In [1]:
import numpy as np
import pandas as pd

In [2]:
data_frame = pd.DataFrame(
    np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=["A", "B", "C"]
)

In [3]:
# get a cell by row and column index
print(f"Cell at row 1, column 2: {data_frame.iloc[1, 2]}")
print()

Cell at row 1, column 2: 6



In [4]:
# get a cell by row and column name
print(f'Cell at row 1, column B: {data_frame.loc[1, "B"]}')
print()

Cell at row 1, column B: 5



In [5]:
# change a cell by row and column index
value = 100
data_frame.iloc[1, 2] = value
print(f"Change cell at row 1, column 2 to: {value}")
print(data_frame)
print()

Change cell at row 1, column 2 to: 100
   A  B    C
0  1  2    3
1  4  5  100
2  7  8    9



In [6]:
# apply lambda x : x**2 to the first column
data_frame["A"] = data_frame["A"].apply(lambda x: x ** 2)
print("Apply lambda x : x**2 to the first column")
print(data_frame)
print()

Apply lambda x : x**2 to the first column
    A  B    C
0   1  2    3
1  16  5  100
2  49  8    9



In [7]:
# add column 'sex' with values 'male' and 'female'
data_frame["sex"] = ["male", "female", "female"]
print(data_frame)
print()

    A  B    C     sex
0   1  2    3    male
1  16  5  100  female
2  49  8    9  female



In [8]:
# change male to 0 and female to 1
data_frame["sex"] = [0 if x == "male" else 1 for x in data_frame["sex"]]
print("Encoding male as 0 and female as 1")
print(data_frame)
print()

Encoding male as 0 and female as 1
    A  B    C  sex
0   1  2    3    0
1  16  5  100    1
2  49  8    9    1



In [9]:
# show how many null values in the data frame
null_values = data_frame.isnull().sum()
print("Number of null values in each column")
print(null_values)
print()

Number of null values in each column
A      0
B      0
C      0
sex    0
dtype: int64



In [10]:
# let's put null values at 5 random cells
for x, y in zip(np.random.randint(0, 3, 5), np.random.randint(0, 3, 5)):
    data_frame.iloc[x, y] = np.nan

print("Data frame with null values")
print(data_frame)
print()

Data frame with null values
      A    B      C  sex
0   NaN  2.0    3.0    0
1  16.0  NaN  100.0    1
2  49.0  8.0    NaN    1



In [11]:
# show how many null values in the data frame
null_values = data_frame.isnull().sum()
print("Number of null values in each column")
print(null_values)
print()

Number of null values in each column
A      1
B      1
C      1
sex    0
dtype: int64



In [12]:
def any_null_value(data_frame):
    return data_frame.isnull().values.any()


any_null_value(data_frame)

True

In [13]:
# copy the data frame and drop rows with null values
data_frame_without_nulls = data_frame.copy()
data_frame_without_nulls.dropna(inplace=True)
print("Data frame without null values")
print(data_frame_without_nulls)
print()

Data frame without null values
Empty DataFrame
Columns: [A, B, C, sex]
Index: []



In [14]:
# fill the null values with mean of the column in the original data frame
data_frame.fillna(data_frame.mean(), inplace=True)
print("Data frame with null values filled with mean")
print(data_frame)
print()

Data frame with null values filled with mean
      A    B      C  sex
0  32.5  2.0    3.0    0
1  16.0  5.0  100.0    1
2  49.0  8.0   51.5    1

