# Dataset

In [22]:
import pandas as pd

df = pd.DataFrame({'Name': [None, 'Bob', 'Charlie', 'Dave', 'Eve'],
                   'Age': [21, None, 23, 24, 25],
                   'Gender': ['Female', 'Male', None, 'Male', 'Female'],
                   'Grade': [80, 85, 90, 95, None],
                   'Final Exam Score': [95, 88, None, 91, 80]})

df

Unnamed: 0,Name,Age,Gender,Grade,Final Exam Score
0,,21.0,Female,80.0,95.0
1,Bob,,Male,85.0,88.0
2,Charlie,23.0,,90.0,
3,Dave,24.0,Male,95.0,91.0
4,Eve,25.0,Female,,80.0


## `fillna():`

- Replace all NaN elements with 0s.

In [21]:
df.fillna(0)

Unnamed: 0,Name,Age,Gender,Grade,Final Exam Score
0,Alice,21.0,Female,80.0,95.0
1,Bob,0.0,Male,85.0,88.0
2,Charlie,23.0,0,90.0,0.0
3,Dave,24.0,Male,95.0,91.0
4,Eve,25.0,Female,0.0,80.0


- Propagate non-null values forward
- Used to fill missing values with the value from the previous row

In [24]:
# Notice that 1st row of 'Name' column still has None value
df.fillna(method = 'ffill')

Unnamed: 0,Name,Age,Gender,Grade,Final Exam Score
0,,21.0,Female,80.0,95.0
1,Bob,21.0,Male,85.0,88.0
2,Charlie,23.0,Male,90.0,88.0
3,Dave,24.0,Male,95.0,91.0
4,Eve,25.0,Female,95.0,80.0


- Propagate non-null values backward
- Used to fill missing values with the value from the next row

In [26]:
# Notice that last row of 'Grade' column still has None value
df.fillna(method = 'bfill')

Unnamed: 0,Name,Age,Gender,Grade,Final Exam Score
0,Bob,21.0,Female,80.0,95.0
1,Bob,23.0,Male,85.0,88.0
2,Charlie,23.0,Male,90.0,91.0
3,Dave,24.0,Male,95.0,91.0
4,Eve,25.0,Female,,80.0


- Dictionary to specify different values to use to fill missing values in different columns

In [32]:
df.fillna({'Name': 'XYZ', 'Age': 18, 'Gender': 'Unknown','Grade' : 50, 'Final Exam Score': 45})

Unnamed: 0,Name,Age,Gender,Grade,Final Exam Score
0,XYZ,21.0,Female,80.0,95.0
1,Bob,18.0,Male,85.0,88.0
2,Charlie,23.0,Unknown,90.0,45.0
3,Dave,24.0,Male,95.0,91.0
4,Eve,25.0,Female,50.0,80.0
