Filling missing values in dataframe

In [4]:
import pandas as pd
import numpy as np

data = {
    'Name': ['Aryan', 'Riya', 'Megha', None],
    'Score': [90, None, 75, None],
    'Subject': ['Math', None, 'Science', 'English']
}

df = pd.DataFrame(data)
print(df)


    Name  Score  Subject
0  Aryan   90.0     Math
1   Riya    NaN     None
2  Megha   75.0  Science
3   None    NaN  English


Filling the null values with a particular value

In [5]:
print("Filling the null values with 0")
print(df.fillna(0))

Filling the null values with 0
    Name  Score  Subject
0  Aryan   90.0     Math
1   Riya    0.0        0
2  Megha   75.0  Science
3      0    0.0  English


Filling the null values with either : mean, mode or median

1. Mean

In [6]:
print(df["Score"].fillna(df["Score"].mean()))

0    90.0
1    82.5
2    75.0
3    82.5
Name: Score, dtype: float64


2. Mode

In [7]:
print(df["Score"].fillna(df["Score"].mode()))

0    90.0
1    90.0
2    75.0
3     NaN
Name: Score, dtype: float64


3. Median

In [8]:
print(df["Score"].fillna(df["Score"].median()))

0    90.0
1    82.5
2    75.0
3    82.5
Name: Score, dtype: float64


Filling the values with forward value

In [9]:
print(df.fillna(method="ffill"))

    Name  Score  Subject
0  Aryan   90.0     Math
1   Riya   90.0     Math
2  Megha   75.0  Science
3  Megha   75.0  English


  print(df.fillna(method="ffill"))


Filling the values with backward value

In [10]:
print(df.fillna(method="bfill"))

    Name  Score  Subject
0  Aryan   90.0     Math
1   Riya   75.0  Science
2  Megha   75.0  Science
3   None    NaN  English


  print(df.fillna(method="bfill"))


Filling only one null value in each column

In [11]:
print(df.fillna(0,limit=1))

    Name  Score  Subject
0  Aryan   90.0     Math
1   Riya    0.0        0
2  Megha   75.0  Science
3      0    NaN  English


### Practice Task

Q1. Fill all missing values in the 'Age' column with the mean age.

Q2. Fill the 'City' column using forward fill (ffill).

Q3. Fill the 'Score' column with the median score.

Q4. Replace all missing values in the entire DataFrame with the word 'Unknown' (for both text & numbers).

Q5. Create a copy of the DataFrame and only fill the first NaN in each column with 0.
(Use the limit parameter in fillna())

Q6. Create a version of the DataFrame where:

'Name' missing values = 'Guest'

'Age' missing values = mean age

'City' missing values = 'Not Provided'

'Score' missing values = mode score

Do this in one go (no line-by-line filling)

In [25]:
import pandas as pd
import numpy as np

data = {
    'Name': ['Aman', 'Divya', None, 'Ravi', 'Simran'],
    'Age': [24, np.nan, 29, np.nan, 26],
    'City': ['Delhi', 'Mumbai', 'Delhi', None, None],
    'Score': [88, np.nan, 92, 75, np.nan]
}

df = pd.DataFrame(data)
print(df)

#q1 fill null values in age column with age mean
print(df["Age"].fillna(df['Age'].mean()))

#q2 fill city column using ffill
print(df["City"].fillna(method="ffill"))

#q3 fill score column with median score
print(df["Score"].fillna(df['Score'].median()))

#q4 replace all missing values with unknown
print(df.fillna("Unknown"))

#q5
copy_df = df.copy()
copy_df.fillna(0,limit=1,inplace=True)
print(copy_df)

#q6
print(df.fillna({"Name":"Guest","Age":df['Age'].mean(),"City":"Not Provided","Score": df['Score'].mode()[0]}))


     Name   Age    City  Score
0    Aman  24.0   Delhi   88.0
1   Divya   NaN  Mumbai    NaN
2    None  29.0   Delhi   92.0
3    Ravi   NaN    None   75.0
4  Simran  26.0    None    NaN
0    24.000000
1    26.333333
2    29.000000
3    26.333333
4    26.000000
Name: Age, dtype: float64
0     Delhi
1    Mumbai
2     Delhi
3     Delhi
4     Delhi
Name: City, dtype: object
0    88.0
1    88.0
2    92.0
3    75.0
4    88.0
Name: Score, dtype: float64
      Name      Age     City    Score
0     Aman     24.0    Delhi     88.0
1    Divya  Unknown   Mumbai  Unknown
2  Unknown     29.0    Delhi     92.0
3     Ravi  Unknown  Unknown     75.0
4   Simran     26.0  Unknown  Unknown
     Name   Age    City  Score
0    Aman  24.0   Delhi   88.0
1   Divya   0.0  Mumbai    0.0
2       0  29.0   Delhi   92.0
3    Ravi   NaN       0   75.0
4  Simran  26.0    None    NaN
     Name        Age          City  Score
0    Aman  24.000000         Delhi   88.0
1   Divya  26.333333        Mumbai   75.0
2   Guest

  print(df["City"].fillna(method="ffill"))
