In [1]:
import pandas as pd
import numpy as np

In [2]:
Marks = { 
    'Name': ['Raman','Raman','Raman','Raman','Zuhaire','Zuhaire','Zuhaire','Zuhaire',
             'Ashravy','Ashravy','Ashravy','Ashravy','Mishti','Mishti','Mishti','Mishti'], 
    'UT': [1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4], 
    'Maths': [22,21,14,np.NaN,20,23,22,19,23,24,12,15,15,18,17,14], 
    'Science': [21,20,19,np.NaN,17,15,18,20,19,22,25,20,22,21,18,20], 
    'S.St': [18,17,15,19,22,21,19,17,20,24,19,20,25,25,20,19], 
    'Hindi': [20,22,24,18,24,25,23,21,15,17,21,20,22,24,25,20], 
    'Eng': [21,24,23,np.NaN,19,15,13,16,22,21,23,17,22,23,20,18]
}

In [3]:
# Convert dictionary to DataFrame
df = pd.DataFrame(Marks)

In [4]:
missing_values_attributes = df.columns[df.isnull().any()].tolist()
print(f"Attributes with missing values: {missing_values_attributes}")

Attributes with missing values: ['Maths', 'Science', 'Eng']


In [5]:
total_missing_values = df.isnull().sum().sum()
print(f"Total missing values: {total_missing_values}")

Total missing values: 3


In [6]:
df_ffill = df.fillna(method='ffill')
print("\nData after forward fill:\n", df_ffill)


Data after forward fill:
        Name  UT  Maths  Science  S.St  Hindi   Eng
0     Raman   1   22.0     21.0    18     20  21.0
1     Raman   2   21.0     20.0    17     22  24.0
2     Raman   3   14.0     19.0    15     24  23.0
3     Raman   4   14.0     19.0    19     18  23.0
4   Zuhaire   1   20.0     17.0    22     24  19.0
5   Zuhaire   2   23.0     15.0    21     25  15.0
6   Zuhaire   3   22.0     18.0    19     23  13.0
7   Zuhaire   4   19.0     20.0    17     21  16.0
8   Ashravy   1   23.0     19.0    20     15  22.0
9   Ashravy   2   24.0     22.0    24     17  21.0
10  Ashravy   3   12.0     25.0    19     21  23.0
11  Ashravy   4   15.0     20.0    20     20  17.0
12   Mishti   1   15.0     22.0    25     22  22.0
13   Mishti   2   18.0     21.0    25     24  23.0
14   Mishti   3   17.0     18.0    20     25  20.0
15   Mishti   4   14.0     20.0    19     20  18.0


In [7]:
df_dropna = df.dropna()
print("\nData after dropping rows with missing values:\n", df_dropna)


Data after dropping rows with missing values:
        Name  UT  Maths  Science  S.St  Hindi   Eng
0     Raman   1   22.0     21.0    18     20  21.0
1     Raman   2   21.0     20.0    17     22  24.0
2     Raman   3   14.0     19.0    15     24  23.0
4   Zuhaire   1   20.0     17.0    22     24  19.0
5   Zuhaire   2   23.0     15.0    21     25  15.0
6   Zuhaire   3   22.0     18.0    19     23  13.0
7   Zuhaire   4   19.0     20.0    17     21  16.0
8   Ashravy   1   23.0     19.0    20     15  22.0
9   Ashravy   2   24.0     22.0    24     17  21.0
10  Ashravy   3   12.0     25.0    19     21  23.0
11  Ashravy   4   15.0     20.0    20     20  17.0
12   Mishti   1   15.0     22.0    25     22  22.0
13   Mishti   2   18.0     21.0    25     24  23.0
14   Mishti   3   17.0     18.0    20     25  20.0
15   Mishti   4   14.0     20.0    19     20  18.0


In [8]:
df_mode = df.copy()
for column in df_mode.columns[2:]:  # Skip 'Name' and 'UT' columns
    df_mode[column].fillna(df_mode[column].mode()[0], inplace=True)
print("\nData after filling NaN with mode:\n", df_mode)


Data after filling NaN with mode:
        Name  UT  Maths  Science  S.St  Hindi   Eng
0     Raman   1   22.0     21.0    18     20  21.0
1     Raman   2   21.0     20.0    17     22  24.0
2     Raman   3   14.0     19.0    15     24  23.0
3     Raman   4   14.0     20.0    19     18  23.0
4   Zuhaire   1   20.0     17.0    22     24  19.0
5   Zuhaire   2   23.0     15.0    21     25  15.0
6   Zuhaire   3   22.0     18.0    19     23  13.0
7   Zuhaire   4   19.0     20.0    17     21  16.0
8   Ashravy   1   23.0     19.0    20     15  22.0
9   Ashravy   2   24.0     22.0    24     17  21.0
10  Ashravy   3   12.0     25.0    19     21  23.0
11  Ashravy   4   15.0     20.0    20     20  17.0
12   Mishti   1   15.0     22.0    25     22  22.0
13   Mishti   2   18.0     21.0    25     24  23.0
14   Mishti   3   17.0     18.0    20     25  20.0
15   Mishti   4   14.0     20.0    19     20  18.0


In [10]:
# Before handling missing data
raman_before = df[df['Name'] == 'Raman']['Hindi'].mean()
print(f"Raman's average Hindi marks before handling missing data: {raman_before:.2f}")

Raman's average Hindi marks before handling missing data: 21.00


In [11]:
# After handling missing data (using the mode-filled DataFrame)
raman_after = df_mode[df_mode['Name'] == 'Raman']['Hindi'].mean()
print(f"Raman's average Hindi marks after handling missing data: {raman_after:.2f}")

Raman's average Hindi marks after handling missing data: 21.00


In [12]:
df_interpolate = df.interpolate()
print("\nData after linear interpolation:\n", df_interpolate)


Data after linear interpolation:
        Name  UT  Maths  Science  S.St  Hindi   Eng
0     Raman   1   22.0     21.0    18     20  21.0
1     Raman   2   21.0     20.0    17     22  24.0
2     Raman   3   14.0     19.0    15     24  23.0
3     Raman   4   17.0     18.0    19     18  21.0
4   Zuhaire   1   20.0     17.0    22     24  19.0
5   Zuhaire   2   23.0     15.0    21     25  15.0
6   Zuhaire   3   22.0     18.0    19     23  13.0
7   Zuhaire   4   19.0     20.0    17     21  16.0
8   Ashravy   1   23.0     19.0    20     15  22.0
9   Ashravy   2   24.0     22.0    24     17  21.0
10  Ashravy   3   12.0     25.0    19     21  23.0
11  Ashravy   4   15.0     20.0    20     20  17.0
12   Mishti   1   15.0     22.0    25     22  22.0
13   Mishti   2   18.0     21.0    25     24  23.0
14   Mishti   3   17.0     18.0    20     25  20.0
15   Mishti   4   14.0     20.0    19     20  18.0
