# **Data Collecting (Data Collection)**

In [None]:
import pandas as pd
import numpy as np

# Raw data (intentionally includes missing & wrong values)
data = {
    "Student": ["Rohit", "Amit", "Sneha", "Pooja", "Rahul"],
    "Maths": [78, 85, np.nan, 66, 72],
    "Science": [82, 88, 94, None, 76],
    "English": [75, 80, 85, 68, None]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Student,Maths,Science,English
0,Rohit,78.0,82.0,75.0
1,Amit,85.0,88.0,80.0
2,Sneha,,94.0,85.0
3,Pooja,66.0,,68.0
4,Rahul,72.0,76.0,


# **Data Cleaning**

Check Missing Values

In [None]:
df.isnull()


Unnamed: 0,Student,Maths,Science,English
0,False,False,False,False
1,False,False,False,False
2,False,True,False,False
3,False,False,True,False
4,False,False,False,True


In [None]:
df.isnull().sum()


Unnamed: 0,0
Student,0
Maths,1
Science,1
English,1


Fill Missing Values (Cleaning)

In [None]:
df["Maths"].fillna(df["Maths"].mean(), inplace=True)
df["Science"].fillna(df["Science"].mean(), inplace=True)
df["English"].fillna(df["English"].mean(), inplace=True)

df


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Maths"].fillna(df["Maths"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Science"].fillna(df["Science"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we ar

Unnamed: 0,Student,Maths,Science,English
0,Rohit,78.0,82.0,75.0
1,Amit,85.0,88.0,80.0
2,Sneha,75.25,94.0,85.0
3,Pooja,66.0,85.0,68.0
4,Rahul,72.0,76.0,77.0


# **Data Preprocessing**

Convert Data Types (if required)


In [None]:
df.dtypes

Unnamed: 0,0
Student,object
Maths,float64
Science,float64
English,float64


Feature Engineering (New Columns)

In [None]:
df["Total"] = df["Maths"] + df["Science"] + df["English"]
df["Average"] = df["Total"] / 3

df


Unnamed: 0,Student,Maths,Science,English,Total,Average
0,Rohit,78.0,82.0,75.0,235.0,78.333333
1,Amit,85.0,88.0,80.0,253.0,84.333333
2,Sneha,75.25,94.0,85.0,254.25,84.75
3,Pooja,66.0,85.0,68.0,219.0,73.0
4,Rahul,72.0,76.0,77.0,225.0,75.0


Convert Pandas Data â†’ NumPy Array

In [None]:
marks_array = df[["Maths", "Science", "English"]].to_numpy()
marks_array


array([[78.  , 82.  , 75.  ],
       [85.  , 88.  , 80.  ],
       [75.25, 94.  , 85.  ],
       [66.  , 85.  , 68.  ],
       [72.  , 76.  , 77.  ]])

# **Data Analysis**

NumPy Calculations


In [None]:
class_average = np.mean(df["Average"])
highest_avg = np.max(df["Average"])
lowest_avg = np.min(df["Average"])

class_average, highest_avg, lowest_avg

(np.float64(79.08333333333333), 84.75, 73.0)

Identify Top Performer

In [None]:
top_student = df.loc[df["Average"].idxmax()]
top_student


Unnamed: 0,2
Student,Sneha
Maths,75.25
Science,94.0
English,85.0
Total,254.25
Average,84.75


Pass / Fail Analysis

In [None]:
df["Result"] = np.where(df["Average"] >= 75, "Pass", "Fail")
df


Unnamed: 0,Student,Maths,Science,English,Total,Average,Result
0,Rohit,78.0,82.0,75.0,235.0,78.333333,Pass
1,Amit,85.0,88.0,80.0,253.0,84.333333,Pass
2,Sneha,75.25,94.0,85.0,254.25,84.75,Pass
3,Pooja,66.0,85.0,68.0,219.0,73.0,Fail
4,Rahul,72.0,76.0,77.0,225.0,75.0,Pass


Final Insights (For Explanation)


In [None]:
print("Class Average:", round(class_average, 2))
print("Top Student:", top_student["Student"])
print("Lowest Average:", round(lowest_avg, 2))

Class Average: 79.08
Top Student: Sneha
Lowest Average: 73.0
