# Student Academic Performance Analysis with Pandas

* Import libraries and dataset
* Add column for Total Scores, Percentage Score, Status
* Correct and Rename column names.
* Remove column "School" for anonymity
* Save dataset to CSV

In [2]:
# Impoprting Libraries
import pandas as pd 

# loading dataset 
df = pd.read_csv("https://raw.githubusercontent.com/ritaafrica/data/refs/heads/main/student_scores.csv")

# Displaying the first 5 rows to ensure data was loaded correctly 
df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Math_Score,Reading_Score,Writing_Score,School,Class
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,Nelson Mandela School,JSS1
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,Ubuntu Academy,JSS2
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,Ubuntu Academy,SS3
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,Ubuntu Academy,SS2
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,Ubuntu Academy,SS3


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   StudentID      100 non-null    int64 
 1   Name           100 non-null    object
 2   Country        100 non-null    object
 3   Gender         100 non-null    object
 4   Age            100 non-null    int64 
 5   Math_Score     100 non-null    int64 
 6   Reading_Score  100 non-null    int64 
 7   Writing_Score  100 non-null    int64 
 8   School         100 non-null    object
 9   Class          100 non-null    object
 10  Total_Scores   100 non-null    int64 
dtypes: int64(6), object(5)
memory usage: 8.7+ KB


In [5]:
df.describe()

Unnamed: 0,StudentID,Age,Math_Score,Reading_Score,Writing_Score,Total_Scores
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,50.5,16.19,55.78,49.96,50.41,156.15
std,29.011492,1.433333,28.786073,26.774373,27.849558,42.829088
min,1.0,14.0,3.0,0.0,1.0,24.0
25%,25.75,15.0,30.0,25.0,32.0,126.5
50%,50.5,16.0,57.0,51.5,47.0,156.5
75%,75.25,18.0,82.5,67.25,73.25,182.0
max,100.0,18.0,100.0,100.0,98.0,257.0


In [3]:
# Adding a new column for "Total Scores"

df["Total_Scores"] = df["Math_Score"] + df["Reading_Score"] + df["Writing_Score"]

# showing the updated dataframe including the Total_Scores column
df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Math_Score,Reading_Score,Writing_Score,School,Class,Total_Scores
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,Nelson Mandela School,JSS1,188
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,Ubuntu Academy,JSS2,187
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,Ubuntu Academy,SS3,179
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,Ubuntu Academy,SS2,136
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,Ubuntu Academy,SS3,76


In [7]:
# Adding the column for Percentage Score, readingscore_mathscore_writingscore -> 300
# formula ((Total_Score / 300) * 100) to 2 decimal places 

df["Percentage"] = ((df["Total_Scores"] / 300) * 100).round(2)

df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Math_Score,Reading_Score,Writing_Score,School,Class,Total_Scores,Percentage
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,Nelson Mandela School,JSS1,188,62.67
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,Ubuntu Academy,JSS2,187,62.33
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,Ubuntu Academy,SS3,179,59.67
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,Ubuntu Academy,SS2,136,45.33
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,Ubuntu Academy,SS3,76,25.33


In [8]:
# Adding a column for status update based on percentage score

df["Status"] = ["Passed" if percentage >= 50
                else "Failed"
                for percentage in df["Percentage"]
                ]

df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Math_Score,Reading_Score,Writing_Score,School,Class,Total_Scores,Percentage,Status
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,Nelson Mandela School,JSS1,188,62.67,Passed
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,Ubuntu Academy,JSS2,187,62.33,Passed
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,Ubuntu Academy,SS3,179,59.67,Passed
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,Ubuntu Academy,SS2,136,45.33,Failed
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,Ubuntu Academy,SS3,76,25.33,Failed


In [9]:
# renaming 'Math_Score' to 'Mathematics_Score'

df.rename(columns={"Math_Score": "Mathematics_Score"}, inplace=True)

df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Mathematics_Score,Reading_Score,Writing_Score,School,Class,Total_Scores,Percentage,Status
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,Nelson Mandela School,JSS1,188,62.67,Passed
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,Ubuntu Academy,JSS2,187,62.33,Passed
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,Ubuntu Academy,SS3,179,59.67,Passed
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,Ubuntu Academy,SS2,136,45.33,Failed
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,Ubuntu Academy,SS3,76,25.33,Failed


In [10]:
# dropping or removing the school column 

df.drop(columns=["School"], inplace=True)

df.head()

Unnamed: 0,StudentID,Name,Country,Gender,Age,Mathematics_Score,Reading_Score,Writing_Score,Class,Total_Scores,Percentage,Status
0,1,Ifeanyi Mugisha,Zimbabwe,Female,18,99,77,12,JSS1,188,62.67,Passed
1,2,Yemi Okeke,Tanzania,Male,16,60,29,98,JSS2,187,62.33,Passed
2,3,Fatou Mugisha,Zimbabwe,Male,15,49,46,84,SS3,179,59.67,Passed
3,4,Chinedu Okafor,Ethiopia,Female,17,34,57,45,SS2,136,45.33,Failed
4,5,Yemi Moyo,Senegal,Male,16,22,16,38,SS3,76,25.33,Failed


In [11]:
# save dataset to CSV
df.to_csv("updated_student_scores.csv", index=False)