# Student Exam Performance Power BI project

## Import packages

In [1]:
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

## Data cleaning

In [2]:
df = pd.read_csv("data/student_performance_interactions.csv")

In [3]:
print(df.dtypes)

student_id                    object
final_score                  float64
grade                         object
pass_fail                      int64
previous_score               float64
math_prev_score              float64
science_prev_score           float64
language_prev_score          float64
daily_study_hours            float64
attendance_percentage        float64
homework_completion_rate     float64
sleep_hours                  float64
screen_time_hours            float64
physical_activity_minutes    float64
motivation_score             float64
exam_anxiety_score           float64
parent_education_level        object
study_environment             object
dtype: object


In [4]:
missing_values = df.isnull().sum()
print("Missing values: ")
print(missing_values)

Missing values: 
student_id                   0
final_score                  0
grade                        0
pass_fail                    0
previous_score               0
math_prev_score              0
science_prev_score           0
language_prev_score          0
daily_study_hours            0
attendance_percentage        0
homework_completion_rate     0
sleep_hours                  0
screen_time_hours            0
physical_activity_minutes    0
motivation_score             0
exam_anxiety_score           0
parent_education_level       0
study_environment            0
dtype: int64


In [5]:
df.duplicated().any()

False

In [6]:
df.isnull().any()

student_id                   False
final_score                  False
grade                        False
pass_fail                    False
previous_score               False
math_prev_score              False
science_prev_score           False
language_prev_score          False
daily_study_hours            False
attendance_percentage        False
homework_completion_rate     False
sleep_hours                  False
screen_time_hours            False
physical_activity_minutes    False
motivation_score             False
exam_anxiety_score           False
parent_education_level       False
study_environment            False
dtype: bool

In [7]:
df[
    df.isnull().any(axis=1) | 
    df.apply(lambda row: row.astype(str).str.lower().eq("null").any(), axis=1)
]

Unnamed: 0,student_id,final_score,grade,pass_fail,previous_score,math_prev_score,science_prev_score,language_prev_score,daily_study_hours,attendance_percentage,homework_completion_rate,sleep_hours,screen_time_hours,physical_activity_minutes,motivation_score,exam_anxiety_score,parent_education_level,study_environment


In [8]:
df.head()

Unnamed: 0,student_id,final_score,grade,pass_fail,previous_score,math_prev_score,science_prev_score,language_prev_score,daily_study_hours,attendance_percentage,homework_completion_rate,sleep_hours,screen_time_hours,physical_activity_minutes,motivation_score,exam_anxiety_score,parent_education_level,study_environment
0,S0001,60.137241,D,1,60.599707,61.488212,53.568119,64.972292,1.427203,75.738405,68.534371,6.809352,3.313096,65.059425,4.150025,6.104103,Master,Noisy
1,S0002,99.021977,A,1,92.289287,85.612565,91.873759,89.040461,4.813612,89.602736,91.990197,5.567793,4.925359,76.016617,8.714693,1.982358,High School,Quiet
2,S0003,70.522955,C,1,80.259667,82.160656,72.736065,74.243663,1.240908,81.495426,69.669666,6.702875,5.107888,113.616872,5.92822,4.463662,High School,Moderate
3,S0004,63.448537,D,1,72.926217,75.979145,76.726496,67.715995,2.190601,71.472047,71.976757,7.854439,3.772446,108.68669,4.224928,4.740474,High School,Noisy
4,S0005,66.483019,C,1,48.581025,51.379977,48.993224,46.145011,2.192265,64.276582,68.940591,7.662429,1.898989,42.107294,9.506815,1.143852,Master,Quiet


In [9]:
df["score_improvement"] = df["final_score"] - df["previous_score"]

In [10]:
df["performance_band"] = pd.cut(
    df["final_score"],
    bins=[0, 60, 75, 90, 100],
    labels=["Low", "Average", "Good", "Excellent"]
)

In [11]:
df.head()

Unnamed: 0,student_id,final_score,grade,pass_fail,previous_score,math_prev_score,science_prev_score,language_prev_score,daily_study_hours,attendance_percentage,homework_completion_rate,sleep_hours,screen_time_hours,physical_activity_minutes,motivation_score,exam_anxiety_score,parent_education_level,study_environment,score_improvement,performance_band
0,S0001,60.137241,D,1,60.599707,61.488212,53.568119,64.972292,1.427203,75.738405,68.534371,6.809352,3.313096,65.059425,4.150025,6.104103,Master,Noisy,-0.462466,Average
1,S0002,99.021977,A,1,92.289287,85.612565,91.873759,89.040461,4.813612,89.602736,91.990197,5.567793,4.925359,76.016617,8.714693,1.982358,High School,Quiet,6.73269,Excellent
2,S0003,70.522955,C,1,80.259667,82.160656,72.736065,74.243663,1.240908,81.495426,69.669666,6.702875,5.107888,113.616872,5.92822,4.463662,High School,Moderate,-9.736712,Average
3,S0004,63.448537,D,1,72.926217,75.979145,76.726496,67.715995,2.190601,71.472047,71.976757,7.854439,3.772446,108.68669,4.224928,4.740474,High School,Noisy,-9.477679,Average
4,S0005,66.483019,C,1,48.581025,51.379977,48.993224,46.145011,2.192265,64.276582,68.940591,7.662429,1.898989,42.107294,9.506815,1.143852,Master,Quiet,17.901994,Average


In [12]:
df.to_csv("data/student_performance_interactions_cleaned.csv")