# Student Performance Analysis

Clean, structured EDA notebook.

## 1. Import Libraries

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (8, 5)


## 2. Load Dataset

In [None]:

df = pd.read_csv("stud.csv")
df.head()


## 3. Data Overview

In [None]:

df.info()
df.describe()


## 4. Data Quality Check

In [None]:

df.isnull().sum()
df.duplicated().sum()


## 5. Feature Engineering

In [None]:

df['average_score'] = (df['math_score'] + df['reading_score'] + df['writing_score']) / 3
df.head()


## 6. Gender vs Performance

In [None]:

sns.boxplot(x='gender', y='average_score', data=df)
plt.title("Average Score by Gender")
plt.show()


## 7. Parental Education Impact

In [None]:

sns.barplot(x='parental_level_of_education', y='average_score', data=df, ci=None)
plt.xticks(rotation=45)
plt.title("Parental Education vs Performance")
plt.show()


## 8. Test Preparation Effect

In [None]:

sns.boxplot(x='test_preparation_course', y='average_score', data=df)
plt.title("Test Preparation Course Impact")
plt.show()


## 9. Lunch Type Analysis

In [None]:

sns.barplot(x='lunch', y='average_score', data=df, ci=None)
plt.title("Lunch Type vs Performance")
plt.show()


## 10. Correlation Analysis

In [None]:

score_df = df[['math_score', 'reading_score', 'writing_score', 'average_score']]

sns.heatmap(score_df.corr(), annot=True, cmap='coolwarm')
plt.title("Score Correlation Matrix")
plt.show()


## 11. Key Insights
- Test prep improves scores
- Parental education matters
- Reading & writing strongly correlated