# Student Performance EDA
Exploratory Data Analysis of StudentPerformanceFactors dataset.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
file_path = "StudentPerformanceFactors(in).csv"
df = pd.read_csv(file_path)

# Display first few rows
df.head()


In [None]:
# Dataset structure
df.info()

# Check for null values
df.isnull().sum()


In [None]:
# Summary statistics for numerical features
df.describe()


In [None]:
# Count unique values in categorical columns
cat_cols = df.select_dtypes(include='object').columns.tolist()
df[cat_cols].nunique()


In [None]:
# Distribution of Exam Score
sns.histplot(df['Exam_Score'], kde=True)
plt.title("Distribution of Exam Score")
plt.xlabel("Exam Score")
plt.ylabel("Frequency")
plt.show()


In [None]:
# Scatter plots: Exam Score vs numerical features
num_cols = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Previous_Scores',
            'Tutoring_Sessions', 'Family_Income', 'Teacher_Quality',
            'Distance_from_Home', 'Physical_Activity']

for col in num_cols:
    sns.scatterplot(x=df[col], y=df['Exam_Score'])
    plt.title(f'Exam Score vs {col}')
    plt.xlabel(col)
    plt.ylabel('Exam Score')
    plt.show()


In [None]:
# Boxplots for Exam Score by categorical variables
for col in cat_cols:
    sns.boxplot(x=col, y='Exam_Score', data=df)
    plt.title(f'Exam Score by {col}')
    plt.xticks(rotation=45)
    plt.show()


In [None]:
# Correlation heatmap for numerical variables
corr = df[num_cols + ['Exam_Score']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
