# Academic Performance Prediction - Data Exploration

This notebook provides interactive exploration of the academic performance dataset.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.data_loader import DataLoader

# Load data
data_loader = DataLoader()
df = data_loader.load_data()
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Correlation analysis
numerical_df = df.select_dtypes(include=[np.number])
plt.figure(figsize=(10, 8))
sns.heatmap(numerical_df.corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

In [None]:
# Performance by categorical features
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Family income vs performance
pd.crosstab(df['family_income'], df['performance']).plot(kind='bar', ax=axes[0])
axes[0].set_title('Performance by Family Income')
axes[0].set_xlabel('Family Income')
axes[0].legend(title='Performance')

# Parent education vs performance
pd.crosstab(df['parent_education'], df['performance']).plot(kind='bar', ax=axes[1])
axes[1].set_title('Performance by Parent Education')
axes[1].set_xlabel('Parent Education')
axes[1].legend(title='Performance')

plt.tight_layout()
plt.show()