In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# 1. Load your data
# Ensure your file is named 'data.csv' and is in the same folder as this script
df = pd.read_csv('student_combined_data.csv')

# --- Q1: Enrollment Patterns ---
print("--- Average Performance per Level ---")
# This calculates the mean performance for Advanced, Intermediate, and Foundation
print(df.groupby('course_level')['performance_score'].mean())

# --- Q2: Aptitude Variance ---
# Checking if Aptitude Scores differ significantly by level
groups = [group['aptitude_score'].values for name, group in df.groupby('course_level')]
f_stat, p_val = stats.f_oneway(*groups)
print(f"\nANOVA p-value: {p_val:.4f}")

# --- Q3: Correlation ---
# Relationship between Aptitude and Performance
corr, _ = stats.pearsonr(df['aptitude_score'], df['performance_score'])
print(f"Correlation between Aptitude and Performance: {corr:.2f}")

# --- Visualization ---
plt.figure(figsize=(10, 5))

# Plot 1: Relationship
plt.subplot(1, 2, 1)
sns.scatterplot(data=df, x='aptitude_score', y='performance_score', hue='course_level')
plt.title('Aptitude vs Performance')

# Plot 2: Distribution
plt.subplot(1, 2, 2)
sns.boxplot(data=df, x='course_level', y='performance_score')
plt.title('Performance by Course Level')

plt.tight_layout()
plt.show()