In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

CSV_FILE = "student_performance.csv"
OUTPUT_DIR = "charts"
os.makedirs(OUTPUT_DIR, exist_ok=True)


if not os.path.exists(CSV_FILE):
    raise FileNotFoundError(
        f"'{CSV_FILE}' not found. Put student_performance.csv in the same folder as this script."
    )

df = pd.read_csv(CSV_FILE)


df["gender"] = df["gender"].astype(str).str.strip().str.title()

required_cols = {"gender", "attendance_percent", "final_score"}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Dataset missing required columns: {missing}")


plt.figure()
plt.hist(df["final_score"].dropna(), bins=12, edgecolor="black")
plt.title("Score Distribution (Final Score)")
plt.xlabel("Final Score")
plt.ylabel("Number of Students")
plt.tight_layout()

p1 = os.path.join(OUTPUT_DIR, "score_distribution.png")
plt.savefig(p1, dpi=200)
plt.close()


plt.figure()
male_scores = df.loc[df["gender"] == "Male", "final_score"].dropna()
female_scores = df.loc[df["gender"] == "Female", "final_score"].dropna()

plt.boxplot([male_scores, female_scores], tick_labels=["Male", "Female"], showmeans=True)
plt.title("Gender vs Performance (Final Score)")
plt.xlabel("Gender")
plt.ylabel("Final Score")
plt.tight_layout()

p2 = os.path.join(OUTPUT_DIR, "gender_vs_performance.png")
plt.savefig(p2, dpi=200)
plt.close()


numeric_df = df.select_dtypes(include=[np.number]).copy()
corr = numeric_df.corr(numeric_only=True)

plt.figure(figsize=(8, 6))
plt.imshow(corr, interpolation="nearest")
plt.title("Correlation Heatmap")
plt.colorbar()
plt.xticks(range(len(corr.columns)), corr.columns, rotation=45, ha="right")
plt.yticks(range(len(corr.columns)), corr.columns)
plt.tight_layout()

p3 = os.path.join(OUTPUT_DIR, "correlation_heatmap.png")
plt.savefig(p3, dpi=200)
plt.close()


x = df["attendance_percent"].to_numpy()
y = df["final_score"].to_numpy()

mask = ~np.isnan(x) & ~np.isnan(y)
x = x[mask]
y = y[mask]

plt.figure()
plt.scatter(x, y)
plt.title("Attendance vs Final Score (with Trend Line)")
plt.xlabel("Attendance (%)")
plt.ylabel("Final Score")

m, b = np.polyfit(x, y, 1)
x_line = np.linspace(x.min(), x.max(), 100)
y_line = m * x_line + b
plt.plot(x_line, y_line)

plt.tight_layout()

p4 = os.path.join(OUTPUT_DIR, "attendance_vs_score_trend.png")
plt.savefig(p4, dpi=200)
plt.close()


print(" Generated all charts in:", os.path.abspath(OUTPUT_DIR))
for p in [p1, p2, p3, p4]:
    print("   -", os.path.abspath(p))
