# Student Performance Analysis using Regression Models

In [None]:
# Install required packages (uncomment if needed)
# !pip install numpy pandas scikit-learn matplotlib seaborn


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
df = pd.read_csv("StudentsPerformance.csv")
df.head()


In [None]:
null_counts = df.isnull().sum()
print("Null element count in each column:")
print(null_counts)

total_nulls = df.isnull().sum().sum()
print(f"Total null elements in dataset: {total_nulls}")


In [None]:
education_order = {
    "some high school": 0,
    "high school": 1,
    "some college": 2,
    "associate's degree": 3,
    "bachelor's degree": 4,
    "master's degree": 5
}

df["parental level of education"] = df["parental level of education"].map(education_order)
df.head()


In [None]:
df_encoded = pd.get_dummies(df, columns=["gender", "race/ethnicity", "lunch", "test preparation course"])
df_encoded.head()


In [None]:
X = df_encoded.drop("math score", axis=1)
y = df_encoded["math score"]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Linear Regression MSE: {mse:.4f}")
print(f"Linear Regression R²: {r2:.4f}")


In [None]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)

mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print(f"Ridge Regression MSE: {mse_ridge:.4f}")
print(f"Ridge Regression R²: {r2_ridge:.4f}")


In [None]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_test)

mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

print(f"Lasso Regression MSE: {mse_lasso:.4f}")
print(f"Lasso Regression R²: {r2_lasso:.4f}")


In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(y_test, y_pred, alpha=0.6, label='Linear')
plt.scatter(y_test, y_pred_ridge, alpha=0.6, label='Ridge')
plt.scatter(y_test, y_pred_lasso, alpha=0.6, label='Lasso')
plt.plot([y.min(), y.max()], [y.min(), y.max()], '--k')
plt.xlabel("Actual Math Score")
plt.ylabel("Predicted Math Score")
plt.title("Actual vs. Predicted Scores")
plt.legend()
plt.grid(True)
plt.show()
