In [76]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

## 🔧 1. Preprocessing

In [95]:
# Load dataset

df = pd.read_csv('Student_Performance.csv')

In [96]:
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [97]:
# Encode binary categorical feature

df["Extracurricular Activities"] = df["Extracurricular Activities"].map({
    'Yes' : 1,
    'No' : 0
})

In [98]:
# Split features and target

X = df.drop(columns=['Performance Index']).to_numpy()
y = df['Performance Index'].to_numpy()

In [108]:
# Train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8)

In [109]:
# Handle missing values

imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [110]:
# Feature scaling

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [107]:
# Check for missing values

print("Any NaNs in X_train:", np.isnan(X_train).any())
print("Any NaNs in X_test:", np.isnan(X_test).any())

Any NaNs in X_train: False
Any NaNs in X_test: False


## 🤖 2. Modeling

In [113]:
model = LinearRegression()

model.fit(X_train_scaled, y_train)

## 🔁 3. Cross-Validation

In [114]:
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='r2')
print("Cross-validated R² scores:", cv_scores)
print("Mean CV R² Score:", cv_scores.mean())

Cross-validated R² scores: [0.98877938 0.98813359 0.9893458  0.98914073 0.98846   ]
Mean CV R² Score: 0.9887719017426722


## 📊 4. Evaluation & Prediction

In [116]:
y_pred = model.predict(X_test_scaled)

print("Final Test R² Score:", r2_score(y_test, y_pred))
print("Final Test MSE:", mean_squared_error(y_test, y_pred))
print("Final Test MAE:", mean_absolute_error(y_test, y_pred))

Final Test R² Score: 0.9884710293673058
Final Test MSE: 4.125785535364117
Final Test MAE: 1.6175266314232444
