MODEL TESTING

In [1]:
# Install CatBoost if not installed
!pip install catboost --quiet

# Imports
import pandas as pd
import numpy as np
import joblib
from google.colab import files


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Upload files: cleaned CSV + regression + classification models + features list
uploaded_files = files.upload()

# Load cleaned CSV
df_test = pd.read_csv("StudentPerformanceFactors_Cleaned.csv")
print("Test data preview:")
display(df_test.head())

# Load saved models
reg_model = joblib.load("best_reg_model.pkl")
clf_model = joblib.load("best_clf_model.pkl")

# Load feature list used in training
all_features = joblib.load("all_features.pkl")
print("Feature columns loaded. Total features:", len(all_features))


Saving best_clf_model.pkl to best_clf_model.pkl
Saving all_features.pkl to all_features.pkl
Saving best_reg_model.pkl to best_reg_model.pkl
Saving StudentPerformanceFactors_Cleaned.csv to StudentPerformanceFactors_Cleaned.csv
Test data preview:


Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Sleep_Hours,Previous_Scores,Motivation_Level,Internet_Access,Tutoring_Sessions,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Physical_Activity,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender,Exam_Score
0,23,84,Low,High,No,7,73,Low,Yes,0,Low,Medium,Public,Positive,3,No,High School,Near,Male,67
1,19,64,Low,Medium,No,8,59,Low,Yes,2,Medium,Medium,Public,Negative,4,No,College,Moderate,Female,61
2,24,98,Medium,Medium,Yes,7,91,Medium,Yes,2,Medium,Medium,Public,Neutral,4,No,Postgraduate,Near,Male,74
3,29,89,Low,Medium,Yes,8,98,Medium,Yes,1,Medium,Medium,Public,Negative,4,No,High School,Moderate,Male,71
4,19,92,Medium,Medium,Yes,6,65,Medium,Yes,3,Medium,High,Public,Neutral,4,No,College,Near,Female,70


Feature columns loaded. Total features: 27


In [3]:
# Identify categorical columns in test data
categorical_cols = df_test.select_dtypes(include=['object']).columns.tolist()

# One-hot encode test features
X_test = df_test.drop(columns=['Exam_Score', 'GradeCategory'], errors='ignore')
X_test_encoded = pd.get_dummies(X_test, drop_first=True)

# Align test columns to training columns
for col in all_features:
    if col not in X_test_encoded.columns:
        X_test_encoded[col] = 0  # Add missing column with default 0

# Ensure order matches training
X_test_encoded = X_test_encoded[all_features]

print("Test data aligned with training features. Shape:", X_test_encoded.shape)


Test data aligned with training features. Shape: (6607, 27)


In [4]:
from sklearn.preprocessing import StandardScaler

# Regression scaling
scaler_reg = StandardScaler()
X_test_scaled = scaler_reg.fit_transform(X_test_encoded)

# Classification scaling (optional, CatBoost does not require scaling)
scaler_clf = StandardScaler()
X_test_scaled_clf = scaler_clf.fit_transform(X_test_encoded)


In [5]:
try:
    exam_score_pred = reg_model.predict(X_test_scaled)
    df_test['Predicted_Exam_Score'] = exam_score_pred
    print("Regression predictions added!")
    display(df_test[['Exam_Score', 'Predicted_Exam_Score']].head())
except Exception as e:
    print("Regression prediction failed:", e)


Regression predictions added!


Unnamed: 0,Exam_Score,Predicted_Exam_Score
0,67,67.090006
1,61,60.791792
2,74,74.182891
3,71,70.677344
4,70,70.840322


In [6]:
try:
    class_pred = clf_model.predict(X_test_scaled_clf)
    df_test['Predicted_GradeCategory'] = class_pred
    print("Classification predictions added!")
    display(df_test[['GradeCategory', 'Predicted_GradeCategory']].head())
except Exception as e:
    print("Classification prediction failed:", e)


Classification predictions added!
Classification prediction failed: "['GradeCategory'] not in index"


In [7]:
# Save predictions for review
df_test.to_csv("StudentPerformanceFactors_Predictions.csv", index=False)
files.download("StudentPerformanceFactors_Predictions.csv")
print("Predictions saved successfully!")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Predictions saved successfully!
