# Batch Inference & Recommendation Generation
This notebook loads the trained classification model and the pre-cleaned test dataset.
It predicts the risk/performance category for every student, generates personalized study recommendations using the rule-engine, and appends the results back to the interactive dataframe.

In [1]:
import os
import pickle
import pandas as pd

# Import our recommender directly since we are in src/ml/
from recommender import generate_recommendations

# Use relative paths
TEST_PATH = "../../datasets/test_cleaned.csv"
MODEL_DIR = "models"
OUTPUT_PATH = "../../datasets/test_with_recommendations.csv"


### 1. Load Data and Random Forest Model

In [2]:
print(f"Loading test data from {TEST_PATH}")
df = pd.read_csv(TEST_PATH)

print("Loading trained Random Forest model...")
with open(os.path.join(MODEL_DIR, "random_forest.pkl"), "rb") as f:
    model = pickle.load(f)
    
display(df.head())

Loading test data from ../../datasets/test_cleaned.csv
Loading trained Random Forest model...


Unnamed: 0,age,parent_education,study_hours,attendance_percentage,internet_access,travel_time,extra_activities,math_score,science_score,english_score,...,gender_other,school_type_private,school_type_public,study_method_coaching,study_method_group study,study_method_mixed,study_method_notes,study_method_online videos,study_method_textbook,final_grade
0,0.304376,-0.884464,0.572402,-0.199818,1,-1.365,1,-0.394337,0.572362,-0.312037,...,1,0,1,0,1,0,0,0,0,3
1,0.890373,0.890776,-1.084567,-0.965184,0,1.341841,1,-0.026578,-1.312553,-0.771564,...,1,0,1,0,0,1,0,0,0,1
2,-0.867617,0.890776,1.677048,0.217655,1,-0.462719,0,0.95252,1.417487,1.736689,...,0,0,1,1,0,0,0,0,0,5
3,-0.28162,0.299029,-0.900459,1.059559,1,0.439561,0,-0.938811,-1.350536,-0.49872,...,0,0,1,0,0,1,0,0,0,1
4,-0.28162,-0.884464,-0.164028,0.454223,1,0.439561,1,1.00028,0.66732,-0.259383,...,1,1,0,0,1,0,0,0,0,3


### 2. Run Inference (Predict Final Grade)

In [3]:
target_col = 'final_grade'
if target_col in df.columns:
    X = df.drop(columns=[target_col])
else:
    X = df
    
predictions = model.predict(X)

CATEGORY_LABELS = {
    5: "Exceptional",
    4: "High-Performing",
    3: "Above-Average",
    2: "Average",
    1: "Below-Average",
    0: "At-Risk",
}

predicted_grades = [f"Grade {pred}" for pred in predictions]
predicted_labels = [CATEGORY_LABELS[pred] for pred in predictions]

df['predicted_grade'] = predicted_grades
df['predicted_category'] = predicted_labels

display(df[['predicted_grade', 'predicted_category']].head())

Unnamed: 0,predicted_grade,predicted_category
0,Grade 3,Above-Average
1,Grade 1,Below-Average
2,Grade 5,Exceptional
3,Grade 1,Below-Average
4,Grade 3,Above-Average


### 3. Generate Study Recommendations per Student

In [4]:
print("Generating individualized recommendations...")
all_recommendations = []

for idx, row in df.iterrows():
    student_data = row.to_dict()
    pred_grade = predicted_grades[idx]
    
    recs = generate_recommendations(student_data, pred_grade)
    # Join as a readable bulleted string for the CSV
    recs_str = " | ".join(recs)
    all_recommendations.append(recs_str)
    
df['study_recommendations'] = all_recommendations

# Visualize the final output for the first few students
display(df[['predicted_category', 'study_recommendations']].head(30))

Generating individualized recommendations...


Unnamed: 0,predicted_category,study_recommendations
0,Above-Average,Priority: Maintain current momentum while iden...
1,Below-Average,Priority: Schedule a 1-on-1 session with an ac...
2,Exceptional,Priority: Keep up the excellent work! | Action...
3,Below-Average,Priority: Schedule a 1-on-1 session with an ac...
4,Above-Average,Priority: Maintain current momentum while iden...
5,Above-Average,Priority: Maintain current momentum while iden...
6,At-Risk,Priority: Schedule a 1-on-1 session with an ac...
7,Below-Average,Priority: Schedule a 1-on-1 session with an ac...
8,Below-Average,Priority: Schedule a 1-on-1 session with an ac...
9,Above-Average,Priority: Maintain current momentum while iden...


### 4. Save Final Augmented Dataset

In [5]:
df.to_csv(OUTPUT_PATH, index=False)
print(f"Successfully saved predictions and recommendations to {OUTPUT_PATH}")

Successfully saved predictions and recommendations to ../../datasets/test_with_recommendations.csv
