In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import json


In [None]:
df = pd.read_csv('/kaggle/input/student-data/student-por.csv')
df.head()

In [None]:
df.info()

In [None]:
# Select only numeric columns
df_numeric = df.select_dtypes(include=['int64', 'float64'])
df_numeric.head()

In [None]:
# Target column
y = df_numeric["G3"]
# Feature columns (all numeric except target)
X = df_numeric.drop(columns=["G3"])

print(X.head(), y.head())

In [None]:
def grade_to_class(g):
    if g <= 9:
        return 0      # Low
    elif g <= 14:
        return 1      # Medium
    else:
        return 2      # High

y_class = df_numeric["G3"].apply(grade_to_class)


We will use RandomForestClassifier because:

✔ Works extremely well on numeric data
✔ Handles non-scaled data
✔ High accuracy on educational datasets
✔ Fast enough for Kaggle CPU

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_class, test_size=0.2, random_state=42
)

In [None]:
# Model
model = RandomForestClassifier( n_estimators=300,random_state=42)

# Train
model.fit(X_train, y_train)

# Predict
pred = model.predict(X_test)

In [None]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, pred)*100)
print("\nClassification Report:\n")
print(classification_report(y_test, pred))

In [None]:
output = []

for i in range(len(pred)):
    row = {
        "id": int(i),

        # ML prediction output
        "predicted_performance_class": int(pred[i]),  # 0=low,1=medium,2=high

        # important features for agent insights
        "G1": int(X_test.iloc[i]["G1"]),
        "G2": int(X_test.iloc[i]["G2"]),
        "studytime": int(X_test.iloc[i]["studytime"]),
        "failures": int(X_test.iloc[i]["failures"]),
        "absences": int(X_test.iloc[i]["absences"])
    }
    output.append(row)

with open('/kaggle/working/student_predictions.json', 'w') as f:
    json.dump(output, f, indent=2)

print("JSON file created successfully!")
