In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, f1_score

import warnings
warnings.filterwarnings('ignore')

print('All imports loaded successfully!')

## Step 1 — Load Cleaned Data

In [None]:
train_df = pd.read_csv('../datasets/train_cleaned.csv')
test_df  = pd.read_csv('../datasets/test_cleaned.csv')

print('Training data shape :', train_df.shape)
print('Test data shape     :', test_df.shape)
print('\nColumns available   :', list(train_df.columns))

In [None]:
exp3_features = [col for col in train_df.columns if col != 'final_grade']

X_train = train_df[exp3_features]
X_test  = test_df[exp3_features]

y_train = train_df['final_grade']
y_test  = test_df['final_grade']

print('Experiment 3 feature count :', len(exp3_features))
print('X_train shape              :', X_train.shape)
print('X_test  shape              :', X_test.shape)
print('\nAll features used:')
for i, feat in enumerate(exp3_features, 1):
    print(f'  {i:2d}. {feat}')
print('\nTarget distribution (train):')
print(y_train.value_counts().sort_index())

## Step 2 — Train Models

1. **Logistic Regression** — linear baseline model
2. **Decision Tree** — non-linear model that captures complex patterns

In [None]:
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)

print('Logistic Regression trained successfully!')

In [None]:
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

print('Decision Tree trained successfully!')

## Step 3 — Evaluate Models

- **Accuracy** — overall correct predictions
- **Precision** — how many predicted positives are actually positive
- **Recall** — how many actual positives are correctly identified
- **F1 Score** — harmonic mean of precision and recall (important for imbalanced classes)
- **Confusion Matrix** — visual breakdown of predictions vs actual

In [None]:
lr_predictions = lr_model.predict(X_test)
dt_predictions = dt_model.predict(X_test)

print('Predictions generated for both models!')

In [None]:
print('LOGISTIC REGRESSION — Results (Exp 3)')

lr_accuracy = accuracy_score(y_test, lr_predictions)
print(f'\nAccuracy: {lr_accuracy:.4f} ({lr_accuracy*100:.2f}%)')

print('\nClassification Report:')
print(classification_report(y_test, lr_predictions))

In [None]:
print('DECISION TREE — Results (Exp 3)')

dt_accuracy = accuracy_score(y_test, dt_predictions)
print(f'\nAccuracy: {dt_accuracy:.4f} ({dt_accuracy*100:.2f}%)')

print('\nClassification Report:')
print(classification_report(y_test, dt_predictions))

In [None]:
grade_labels = ['f(0)', 'e(1)', 'd(2)', 'c(3)', 'b(4)', 'a(5)']

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

ConfusionMatrixDisplay.from_predictions(
    y_test, lr_predictions,
    display_labels=grade_labels,
    cmap='Blues',
    ax=axes[0]
)
axes[0].set_title('Logistic Regression (Exp 3)')


ConfusionMatrixDisplay.from_predictions(
    y_test, dt_predictions,
    display_labels=grade_labels,
    cmap='Greens',
    ax=axes[1]
)
axes[1].set_title('Decision Tree (Exp 3)')

plt.suptitle('Experiment 3 — Confusion Matrices', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## Step 4 — Compare Results Across All 3 Experiments



| Experiment | Feature Set | Features Count |
|:---|:---|:---|
| Exp 1 | Academic + Behavioral | 11 |
| Exp 2 | + Contextual | 14 |
| Exp 3 | All Features | 22 |

In [None]:

exp1_lr_acc = 0.7697
exp1_lr_f1  = 0.7698
exp1_dt_acc = 0.6643
exp1_dt_f1  = 0.6644

exp2_lr_acc = 0.7697
exp2_lr_f1  = 0.7698
exp2_dt_acc = 0.6643
exp2_dt_f1  = 0.6644


lr_f1 = f1_score(y_test, lr_predictions, average='weighted')
dt_f1 = f1_score(y_test, dt_predictions, average='weighted')


comparison = pd.DataFrame({
    'Feature Set': [
        'Exp 1 (Academic + Behavioral)',
        'Exp 1 (Academic + Behavioral)',
        'Exp 2 (+ Contextual)',
        'Exp 2 (+ Contextual)',
        'Exp 3 (All Features)',
        'Exp 3 (All Features)'
    ],
    'Model': [
        'Logistic Regression',
        'Decision Tree',
        'Logistic Regression',
        'Decision Tree',
        'Logistic Regression',
        'Decision Tree'
    ],
    'Accuracy': [
        round(exp1_lr_acc, 4),
        round(exp1_dt_acc, 4),
        round(exp2_lr_acc, 4),
        round(exp2_dt_acc, 4),
        round(lr_accuracy, 4),
        round(dt_accuracy, 4)
    ],
    'Weighted F1': [
        round(exp1_lr_f1, 4),
        round(exp1_dt_f1, 4),
        round(exp2_lr_f1, 4),
        round(exp2_dt_f1, 4),
        round(lr_f1, 4),
        round(dt_f1, 4)
    ]
})

print('All Experiments — Results Comparison')
print(comparison.to_string(index=False))

## Observations

- **Academic features** (`math_score`, `science_score`, `english_score`, `overall_score`) remain the **strongest predictors** of `final_grade`.
- **Contextual features** (`internet_access`, `travel_time`, `extra_activities`) add a **small improvement** — environmental factors have limited impact on grade prediction.
- **Demographic features** (`age`, `gender`, `school_type`, `parent_education`) added in Experiment 3 may provide **marginal gains** — these features influence learning environment but are not direct predictors of scores.
- **Logistic Regression** performs consistently well across all experiments — it handles linear relationships between features and grades effectively.
- **Decision Tree** captures non-linear patterns but is more sensitive to feature noise — adding irrelevant features can sometimes hurt its performance.
- Using **all features** tests the maximum predictive power of the dataset, showing whether more data always leads to better predictions.

### Key Takeaway
More features do **not always** mean better performance. The **quality** of features matters more than **quantity**. Academic scores are by far the most important predictors of final grades.