In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import time

df = pd.read_csv("synthetic_large_dataset.csv")
X = df.drop('target', axis=1).values
y = df['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def evaluate_model(name, y_true, y_pred):
    print(f"\n---- {name} Evaluation ----")
    classes = np.unique(y_true)
    num_classes = len(classes)
    conf_matrix = np.zeros((num_classes, num_classes), dtype=int)
    for true, pred in zip(y_true, y_pred):
        conf_matrix[true-1][pred-1] += 1
    print("\nConfusion Matrix:")
    print(conf_matrix)
    correct = np.trace(conf_matrix)
    total = np.sum(conf_matrix)
    accuracy = correct / total
    print(f"\nManual Accuracy: {accuracy:.4f}")
    print("\nClass-wise Metrics:")
    for i in range(num_classes):
        TP = conf_matrix[i][i]
        FP = np.sum(conf_matrix[:, i]) - TP
        FN = np.sum(conf_matrix[i, :]) - TP
        precision = TP / (TP + FP) if (TP + FP) > 0 else 0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        print(f"Class {i+1}: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}")

print("\nTraining Random Forest...")
start_rf = time.time()
rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)
rf_time = time.time() - start_rf
rf_preds = rf.predict(X_test)
print(f"Random Forest Training Time: {rf_time:.2f}s")
evaluate_model("Random Forest", y_test, rf_preds)

print("\nTraining AdaBoost...")
start_ada = time.time()
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)
ada.fit(X_train, y_train)
ada_time = time.time() - start_ada
ada_preds = ada.predict(X_test)
print(f"AdaBoost Training Time: {ada_time:.2f}s")
evaluate_model("AdaBoost", y_test, ada_preds)



Training Random Forest...
Random Forest Training Time: 21.70s

---- Random Forest Evaluation ----

Confusion Matrix:
[[878 823 872 713 794]
 [872 811 826 696 762]
 [869 795 848 691 758]
 [850 798 815 718 804]
 [843 865 820 709 770]]

Manual Accuracy: 0.2013

Class-wise Metrics:
Class 1: Precision=0.2036, Recall=0.2152, F1=0.2092
Class 2: Precision=0.1982, Recall=0.2044, F1=0.2013
Class 3: Precision=0.2028, Recall=0.2141, F1=0.2083
Class 4: Precision=0.2036, Recall=0.1802, F1=0.1912
Class 5: Precision=0.1980, Recall=0.1922, F1=0.1951

Training AdaBoost...
AdaBoost Training Time: 65.82s

---- AdaBoost Evaluation ----

Confusion Matrix:
[[  61  688 1566   23 1742]
 [  64  634 1512   27 1730]
 [  56  638 1567   36 1664]
 [  59  616 1547   36 1727]
 [  62  627 1545   23 1750]]

Manual Accuracy: 0.2024

Class-wise Metrics:
Class 1: Precision=0.2020, Recall=0.0150, F1=0.0278
Class 2: Precision=0.1979, Recall=0.1598, F1=0.1768
Class 3: Precision=0.2025, Recall=0.3956, F1=0.2679
Class 4: Preci