In [1]:
"""
Classical Machine Learning Baselines for Fashion-MNIST
-----------------------------------------------------
This script trains multiple classical ML models on flattened image data.
It provides a comparative benchmark for deep and quantum-inspired models.
"""

import numpy as np
import time
import os
from tensorflow.keras import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# =========================================
# 1Ô∏è‚É£ Load Fashion-MNIST Dataset
# =========================================
print("üì• Loading Fashion-MNIST dataset ...")
(x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data()

# Flatten 28x28 images ‚Üí 784 features
x_train = x_train.reshape(len(x_train), -1) / 255.0
x_test = x_test.reshape(len(x_test), -1) / 255.0

# Optional: use subset for faster testing (remove if you want full training)
x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.7, random_state=42)

# Standardize features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# =========================================
# 2Ô∏è‚É£ Define Models
# =========================================
models = {
    "Logistic Regression": LogisticRegression(max_iter=300, n_jobs=-1),
    "SVM (RBF Kernel)": SVC(kernel='rbf', C=3, gamma='scale'),
    "Random Forest": RandomForestClassifier(n_estimators=150, random_state=42, n_jobs=-1),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100),
    "KNN (k=5)": KNeighborsClassifier(n_neighbors=5, n_jobs=-1),
    "MLP (Sklearn)": MLPClassifier(hidden_layer_sizes=(256,128), activation='relu', max_iter=30)
}

# =========================================
# 3Ô∏è‚É£ Train & Evaluate Models
# =========================================
os.makedirs("models/classical", exist_ok=True)
results = {}

for name, model in models.items():
    print(f"\nüöÄ Training {name} ...")
    start = time.time()
    model.fit(x_train, y_train)
    duration = time.time() - start

    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = round(acc * 100, 2)

    print(f"‚úÖ {name} Accuracy: {acc*100:.2f}% | Time: {duration:.1f}s")
    print(classification_report(y_test, y_pred, digits=3))

    # Save model
    joblib.dump(model, f"models/classical/{name.replace(' ','_').lower()}.pkl")

# =========================================
# 4Ô∏è‚É£ Save Results Summary
# =========================================
import json
os.makedirs("results", exist_ok=True)
with open("results/classical_model_accuracies.json", "w") as f:
    json.dump(results, f, indent=4)

print("\nüìä Final Accuracy Summary:")
for k, v in results.items():
    print(f"{k}: {v:.2f}%")

print("\n‚úÖ All classical models trained and saved successfully.")


üì• Loading Fashion-MNIST dataset ...

üöÄ Training Logistic Regression ...
‚úÖ Logistic Regression Accuracy: 80.88% | Time: 23.3s
              precision    recall  f1-score   support

           0      0.757     0.774     0.765      1000
           1      0.950     0.956     0.953      1000
           2      0.686     0.701     0.693      1000
           3      0.818     0.798     0.808      1000
           4      0.679     0.715     0.697      1000
           5      0.912     0.893     0.902      1000
           6      0.570     0.507     0.537      1000
           7      0.889     0.915     0.902      1000
           8      0.899     0.901     0.900      1000
           9      0.910     0.928     0.919      1000

    accuracy                          0.809     10000
   macro avg      0.807     0.809     0.808     10000
weighted avg      0.807     0.809     0.808     10000


üöÄ Training SVM (RBF Kernel) ...
‚úÖ SVM (RBF Kernel) Accuracy: 87.31% | Time: 64.0s
              precis



‚úÖ MLP (Sklearn) Accuracy: 86.51% | Time: 54.1s
              precision    recall  f1-score   support

           0      0.787     0.816     0.801      1000
           1      0.977     0.969     0.973      1000
           2      0.843     0.749     0.793      1000
           3      0.893     0.819     0.854      1000
           4      0.775     0.819     0.796      1000
           5      0.967     0.936     0.951      1000
           6      0.623     0.698     0.658      1000
           7      0.907     0.967     0.936      1000
           8      0.963     0.938     0.950      1000
           9      0.959     0.940     0.949      1000

    accuracy                          0.865     10000
   macro avg      0.869     0.865     0.866     10000
weighted avg      0.869     0.865     0.866     10000


üìä Final Accuracy Summary:
Logistic Regression: 80.88%
SVM (RBF Kernel): 87.31%
Random Forest: 86.18%
Gradient Boosting: 85.53%
KNN (k=5): 82.81%
MLP (Sklearn): 86.51%

‚úÖ All classical mo