In [21]:
import numpy as np
import time
import json
import os
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score
from datetime import datetime

path = "../../data/processed/"

datasets = ["mnist_01_pca_4" ,
            "mnist_01_pca_8" ,
            "mnist_38_pca_4" ,
            "mnist_38_pca_8"]

all_results = {
    "experiment_info": {
        "model_type": "classical_svm_rbf",
        "date": datetime.now().isoformat(),
        "random_state": 42,
    },
    "results": []
}

sample_sizes = [500, 2000]

In [22]:
# for 5k testing
for dataset in datasets:
    dataset_path = path + dataset
    
    X_train = np.load(dataset_path + "/X_train.npy")
    X_test = np.load(dataset_path + "/X_test.npy")
    y_train = np.load(dataset_path + "/y_train.npy")
    y_test = np.load(dataset_path + "/y_test.npy")
    
    print(f"\nData loaded successfully from {dataset_path}")
    print(f"Training samples: {X_train.shape[0]}")
    print(f"Test samples: {X_test.shape[0]}")

    # Train
    start_time = time.time()
    svc_classical = SVC(kernel='rbf', random_state=42)
    svc_classical.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Inference
    start_time = time.time()
    y_pred = svc_classical.predict(X_test)
    inference_time = time.time() - start_time

    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')
    
    # Store results for this dataset
    result = {
        "dataset": dataset,
        "n_train": int(X_train.shape[0]),
        "n_test": int(X_test.shape[0]),
        "n_features": int(X_train.shape[1]),
        "accuracy": float(accuracy),
        "f1_score": float(f1),
        "training_time_seconds": float(training_time),
        "inference_time_seconds": float(inference_time)
    }
    
    all_results["results"].append(result)
    
    print(f"Classical SVM Results for {dataset}")
    print(f"  Accuracy: {accuracy:.4f}")
    print(f"  F1-Score: {f1:.4f}")
    print(f"  Training time: {training_time:.4f} seconds")
    print(f"  Inference time: {inference_time:.4f} seconds")
    print(classification_report(y_test, y_pred))

# Save to JSON
output_path = "../../results/classical_svm_baseline_results.json"


Data loaded successfully from ../../data/processed/mnist_01_pca_4
Training samples: 4000
Test samples: 1000
Classical SVM Results for mnist_01_pca_4
  Accuracy: 0.9970
  F1-Score: 0.9970
  Training time: 0.0102 seconds
  Inference time: 0.0030 seconds
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       486
           1       0.99      1.00      1.00       514

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000


Data loaded successfully from ../../data/processed/mnist_01_pca_8
Training samples: 4000
Test samples: 1000
Classical SVM Results for mnist_01_pca_8
  Accuracy: 0.9970
  F1-Score: 0.9970
  Training time: 0.0065 seconds
  Inference time: 0.0023 seconds
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       486
           1       0.99      1.00      1.00       514

   

In [23]:
for dataset in datasets:
    dataset_path = path + dataset
    
    # Load full training data
    X_train_full = np.load(dataset_path + "/X_train.npy")
    X_test = np.load(dataset_path + "/X_test.npy")
    y_train_full = np.load(dataset_path + "/y_train.npy")
    y_test = np.load(dataset_path + "/y_test.npy")
    
    print(f"\n{'='*60}")
    print(f"Dataset: {dataset}")
    print(f"{'='*60}")
    
    for n_samples in sample_sizes:
        # Subsample training data
        X_train, _, y_train, _ = train_test_split(
            X_train_full, y_train_full,
            train_size=n_samples,
            random_state=42,
            stratify=y_train_full
        )
        
        print(f"\nTraining with {n_samples} samples...")
        
        # Train
        start_time = time.time()
        svc_classical = SVC(kernel='rbf', random_state=42)
        svc_classical.fit(X_train, y_train)
        training_time = time.time() - start_time
        
        # Inference
        start_time = time.time()
        y_pred = svc_classical.predict(X_test)
        inference_time = time.time() - start_time
        
        # Metrics
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='macro')
        
        # Store result
        result = {
            "dataset": dataset,
            "n_train": int(n_samples),
            "n_test": int(X_test.shape[0]),
            "n_features": int(X_train.shape[1]),
            "accuracy": float(accuracy),
            "f1_score": float(f1),
            "training_time_seconds": float(training_time),
            "inference_time_seconds": float(inference_time),
            "timestamp": datetime.now().isoformat()
        }
        
        all_results["results"].append(result)
        
        print(f"  Accuracy: {accuracy:.4f}")
        print(f"  F1-Score: {f1:.4f}")
        print(f"  Training time: {training_time:.4f}s")

with open(output_path, 'w') as f:
    json.dump(all_results, indent=2, fp=f)

print(f"\n✓ Results appended and saved to {output_path}")


Dataset: mnist_01_pca_4

Training with 500 samples...
  Accuracy: 0.9950
  F1-Score: 0.9950
  Training time: 0.0019s

Training with 2000 samples...
  Accuracy: 0.9970
  F1-Score: 0.9970
  Training time: 0.0041s

Dataset: mnist_01_pca_8

Training with 500 samples...
  Accuracy: 0.9970
  F1-Score: 0.9970
  Training time: 0.0008s

Training with 2000 samples...
  Accuracy: 0.9970
  F1-Score: 0.9970
  Training time: 0.0031s

Dataset: mnist_38_pca_4

Training with 500 samples...
  Accuracy: 0.8550
  F1-Score: 0.8546
  Training time: 0.0022s

Training with 2000 samples...
  Accuracy: 0.8720
  F1-Score: 0.8718
  Training time: 0.0189s

Dataset: mnist_38_pca_8

Training with 500 samples...
  Accuracy: 0.9370
  F1-Score: 0.9368
  Training time: 0.0012s

Training with 2000 samples...
  Accuracy: 0.9550
  F1-Score: 0.9549
  Training time: 0.0081s

✓ Results appended and saved to ../../results/classical_svm_baseline_results.json
