SVM classifiers on MNIST and FashionMNIST datasets



In [2]:

!pip install numpy pandas scikit-learn matplotlib seaborn torch torchvision tqdm joblib



In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import numpy as np
import pandas as pd
import time
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import joblib
import os


In [5]:
def load_dataset(dataset_name='MNIST', sample_size=10000):
    """Load MNIST or FashionMNIST dataset"""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    if dataset_name == 'MNIST':
        dataset = torchvision.datasets.MNIST(
            root='./data', train=True, download=True, transform=transform
        )
        test_dataset = torchvision.datasets.MNIST(
            root='./data', train=False, download=True, transform=transform
        )
    else:  # FashionMNIST
        dataset = torchvision.datasets.FashionMNIST(
            root='./data', train=True, download=True, transform=transform
        )
        test_dataset = torchvision.datasets.FashionMNIST(
            root='./data', train=False, download=True, transform=transform
        )

    # Convert to numpy arrays
    print(f"Loading {dataset_name} dataset...")

    # Load training data (limit sample size for faster training)
    X_train = []
    y_train = []
    for i in tqdm(range(min(sample_size, len(dataset))), desc="Loading train data"):
        img, label = dataset[i]
        X_train.append(img.numpy().flatten())
        y_train.append(label)

    # Load test data
    X_test = []
    y_test = []
    test_sample_size = min(2000, len(test_dataset))  # Smaller test set
    for i in tqdm(range(test_sample_size), desc="Loading test data"):
        img, label = test_dataset[i]
        X_test.append(img.numpy().flatten())
        y_test.append(label)

    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)

    print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

    return X_train, y_train, X_test, y_test

In [6]:
def preprocess_data(X_train, X_test):
    """Preprocess data for SVM"""
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

In [7]:
def train_svm(X_train, y_train, X_test, y_test, kernel='rbf', C=1.0, gamma='scale',
              degree=3, coef0=0.0, verbose=True):
    """Train SVM classifier and return results"""

    if verbose:
        print(f"\nTraining SVM with: kernel={kernel}, C={C}, gamma={gamma}, degree={degree}")

    # Create SVM model
    if kernel == 'poly':
        svm = SVC(kernel=kernel, C=C, gamma=gamma, degree=degree, coef0=coef0,
                  random_state=42, verbose=0)
    else:
        svm = SVC(kernel=kernel, C=C, gamma=gamma, random_state=42, verbose=0)

    # Train and time
    start_time = time.time()
    svm.fit(X_train, y_train)
    train_time = (time.time() - start_time) * 1000  # Convert to milliseconds

    # Predict and evaluate
    start_time = time.time()
    y_pred = svm.predict(X_test)
    test_time = (time.time() - start_time) * 1000  # Convert to milliseconds

    accuracy = accuracy_score(y_test, y_pred) * 100

    if verbose:
        print(f"Training time: {train_time:.2f} ms")
        print(f"Testing time: {test_time:.2f} ms")
        print(f"Test Accuracy: {accuracy:.2f}%")

    return {
        'kernel': kernel,
        'C': C,
        'gamma': gamma,
        'degree': degree if kernel == 'poly' else None,
        'coef0': coef0 if kernel == 'poly' else None,
        'train_time_ms': train_time,
        'test_time_ms': test_time,
        'test_accuracy_%': accuracy,
        'model': svm
    }

In [8]:
def run_svm_experiments(dataset_name='MNIST', sample_size=5000):
    """Run SVM experiments with different hyperparameters"""

    print(f"\n{'='*80}")
    print(f"RUNNING SVM EXPERIMENTS ON {dataset_name.upper()}")
    print(f"{'='*80}")

    # Load dataset
    X_train, y_train, X_test, y_test = load_dataset(dataset_name, sample_size)

    # Preprocess data
    print("\nPreprocessing data...")
    X_train_scaled, X_test_scaled = preprocess_data(X_train, X_test)

    results = []
    models = []

    # Define hyperparameter grid
    kernels = ['rbf', 'poly']  # Including linear for comparison
    C_values = [0.1, 1.0]
    gamma_values = ['scale', 'auto', 0.01, 0.1]
    degrees = [2, 3]  # For polynomial kernel

    total_experiments = 0
    for kernel in kernels:
        if kernel == 'poly':
            total_experiments += len(C_values) * len(gamma_values) * len(degrees)
        else:
            total_experiments += len(C_values) * len(gamma_values)

    print(f"\nTotal experiments to run: {total_experiments}")

    experiment_count = 0

    # Run experiments for each kernel
    for kernel in kernels:
        print(f"\n{'='*60}")
        print(f"Testing {kernel.upper()} kernel")
        print('='*60)

        if kernel == 'poly':
            # Polynomial kernel experiments
            for C in C_values:
                for gamma in gamma_values:
                    for degree in degrees:
                        experiment_count += 1
                        print(f"\n[{experiment_count}/{total_experiments}] ", end="")

                        result = train_svm(
                            X_train_scaled, y_train, X_test_scaled, y_test,
                            kernel=kernel, C=C, gamma=gamma, degree=degree,
                            coef0=1.0, verbose=True
                        )

                        result['dataset'] = dataset_name
                        results.append(result)
                        models.append(result['model'])

        else:
            # RBF and Linear kernel experiments
            for C in C_values:
                for gamma in gamma_values if kernel == 'rbf' else ['scale']:
                    experiment_count += 1
                    print(f"\n[{experiment_count}/{total_experiments}] ", end="")

                    result = train_svm(
                        X_train_scaled, y_train, X_test_scaled, y_test,
                        kernel=kernel, C=C, gamma=gamma,
                        verbose=True
                    )

                    result['dataset'] = dataset_name
                    results.append(result)
                    models.append(result['model'])

    # Create results DataFrame
    df_results = pd.DataFrame([{k: v for k, v in r.items() if k != 'model'} for r in results])

    return df_results, models


In [9]:
def compare_datasets():
    """Run SVM experiments on both MNIST and FashionMNIST"""

    all_results = []

    # Run for MNIST
    mnist_results, mnist_models = run_svm_experiments('MNIST', sample_size=10000)
    all_results.append(mnist_results)

    # Run for FashionMNIST
    fashion_results, fashion_models = run_svm_experiments('FashionMNIST', sample_size=10000)
    all_results.append(fashion_results)

    # Combine results
    combined_results = pd.concat(all_results, ignore_index=True)

    # Save results
    combined_results.to_csv('svm_results.csv', index=False)
    print(f"\nResults saved to 'svm_results.csv'")

    return combined_results, mnist_models, fashion_models


In [10]:
def analyze_results(results_df):
    """Analyze and visualize SVM results"""

    print(f"\n{'='*80}")
    print("RESULTS ANALYSIS")
    print('='*80)

    # Separate datasets
    mnist_results = results_df[results_df['dataset'] == 'MNIST']
    fashion_results = results_df[results_df['dataset'] == 'FashionMNIST']

    # Best results for each dataset
    print("\nBEST RESULTS BY DATASET:")
    print("-" * 40)

    for dataset_name, df in [('MNIST', mnist_results), ('FashionMNIST', fashion_results)]:
        best_idx = df['test_accuracy_%'].idxmax()
        best_result = df.loc[best_idx]

        print(f"\n{dataset_name}:")
        print(f"  Kernel: {best_result['kernel']}")
        print(f"  C: {best_result['C']}")
        print(f"  Gamma: {best_result['gamma']}")
        if best_result['kernel'] == 'poly':
            print(f"  Degree: {best_result['degree']}")
        print(f"  Accuracy: {best_result['test_accuracy_%']:.2f}%")
        print(f"  Training Time: {best_result['train_time_ms']:.2f} ms")
        print(f"  Testing Time: {best_result['test_time_ms']:.2f} ms")


In [11]:
def analyze_results(results_df):
    """Analyze and visualize SVM results"""

    print(f"\n{'='*80}")
    print("RESULTS ANALYSIS")
    print('='*80)

    # Separate datasets
    mnist_results = results_df[results_df['dataset'] == 'MNIST']
    fashion_results = results_df[results_df['dataset'] == 'FashionMNIST']

    # Best results for each dataset
    print("\nBEST RESULTS BY DATASET:")
    print("-" * 40)

    for dataset_name, df in [('MNIST', mnist_results), ('FashionMNIST', fashion_results)]:
        best_idx = df['test_accuracy_%'].idxmax()
        best_result = df.loc[best_idx]

        print(f"\n{dataset_name}:")
        print(f"  Kernel: {best_result['kernel']}")
        print(f"  C: {best_result['C']}")
        print(f"  Gamma: {best_result['gamma']}")
        if best_result['kernel'] == 'poly':
            print(f"  Degree: {best_result['degree']}")
        print(f"  Accuracy: {best_result['test_accuracy_%']:.2f}%")
        print(f"  Training Time: {best_result['train_time_ms']:.2f} ms")
        print(f"  Testing Time: {best_result['test_time_ms']:.2f} ms")

    # Summary statistics
    print(f"\n{'='*80}")
    print("SUMMARY STATISTICS")
    print('='*80)

    for dataset_name, df in [('MNIST', mnist_results), ('FashionMNIST', fashion_results)]:
        print(f"\n{dataset_name}:")
        print(f"  Number of experiments: {len(df)}")
        print(f"  Average accuracy: {df['test_accuracy_%'].mean():.2f}%")
        print(f"  Max accuracy: {df['test_accuracy_%'].max():.2f}%")
        print(f"  Min accuracy: {df['test_accuracy_%'].min():.2f}%")
        print(f"  Average training time: {df['train_time_ms'].mean():.2f} ms")
        print(f"  Average testing time: {df['test_time_ms'].mean():.2f} ms")

    # Results by kernel
    print(f"\n{'='*80}")
    print("RESULTS BY KERNEL")
    print('='*80)

    for dataset_name, df in [('MNIST', mnist_results), ('FashionMNIST', fashion_results)]:
        print(f"\n{dataset_name}:")
        for kernel in df['kernel'].unique():
            kernel_df = df[df['kernel'] == kernel]
            print(f"  {kernel.upper()} kernel:")
            print(f"    Experiments: {len(kernel_df)}")
            print(f"    Best accuracy: {kernel_df['test_accuracy_%'].max():.2f}%")
            print(f"    Avg accuracy: {kernel_df['test_accuracy_%'].mean():.2f}%")
            print(f"    Avg training time: {kernel_df['train_time_ms'].mean():.2f} ms")

    return mnist_results, fashion_results

In [12]:
def create_results_table(results_df):
    """Create formatted results table"""

    print(f"\n{'='*120}")
    print("DETAILED SVM RESULTS")
    print('='*120)

    # Sort by accuracy
    results_sorted = results_df.sort_values(['dataset', 'test_accuracy_%'], ascending=[True, False])

    # Create display table
    display_cols = ['dataset', 'kernel', 'C', 'gamma', 'degree',
                    'test_accuracy_%', 'train_time_ms', 'test_time_ms']

    # Format the table
    pd.set_option('display.max_rows', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

    # Create formatted strings
    formatted_results = []
    for _, row in results_sorted.iterrows():
        formatted_row = {
            'Dataset': row['dataset'],
            'Kernel': row['kernel'],
            'C': row['C'],
            'Gamma': row['gamma'],
            'Degree': row['degree'] if pd.notna(row['degree']) else '-',
            'Accuracy%': f"{row['test_accuracy_%']:.2f}",
            'TrainTime(ms)': f"{row['train_time_ms']:.1f}",
            'TestTime(ms)': f"{row['test_time_ms']:.1f}"
        }
        formatted_results.append(formatted_row)

    formatted_df = pd.DataFrame(formatted_results)
    print(formatted_df.to_string(index=False))

    # Reset pandas options
    pd.reset_option('display.max_rows')
    pd.reset_option('display.width')
    pd.reset_option('display.max_colwidth')

    return formatted_df

In [17]:
def save_best_models(mnist_models, fashion_models, results_df):
    """Save the best models for each dataset"""

    # Create models directory
    os.makedirs('svm_models', exist_ok=True)

    # Find best models global indices from results_df
    mnist_best_global_idx = results_df[results_df['dataset'] == 'MNIST']['test_accuracy_%'].idxmax()

    # For FashionMNIST, idxmax will return a global index from results_df.
    fashion_best_global_idx = results_df[results_df['dataset'] == 'FashionMNIST']['test_accuracy_%'].idxmax()

    # Determine the number of MNIST experiments to calculate the offset for FashionMNIST models
    # This assumes MNIST results come first in results_df, which they do from compare_datasets()
    num_mnist_experiments = len(results_df[results_df['dataset'] == 'MNIST'])

    # Calculate the relative index for fashion_models list
    fashion_best_relative_idx = fashion_best_global_idx - num_mnist_experiments

    # Save best models
    joblib.dump(mnist_models[int(mnist_best_global_idx)], 'svm_models/mnist_best_svm.pkl')
    joblib.dump(fashion_models[int(fashion_best_relative_idx)], 'svm_models/fashion_best_svm.pkl')

    print(f"\nBest models saved to 'svm_models/' directory")
    print(f"MNIST best model: svm_models/mnist_best_svm.pkl")
    print(f"FashionMNIST best model: svm_models/fashion_best_svm.pkl")

In [14]:
# Main execution
if __name__ == "__main__":
    print("SVM CLASSIFIER EXPERIMENTS")
    print("="*80)
    print("Training SVM on MNIST and FashionMNIST with different kernels and hyperparameters")
    print("Kernels: 'poly', 'rbf', 'linear'")
    print("="*80)

    # Run experiments
    results_df, mnist_models, fashion_models = compare_datasets()

    # Analyze results
    mnist_results, fashion_results = analyze_results(results_df)

    # Create detailed table
    formatted_df = create_results_table(results_df)

    # Save best models
    save_best_models(mnist_models, fashion_models, results_df)

    # Save formatted results
    formatted_df.to_csv('svm_formatted_results.csv', index=False)
    print(f"\nFormatted results saved to 'svm_formatted_results.csv'")

    print(f"\n{'='*80}")
    print("EXPERIMENTS COMPLETED SUCCESSFULLY!")
    print('='*80)

SVM CLASSIFIER EXPERIMENTS
Training SVM on MNIST and FashionMNIST with different kernels and hyperparameters
Kernels: 'poly', 'rbf', 'linear'

RUNNING SVM EXPERIMENTS ON MNIST


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.94MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 131kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.24MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 12.4MB/s]


Loading MNIST dataset...


Loading train data: 100%|██████████| 10000/10000 [00:10<00:00, 996.98it/s]
Loading test data: 100%|██████████| 2000/2000 [00:02<00:00, 962.19it/s]


Train shape: (10000, 784), Test shape: (2000, 784)

Preprocessing data...

Total experiments to run: 24

Testing RBF kernel

[1/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=scale, degree=3
Training time: 20365.05 ms
Testing time: 8962.91 ms
Test Accuracy: 85.70%

[2/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=auto, degree=3
Training time: 18993.12 ms
Testing time: 8624.78 ms
Test Accuracy: 85.95%

[3/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=0.01, degree=3
Training time: 45630.34 ms
Testing time: 13364.85 ms
Test Accuracy: 50.65%

[4/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=0.1, degree=3
Training time: 52109.79 ms
Testing time: 16152.96 ms
Test Accuracy: 11.70%

[5/24] 
Training SVM with: kernel=rbf, C=1.0, gamma=scale, degree=3
Training time: 11038.53 ms
Testing time: 4911.35 ms
Test Accuracy: 92.10%

[6/24] 
Training SVM with: kernel=rbf, C=1.0, gamma=auto, degree=3
Training time: 10291.02 ms
Testing time: 4658.59 ms
Test Accuracy: 91.70%

[7/24] 
Training SVM

100%|██████████| 26.4M/26.4M [00:02<00:00, 9.19MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 174kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.53MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 13.6MB/s]


Loading FashionMNIST dataset...


Loading train data: 100%|██████████| 10000/10000 [00:02<00:00, 3779.66it/s]
Loading test data: 100%|██████████| 2000/2000 [00:00<00:00, 5269.83it/s]


Train shape: (10000, 784), Test shape: (2000, 784)

Preprocessing data...

Total experiments to run: 24

Testing RBF kernel

[1/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=scale, degree=3
Training time: 14319.05 ms
Testing time: 8672.47 ms
Test Accuracy: 80.55%

[2/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=auto, degree=3
Training time: 14515.14 ms
Testing time: 7927.66 ms
Test Accuracy: 80.55%

[3/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=0.01, degree=3
Training time: 44581.84 ms
Testing time: 13518.47 ms
Test Accuracy: 49.40%

[4/24] 
Training SVM with: kernel=rbf, C=0.1, gamma=0.1, degree=3
Training time: 52697.47 ms
Testing time: 14824.00 ms
Test Accuracy: 10.15%

[5/24] 
Training SVM with: kernel=rbf, C=1.0, gamma=scale, degree=3
Training time: 9097.52 ms
Testing time: 6450.05 ms
Test Accuracy: 85.65%

[6/24] 
Training SVM with: kernel=rbf, C=1.0, gamma=auto, degree=3
Training time: 9332.14 ms
Testing time: 5303.51 ms
Test Accuracy: 85.65%

[7/24] 
Training SVM w

IndexError: list index out of range

In [15]:
mnist_results

Unnamed: 0,kernel,C,gamma,degree,coef0,train_time_ms,test_time_ms,test_accuracy_%,dataset
0,rbf,0.1,scale,,,20365.053177,8962.910175,85.7,MNIST
1,rbf,0.1,auto,,,18993.120432,8624.782801,85.95,MNIST
2,rbf,0.1,0.01,,,45630.337477,13364.848137,50.65,MNIST
3,rbf,0.1,0.1,,,52109.793663,16152.964115,11.7,MNIST
4,rbf,1.0,scale,,,11038.534403,4911.353111,92.1,MNIST
5,rbf,1.0,auto,,,10291.016579,4658.588409,91.7,MNIST
6,rbf,1.0,0.01,,,44529.312611,12100.209713,77.5,MNIST
7,rbf,1.0,0.1,,,53958.088636,16073.771715,18.6,MNIST
8,poly,0.1,scale,2.0,1.0,10682.586432,3091.583252,90.1,MNIST
9,poly,0.1,scale,3.0,1.0,9025.827408,2341.386795,91.45,MNIST


In [16]:
fashion_results

Unnamed: 0,kernel,C,gamma,degree,coef0,train_time_ms,test_time_ms,test_accuracy_%,dataset
24,rbf,0.1,scale,,,14319.054365,8672.468901,80.55,FashionMNIST
25,rbf,0.1,auto,,,14515.142202,7927.663088,80.55,FashionMNIST
26,rbf,0.1,0.01,,,44581.836939,13518.473625,49.4,FashionMNIST
27,rbf,0.1,0.1,,,52697.471857,14823.996544,10.15,FashionMNIST
28,rbf,1.0,scale,,,9097.523928,6450.047493,85.65,FashionMNIST
29,rbf,1.0,auto,,,9332.140684,5303.508997,85.65,FashionMNIST
30,rbf,1.0,0.01,,,44149.310589,11590.322733,73.6,FashionMNIST
31,rbf,1.0,0.1,,,53481.520891,14534.980774,14.7,FashionMNIST
32,poly,0.1,scale,2.0,1.0,8933.977127,3404.848337,83.3,FashionMNIST
33,poly,0.1,scale,3.0,1.0,7942.418575,2938.693047,85.25,FashionMNIST


In [18]:
# This block assumes 'results_df', 'mnist_models', and 'fashion_models'
# are already in memory from a previous run of compare_datasets().

print("Continuing SVM CLASSIFIER EXPERIMENTS from analysis step...")
print("="*80)

# Analyze results
mnist_results, fashion_results = analyze_results(results_df)

# Create detailed table
formatted_df = create_results_table(results_df)

# Save best models (using the corrected save_best_models function)
save_best_models(mnist_models, fashion_models, results_df)

# Save formatted results
formatted_df.to_csv('svm_formatted_results.csv', index=False)
print(f"\nFormatted results saved to 'svm_formatted_results.csv'")

print(f"\n{'='*80}")
print("EXPERIMENTS COMPLETED SUCCESSFULLY!")
print('='*80)

Continuing SVM CLASSIFIER EXPERIMENTS from analysis step...

RESULTS ANALYSIS

BEST RESULTS BY DATASET:
----------------------------------------

MNIST:
  Kernel: poly
  C: 0.1
  Gamma: 0.1
  Degree: 2.0
  Accuracy: 95.00%
  Training Time: 9304.71 ms
  Testing Time: 2266.40 ms

FashionMNIST:
  Kernel: poly
  C: 0.1
  Gamma: 0.01
  Degree: 2.0
  Accuracy: 87.75%
  Training Time: 7130.37 ms
  Testing Time: 2416.25 ms

SUMMARY STATISTICS

MNIST:
  Number of experiments: 24
  Average accuracy: 83.67%
  Max accuracy: 95.00%
  Min accuracy: 11.70%
  Average training time: 16786.09 ms
  Average testing time: 5053.70 ms

FashionMNIST:
  Number of experiments: 24
  Average accuracy: 77.47%
  Max accuracy: 87.75%
  Min accuracy: 10.15%
  Average training time: 15258.58 ms
  Average testing time: 5228.18 ms

RESULTS BY KERNEL

MNIST:
  RBF kernel:
    Experiments: 8
    Best accuracy: 92.10%
    Avg accuracy: 64.24%
    Avg training time: 32114.41 ms
  POLY kernel:
    Experiments: 16
    Best ac