# Building an SVM Classifier for MNIST with Hyperparameter Tuning and Comparative Analysis

## Fetch the Dataset

In [1]:
import sklearn
from sklearn.datasets import fetch_openml
import numpy as np

In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

  warn(


In [3]:
mnist

{'data': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'target': array(['5', '0', '4', ..., '4', '5', '6'], dtype=object),
 'frame': None,
 'categories': {},
 'feature_names': ['pixel1',
  'pixel2',
  'pixel3',
  'pixel4',
  'pixel5',
  'pixel6',
  'pixel7',
  'pixel8',
  'pixel9',
  'pixel10',
  'pixel11',
  'pixel12',
  'pixel13',
  'pixel14',
  'pixel15',
  'pixel16',
  'pixel17',
  'pixel18',
  'pixel19',
  'pixel20',
  'pixel21',
  'pixel22',
  'pixel23',
  'pixel24',
  'pixel25',
  'pixel26',
  'pixel27',
  'pixel28',
  'pixel29',
  'pixel30',
  'pixel31',
  'pixel32',
  'pixel33',
  'pixel34',
  'pixel35',
  'pixel36',
  'pixel37',
  'pixel38',
  'pixel39',
  'pixel40',
  'pixel41',
  'pixel42',
  'pixel43',
  'pixel44',
  'pixel45',
  'pixel46',
  'pixel47',
  'pixel48',
  'pixe

In [4]:
X, y = mnist["data"], mnist["target"]
y = y.astype(np.uint8)
y.shape

(70000,)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_opt, y_opt = X[:15000], y[:15000]

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
X_opt_scaled = scaler.fit_transform(X_opt)

## Hyperparameter Tuning of the SVM classifier

### Random Search

In [7]:
from sklearn.svm import SVC
svm_classifier = SVC()

In [8]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint, loguniform
param_distributions_poly = {
    'kernel': ['poly'],
    'degree': randint(2, 6),  
    'C': uniform(0.1, 10),    
    'coef0': uniform(0, 1)   
}


param_distributions_rbf = {
    'kernel': ['rbf'],
    'C': uniform(0.1, 10),   
    'gamma': loguniform(1e-4, 1e1)
}

param_distributions_linear = {
    'kernel': ['linear'],
    'C': uniform(0.1, 10)
}

In [9]:
random_search_svm_poly = RandomizedSearchCV(
    estimator=svm_classifier,
    param_distributions=param_distributions_poly,
    n_iter=20,                
    scoring='accuracy',       
    cv=3,                     
    random_state=42,
    verbose=3
)

In [10]:
random_search_svm_rbf = RandomizedSearchCV(
    estimator=svm_classifier,
    param_distributions=param_distributions_rbf,
    n_iter=20,                
    scoring='accuracy',      
    cv=3,                     
    random_state=42,
    verbose=3
)

In [11]:
random_search_svm_linear = RandomizedSearchCV(
    estimator=svm_classifier,
    param_distributions=param_distributions_linear,
    n_iter=20,                
    scoring='accuracy',      
    cv=3,                     
    random_state=42,
    verbose=3
)

In [12]:
random_search_svm_poly.fit(X_opt_scaled, y_opt)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV 1/3] END C=3.845401188473625, coef0=0.9507143064099162, degree=4, kernel=poly;, score=0.964 total time=   7.9s
[CV 2/3] END C=3.845401188473625, coef0=0.9507143064099162, degree=4, kernel=poly;, score=0.950 total time=   7.8s
[CV 3/3] END C=3.845401188473625, coef0=0.9507143064099162, degree=4, kernel=poly;, score=0.958 total time=   8.6s
[CV 1/3] END C=7.896910002727692, coef0=0.596850157946487, degree=3, kernel=poly;, score=0.965 total time=   8.1s
[CV 2/3] END C=7.896910002727692, coef0=0.596850157946487, degree=3, kernel=poly;, score=0.951 total time=   8.0s
[CV 3/3] END C=7.896910002727692, coef0=0.596850157946487, degree=3, kernel=poly;, score=0.959 total time=   8.0s
[CV 1/3] END C=1.6599452033620266, coef0=0.05808361216819946, degree=5, kernel=poly;, score=0.778 total time=  34.6s
[CV 2/3] END C=1.6599452033620266, coef0=0.05808361216819946, degree=5, kernel=poly;, score=0.742 total time=  35.1s
[CV 3/3] END C=1.6

In [13]:
import pandas as pd
rnd_search_poly_results = random_search_svm_poly.cv_results_
results_rnd_poly = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(rnd_search_poly_results["mean_test_score"], rnd_search_poly_results["params"])
]
rnd_search_poly_results_df = pd.DataFrame(results_rnd_poly)
rnd_search_poly_results_df_sorted = rnd_search_poly_results_df.sort_values(by='Score', ascending=False)
rnd_search_poly_results_df_sorted.to_csv('output/rnd_svm_poly_sorted.csv', index=False)
rnd_search_poly_results_df_sorted

Unnamed: 0,Score,C,coef0,degree,kernel
9,0.963,9.837555,0.232771,3,poly
10,0.961933,7.95176,0.199674,4,poly
18,0.9614,6.725223,0.311711,3,poly
6,0.960867,1.934045,0.304242,3,poly
17,0.960133,1.833647,0.391061,3,poly
16,0.959667,1.320382,0.495177,4,poly
12,0.959267,6.175449,0.170524,4,poly
4,0.958733,0.305845,0.96991,5,poly
1,0.9582,7.89691,0.59685,3,poly
14,0.9582,8.183973,0.304614,2,poly


In [14]:
random_search_svm_rbf.fit(X_opt_scaled, y_opt)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV 1/3] END C=3.845401188473625, gamma=5.669849511478847, kernel=rbf;, score=0.113 total time= 1.3min
[CV 2/3] END C=3.845401188473625, gamma=5.669849511478847, kernel=rbf;, score=0.113 total time= 1.3min
[CV 3/3] END C=3.845401188473625, gamma=5.669849511478847, kernel=rbf;, score=0.113 total time= 1.2min
[CV 1/3] END C=7.41993941811405, gamma=0.09846738873614563, kernel=rbf;, score=0.178 total time= 1.1min
[CV 2/3] END C=7.41993941811405, gamma=0.09846738873614563, kernel=rbf;, score=0.179 total time= 1.1min
[CV 3/3] END C=7.41993941811405, gamma=0.09846738873614563, kernel=rbf;, score=0.175 total time= 1.2min
[CV 1/3] END C=1.6601864044243653, gamma=0.000602521573620386, kernel=rbf;, score=0.944 total time=  18.0s
[CV 2/3] END C=1.6601864044243653, gamma=0.000602521573620386, kernel=rbf;, score=0.932 total time=  18.2s
[CV 3/3] END C=1.6601864044243653, gamma=0.000602521573620386, kernel=rbf;, score=0.933 total time=  18.

In [15]:
rnd_search_rbf_results = random_search_svm_rbf.cv_results_
results_rnd_rbf = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(rnd_search_rbf_results["mean_test_score"], rnd_search_rbf_results["params"])
]
rnd_search_rbf_results_df = pd.DataFrame(results_rnd_rbf)
rnd_search_rbf_results_df_sorted = rnd_search_rbf_results_df.sort_values(by='Score', ascending=False)
rnd_search_rbf_results_df_sorted.to_csv('output/rnd_svm_rbf_sorted.csv', index=False)
rnd_search_rbf_results_df_sorted

Unnamed: 0,Score,C,gamma,kernel
6,0.947533,8.424426,0.001153,rbf
15,0.947133,6.175449,0.000712,rbf
10,0.943667,6.218529,0.000498,rbf
7,0.940733,1.91825,0.000826,rbf
2,0.9364,1.660186,0.000603,rbf
18,0.9348,3.146138,0.000308,rbf
9,0.934067,4.41945,0.002859,rbf
14,0.9336,6.024146,0.000171,rbf
11,0.8584,3.021446,0.006789,rbf
19,0.662133,6.94233,0.015877,rbf


In [16]:
random_search_svm_linear.fit(X_opt_scaled, y_opt)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV 1/3] END C=3.845401188473625, kernel=linear;, score=0.914 total time=   6.0s
[CV 2/3] END C=3.845401188473625, kernel=linear;, score=0.910 total time=   5.7s
[CV 3/3] END C=3.845401188473625, kernel=linear;, score=0.898 total time=   5.8s
[CV 1/3] END .C=9.60714306409916, kernel=linear;, score=0.914 total time=   5.7s
[CV 2/3] END .C=9.60714306409916, kernel=linear;, score=0.910 total time=   5.7s
[CV 3/3] END .C=9.60714306409916, kernel=linear;, score=0.898 total time=   5.5s
[CV 1/3] END .C=7.41993941811405, kernel=linear;, score=0.914 total time=   5.7s
[CV 2/3] END .C=7.41993941811405, kernel=linear;, score=0.910 total time=   5.6s
[CV 3/3] END .C=7.41993941811405, kernel=linear;, score=0.898 total time=   5.6s
[CV 1/3] END C=6.086584841970366, kernel=linear;, score=0.914 total time=   5.7s
[CV 2/3] END C=6.086584841970366, kernel=linear;, score=0.910 total time=   5.8s
[CV 3/3] END C=6.086584841970366, kernel=linear;

In [17]:
rnd_search_linear_results = random_search_svm_linear.cv_results_
results_rnd_linear = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(rnd_search_linear_results["mean_test_score"], rnd_search_linear_results["params"])
]
rnd_search_linear_results_df = pd.DataFrame(results_rnd_linear)
rnd_search_linear_results_df_sorted = rnd_search_linear_results_df.sort_values(by='Score', ascending=False)
rnd_search_linear_results_df_sorted.to_csv('output/rnd_svm_linear_sorted.csv', index=False)
rnd_search_linear_results_df_sorted

Unnamed: 0,Score,C,kernel
10,0.9088,0.305845,linear
1,0.907533,9.607143,linear
18,0.907533,4.41945,linear
17,0.907533,5.347564,linear
16,0.907533,3.142422,linear
15,0.907533,1.934045,linear
14,0.907533,1.91825,linear
13,0.907533,2.223391,linear
12,0.907533,8.424426,linear
11,0.907533,9.799099,linear


### Grid Search

In [22]:
from sklearn.model_selection import GridSearchCV
param_distributions_poly_grid = [{
    'C': [10.0, 25.0, 50.0],
    'coef0': [0.25, 0.4, 0.5],
    'degree': [3],
    'kernel': ['poly']
}]

grid_search_poly = GridSearchCV(svm_classifier, param_distributions_poly_grid, cv=5, verbose=3)
grid_search_poly.fit(X_opt_scaled, y_opt)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END C=10.0, coef0=0.25, degree=3, kernel=poly;, score=0.967 total time=  10.6s
[CV 2/5] END C=10.0, coef0=0.25, degree=3, kernel=poly;, score=0.971 total time=  11.4s
[CV 3/5] END C=10.0, coef0=0.25, degree=3, kernel=poly;, score=0.959 total time=  11.2s
[CV 4/5] END C=10.0, coef0=0.25, degree=3, kernel=poly;, score=0.967 total time=  11.3s
[CV 5/5] END C=10.0, coef0=0.25, degree=3, kernel=poly;, score=0.966 total time=  10.6s
[CV 1/5] END C=10.0, coef0=0.4, degree=3, kernel=poly;, score=0.962 total time=  10.2s
[CV 2/5] END C=10.0, coef0=0.4, degree=3, kernel=poly;, score=0.969 total time=  10.4s
[CV 3/5] END C=10.0, coef0=0.4, degree=3, kernel=poly;, score=0.956 total time=  10.0s
[CV 4/5] END C=10.0, coef0=0.4, degree=3, kernel=poly;, score=0.966 total time=  10.4s
[CV 5/5] END C=10.0, coef0=0.4, degree=3, kernel=poly;, score=0.964 total time=  10.0s
[CV 1/5] END C=10.0, coef0=0.5, degree=3, kernel=poly;, score=0.9

In [23]:
grid_search_poly_results = grid_search_poly.cv_results_
results_grid_poly = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(grid_search_poly_results["mean_test_score"], grid_search_poly_results["params"])
]
grid_search_poly_results_df = pd.DataFrame(results_grid_poly)
grid_search_poly_results_df_sorted = grid_search_poly_results_df.sort_values(by='Score', ascending=False)
grid_search_poly_results_df_sorted.to_csv('output/grid_svm_poly_sorted.csv', index=False)
grid_search_poly_results_df_sorted

Unnamed: 0,Score,C,coef0,degree,kernel
0,0.965867,10.0,0.25,3,poly
6,0.965867,50.0,0.25,3,poly
3,0.9658,25.0,0.25,3,poly
1,0.9634,10.0,0.4,3,poly
7,0.963333,50.0,0.4,3,poly
4,0.963267,25.0,0.4,3,poly
2,0.9612,10.0,0.5,3,poly
5,0.961067,25.0,0.5,3,poly
8,0.961067,50.0,0.5,3,poly


In [26]:
param_distributions_rbf_grid = [{
    'kernel': ['rbf'],
    'C': [10, 25, 50],   
    'gamma': [0.0001, 0.001, 0.01]
}]

grid_search_rbf = GridSearchCV(svm_classifier, param_distributions_rbf_grid, cv=5, verbose=3)
grid_search_rbf.fit(X_opt_scaled, y_opt)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.937 total time=  13.3s
[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.943 total time=  13.9s
[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.927 total time=  13.5s
[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.938 total time=  13.8s
[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.924 total time=  13.2s
[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.952 total time=  16.4s
[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.955 total time=  17.2s
[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.945 total time=  16.6s
[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.954 total time=  16.7s
[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.945 total time=  16.4s
[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.804 total time= 1.1min
[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;,

In [27]:
grid_search_rbf_results = grid_search_rbf.cv_results_
results_grid_rbf = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(grid_search_rbf_results["mean_test_score"], grid_search_rbf_results["params"])
]
grid_search_rbf_results_df = pd.DataFrame(results_grid_rbf)
grid_search_rbf_results_df_sorted = grid_search_rbf_results_df.sort_values(by='Score', ascending=False)
grid_search_rbf_results_df_sorted.to_csv('output/grid_svm_rbf_sorted.csv', index=False)
grid_search_rbf_results_df_sorted

Unnamed: 0,Score,C,gamma,kernel
1,0.95,10,0.001,rbf
4,0.948667,25,0.001,rbf
7,0.9486,50,0.001,rbf
3,0.9374,25,0.0001,rbf
6,0.9374,50,0.0001,rbf
0,0.934,10,0.0001,rbf
2,0.7902,10,0.01,rbf
5,0.7902,25,0.01,rbf
8,0.7902,50,0.01,rbf


In [28]:
param_distributions_linear_grid = [{
    'kernel': ['linear'],
    'C': [10, 25, 50]
}]

grid_search_linear = GridSearchCV(svm_classifier, param_distributions_linear_grid, cv=5, verbose=3)
grid_search_linear.fit(X_opt_scaled, y_opt)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END ...............C=10, kernel=linear;, score=0.908 total time=   7.4s
[CV 2/5] END ...............C=10, kernel=linear;, score=0.918 total time=   7.5s
[CV 3/5] END ...............C=10, kernel=linear;, score=0.903 total time=   7.4s
[CV 4/5] END ...............C=10, kernel=linear;, score=0.917 total time=   7.4s
[CV 5/5] END ...............C=10, kernel=linear;, score=0.888 total time=   7.1s
[CV 1/5] END ...............C=25, kernel=linear;, score=0.908 total time=   7.3s
[CV 2/5] END ...............C=25, kernel=linear;, score=0.918 total time=   7.4s
[CV 3/5] END ...............C=25, kernel=linear;, score=0.903 total time=   7.3s
[CV 4/5] END ...............C=25, kernel=linear;, score=0.917 total time=   7.7s
[CV 5/5] END ...............C=25, kernel=linear;, score=0.888 total time=   7.1s
[CV 1/5] END ...............C=50, kernel=linear;, score=0.908 total time=   7.3s
[CV 2/5] END ...............C=50, kernel=linear;,

In [29]:
grid_search_linear_results = grid_search_linear.cv_results_
results_grid_linear = [
    {'Score': mean_score, **params} 
    for mean_score, params in zip(grid_search_linear_results["mean_test_score"], grid_search_linear_results["params"])
]
grid_search_linear_results_df = pd.DataFrame(results_grid_linear)
grid_search_linear_results_df_sorted = grid_search_linear_results_df.sort_values(by='Score', ascending=False)
grid_search_linear_results_df_sorted.to_csv('output/grid_svm_linear_sorted.csv', index=False)
grid_search_linear_results_df_sorted

Unnamed: 0,Score,C,kernel
0,0.906933,10,linear
1,0.906933,25,linear
2,0.906933,50,linear
