# 3.1 Support Vector Machine (SVM)

In [1]:
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split

Specify the data file you want to load. Here I load a npz-file with 15 classes created with the <u>"2.1 Data Preparation - Split-Train-Test.ipynb"</u> - notebook

In [2]:
npzfile = np.load('../data/image_data_10.npz')

In [3]:
X = npzfile['arr_0']
y = npzfile['arr_1']

In [4]:
# reshape 
X = np.reshape(X, (X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]))

In [5]:
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# define grid for gridsearch
C_range = np.logspace(-2, 1, 4)
gamma_range = np.logspace(-3, 1, 4)
kernel_range = ['rbf', 'linear']
param_grid = dict(gamma=gamma_range, C=C_range, kernel=kernel_range)

In [7]:
# find the best parameters for the model
grid = GridSearchCV(
    SVC(), 
    param_grid=param_grid,
    cv=StratifiedKFold(n_splits=3),
    n_jobs=4,
    verbose=1000)

grid.fit(X_train, y_train)

Fitting 3 folds for each of 32 candidates, totalling 96 fits
Memmaping (shape=(1212, 67500), dtype=float64) to new file C:\Users\flori\AppData\Local\Temp\joblib_memmaping_pool_14152_2370023194128\14152-2370080292592-2898dbb030e98a4a31c8c039fc3d6992.pkl
Pickling array (shape=(1212,), dtype=int32).
Pickling array (shape=(806,), dtype=int32).
Pickling array (shape=(406,), dtype=int32).
Memmaping (shape=(1212, 67500), dtype=float64) to old file C:\Users\flori\AppData\Local\Temp\joblib_memmaping_pool_14152_2370023194128\14152-2370080292592-2898dbb030e98a4a31c8c039fc3d6992.pkl
Pickling array (shape=(1212,), dtype=int32).
Pickling array (shape=(808,), dtype=int32).
Pickling array (shape=(404,), dtype=int32).
Memmaping (shape=(1212, 67500), dtype=float64) to old file C:\Users\flori\AppData\Local\Temp\joblib_memmaping_pool_14152_2370023194128\14152-2370080292592-2898dbb030e98a4a31c8c039fc3d6992.pkl
Pickling array (shape=(1212,), dtype=int32).
Pickling array (shape=(810,), dtype=int32).
Pickling

GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=None, shuffle=False),
       error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=4,
       param_grid={'kernel': ['rbf', 'linear'], 'gamma': array([  1.00000e-03,   2.15443e-02,   4.64159e-01,   1.00000e+01]), 'C': array([  0.01,   0.1 ,   1.  ,  10.  ])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=1000)

In [10]:
# {'C': 0.01, 'gamma': 0.001, 'kernel': 'linear'}
grid.best_params_

{'C': 0.01, 'gamma': 0.001, 'kernel': 'linear'}

In [11]:
# use the best parameters for the model
best_C = grid.best_params_['C']
best_gamma = grid.best_params_['gamma']
best_kernel = grid.best_params_['kernel']

In [12]:
# fit the model with the best parameters
clf = SVC(C=best_C, gamma=best_gamma)
clf.fit(X_train, y_train)

SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [13]:
# score on test data
clf.score(X_test, y_test)

0.1617161716171617

In [14]:
# score on training data
clf.score(X_train, y_train)

0.18894389438943895