# 3.1 Support Vector Machine (SVM)

In [1]:
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split

Specify the data file you want to load. Here I load a npz-file with 15 classes created with the <u>"2.1 Data Preparation - Split-Train-Test.ipynb"</u> - notebook

In [2]:
npzfile = np.load('../data/image_data_10.npz')

In [3]:
X = npzfile['arr_0']
y = npzfile['arr_1']

In [4]:
# reshape 
X = np.reshape(X, (X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]))

In [5]:
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# define grid for gridsearch
C_range = np.logspace(-2, 1, 4)
gamma_range = np.logspace(-3, 1, 4)
kernel_range = ['rbf', 'linear']
param_grid = dict(gamma=gamma_range, C=C_range, kernel=kernel_range)

In [1]:
# find the best parameters for the model
grid = GridSearchCV(
    SVC(), 
    param_grid=param_grid,
    cv=StratifiedKFold(n_splits=3),
    n_jobs=4)

grid.fit(X_train, y_train)
grid.best_params_

In [None]:
# use the best parameters for the model
best_C = grid.best_params_['C']
best_gamma = grid.best_params_['gamma']
best_kernel = grid.best_params_['kernel']

In [None]:
# fit the model with the best parameters
clf = SVC(C=best_C, gamma=best_gamma)
clf.fit(X_train, y_train)

In [None]:
# score on test data
clf.score(X_test, y_test)

In [None]:
# score on training data
clf.score(X_train, y_train)