# 3.1 Support Vector Machine (SVM)

In [1]:
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

Load the small image data set.

In [2]:
npzfile = np.load('../data/image_data.npz')

In [3]:
X = npzfile['arr_0']
y = npzfile['arr_1']

In [4]:
# reshape 
X = np.reshape(X, (X.shape[0], X.shape[1]*X.shape[2]*X.shape[3]))

In [5]:
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
C_range = np.logspace(-2, 1, 4)
gamma_range = np.logspace(-3, 1, 4)
param_grid = dict(gamma=gamma_range, C=C_range)

In [None]:
# find the best parameters for the model
grid = GridSearchCV(SVC(), param_grid=param_grid)
grid.fit(X_train, y_train)
grid.best_params_

In [None]:
# use the best parameters for the model
best_C = grid.best_params_['C']
best_gamma = grid.best_params_['gamma']

In [8]:
# fit the model with the best parameters
clf = SVC(C=best_C, gamma=best_gamma)
clf.fit(X_train, y_train)

SVC(C=0.9, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [9]:
# score on test data
clf.score(X_test, y_test)

0.47983870967741937

In [10]:
# score on training data
clf.score(X_train, y_train)

0.9555106167846309