In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from skimage.io import imread, imshow, show, imshow_collection
from skimage.transform import resize, rescale, rotate

warnings.filterwarnings('ignore')

# Load Data

In [2]:
data_train_1 = pickle.load(open("data_train_flatten_batch_1.pkl", "rb"))
data_train_2 = pickle.load(open("data_train_flatten_batch_2.pkl", "rb"))
labels_train_1 = pickle.load(open("labels_train_batch_1.pkl", "rb"))
labels_train_2 = pickle.load(open("labels_train_batch_2.pkl", "rb"))

In [3]:
data_train = np.concatenate((data_train_1, data_train_2))
labels_train = np.concatenate((labels_train_1, labels_train_2))

In [4]:
data_test = pickle.load(open("data_test_flatten.pkl", "rb"))
labels_test = pickle.load(open("labels_test.pkl", "rb"))

We don't need to shuffle data because are already splitted in train and test.

# Scale Data

In [5]:
scaler = StandardScaler()

X_train_std = scaler.fit_transform(data_train)
X_test_std = scaler.transform(data_test)

# Train Model

In [None]:
# Train model with tuning
m = SVC()

parameters = [{
    "gamma": [0.01, 0.001, 0.0001],
    "C": [1, 10, 100, 1000]
}]

grid_search = GridSearchCV(m, parameters, verbose=3)
grid_search.fit(X_train_std, labels_train)
model = grid_search.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END ..................C=1, gamma=0.01;, score=0.156 total time=137.9min
[CV 2/5] END ..................C=1, gamma=0.01;, score=0.155 total time=137.2min
[CV 3/5] END ..................C=1, gamma=0.01;, score=0.158 total time=137.4min
[CV 4/5] END ..................C=1, gamma=0.01;, score=0.155 total time=137.8min
[CV 5/5] END ..................C=1, gamma=0.01;, score=0.160 total time=137.8min


In [None]:
print("Model optimal parameters".upper())
print("gamma:", model.get_params()["gamma"])
print("C", model.get_params()["C"])

# Test Model

In [None]:
predictions = model.predict(X_test_std)
accuracy = accuracy_score(labels_test, predictions)
print("Accuracy score:", accuracy)

In [None]:
cm = confusion_matrix(labels_test, predictions, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot()

# Save Model

In [None]:
model_file_name = "SVC_32x32.pkl"
pickle.dump(model, open(model_file_name, "wb"))