In [6]:
import time
import numpy as np

In [7]:
from shared.helpers import build_set

# Gets the histogram sets with labels
print('Loading datasets...')
load_time_start = time.perf_counter()

labels = np.array([0, 1, 2, 3, 4])
y_train, x_train = build_set('train', binary=False, use_moments=True, with_extensions=True)
y_test, x_test = build_set('test', binary=False, use_moments=True, with_extensions=True)

print(f'Loaded datasets. Time taken: {time.perf_counter() - load_time_start} s')

Loading datasets...
Loading train dataset...
Loaded 17334 images (with extensions) in 6.357823800000006 s
Getting image descriptors...
Loaded descriptors for label 0:  (6858, 2)
Loaded descriptors for label 1:  (3138, 2)
Loaded descriptors for label 2:  (4548, 2)
Loaded descriptors for label 3:  (2271, 2)
Loaded descriptors for label 4:  (519, 2)
Total image loading time: 10.462506000000076 s...
Loading test dataset...
Loaded 4968 images (with extensions) in 1.6939256000000569 s
Getting image descriptors...
Loaded descriptors for label 0:  (1917, 2)
Loaded descriptors for label 1:  (888, 2)
Loaded descriptors for label 2:  (1341, 2)
Loaded descriptors for label 3:  (669, 2)
Loaded descriptors for label 4:  (153, 2)
Total image loading time: 2.890775699999949 s...
Loaded datasets. Time taken: 13.428830700000162 s


In [12]:
import importlib

helpers = importlib.import_module('shared.helpers')
importlib.reload(helpers)

from shared.helpers import scale_dataset

print('unscaled: ', x_train[:2])

x_scaled = x_train
x_test_scaled = x_test

x_scaled = scale_dataset(x_scaled)
x_test_scaled = scale_dataset(x_test_scaled)

print('scaled: ', x_scaled[:2])

unscaled:  [[ -3.1523345  -7.2572093 -11.15301   -12.524306  -24.662037  -16.436357
  -24.42609  ]
 [ -3.1742635  -7.6360016 -11.778712  -12.834935  -25.586607  -17.294075
  -25.171728 ]]
Scaling and fitting dataset...
Scaling and fitting dataset...
scaled:  [[-0.23317589  0.54865736  0.15126559 -0.11027179 -0.06145787  0.07264346
   0.11679873]
 [-0.5273372  -0.59333515 -1.1655784  -0.5280247  -0.7270186  -0.75469023
  -0.46720853]]


In [13]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

params = {
    'n_neighbors': range(1, 120),
    'weights': ['uniform', 'distance']
}

knn = KNeighborsClassifier(n_jobs=-1)
clf = GridSearchCV(knn, params, cv=5)
clf.fit(x_scaled, y_train)

clf = clf.best_estimator_

In [16]:
from sklearn.metrics import confusion_matrix, classification_report

y_predict=clf.predict(x_test_scaled)
accuracy = clf.score(x_test_scaled, y_test)

print(f"Accuracy: {accuracy * 100}%")
print("\nClassification report:")
print(classification_report(y_test, y_predict))

print("\nConfusion matrix:")
print(f"Labels: {labels}\n")
print(confusion_matrix(y_test, y_predict, labels=labels))

Accuracy: 32.46779388083736%

Classification report:
              precision    recall  f1-score   support

           0       0.41      0.54      0.47      1917
           1       0.19      0.15      0.17       888
           2       0.27      0.25      0.26      1341
           3       0.21      0.14      0.17       669
           4       0.14      0.06      0.08       153

    accuracy                           0.32      4968
   macro avg       0.25      0.23      0.23      4968
weighted avg       0.30      0.32      0.31      4968


Confusion matrix:
Labels: [0 1 2 3 4]

[[1032  274  448  148   15]
 [ 448  137  231   68    4]
 [ 671  180  338  130   22]
 [ 280   96  183   97   13]
 [  62   28   33   21    9]]


In [18]:
import os
import joblib

import shared.constants as constants

if os.path.exists(constants.MODELS_DIR):
    joblib.dump(clf, 'shallow_multi.sav')
else:
    print("Cannot save trained svm model to {0}.".format(constants.SHALLOW_MODEL_NAME))