In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterGrid
from sklearn import svm
from sklearn import metrics

In [2]:
# imgs = np.load("Datasets/flat_processed_train_images.npy", encoding="bytes")
imgs = np.load("hog_train_images.npy", encoding="bytes")
print(imgs.shape)
imgs[0]
# plt.imshow(imgs[0].reshape(40,40), cmap = 'gray')

(10000, 3888)


array([0., 0., 0., ..., 0., 0., 0.])

In [3]:
labels = np.reshape(pd.read_csv("train_labels.csv").values[:, 1], (-1, 1))
labels.shape
print(labels[0])

['shovel']


In [4]:
merged = np.append(imgs, labels, axis=1)

In [5]:
merged

array([[0.0, 0.0, 0.0, ..., 0.0, 0.0, 'shovel'],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 'rifle'],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 'scorpion'],
       ...,
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 'rollerskates'],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 'mouth'],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 'pencil']], dtype=object)

In [6]:
X = imgs.tolist()
#X = [x.tolist() for x in X]

In [7]:
y = merged[:,-1].tolist()
y

['shovel',
 'rifle',
 'scorpion',
 'apple',
 'spoon',
 'pineapple',
 'rifle',
 'mouth',
 'skateboard',
 'rollerskates',
 'peanut',
 'rabbit',
 'sink',
 'sailboat',
 'nose',
 'nose',
 'skull',
 'mouth',
 'pool',
 'mouth',
 'peanut',
 'rifle',
 'pear',
 'scorpion',
 'pillow',
 'penguin',
 'skull',
 'skateboard',
 'nail',
 'pencil',
 'penguin',
 'empty',
 'rollerskates',
 'shovel',
 'rifle',
 'pool',
 'peanut',
 'pear',
 'nail',
 'rabbit',
 'scorpion',
 'spoon',
 'spoon',
 'rifle',
 'spoon',
 'pineapple',
 'apple',
 'skull',
 'nose',
 'pineapple',
 'rifle',
 'pineapple',
 'mouth',
 'sailboat',
 'octagon',
 'pineapple',
 'rollerskates',
 'mouth',
 'mouth',
 'mouth',
 'moustache',
 'spoon',
 'mouth',
 'penguin',
 'empty',
 'pool',
 'paintbrush',
 'pineapple',
 'empty',
 'pool',
 'rabbit',
 'panda',
 'paintbrush',
 'scorpion',
 'penguin',
 'parrot',
 'screwdriver',
 'shovel',
 'pear',
 'scorpion',
 'paintbrush',
 'parrot',
 'rifle',
 'paintbrush',
 'mouth',
 'rollerskates',
 'scorpion',
 'si

In [8]:
#do a train test split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [9]:
svm_params = ParameterGrid({'random_state': [69],
                            'loss': ['hinge','squared_hinge'],
                            'C': [.5, 2.0, 5.0, 50.0]})

In [10]:
best_params_list = []
best_f1_score = 0
for svm_param in svm_params:
    best_score = 0
    best_params = None
    print("Param combinations: ", svm_param)
    
    classifier = svm.LinearSVC(**svm_param)
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    
    #get the f1 score
    test_f1_measure = metrics.f1_score(y_test, prediction, average = 'micro')
    
    if test_f1_measure > best_f1_score:
        best_params_list = svm_param
        best_f1_score = test_f1_measure
    print("F1 score:", test_f1_measure)
    

Param combinations:  {'C': 0.5, 'loss': 'hinge', 'random_state': 69}
F1 score: 0.4575
Param combinations:  {'C': 0.5, 'loss': 'squared_hinge', 'random_state': 69}
F1 score: 0.45
Param combinations:  {'C': 2.0, 'loss': 'hinge', 'random_state': 69}
F1 score: 0.424
Param combinations:  {'C': 2.0, 'loss': 'squared_hinge', 'random_state': 69}
F1 score: 0.429
Param combinations:  {'C': 5.0, 'loss': 'hinge', 'random_state': 69}
F1 score: 0.4215
Param combinations:  {'C': 5.0, 'loss': 'squared_hinge', 'random_state': 69}
F1 score: 0.4235
Param combinations:  {'C': 50.0, 'loss': 'hinge', 'random_state': 69}
F1 score: 0.4195
Param combinations:  {'C': 50.0, 'loss': 'squared_hinge', 'random_state': 69}
F1 score: 0.41999999999999993


In [11]:
print(best_params_list)
print(best_f1_score)

{'C': 0.5, 'loss': 'hinge', 'random_state': 69}
0.4575


In [12]:
best_svm_classifier = svm.LinearSVC(**best_params_list)
best_svm_classifier.fit(X_train, y_train)

LinearSVC(C=0.5, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=69, tol=0.0001, verbose=0)

In [14]:
prediction = best_svm_classifier.predict(X_test)
acc = metrics.accuracy_score(y_test, prediction)
print(acc)

print('Predicted ',best_svm_classifier.predict(X_test[111:112]))
print('Actual ',y_test[111])

0.4575
Predicted  ['skateboard']
Actual  moustache
