In [1]:
from __future__ import print_function, division
from data_utils import load_CIFAR10
import numpy as np

X_train, y_train, X_test, y_test = load_CIFAR10("cifar-10-batches-py")

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: (50000, 32, 32, 3)
y_train: (50000,)
X_test: (10000, 32, 32, 3)
y_test: (10000,)


In [2]:
from skimage.feature import hog
from skimage import data, color, exposure
import matplotlib.pyplot as plt

tmp_train = []
for i in xrange(X_train.shape[0]):
    gray_img = color.rgb2gray(X_train[i])
    fd, _ = hog(gray_img, orientations=8, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualise=True, transform_sqrt=False)
    fd = fd[np.newaxis, :]
    tmp_train.append(fd)

X_train = np.concatenate(tmp_train, axis=0)

tmp_test = []
for i in xrange(X_test.shape[0]):
    gray_img = color.rgb2gray(X_test[i])
    fd, _ = hog(gray_img, orientations=8, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualise=True, transform_sqrt=False)
    fd = fd[np.newaxis, :]
    tmp_test.append(fd)

X_test = np.concatenate(tmp_test, axis=0)

print(X_train.shape)
print(X_test.shape)


(50000, 288)
(10000, 288)


In [5]:
from sklearn import neighbors
import time 

group_K = [1, 5, 20, 50, 100]
acc = np.zeros((len(group_K),))
for i in xrange(len(group_K)):
    print("K: %d" %(group_K[i]))
    clf = neighbors.KNeighborsClassifier(group_K[i], n_jobs=-1)
    clf.fit(X_train, y_train)
    tic = time.time()
    preds = clf.predict(X_test)
    toc = time.time()
    print("time for prediction: %d seconds" %(toc-tic))

    acc[i] = (preds==y_test).mean()
    print("accuracy: ", acc[i])

print(acc)

K: 1
time for prediction: 87 seconds
accuracy:  0.4347
K: 5
time for prediction: 91 seconds
accuracy:  0.4518
K: 20
time for prediction: 91 seconds
accuracy:  0.4502
K: 50
time for prediction: 91 seconds
accuracy:  0.4317
K: 100
time for prediction: 83 seconds
accuracy:  0.4107
[ 0.4347  0.4518  0.4502  0.4317  0.4107]


In [6]:
from sklearn import linear_model

tic = time.time()
lin_reg = linear_model.LogisticRegression(multi_class='multinomial', n_jobs=-1, solver='sag', max_iter=500)
lin_reg.fit(X_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_reg.predict(X_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 21 seconds
time for prediction: 0 seconds
accuracy:  0.4957


In [4]:
from sklearn import svm
import time

tic = time.time()
lin_clf = svm.LinearSVC() 

lin_clf.fit(X_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_clf.predict(X_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 27 seconds
time for prediction: 0 seconds
accuracy:  0.5005


In [5]:
from sklearn import svm


tic = time.time()
lin_clf = svm.SVC() 

lin_clf.fit(X_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_clf.predict(X_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)


time for training: 1891 seconds
time for prediction: 165 seconds
accuracy:  0.3812
