In [1]:
from __future__ import print_function, division
from data_utils import load_CIFAR10

X_train, y_train, X_test, y_test = load_CIFAR10("cifar-10-batches-py")

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: (50000, 32, 32, 3)
y_train: (50000,)
X_test: (10000, 32, 32, 3)
y_test: (10000,)


In [11]:
import numpy as np

data_train = X_train.copy()
data_train = data_train.astype('uint8') // 64
fea_train = []
for i in xrange(data_train.shape[0]):
    h = np.zeros((64,))
    tmp = data_train[i].reshape(-1, 3)
    tmp = tmp[:, 0]*16 + tmp[:, 1]*4 + tmp[:, 2]
    uni, cnt = np.unique(tmp, return_counts=True)
    h[uni] = cnt
    h = h / h.sum()
    h = h[np.newaxis, :]
    fea_train.append(h)

fea_train = np.concatenate(fea_train, axis=0)

data_test = X_test.copy()
data_test = data_test.astype('uint8') // 64
fea_test = []
for i in xrange(data_test.shape[0]):
    h = np.zeros((64,))
    tmp = data_test[i].reshape(-1, 3)
    tmp = tmp[:, 0]*16 + tmp[:, 1]*4 + tmp[:, 2]
    uni, cnt = np.unique(tmp, return_counts=True)
    h[uni] = cnt
    h = h / h.sum()
    h = h[np.newaxis, :]
    fea_test.append(h)

fea_test = np.concatenate(fea_test, axis=0)

print(fea_train.shape)
print(fea_test.shape)

(50000, 64)
(10000, 64)


In [12]:
from sklearn import neighbors
import time 

group_K = [1, 3, 5, 20, 50, 100]
acc = np.zeros((len(group_K),))
for i in xrange(len(group_K)):
    print("K: %d" %(group_K[i]))
    clf = neighbors.KNeighborsClassifier(group_K[i], n_jobs=-1)
    clf.fit(fea_train, y_train)
    tic = time.time()
    preds = clf.predict(fea_test)
    toc = time.time()
    print("time for prediction: %d seconds" %(toc-tic))

    acc[i] = (preds==y_test).mean()
    print("accuracy: ", acc[i])

print(acc)

K: 1
time for prediction: 4 seconds
accuracy:  0.2813
K: 3
time for prediction: 5 seconds
accuracy:  0.2757
K: 5
time for prediction: 5 seconds
accuracy:  0.2993
K: 20
time for prediction: 6 seconds
accuracy:  0.3323
K: 50
time for prediction: 8 seconds
accuracy:  0.3348
K: 100
time for prediction: 9 seconds
accuracy:  0.3343
[ 0.2813  0.2757  0.2993  0.3323  0.3348  0.3343]


In [13]:
from sklearn import svm

tic = time.time()
lin_clf = svm.LinearSVC() 

lin_clf.fit(fea_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_clf.predict(fea_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 5 seconds
time for prediction: 0 seconds
accuracy:  0.2788


In [14]:
from sklearn import linear_model

tic = time.time()
lin_reg = linear_model.LogisticRegression(multi_class='multinomial', n_jobs=-1, solver='sag', max_iter=500)
lin_reg.fit(fea_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_reg.predict(fea_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 9 seconds
time for prediction: 0 seconds
accuracy:  0.2908


In [16]:
interv = 32
data_train = X_train.copy()
data_train = data_train.astype('uint8') // interv
base = 256 // interv
fea_train = []
for i in xrange(data_train.shape[0]):
    h = np.zeros((base**3,))
    tmp = data_train[i].reshape(-1, 3)
    tmp = tmp[:, 0]*base*base + tmp[:, 1]*base + tmp[:, 2]
    uni, cnt = np.unique(tmp, return_counts=True)
    h[uni] = cnt
    h = h / h.sum()
    h = h[np.newaxis, :]
    fea_train.append(h)

fea_train = np.concatenate(fea_train, axis=0)

data_test = X_test.copy()
data_test = data_test.astype('uint8') // interv
fea_test = []
for i in xrange(data_test.shape[0]):
    h = np.zeros((base**3,))
    tmp = data_test[i].reshape(-1, 3)
    tmp = tmp[:, 0]*base*base + tmp[:, 1]*base + tmp[:, 2]
    uni, cnt = np.unique(tmp, return_counts=True)
    h[uni] = cnt
    h = h / h.sum()
    h = h[np.newaxis, :]
    fea_test.append(h)

fea_test = np.concatenate(fea_test, axis=0)

print(fea_train.shape)
print(fea_test.shape)

(50000, 512)
(10000, 512)


In [21]:
from sklearn import neighbors
import time 

group_K = [1, 3, 5, 20, 50, 100]
acc = np.zeros((len(group_K),))
for i in xrange(len(group_K)):
    print("K: %d" %(group_K[i]))
    clf = neighbors.KNeighborsClassifier(group_K[i], n_jobs=-1)
    clf.fit(fea_train, y_train)
    tic = time.time()
    preds = clf.predict(fea_test)
    toc = time.time()
    print("time for prediction: %d seconds" %(toc-tic))

    acc[i] = (preds==y_test).mean()
    print("accuracy: ", acc[i])

print(acc)

K: 1
time for prediction: 49 seconds
accuracy:  0.3178
K: 3
time for prediction: 56 seconds
accuracy:  0.3129
K: 5
time for prediction: 58 seconds
accuracy:  0.3345
K: 20
time for prediction: 66 seconds
accuracy:  0.3516
K: 50
time for prediction: 71 seconds
accuracy:  0.3462
K: 100
time for prediction: 72 seconds
accuracy:  0.3346
[ 0.3178  0.3129  0.3345  0.3516  0.3462  0.3346]


In [22]:
from sklearn import svm

tic = time.time()
lin_clf = svm.LinearSVC() 

lin_clf.fit(fea_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_clf.predict(fea_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 10 seconds
time for prediction: 0 seconds
accuracy:  0.2972


In [23]:
from sklearn import linear_model

tic = time.time()
lin_reg = linear_model.LogisticRegression(multi_class='multinomial', n_jobs=-1, solver='sag', max_iter=500)
lin_reg.fit(fea_train, y_train)
toc_1 = time.time()
print("time for training: %d seconds" %(toc_1-tic))

preds = lin_reg.predict(fea_test)
toc_2 = time.time()
print("time for prediction: %d seconds" %(toc_2-toc_1))

acc = (preds==y_test).mean()
print("accuracy: ", acc)

time for training: 67 seconds
time for prediction: 0 seconds
accuracy:  0.307
