In [326]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
from random import random
from scipy.spatial import distance

In [337]:
def unpickle(file):
    with open(file, 'rb') as f:
        dict = pickle.load(f, encoding="latin1")
    return dict

def create_train_data(path):
    path = path + 'data_batch_'
    
    tr_X = np.zeros((50000,32,32,3))
    tr_Y = np.zeros(50000)
    for i in range(5):
        temp_dict = unpickle(path + str(i+1))
        temp_tr_X = np.array(temp_dict['data']).reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
        temp_tr_Y = np.array(temp_dict['labels'])
        
        tr_X[10000*i:10000*(i+1),:,:,:] = temp_tr_X
        tr_Y[10000*i:10000*(i+1)] = temp_tr_Y
        
#         if i == 1:
#             tr_X = temp_tr_X
#             tr_Y = temp_tr_Y
#         else:
#             tr_X = np.append(tr_X,temp_tr_X,axis=0)
#             tr_Y = np.append(tr_Y,temp_tr_Y,axis=0)
    print(tr_X.shape)
    print(tr_Y.shape)
    return tr_X,tr_Y

def create_test_data(path):
    datadict = unpickle(path + 'test_batch')

    test_X = datadict["data"]
    test_Y = datadict["labels"]

    test_X = test_X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("uint8")
    test_Y = np.array(test_Y)
    return test_X,test_Y

def create_label_dict(path):
    labeldict = unpickle(path + 'batches.meta')
    label_names = labeldict["label_names"]
    return label_names

'''
Evaluating Function
'''
def class_acc(pred,gt):
    return np.sum(pred == gt)/pred.shape[0]

'''
Random Classifier Function
'''
def cifar10_classifier_random(x):
    pred = np.random.randint(0,10,size = x.shape[0])
    return pred


'''
1-NN Classifier Function
'''
## TODO: implement 1nn classifier, checked euclidean distance
def cifar10_classifier_1nn(X,trdata,trlabels):
    # X ~ test_X: input data used for classifying
    Y = np.array([])
    for x in X:
        dist_per_test = np.sum(np.square(x-trdata),axis=(1,2,3))
#         print(dist_per_test.shape)
#         print(dist_per_test)
        label_ind = np.argmin(dist_per_test)
        Y = np.append(Y,trlabels[label_ind])
    return Y

In [338]:
# datapath = '/home/trungvan/worktemp/cifar-10-python/cifar-10-batches-py/' ## laptop
datapath = 'C:/Users/vanhut1/AppData/cifar-10-python.tar/cifar-10-batches-py/' ## Workstation

label_names = create_label_dict(datapath)
tr_X,tr_Y = create_train_data(datapath)
test_X,test_Y = create_test_data(datapath)

(50000, 32, 32, 3)
(50000,)


In [339]:
# Test with test-data
n = 100
print('Precision of 1nn classifier - test-data (n = {}): '.format(n),class_acc(cifar10_classifier_1nn(test_X[:n],tr_X[:n],tr_Y[:n]),test_Y[:n])*100,"%")

Precision of 1nn classifier - test-data (n = 100):  13.0 %


In [340]:
# Test evaluate function:
print('Precision of random classifier: ',class_acc(cifar10_classifier_random(test_X),test_Y))

Precision of random classifier:  0.0983


In [341]:
# Test with train-data
print('Precision of 1nn classifier - train-data should be 100%: ',class_acc(cifar10_classifier_1nn(tr_X[:100],tr_X[0:100],tr_Y[0:100]),tr_Y[0:100])*100,"%")

Precision of 1nn classifier - train-data should be 100%:  100.0 %


In [344]:
n = 10000
print('Precision of 1nn classifier - test-data (n = {}): '.format(n))
print(class_acc(cifar10_classifier_1nn(test_X[0:n],tr_X[0:n],tr_Y[0:n]),test_Y[0:n])*100,"%")

Precision of 1nn classifier - test-data (n = 10000): 
28.77 %
