In [1]:
import os
import numpy as np

In [2]:
def load_cifar10_batch(file):
    import pickle
    fo = open(file, 'rb') 
    data = pickle.load(fo,encoding='bytes') 
    fo.close() 
    print(data.keys())
    return data[b'data'].reshape(-1, 32, 32, 3), data[b'labels'] # reshaping the data to 32 x 32 x 3  
print('Loading...') 
batch_fns = [os.path.join("./", 'cifar-10-batches-py', 'data_batch_' + str(i)) for i in range(1, 6)] 
data_batches = [load_cifar10_batch(fn) for fn in batch_fns] 

Loading...
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
dict_keys([b'batch_label', b'labels', b'data', b'filenames'])


In [3]:
print(data_batches)

[(array([[[[ 59,  43,  50],
         [ 68,  98, 119],
         [139, 145, 149],
         ...,
         [127, 126, 127],
         [130, 142, 130],
         [118, 120, 109]],

        [[ 33,  38,  87],
         [106, 115, 117],
         [114, 105, 107],
         ...,
         [118, 140, 136],
         [120, 107,  88],
         [ 67,  35,  32]],

        [[ 97, 111, 123],
         [130, 136, 132],
         [122, 121, 127],
         ...,
         [ 76, 107, 135],
         [135, 129, 127],
         [119, 125, 134]],

        ...,

        [[ 72,  44,  42],
         [ 52,  72,  64],
         [ 58,  57,  81],
         ...,
         [ 66,  62,  52],
         [ 39,  39,  44],
         [ 87, 114,  58]],

        [[ 82,  26,  25],
         [ 34,  41,  49],
         [ 53,  56,  58],
         ...,
         [ 55,  61,  68],
         [ 59,  46,  98],
         [ 87,   9,   9]],

        [[ 96,  34,  26],
         [ 27,  34,  32],
         [ 42,  52,  49],
         ...,
         [ 58,  65,  59],
      

In [4]:
data_all = np.vstack([data_batches[i][0] for i in range(len(data_batches))]).astype('float') 
labels_all = np.vstack([data_batches[i][1] for i in range(len(data_batches))]).flatten() 

In [5]:
#Splitting the whole training set into 92:8
seed=7
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(1, test_size=0.08,random_state=seed)
data_split = sss.get_n_splits(data_all,labels_all) #creating data_split object with 8% test size 
for train_index, test_index in sss.split(data_all,labels_all):
    split_data_92, split_data_8 = data_all[train_index], data_all[test_index]        
    split_label_92, split_label_8 = labels_all[train_index], labels_all[test_index]

In [6]:
#Splitting the training set into 70 and 30
xxx = StratifiedShuffleSplit(1, test_size=0.3,random_state=seed)
train_test_split = xxx.split(split_data_8,split_label_8) #test_size=0.3 denotes that 30 % of the dataset is used for testing.
for train_index, test_index in train_test_split:
    train_data_70, test_data_30 = split_data_8[train_index], split_data_8[test_index]     
    train_label_70, test_label_30 = split_label_8[train_index], split_label_8[test_index]
train_data = train_data_70 #assigning to variable train_data
train_labels = train_label_70 #assigning to variable train_labels
test_data = test_data_30
test_labels = test_label_30

In [7]:
test_data.shape

(1200, 32, 32, 3)

In [8]:
# definition of normalization function
def normalize(data, eps=1e-8): 
    data -= data.mean(axis=(1, 2, 3), keepdims=True) 
    std = np.sqrt(data.var(axis=(1, 2, 3), ddof=1, keepdims=True)) # calculating standard deviation
    std[std < eps] = 1. 
    data /= std 
    return data 
# calling the function
train_data = normalize(train_data) 
test_data = normalize(test_data) 
# prints the shape of train data and test data 
print('train_data: ', train_data.shape)
print('test_data: ', test_data.shape)

train_data:  (2800, 32, 32, 3)
test_data:  (1200, 32, 32, 3)


In [9]:
# Computing whitening matrix 
train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T
print('train_data_flat: ', train_data_flat.shape)
print('test_data_flat: ', test_data_flat.shape)
train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T

train_data_flat:  (3072, 2800)
test_data_flat:  (3072, 1200)


In [10]:
from sklearn.decomposition import PCA
# n_components specify the no.of components to keep
train_data_pca = PCA(n_components=train_data_flat.shape[1]).fit_transform(train_data_flat)
test_data_pca = PCA(n_components=test_data_flat.shape[1]).fit_transform(test_data_flat)
train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T

In [12]:
from skimage import color
# definition for SVD
def svdFeatures(input_data):
    svdArray_input_data=[]
    size = input_data.shape[0]
    for i in range (0,size):
        img=color.rgb2gray(input_data[i])
        U, s, V = np.linalg.svd(img, full_matrices=False);
        S=[s[i] for i in range(30)]
        svdArray_input_data.append(S)
        svdMatrix_input_data=np.matrix(svdArray_input_data)
    return svdMatrix_input_data
# apply SVD for train and test data
train_data_svd=svdFeatures(train_data)
test_data_svd=svdFeatures(test_data)

In [13]:
from sklearn import svm #Creating a svm classifier model

clf = svm.SVC(gamma=.001,probability=True) #Model training

clf.fit(train_data_flat_t, train_labels) #After being fitted, the model can then be used to predict the output.

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [14]:
predicted=clf.predict(test_data_flat_t)

score= clf.score(test_data_flat_t,test_labels) #classification score.

print("score",score)

score 0.38666666666666666


In [15]:
from sklearn import metrics

conf_matrix=metrics.confusion_matrix(test_labels,predicted)

print("Confusion matrix:",conf_matrix)

Confusion matrix: [[47 13 11  4  1  6  4  5 20  9]
 [ 2 60  4 11  9  7  4  5  7 11]
 [15  8 31 14 15 11  9  7  8  2]
 [ 3  4 10 37 11 27 12  9  3  4]
 [ 7  4 16  8 30 10 19 11  7  8]
 [ 1  4 13 24  9 43 17  6  1  2]
 [ 0  6 18 17 17 11 43  5  0  3]
 [ 4  2  5 11 17  9  8 48  1 15]
 [10 14  1  5  2  6  1  2 62 17]
 [ 3 22  3  6  0  4  5  3 11 63]]


In [17]:
#To see the accuracy of each class. 

accuracy=[]

leng = len(conf_matrix) #finding the length of confusion matrix

for i in range(leng): 



#each diagonal element (conf_matrix[i,i]) is divided by the sum of the elements of that particular row (conf_matrix[i].sum()).

    ac=(conf_matrix[i,i]/((conf_matrix[i].sum())+.0000001))*100 

    accuracy.append(ac)

print(accuracy)

[39.16666663402778, 49.99999995833333, 25.833333311805557, 30.83333330763889, 24.999999979166667, 35.83333330347222, 35.83333330347222, 39.999999966666664, 51.666666623611114, 52.49999995625]


In [18]:
summation=0

no_of_classes = 10

for i in range(0,len(accuracy)):

    summation+=accuracy[i]

overall_accuracy = summation/no_of_classes

print(overall_accuracy)

38.66666663444444
