In [1]:
import os
import numpy as np
import time
def _load_cifar10_batch(file): 
    import pickle as cPickle 
    fo = open(file, 'rb') 
    dict = cPickle.load(fo,encoding='latin1') 
    fo.close() 
    return dict['data'].reshape(-1, 32, 32, 3), dict['labels'] # reshaping the data to 32 x 32 x 3  
print('Loading...') 

batch_fns = [os.path.join("/home/halfmirror/Documents/Python/Image Classification/", 'cifar-10-batches-py', 'data_batch_' + str(i)) for i in range(1, 6)] 
data_batches = [_load_cifar10_batch(fn) for fn in batch_fns] 


Loading...


In [2]:
data_all = np.vstack([data_batches[i][0] for i in range(len(data_batches))]).astype('float') 
labels_all = np.vstack([data_batches[i][1] for i in range(len(data_batches))]).flatten() 

In [3]:
#Splitting the whole training set into 92:8
seed=7
from sklearn.model_selection import StratifiedShuffleSplit
data_split = StratifiedShuffleSplit(n_splits=1, test_size=0.08,random_state=seed) #creating data_split object with 8% test size 
data_split.get_n_splits(data_all,labels_all)
for i, j in data_split.split(data_all,labels_all):
    split_data_92, split_data_8 = data_all[i], data_all[j]        
    split_label_92, split_label_8 = labels_all[i], labels_all[j]

In [4]:
#Splitting the training set into 70 and 30
train_test_split = StratifiedShuffleSplit(1, test_size=0.3,random_state=seed) #test_size=0.3 denotes that 30 % of the dataset is used for testing.
for train_index, test_index in train_test_split.split(split_data_8,split_label_8):
    train_data_70, test_data_30 = split_data_8[train_index], split_data_8[test_index]     
    train_label_70, test_label_30 = split_label_8[train_index], split_label_8[test_index]
train_data = train_data_70 #assigning to variable train_data
train_labels = train_label_70 #assigning to variable train_labels
test_data = test_data_30
test_labels = test_label_30

In [5]:
print ('train_data : ', train_data.shape)
print ('train_labels : ', train_labels.shape)
print ('test_data : ', test_data.shape)
print ('test_labels : ', test_labels.shape)

train_data :  (2800, 32, 32, 3)
train_labels :  (2800,)
test_data :  (1200, 32, 32, 3)
test_labels :  (1200,)


In [6]:
train_data[0]

array([[[ 19.,  26.,  24.],
        [ 26.,  39.,  52.],
        [ 64.,  73.,  81.],
        ...,
        [ 93.,  93.,  94.],
        [ 96.,  98.,  99.],
        [100.,  99.,  95.]],

       [[  0.,   1.,   2.],
        [  3.,  15.,  33.],
        [ 49.,  62.,  74.],
        ...,
        [ 79.,  78.,  82.],
        [ 87.,  92.,  96.],
        [ 99.,  99.,  98.]],

       [[  0.,   0.,   0.],
        [  0.,   6.,  16.],
        [ 27.,  41.,  55.],
        ...,
        [ 75.,  80.,  85.],
        [ 89.,  94.,  98.],
        [ 99., 101., 103.]],

       ...,

       [[ 41.,  46.,  54.],
        [ 59.,  64.,  67.],
        [ 67.,  66.,  65.],
        ...,
        [ 44.,  41.,  61.],
        [ 98.,  89.,  37.],
        [ 20.,  35.,  45.]],

       [[ 46.,  55.,  65.],
        [ 68.,  72.,  74.],
        [ 74.,  74.,  72.],
        ...,
        [ 62.,  52.,  40.],
        [ 31.,  27.,  22.],
        [ 28.,  33.,  36.]],

       [[ 48.,  56.,  64.],
        [ 70.,  73.,  74.],
        [ 73.,  

In [7]:
# definition of normalization function
def normalize(data, eps=1e-8): 
    data -= data.mean(axis=(1, 2, 3), keepdims=True) 
    std = np.sqrt(data.var(axis=(1, 2, 3), ddof=1, keepdims=True)) # calculating standard deviation
    std[std < eps] = 1. 
    data /= std 
    return data 
# calling the function
train_data = normalize(train_data) 
test_data = normalize(test_data)
# prints the shape of train data and test data 
print ('train_data: ', train_data.shape)
print ('test_data: ', test_data.shape)

train_data:  (2800, 32, 32, 3)
test_data:  (1200, 32, 32, 3)


In [8]:
train_data[0]

array([[[-1.10026262, -0.92360678, -0.97407988],
        [-0.92360678, -0.59553163, -0.26745649],
        [ 0.0353821 ,  0.26251105,  0.46440344],
        ...,
        [ 0.76724204,  0.76724204,  0.79247859],
        [ 0.84295169,  0.89342479,  0.91866133],
        [ 0.94389788,  0.91866133,  0.81771514]],

       [[-1.57975707, -1.55452052, -1.52928397],
        [-1.50404742, -1.20120882, -0.74695093],
        [-0.34316614, -0.015091  ,  0.2877476 ],
        ...,
        [ 0.41393034,  0.3886938 ,  0.48963999],
        [ 0.61582274,  0.74200549,  0.84295169],
        [ 0.91866133,  0.91866133,  0.89342479]],

       [[-1.57975707, -1.57975707, -1.57975707],
        [-1.57975707, -1.42833777, -1.17597227],
        [-0.89837023, -0.54505854, -0.19174684],
        ...,
        [ 0.31298415,  0.43916689,  0.56534964],
        [ 0.66629584,  0.79247859,  0.89342479],
        [ 0.91866133,  0.96913443,  1.01960753]],

       ...,

       [[-0.54505854, -0.41887579, -0.21698339],
        [-0

In [9]:
# Computing whitening matrix 
train_data_flat = train_data.reshape(train_data.shape[0], -1).T
test_data_flat = test_data.reshape(test_data.shape[0], -1).T
print('train_data_flat: ', train_data_flat.shape)
print('test_data_flat: ', test_data_flat.shape)
train_data_flat_t = train_data_flat.T
test_data_flat_t = test_data_flat.T

train_data_flat:  (3072, 2800)
test_data_flat:  (3072, 1200)


In [10]:
from sklearn.decomposition import PCA
# n_components specify the no.of components to keep
train_data_pca = PCA(n_components=train_data_flat.shape[1]).fit_transform(train_data_flat)
test_data_pca = PCA(n_components=test_data_flat.shape[1]).fit_transform(test_data_flat)
train_data_pca = train_data_pca.T
test_data_pca = test_data_pca.T

In [13]:
train_data_pca[0]

array([ 31.084122  ,  31.01747292,  31.71379104, ..., -10.19050468,
        -9.38963984,  -8.25183206])

In [15]:
# from skimage import color
# # definition for SVD
# def svdFeatures(input_data):
#     svdArray_input_data=[]
#     size = input_data.shape[0]
#     for i in range (0,size):
#         img=color.rgb2gray(input_data[i])
#         U, s, V = np.linalg.svd(img, full_matrices=False);
#         S=[s[i] for i in range(30)]
#         svdArray_input_data.append(S)
#         svdMatrix_input_data=np.matrix(svdArray_input_data)
#     return svdMatrix_input_data
# # apply SVD for train and test data
# train_data_svd=svdFeatures(train_data)
# test_data_svd=svdFeatures(test_data)

In [16]:

from sklearn import svm #Creating a svm classifier model

clf = svm.SVC(gamma=.001,probability=True) #Model training

clf.fit(train_data_flat_t, train_labels) #After being fitted, the model can then be used to predict the output.



SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)