## Task 2 Classification Algorithm

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import classification_report

from __future__ import print_function

from builtins import range
from six.moves import cPickle as pickle
from imageio import imread
import platform
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import accuracy_score


# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
!wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz
!tar -xzvf cifar-10-python.tar.gz

In [None]:
def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2':
        return  pickle.load(f)
    elif version[0] == '3':
        return  pickle.load(f, encoding='latin1')
    raise ValueError("invalid python version: {}".format(version))


def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        datadict = load_pickle(f)
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
        Y = np.array(Y)
        return X, Y


def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1,6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte


cifar10_dir = 'cifar-10-batches-py'

# Cleaning up variables to prevent loading data multiple times
try:
    del X_train, y_train
    del X_test, y_test
    print('Clear previously loaded data.')
except:
    pass

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# As a sanity check, we print out the size of the training and test data.
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

In [None]:
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
    idxs = np.flatnonzero(y_train == y)
    idxs = np.random.choice(idxs, samples_per_class, replace=False)
    for i, idx in enumerate(idxs):
        plt_idx = i * num_classes + y + 1
        plt.subplot(samples_per_class, num_classes, plt_idx)
        plt.imshow(X_train[idx].astype('uint8'))
        plt.axis('off')
        if i == 0:
            plt.title(cls)
plt.show()

In [None]:
try:
    del X_train, y_train, X_val
    del X_test, y_test,y_val
    print('Clear previously loaded data.')
except:
    pass
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

num_training=15000
val=1000
mask = range(num_training, num_training + val)
X_val = X_train[mask]
y_val = y_train[(mask)]
y_val = np.ravel(y_val)

mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
y_train = np.ravel(y_train)

num_test = 5000
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
y_test = np.ravel(y_test)


# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))

print('X_train and X_test shapes: ',X_train.shape, X_test.shape)
print('y_train and y_test shapes: ',y_train.shape, y_test.shape)
print('X_val and y_val shapes: ' ,X_val.shape, y_val.shape)

In [None]:
# Implementing K-NN Classifier

from sklearn.neighbors import KNeighborsClassifier
import time
knn = KNeighborsClassifier(n_neighbors=3)  
knn.fit(X_train, y_train)
y_val_predict= knn.predict(X_val)

print("K- Neighbors Prediction")
print(classification_report(y_val, y_val_predict))


print( "The accuracy of the knn model is: ",accuracy_score(y_test,y_test_predict))

In [None]:
from sklearn.svm import SVC
clf = SVC()  
clf.fit(X_train, y_train)
y_val_predict= clf.predict(X_val)

print("SVC Prediction")
print(classification_report(y_val, y_val_predict))


print( "The accuracy of the svm model is: ",accuracy_score(y_test,y_test_predicted))

In [None]:
from sklearn.naive_bayes import GaussianNB
naive_bayes_clf=GaussianNB()
naive_bayes_clf.fit(X_train, y_train)
y_val_pre= naive_bayes_clf.predict(X_val)

print("NB Prediction")
print(classification_report(y_val, y_val_pre))

print( "The accuracy of the NB model is: ",accuracy_score(y_test,y_test_predicted))
