# Create CNN for CIFAR10 dataset

1. read Toronto paper.
2. Make a subroutine to get the predictions of the top 3 probabilities for each test sample and evaluation the overall accuracy of top1 and top3 predictions.
3. read CNN overview pdf
4. download cifar10 dataset.
5. make your first CNN.  It does not need to be a big one.  It is OK to get just one or two convolution layers in your first CNN to save time.


In [65]:
from keras.optimizers import SGD, Adam
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.models import Sequential
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from matplotlib.ticker import FormatStrFormatter
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn import tree, datasets
from scipy.io import arff
from time import time
from os.path import join
from os import system, getcwd, startfile
from timeit import default_timer as timer
import math
import json
import inspect
import concurrent.futures as cf  # doesn't work with sklearn
import pandas as pd
import numpy as np
import copy as copy
import statistics as stt
import seaborn as sns
import pickle
import sys
sns.set_theme()
%matplotlib inline


In [66]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict


def convert(data):
    if isinstance(data, bytes):
        return data.decode('ascii')
    if isinstance(data, dict):
        return dict(map(convert, data.items()))
    if isinstance(data, tuple):
        return map(convert, data)
    return data

def convert1(data):
    if isinstance(data, bytes): return data.decode('utf-8')
    if isinstance(data, dict): return dict(map(convert1, data.items()))
    if isinstance(data, tuple): return map(convert1, data)
    return data

def convert2(data):
    # https://stackoverflow.com/questions/33137741/convert-all-bytes-to-str-in-an-object-consiting-of-random-nested-built-in-ty
    data = {
        key.decode() if isinstance(key, bytes) else key:
        val.decode() if isinstance(val, bytes) else [element.decode() if isinstance(element, bytes) else element for element in val]
        for key, val in data.items()
        }
    return data

def convert3(data):
    # https://stackoverflow.com/questions/33137741/convert-all-bytes-to-str-in-an-object-consiting-of-random-nested-built-in-ty
    if isinstance(data, bytes):
        return data.decode()
    if isinstance(data, (str, int)):
        return str(data)
    if isinstance(data, dict):
        return dict(map(convert3, data.items()))
    if isinstance(data, tuple):
        return tuple(map(convert3, data))
    if isinstance(data, list):
        return list(map(convert3, data))
    if isinstance(data, set):
        return set(map(convert3, data))
    return data


def summarize_diagnostics(history):
	# plot loss
	plt.subplot(211)
	plt.title('Cross Entropy Loss')
	plt.plot(history.history['loss'], color='blue', label='train')
	plt.plot(history.history['val_loss'], color='orange', label='test')
	# plot accuracy
	plt.subplot(212)
	plt.title('Classification Accuracy')
	plt.plot(history.history['accuracy'], color='blue', label='train')
	plt.plot(history.history['val_accuracy'], color='orange', label='test')
	# save plot to file
	filename = sys.argv[0].split('/')[-1]
	plt.savefig(filename + '_plot.png')
	plt.close()


## Test run on test_batch

### Read Datasets

In [67]:
test_batch = unpickle(join(getcwd().rstrip('src'), 'data',
                      'cifar-10-batches-py', 'test_batch'))
test_batch = convert2(test_batch)

# for element in test_batch:
#     print(element)
#     print(test_batch[element])

### Turn into sets

In [68]:
# print(test_batch.keys())
# print(test_batch['data'])
X = np.array(test_batch['data'])
X = X.reshape(X.shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
y = np.array(test_batch['labels'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2018)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(8000, 32, 32, 3)
(8000, 10)
(2000, 32, 32, 3)
(2000, 10)


### Create Model

In [69]:
model = Sequential()
# 3-block vgg style architecture
model.add(Conv2D(32, (3, 3), activation='relu',
          kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
model.add(Conv2D(32, (3, 3), activation='relu',
          kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(64, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(Conv2D(64, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(128, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(Conv2D(128, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(MaxPooling2D((2, 2)))
# output layer
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [70]:
# # fit model
# # history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test), verbose=1)
# history = model.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1)
# # evaluate model
# _, acc = model.evaluate(X_test, y_test, verbose=0)
# print('> %.3f' % (acc * 100.0))
# # learning curves
# summarize_diagnostics(history)

## Run with full dataset

### Read Datasets

In [72]:
# meta data
batches_meta = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'batches.meta'))
batches_meta = convert3(batches_meta)
print(batches_meta)

{'num_cases_per_batch': '10000', 'label_names': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'num_vis': '3072'}


In [75]:
# batch data
data_batch1 = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'data_batch_1'))
data_batch1 = convert3(data_batch1)
data_batch2 = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'data_batch_2'))
data_batch2 = convert3(data_batch2)
data_batch3 = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'data_batch_3'))
data_batch3 = convert3(data_batch3)
data_batch4 = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'data_batch_4'))
data_batch4 = convert3(data_batch4)
data_batch5 = unpickle(join(getcwd().rstrip('src'), 'data',
                        'cifar-10-batches-py', 'data_batch_5'))
data_batch5 = convert3(data_batch5)
data = np.concatenate((data_batch1['data'], data_batch2['data'], data_batch3['data'],
                          data_batch4['data'], data_batch5['data']))
labels = np.concatenate((data_batch1['labels'], data_batch2['labels'], data_batch3['labels'],
                            data_batch4['labels'], data_batch5['labels']))
print(data.shape)
print(labels.shape)

(50000, 3072)
(50000,)


### Turn into sets

In [76]:
X = np.array(data)
X = X.reshape(X.shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
y = np.array(labels)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2018)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(40000, 32, 32, 3)
(40000, 10)
(10000, 32, 32, 3)
(10000, 10)


### Create Model

In [77]:
model = Sequential()
# 3-block vgg style architecture
model.add(Conv2D(32, (3, 3), activation='relu',
          kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
model.add(Conv2D(32, (3, 3), activation='relu',
          kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(64, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(Conv2D(64, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(MaxPooling2D((2, 2)))

# model.add(Conv2D(128, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(Conv2D(128, (3, 3), activation='relu',
#           kernel_initializer='he_uniform', padding='same'))
# model.add(MaxPooling2D((2, 2)))
# output layer
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy',
              metrics=['accuracy'])


In [78]:
# fit model
# history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test), verbose=1)
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), verbose=1)
# evaluate model
_, acc = model.evaluate(X_test, y_test, verbose=0)
print('> %.3f' % (acc * 100.0))
# learning curves
summarize_diagnostics(history)




KeyboardInterrupt: 

## Result

CNN networks are built successfully and can be executed without bugs.
However, the accuracy is stuck firmly both in test batch and full batch on around 9%.
Moreover, without GPU acceleration the whole process is painfully slow.