# Image Recognition using a Convolutional Neural Network

## Data Exploration

A link to the dataset can be found here:
http://www.cs.utoronto.ca/%7Ekriz/cifar.html

The CIFAR-10 dataset contains pictures of objects belonging to 10 categories or class: 

labels: airplane, automobile, bird, cat, deer, dog, frog, horse, ship truck

The dataset consists of 60,000 32 x 32 color images, with 6000 images per class.  There are a total of 3072 properties or features present for each image (32 * 32 * 3 = 3072) where each 32 integer represents the width and height of each image in pixels.  This figure is then multiplied by 3 because the images have three color channels, red, green and blue.  The training data features comes in the form of a multi-dimensional array, with 50,000 rows and 3072 columns, where each row is a uint8 representation an image.  The test set features is also a multi-dimensional array, consisting of 10,000 rows and 3072 columns.  Both training and testing datasets contains labels in a one dimensional format, where values for the ith label can range between 0-9.  Each image is stored in row-major order, where the first 1024 indices represent the values of the red channel, the next 1024 the values of the green channel, and the last 1024 the values of the blue channel.  

### Explore categories

In [23]:
import cPickle
import pandas as pd
import numpy as np
import os
import struct

def unpickle(file):
    
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict

meta_file = 'Datasets/Cifar/batches.meta'
meta_data = unpickle(meta_file)
categories = pd.DataFrame.from_dict(meta_data['label_names'])
print(categories)

            0
0    airplane
1  automobile
2        bird
3         cat
4        deer
5         dog
6        frog
7       horse
8        ship
9       truck


### Explore features and labels

Below is a sample of what the training and test sets looks like:

In [33]:
def load_file(batch_1 = None, batch_2 = None, batch_3 = None, batch_4 = None, batch_5 = None, test_batch = None):
    
    # Load training batch and test set   
    file_1 = open(batch_1, 'rb')
    training_batch = cPickle.load(file_1)
    file_1.close()
    
    test = open(test_batch, 'rb')
    test_set = cPickle.load(test)
    test.close()
    
    # training data
    training_features_sample = training_batch['data']
    print(training_features_sample)
    #print(training_features_sample.shape)
    
    # training data shape
    #print(training_batch['data'].shape)
    
    # sample of 200 labels
    #print(test_set['labels'][:200])
    return training_features_sample

training_batch_1_path = 'Datasets/Cifar/data_batch_1'
test_set_path = 'Datasets/Cifar/test_batch'
X_train = load_file(batch_1 = training_batch_1_path, test_batch = test_set_path)
print(X_train.shape)

[[ 59  43  50 ..., 140  84  72]
 [154 126 105 ..., 139 142 144]
 [255 253 253 ...,  83  83  84]
 ..., 
 [ 71  60  74 ...,  68  69  68]
 [250 254 211 ..., 215 255 254]
 [ 62  61  60 ..., 130 130 131]]
(10000, 3072)


### 1. Import libraries and specify global variables

In [41]:
import keras
from keras.datasets import cifar10
import h5py
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.advanced_activations
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.regularizers import l2, activity_l2
from keras.layers.normalization import BatchNormalization
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

batch_size = 32
nb_classes = 10
nb_epoch = 200
lambda_reg = 0.001

# input image dimensions
img_rows, img_cols = 32, 32
# the CIFAR10 images are RGB
img_channels = 3

### 2.  A function that returns training, validation and testing sets

The dataset was taken from keras' library of preprocessed datasets.  The dataset has already been reshaped and returns T4 tensors (4 dimensional arrays).  
Eg. X_train = [number of rows, depth, width, height]

In [44]:
def load_dataset():
    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    
    # Allocate last 5000 training examples for validation.
    X_train, X_val = X_train[:-5000], X_train[-5000:]
    y_train, y_val = y_train[:-5000], y_train[-5000:]
    
    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)
    y_val = np_utils.to_categorical(y_val, nb_classes)
    
    # preprocess data
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_val = X_val.astype('float32')
    X_train /= 255
    X_test /= 255
    X_val /= 255
    
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')
    print(y_train.shape[0], 'training labels')
    print(y_test.shape[0], 'test labels')
    print(X_val.shape[0], 'validation samples')
    print(y_val.shape[0], 'validation labels')

    return X_train, y_train, X_test, y_test, X_val, y_val

### Callbacks:  Includes metrics from each epoch

In [43]:
class History(keras.callbacks.Callback):
    
    #  called at the beginning of each training epoch
    def on_train_begin(self, logs={}):
        self.training_loss = []
        self.training_acc = []
        self.val_loss = []
        self.val_acc = []
    
    #  called at the end of each epoch
    def on_epoch_end(self, batch, logs = {}):
        self.training_loss.append(logs.get('loss'))
        self.training_acc.append(logs.get('acc'))
        self.val_loss.append(logs.get('val_loss'))
        self.val_acc.append(logs.get('val_acc'))
        
        # Uncomment to save results after each epoch
        
        #np.save(file = 'Model/2nd_run/performance/training_loss', arr = self.training_loss)
        #np.save(file = 'Model/2nd_run/performance/training_acc', arr = self.training_acc)    
        #np.save(file = 'Model/2nd_run/performance/val_loss', arr = self.val_loss)   
        #np.save(file = 'Model/2nd_run/performance/val_acc', arr = self.val_acc) 
        
history = History()

### Graph Plots

In [42]:
# function to plot and save the loss data 

def plot_loss(history = None, filepath = None):
    plt.figure(figsize=(6, 4))
    plt.plot(history.losses, color = 'red')
    plt.plot(history.val_loss, color = 'blue')
    plt.tight_layout()
    plt.ylabel('loss')
    plt.xlabel('epochs')
    plt.title('Graph Comparing Testing and Validation Loss')
    red_patch = mpatches.Patch(color='red', label='Training Loss')
    blue_patch = mpatches.Patch(color='blue', label='Validation Loss')
    plt.legend(handles=[red_patch, blue_patch],loc = 1)
    plt.show()
    if filepath:
        plt.savefig(filepath)

# function to plot and save the accuracy data

def plot_accuracy(history, filepath):
    plt.figure(figsize=(6, 4))
    plt.plot(history.accuracy, color = 'red')
    plt.plot(history.val_acc, color = 'blue')
    plt.tight_layout() # adjusts labels so they fit into the figure area
    plt.ylabel('accuracy')
    plt.xlabel('epochs')
    plt.title('Graph Comparing Testing and Validation Accuracy')
    red_patch = mpatches.Patch(color='red', label='Training Accuracy')
    blue_patch = mpatches.Patch(color='blue', label='Validation Accuracy')
    plt.legend(handles=[red_patch, blue_patch],loc = 4)
    plt.show()
    if filepath:
        plt.savefig(filepath)

The ConvNet was trained using stochastic gradient descent (SGD) with Nesterov momentum.  The momentum paramter was set to .9. 
Early stopping was added with enough room to learn but stop the model from training if it has stopped learning.  The training set data was shuffled after each batch 

### Predict function

In [45]:
def predict(X_test = None, y_test = None, model = None):

    loss, accuracy = model.evaluate(X_test, y_test)
    print(loss, accuracy)
    return loss, accuracy

### Train, test and predict 

In [5]:
# load data
X_train, y_train, X_test, y_test, X_val, y_val = load_dataset()

X_train shape: (40000, 3, 32, 32)
40000 train samples
10000 test samples
40000 training labels
10000 test labels
10000 validation samples
10000 validation labels


In [1]:
accuracy_translation_opt = {}

def train_predict(cnn = None, learning_rate = None, translation = None, 
                  zca_whitening = False, rotation = None, flipping = None, shear_range = None,
                  training_size = None, validation_size = None, filepath = None, weights_path = None):

    print('cnn is %', cnn)
    print('learning rate is %', learning_rate)
    print('translation is %', translation)
    print('rotation range is 0 - %', rotation)
    print('flipping is %', flipping)
    print('training size is %', training_size)
    print('validation size is %', validation_size)
    print('filepath is %', filepath)
    
    # instance of Stochastic Gradient Descent optimizer
    sgd = SGD(lr = learning_rate, decay = 1e-6, momentum = 0.9, nesterov = True)
    
    # set optimizer and specify cost function
    cnn.compile(loss = 'categorical_crossentropy',
                  optimizer = sgd,
                  metrics = ['accuracy'])
    
    # configure data augmentation
    datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=zca_whitening,  # apply ZCA whitening
            rotation_range = rotation,  # randomly rotate images in the range (degrees, 0 to 180)
            shear_range = shear_range,
            width_shift_range = translation,  # randomly shift images horizontally (fraction of total width)
            height_shift_range = translation,  # randomly shift images vertically (fraction of total height)
            horizontal_flip = flipping,  # randomly flip images
            vertical_flip = False)  # randomly flip images
    
    if weights_path == None:
        fit(model = cnn, datagen = datagen, training_size = training_size, 
                      validation_size = validation_size, filepath=filepath)
    loss, accuracy = predict(X_test[:validation_size], y_test[:validation_size], model = cnn)
    
    key = str(translation)
    accuracy_translation_opt[key] = accuracy
    plot_accuracy(history=history)
    plot_loss(history=history)
    print('Accuracy and loss with {} translation = {} and {} respectively'.format(translation, accuracy, loss))

### Fit function

In [46]:
def fit(model = None, datagen = None, training_size = None, 
                  validation_size = None, filepath = None):
    
    # CALLBACKS:
    
    # Early stopping callback setup 
    early_stopping_val_acc = keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                                                 patience= 10, 
                                                                 verbose = 1, 
                                                                 mode = 'auto')
    
    # Weights callback setup
    checkpoint_weights_path = 'Model/2nd_run/checkpoint/weights.h5'
    checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_weights_path, 
                                                 monitor='val_loss', verbose=0, save_best_only=True, mode='auto')
    
    #  Check if data augmentation was applied
    if datagen:
        print('Data augmentation applied')
        datagen.fit(X_train[:training_size])
        model.fit_generator(datagen.flow(X_train[:training_size], y_train[:training_size], batch_size = batch_size),
                            samples_per_epoch = training_size, 
                            nb_epoch = nb_epoch,
                            callbacks=[history, early_stopping_val_acc, checkpoint],
                            validation_data=(X_val[:validation_size], y_val[:validation_size]))
    else:
        print('No data augmentation applied')
        model.fit(X_train[:training_size], y_train[:training_size], 
             batch_size = batch_size, 
             nb_epoch = nb_epoch, 
             verbose = 1, 
             callbacks = [history, early_stopping_val_acc, checkpoint], 
             validation_data = (X_val[:validation_size], y_val[:validation_size]), 
             shuffle = True)
    
    # if filepath is specified, save it
    if filepath: 
        print('yes')
        model.save_weights(filepath)

### Model

In [47]:
def convnet(weights_path=None):
    
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(3,32,32)))
    
    # Convolutional layer 1
    model.add(Convolution2D(64, 3, 3, init='he_normal', activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    # Convolutional layer 2
    model.add(Convolution2D(64, 3, 3, init='he_normal', activation='relu'))
    # MaxPooling Layer 1
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    # Convolutional layer 3
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, init='he_normal', activation='relu'))
    # Convolutional layer 4
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, init='he_normal', activation='relu'))
    # MaxPooling Layer 2
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    # Convolutional layer 5
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    # Convolutional layer 6
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    # Convolutional layer 7
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    # Convolutional layer 8
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    # MaxPooling layer 3
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # Convolutional layer 9
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 10
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 11
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 12
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # MaxPooling layer 4
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # Convolutional layer 13
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 14
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 15
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # Convolutional layer 16
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    # MaxPooling 5
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    model.add(Flatten())
    # Fully Connected Layer 1
    model.add(Dense(1024, init='he_normal', W_regularizer = l2(.004), activation='relu'))
    model.add(Dropout(0.5))
    # Fully Connected Layer 2
    model.add(Dense(1024, init='he_normal', W_regularizer = l2(.004), activation='relu'))
    model.add(Dropout(0.5))
    # Fully Connected Layer 3
    model.add(Dense(10, init='he_normal', activation='softmax'))
    
    # If saved weights exist, load it
    if weights_path:
        model.load_weights(weights_path)
        print('loaded pre-trained model')
        
    print(model.summary())
    return model

### Train, test, predict

In [None]:
#final_weights = <Specify a filepath to save the weights here>

# instance of model
convNet = cuda_conv()

# fit and predic
train_predict(cnn=convNet,
              learning_rate= .01,
              translation=.0625,
              flipping=True,
              rotation=0,
              zca_whitening = False,
              featurewise_center = False,
              featurewise_std_normalization = False,
              shear_range=None,
              training_size=len(X_train),
              validation_size=len(X_val),
              filepath=None)