### Get the materials

In [None]:
# Requires the latest pip
# !pip3 install --user --upgrade pip

# Current stable release for CPU and GPU
# !pip3 install --user tensorflow
# !pip3 install --user keras

# conda install -c conda-forge keras tensorflow
# conda update -n base -c defaults conda
#!conda install -c conda-forge jupyterthemes

In [None]:
# unzip file with all images and labels
# filename = 'cifar-10-python.tar.gz'
# tar = tarfile.open(filename, 'r:gz')
# tar.extractall()
# tar.close()

In [1]:
from pathlib import Path
projectpath = Path.cwd().absolute()

### Figure out what is in the batch

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tarfile
import os
import pickle

In [3]:
# define a function to unpack the data into a dictionary
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding = 'bytes')
    return dict

In [4]:
dirpath   = os.path.join(projectpath, 'cifar-10-batches-py')
batchpath = os.path.join(dirpath, 'data_batch_1')
batch     = unpickle(batchpath)

# what are the key types
list(batch.keys())

[b'batch_label', b'labels', b'data', b'filenames']

In [5]:
len(batch[b'labels'])

10000

In [6]:
# rows = images, columns = pixels in an image
batch[b'data'].shape 

(10000, 3072)

In [7]:
# back calculating the shape of the image
n_channels = 3
n_imgs = batch[b'data'].shape[0]
img_dim = int(np.sqrt(batch[b'data'].shape[1]/n_channels))

train_data = batch[b'data'].reshape((n_imgs, n_channels, img_dim, img_dim)).transpose(0, 2, 3, 1)

In [8]:
# normalize the images
train_data = (train_data / 255) - 0.5

### Process all batches and form into dataset

In [9]:
# one hot encode the labels (could have just used keras.utils to_categorical)
def one_hot_encode(train_labels):
    encoded = np.zeros((n_imgs, len(np.unique(train_labels))))
    for idx,lbl in enumerate(train_labels,0):
        encoded[idx][lbl] = 1
    return encoded

In [15]:
# data comes in vectors so reshape then normalize
def reshape_data(batchdata):
    # reshape
    n_channels = 3
    n_imgs     = batchdata.shape[0]
    img_dim    = int(np.sqrt(batchdata.shape[1]/n_channels))
    train_data = batchdata.reshape((n_imgs, n_channels, img_dim, img_dim)).transpose(0, 2, 3, 1)
    # normalize
    train_data = (train_data / 255) - 0.5
    return train_data, img_dim

In [16]:
# unpack the data from the batches, do a bunch of stuff, then combine them
batchpath  = os.path.join(projectpath, 'cifar-10-batches-py')

for set in range(1,6):
    if set == 1:
        dirname       = os.path.join(batchpath, 'data_batch_' + str(set))
        batch         = unpickle(dirname)
        train_data, n = reshape_data(batch[b'data'])        
        train_labels  = one_hot_encode(batch[b'labels'])
    else:
        dirname        = os.path.join(batchpath, 'data_batch_' + str(set))
        batch          = unpickle(dirname)
        train_data2, n = reshape_data(batch[b'data'])
        train_labels2  = one_hot_encode(batch[b'labels'])
        train_data     = np.concatenate((train_data, train_data2))
        train_labels   = np.concatenate((train_labels, train_labels2))

In [12]:
train_data.shape

(50000, 32, 32, 3)

In [17]:
# unpack the data from test batch, manipulate
batchpath          = os.path.join(projectpath, 'cifar-10-batches-py')
dirname            = os.path.join(batchpath, 'test_batch')
testbatch          = unpickle(dirname)
test_data, img_dim = reshape_data(testbatch[b'data'])        
test_labels        = one_hot_encode(testbatch[b'labels'])

In [18]:
test_labels.shape

(10000, 10)

### Build the network

In [19]:
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPool2D, BatchNormalization, Activation
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


In [None]:
model = Sequential()
model.add(Conv2D(32, 
                 kernel_size = 3, 
                 activation  = 'relu', 
                 padding     = 'same', 
                 input_shape = (img_dim,
                                img_dim,
                                3)))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))
model.add(Conv2D(32,
                 kernel_size = 5, 
                 activation  = 'relu',
                 padding     = 'same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))
model.add(Conv2D(64,
                 kernel_size = 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, 
                 activation='relu',
                 kernel_size = 3))
model.add(MaxPool2D(2))
model.add(Flatten())
model.add(Dense  (64,  activation='relu'))
model.add(Dense  (128))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Dense  (64,  activation='relu'))
model.add(Dense  (10,  activation='softmax'))

early_stopping_monitor = EarlyStopping(patience = 15,
                                       restore_best_weights = True)

model.compile(optimizer = 'adam', 
              loss      = 'categorical_crossentropy', 
              metrics   = ['accuracy'])

training = model.fit(train_data, 
                     train_labels, 
                     validation_split = 0.2, 
                     epochs = 100, 
                     batch_size = 125,
                     callbacks = [early_stopping_monitor])

#model.save('model_saved9.h5')

plt.plot(training.history['loss'])
plt.plot(training.history['val_loss'])
plt.show()

In [None]:
training.history

## Visualize Model Components

#### Code that produced figures in the project presentation included below

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(8, 4))
axs[0].plot(training.history['loss'])
axs[0].plot(training.history['val_loss'])
axs[0].set_title('Loss')
axs[1].plot(training.history['accuracy'])
axs[1].plot(training.history['val_accuracy'])
axs[1].set_title('Accuracy')

fig.subplots_adjust(top = 0.8, bottom = 0.1, hspace = 0.5, wspace = 0.5)
plt.show()
fig.savefig('loss&accuracyModel9.png')

#### Get shape of kernel and visualize

In [None]:
conv1 = model.layers[0]
weights1 = conv1.get_weights()
kernels1 = weights1[0]
kernels1.shape

In [None]:
kernels1_1 = kernels1[:,:,:,0]
kernels1_1
kernels1_1avg = np.mean(kernels1_1, axis=0)

plt.imshow(kernels1_1avg);
plt.axis('off')

#### Define a function that performs a convolution with a kernel

In [None]:
def convolution(kernel, image):
    img_dim     = image.shape[0]
    channels    = image.shape[2]
    kernel_size = kernel.shape[0]
    output_size = img_dim-(kernel_size-1)
    conv        = np.zeros((output_size,output_size))
    
    for ii in range(output_size):
        for jj in range(output_size):
            window = image[ii:ii+kernel_size, jj:jj+kernel_size]
            conv[ii,jj] = np.sum(window*kernel)
            
    return conv

#### Use the convolution function on random images to see its output

In [None]:
image = train_data[25,:,:,:]
conv = convolution(kernels1_1, image)

fig, axs = plt.subplots(1, 2, figsize=(8, 8))
axs[0].set_title('Original Image')
axs[0].imshow(image);
axs[0].axis('off')

axs[1].set_title('Convolution Output')
axs[1].imshow(conv);
axs[1].axis('off')
plt.show()

#### Compare the output of 2 kernels on 2 images

In [None]:
# Visualize the kernels in RGB

conv1    = model.layers[0]
weights1 = conv1.get_weights()
kernels1 = weights1[0] # shape of 32 kernels (3, 3, 3, 32)

kernels1_1 = kernels1[:,:,:,16]
kernels1_1avg = np.mean(kernels1_1, axis=0)

kernels1_2 = kernels1[:,:,:,17]
kernels1_2avg = np.mean(kernels1_2, axis=0)

fig, axs = plt.subplots(2, 3, figsize=(8, 8))
axs[0,0].imshow(kernels1_1[:,:,0], cmap=plt.cm.get_cmap('Reds', 6));
axs[0,0].axis('off');
axs[0,1].set_title('Kernel 1: edges?')
axs[0,1].imshow(kernels1_1[:,:,1], cmap=plt.cm.get_cmap('Greens', 6));
axs[0,1].axis('off');
axs[0,2].imshow(kernels1_1[:,:,2], cmap=plt.cm.get_cmap('Blues', 6));
axs[0,2].axis('off');

axs[1,0].imshow(kernels1_2[:,:,0], cmap=plt.cm.get_cmap('Reds', 6));
axs[1,0].axis('off');
axs[1,1].set_title('Kernel 2: horizontal?')
axs[1,1].imshow(kernels1_2[:,:,1], cmap=plt.cm.get_cmap('Greens', 6));
axs[1,1].axis('off');
axs[1,2].imshow(kernels1_2[:,:,2], cmap=plt.cm.get_cmap('Blues', 6));
axs[1,2].axis('off');

fig.subplots_adjust(top = 0.7, bottom = 0.1, hspace = 0.1, wspace = 0.1)

In [None]:
# Visualize the output of the kernels after convolving on 2 images

image1 = train_data[1,:,:,:]
image2 = train_data[50,:,:,:]
conv1 = convolution(kernels1_1, image1)
conv2 = convolution(kernels1_1, image2)
conv3 = convolution(kernels1_2, image1)
conv4 = convolution(kernels1_2, image2)

fig, axs = plt.subplots(2, 4, figsize=(14, 14))
axs[0,0].set_title('Original Image')
axs[0,0].imshow(image1);
axs[0,0].axis('off');

axs[0,1].set_title('Convolution Output')
axs[0,1].imshow(conv1);
axs[0,1].axis('off');

axs[0,2].set_title('Original Image')
axs[0,2].imshow(image2);
axs[0,2].axis('off');

axs[0,3].set_title('Convolution Output')
axs[0,3].imshow(conv2);
axs[0,3].axis('off');

axs[1,0].set_title('Original Image')
axs[1,0].imshow(image1);
axs[1,0].axis('off');

axs[1,1].set_title('Convolution Output')
axs[1,1].imshow(conv3);
axs[1,1].axis('off');

axs[1,2].set_title('Original Image')
axs[1,2].imshow(image2);
axs[1,2].axis('off');

axs[1,3].set_title('Convolution Output')
axs[1,3].imshow(conv4);
axs[1,3].axis('off');

fig.subplots_adjust(top = 0.5, bottom = 0.1, hspace = 0.1, wspace = 0.1)
plt.show()
fig.savefig('conv_output.png')

In [None]:
# Get a summary of the model's architecture
model.summary()

### Evaluating the Model

In [None]:
early_stopping_monitor = EarlyStopping(patience = 15)

score = model.evaluate(test_data, 
                       test_labels,
                       callbacks = [early_stopping_monitor], 
                       batch_size = 125)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

#### Hand calculate accuracy

In [None]:
output              = model.predict_classes(test_data)
predictions         = one_hot_encode(output)

correct_predictions = (predictions * test_labels).sum()
test_accuracy       = correct_predictions/len(predictions)

print('Test accuracy:', test_accuracy)

### Model performance per category

In [None]:
pred_cat      = np.sum(predictions, 
                       axis=0)
corr_pred_cat = np.sum(predictions * test_labels,
                       axis=0)
true_cat      = np.sum(test_labels, 
                       axis=0)
prob_cat      = corr_pred_cat/true_cat

x = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
     'dog', 'frog', 'horse', 'ship', 'truck']

plt.xticks(rotation=45);
plt.bar(x, prob_cat);

## Future Directions
<font size="3.7"> 
1. Explore the output of later convolutional layers
    
2. Investigate mistakes (confusions)
   
3. Compare mistakes with those of previous networks   
</font>