# Import packages and data

In [1]:
%matplotlib inline

import numpy as np
import time
import matplotlib.pyplot as plt
import scipy.io

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


In [2]:
from keras.datasets import mnist
from keras.datasets import cifar10

# downloads data at first execution
(x_train, y_train), (x_test, y_test) = mnist.load_data()
(x_c_train, y_c_train), (x_c_test, y_c_test) = cifar10.load_data()

# Preprocessing and helper functions

In [3]:
def grayscale(x):
    #x = x.astype('float32')/255
    #x = np.piecewise(x, [x <= 0.04045, x > 0.04045], 
    #                    [lambda x: x/12.92, lambda x: ((x + .055)/1.055)**2.4])
    return .2126 * x[:,:,:,0] + .7152 * x[:,:,:,1]  + .07152 * x[:,:,:,2]

def downsample(x):
    return sum([x[i::2,j::2,:] for i in range(2) for j in range(2)])/4

x_c_train = grayscale(x_c_train)
x_c_test = grayscale(x_c_test)

In [4]:
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)
y_c_train = keras.utils.to_categorical(y_c_train)
y_c_test = keras.utils.to_categorical(y_c_test)

x_train = x_train/np.max(x_train)
x_test = x_test/np.max(x_test)
x_c_train = x_c_train/np.max(x_c_train)
x_c_test = x_c_test/np.max(x_c_test)



In [5]:
def plot_some_samples(x, y = [], yhat = [], select_from = [], 
                      ncols = 6, nrows = 4, xdim = 28, ydim = 28,
                      label_mapping = range(10)):
    """plot some input vectors as grayscale images (optionally together with their assigned or predicted labels).
    
    x is an NxD - dimensional array, where D is the length of an input vector and N is the number of samples.
    Out of the N samples, ncols x nrows indices are randomly selected from the list select_from (if it is empty, select_from becomes range(N)).
    
    Keyword arguments:
    y             -- corresponding labels to plot in green below each image.
    yhat          -- corresponding predicted labels to plot in red below each image.
    select_from   -- list of indices from which to select the images.
    ncols, nrows  -- number of columns and rows to plot.
    xdim, ydim    -- number of pixels of the images in x- and y-direction.
    label_mapping -- map labels to digits.
    
    """
    fig, ax = plt.subplots(nrows, ncols)
    if len(select_from) == 0:
        select_from = range(x.shape[0])
    indices = np.random.choice(select_from, size = min(ncols * nrows, len(select_from)), replace = False)
    for i, ind in enumerate(indices):
        thisax = ax[i//ncols,i%ncols]
        thisax.matshow(x[ind].reshape(xdim, ydim), cmap='gray')
        thisax.set_axis_off()
        if len(y) != 0:
            j = y[ind] if type(y[ind]) != np.ndarray else y[ind].argmax()
            thisax.text(0, 0, (label_mapping[j]+1)%10, color='green', 
                                                       verticalalignment='top',
                                                       transform=thisax.transAxes)
        if len(yhat) != 0:
            k = yhat[ind] if type(yhat[ind]) != np.ndarray else yhat[ind].argmax()
            thisax.text(1, 0, (label_mapping[k]+1)%10, color='red',
                                             verticalalignment='top',
                                             horizontalalignment='right',
                                             transform=thisax.transAxes)
    return fig

def prepare_standardplot(title, xlabel):
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle(title)
    ax1.set_ylabel('categorical cross entropy')
    ax1.set_xlabel(xlabel)
    ax1.set_yscale('log')
    ax2.set_ylabel('accuracy [% correct]')
    ax2.set_xlabel(xlabel)
    return fig, ax1, ax2

def finalize_standardplot(fig, ax1, ax2):
    ax1handles, ax1labels = ax1.get_legend_handles_labels()
    if len(ax1labels) > 0:
        ax1.legend(ax1handles, ax1labels)
    ax2handles, ax2labels = ax2.get_legend_handles_labels()
    if len(ax2labels) > 0:
        ax2.legend(ax2handles, ax2labels)
    fig.tight_layout()
    plt.subplots_adjust(top=0.9)

def plot_history(history, title):
    fig, ax1, ax2 = prepare_standardplot(title, 'epoch')
    ax1.plot(history.history['loss'], label = "training")
    ax1.plot(history.history['val_loss'], label = "validation")
    ax2.plot(history.history['acc'], label = "training")
    ax2.plot(history.history['val_acc'], label = "validation")
    finalize_standardplot(fig, ax1, ax2)
    return fig

# Define Model(s)

In [46]:
def compilemodel(model, optimizer):
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

def convnet(input_shape = (28,28,1), num_classes = 10, optimizer = Adam(), activation_function = 'relu', batch_norm = False):
    model = Sequential()
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))  
    model.add(Conv2D(32, (3, 3), activation=activation_function, input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(64, (3, 3), activation=activation_function))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Dropout(.25))
    model.add(Flatten())
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    
    model.add(Dense(128, activation=activation_function))
    model.add(Dropout(.5))
    input_shape = (None,)
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Dense(num_classes, activation='softmax'))
    compilemodel(model, optimizer)
    return model

def convnet1(input_shape = (28,28,1), num_classes = 10, optimizer = Adam(), activation_function = 'relu', batch_norm = False):
    model = Sequential()
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))  
    model.add(Conv2D(32, (3, 3), activation=activation_function, input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(64, (3, 3), activation=activation_function))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(128, (3, 3), activation=activation_function))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Dropout(.25))
    model.add(Flatten())
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    
    model.add(Dense(num_classes, activation='softmax'))
    compilemodel(model, optimizer)
    return model

def convnet2(input_shape = (28,28,1), num_classes = 10, optimizer = Adam(), activation_function = 'relu', batch_norm = False):
    model = Sequential()
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))  
    model.add(Conv2D(32, (3, 3), activation=activation_function, input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(64, (3, 3), activation=activation_function))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Conv2D(128, (3, 3), activation=activation_function))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Dropout(.25))
    model.add(Flatten())
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    
    model.add(Dense(128, activation=activation_function))
    model.add(Dropout(.5))
    input_shape = (None,)
    if batch_norm:
            model.add(keras.layers.BatchNormalization(input_shape = input_shape))
    model.add(Dense(num_classes, activation='softmax'))
    compilemodel(model, optimizer)
    return model

# Run Model(s)

In [47]:
def fit_convnet(x_train, x_test, y_train, y_test, batch_size, epochs, 
                activation_function = 'relu', batch_norm = False, input_shape = (28,28,1)):
    model_conv = convnet(input_shape = input_shape, num_classes = y_test.shape[1], 
                         activation_function = activation_function, batch_norm = batch_norm)
    model_conv.summary()
    x_train_conv = x_train.reshape(x_train.shape[0], input_shape[0], input_shape[1], input_shape[2])
    x_test_conv = x_test.reshape(x_test.shape[0], input_shape[0], input_shape[1], input_shape[2])

    t = time.time()
    history_conv = model_conv.fit(x_train_conv, y_train,
                                  batch_size=batch_size,
                                  epochs= epochs,
                                  verbose=1,
                                  validation_data=(x_test_conv, y_test))
    elapsed_conv = time.time() - t
    return model_conv, history_conv, elapsed_conv

def cont_fit_convent(model_conv, x_train, x_test, y_train, y_test, batch_size, epochs, 
                     input_shape = (28,28,1)):
    x_train_conv = x_train.reshape(x_train.shape[0], input_shape[0], input_shape[1], input_shape[2])
    x_test_conv = x_test.reshape(x_test.shape[0], input_shape[0], input_shape[1], input_shape[2])

    t = time.time()
    history_conv = model_conv.fit(x_train_conv, y_train,
                                  batch_size=batch_size,
                                  epochs= epochs,
                                  verbose=1,
                                  validation_data=(x_test_conv, y_test))
    elapsed_conv = time.time() - t
    return model_conv, history_conv, elapsed_conv
    

### MNIST

In [39]:
batch_size = 128
epochs = 10

model_conv, history_conv, t_elapsed_conv = fit_convnet(x_train, x_test, y_train, y_test, batch_size, epochs)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 5, 5, 64)          0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 5, 5, 64)          0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 1600)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 128)               204928    
__________

### CIFAR10

In [None]:
batch_size = 128
epochs = 20

model_conv_c, history_conv_c, t_elapsed_conv_c = fit_convnet(x_c_train, x_c_test, y_c_train, y_c_test, 
                                                            batch_size, epochs, input_shape = (32,32,1))

#model_conv_c, history_conv_c, t_elapsed_conv_c = cont_fit_convent(model_conv_c, x_c_train, x_c_test, 
#                                                                  y_c_train, y_c_test, 
#                                                                  batch_size, epochs, input_shape = (32,32,1))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 30, 30, 32)        320       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 6, 6, 64)          0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 6, 6, 64)          0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 2304)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               295040    
__________