In [1]:
import os
import datetime
import pandas as pd
import numpy as np

from keras.initializers import he_uniform
from keras.models import Sequential, load_model
from keras.optimizers import Adam, RMSprop
from keras.regularizers import l2
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Conv2D, MaxPooling2D, Flatten
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, TensorBoard

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

seed=11
np.random.seed(seed)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## 0. Preps

In [2]:
def sample_random_predictions(X, y):
    I = np.random.permutation(X.shape[0])[:3]

    for c, i in enumerate(I):
        img = X[i].reshape(28, 28)
        plt.subplot(131+c)
        plt.imshow(img, cmap=plt.cm.gray)
        plt.title(y[i])

In [3]:
def print_training_history(history):
    acc_list = history.history['acc']
    acc = acc_list[-1]
    best_acc_index = np.array(acc_list).argmax()
    best_acc = acc_list[best_acc_index]
    
    print('Accuracy: {:.4f} \tBest Accuracy: {:.4f} \t\t@ {} epoch'.format(acc, best_acc, best_acc_index+1))
    
    if 'val_acc' in  history.history.keys():
        val_acc_list = history.history['val_acc']
        val_acc = val_acc_list[-1]
        best_val_acc_index = np.array(val_acc_list).argmax()
        best_val_acc = val_acc_list[best_val_acc_index]
        print('Dev Accuracy: {:.4f} \tBest Dev Accuracy: {:.4f} \t@ {} epoch'.format(val_acc, best_val_acc, best_val_acc_index+1))
    

In [4]:
def plot_training_history(history):
    plt.figure(figsize=(15,4))

    plt.subplot(121)
    plt.plot(history.history['acc'], label='Training Set')
    
    if 'val_acc' in history.history.keys():
        plt.plot(history.history['val_acc'], label='Validation Set')
    plt.title('Accuracy vs Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')

    plt.subplot(122)
    plt.plot(history.history['loss'], label='Training Set')
    
    if 'val_loss' in history.history.keys():
        plt.plot(history.history['val_loss'], label='Validation Set')
    plt.title('Loss vs Epochs')
    plt.xlabel('Loss')
    plt.ylabel('Accuracy')
    plt.legend(loc='best')
    plt.show()

In [5]:
def plot_error_matrix(cm):
    row_sum = cm.sum(axis=1, keepdims=True)
    norm_cm = cm/row_sum
    
    np.fill_diagonal(norm_cm, 0)
    sns.heatmap(norm_cm, robust=True, fmt="f", cmap='RdBu_r', vmin=0, vmax=4)

    plt.show()

## 1. Getting Data

In [6]:
payload_dir = '../payload/'
payload_file = 'normal-payload.npz'

payload_path = os.path.join(payload_dir, payload_file)

payload_archive = np.load(payload_path)

dev_set = payload_archive['dev_set']
test_set = payload_archive['test_set']
train_dev_set = payload_archive['train_dev_set']
train_set = payload_archive['train_set']

1.1 Training Set

In [7]:
trainX, trainy = train_set[:,1:].reshape(-1,28,28,1), train_set[:,0]
trainY = np_utils.to_categorical(trainy)

In [8]:
train_devX, train_devy = train_dev_set[:,1:].reshape(-1,28,28,1), train_dev_set[:,0]
train_devY = np_utils.to_categorical(train_devy)

In [9]:
testX, testy = test_set[:,1:].reshape(-1,28,28,1), test_set[:,0]
testY = np_utils.to_categorical(testy)

In [10]:
devX, devy = dev_set[:,1:].reshape(-1,28,28,1), dev_set[:,0]
devY = np_utils.to_categorical(devy)

## 4 The Network

### 4.1 Network Architecture

Regularization Parameters

In [11]:
dropout = .4
lambd = 0

### 4.2 Optimization Setup

4.2.1 Hyperparameters

In [69]:
alpha = 1e-2
lr_decay = 9e-3
batch_size=128
loss = 'categorical_crossentropy'
metrics = ['accuracy']

models_dir = '../models'
modelpath = os.path.join(models_dir,'ConvNet.{epoch:02d}-{val_loss:.2f}.hdf5')

4.2.2 Callbacks

In [70]:
patience=10

In [71]:
def lr_sched(epoch):
    return 1/(1+lr_decay*epoch) * alpha#*(lr_decay**epoch)

schd = LearningRateScheduler(lr_sched, verbose=1)
early_stopping = EarlyStopping(patience=patience, verbose=1)
model_checkpoint = ModelCheckpoint(filepath=modelpath, save_best_only=True, verbose=1, monitor='val_acc')
tfboard = TensorBoard(log_dir='./logs', batch_size=128, write_graph=True, write_images=True)


callbacks = [schd, early_stopping, model_checkpoint]

4.2.3 The Model

In [72]:
model = None#'ConvNet.13-0.03.hdf5'

if model:
    alpha = 0.008952551477170993
    model = os.path.join('../models', model)

In [73]:
def get_model(path=None):        
    
    model = Sequential()
    
    if path:
        return load_model(path)
    
    model.add(Conv2D(64,(3,3), strides=2, padding='same', activation='relu', input_shape=(28,28,1)))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(Conv2D(64,(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(.2))

    model.add(Flatten())
    
    # layer 1
    model.add(
        Dense(1024, 
        bias_initializer='zeros', kernel_initializer=he_uniform(seed), 
        kernel_regularizer=l2(lambd),
        )
    )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))
    
    # layer 1
    model.add(
        Dense(1024, 
        bias_initializer='zeros', kernel_initializer=he_uniform(seed), 
        kernel_regularizer=l2(lambd),
        )
    )
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))
    
    model.add(Dense(10, activation='softmax'))
    
    adam_optimizer = Adam(lr=alpha)
    model.compile(optimizer=adam_optimizer, loss=loss, metrics=metrics)
    
    return model

In [74]:
model = get_model(model)

4.2.4 Optimization

In [75]:
num_epoch=500

In [76]:
history=model.fit(trainX, trainY, batch_size=batch_size, epochs=num_epoch, verbose=1, 
                  callbacks=callbacks, validation_split=.1)

Train on 127093 samples, validate on 14122 samples
Epoch 1/500

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.

Epoch 00001: val_acc improved from -inf to 0.95249, saving model to ../models/ConvNet.01-0.15.hdf5
Epoch 2/500

Epoch 00002: LearningRateScheduler reducing learning rate to 0.009910802775024779.

Epoch 00002: val_acc improved from 0.95249 to 0.95773, saving model to ../models/ConvNet.02-0.14.hdf5
Epoch 3/500

Epoch 00003: LearningRateScheduler reducing learning rate to 0.009823182711198428.

Epoch 00003: val_acc improved from 0.95773 to 0.96205, saving model to ../models/ConvNet.03-0.13.hdf5
Epoch 4/500

Epoch 00004: LearningRateScheduler reducing learning rate to 0.009737098344693282.

Epoch 00004: val_acc did not improve
Epoch 5/500

Epoch 00005: LearningRateScheduler reducing learning rate to 0.009652509652509652.

Epoch 00005: val_acc improved from 0.96205 to 0.96870, saving model to ../models/ConvNet.05-0.10.hdf5
Epoch 6/500

Epoch 00006: LearningRate


Epoch 00028: val_acc did not improve
Epoch 29/500

Epoch 00029: LearningRateScheduler reducing learning rate to 0.007987220447284346.

Epoch 00029: val_acc did not improve
Epoch 30/500

Epoch 00030: LearningRateScheduler reducing learning rate to 0.007930214115781127.

Epoch 00030: val_acc did not improve
Epoch 31/500

Epoch 00031: LearningRateScheduler reducing learning rate to 0.007874015748031496.

Epoch 00031: val_acc did not improve
Epoch 32/500

Epoch 00032: LearningRateScheduler reducing learning rate to 0.007818608287724786.

Epoch 00032: val_acc did not improve
Epoch 00032: early stopping


4.2.3 Best Model Yet

In [130]:
print_training_history(history)

NameError: name 'history' is not defined

### 4.3 Training Result

In [None]:
plot_training_history(history)

