In [2]:
# environment: Paperspace Quadro P4000 GPU
import numpy as np 
import pandas as pd 
import os 
import tensorflow as tf
import tensorflow.keras # run pip install keras==2.3 beforehand for compatability 
from tensorflow.keras import Input, Model 
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, AlphaDropout, MaxPooling2D, AveragePooling2D, BatchNormalization, Concatenate, Flatten, Reshape, Add, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


# Read in file and preprocess data

In [3]:
train_path = './storage/modified_mnist_dataset/train.csv'  
test_path = './storage/modified_mnist_dataset/test.csv' 
submission_path = './storage/modified_mnist_dataset/submission.csv'

In [4]:
train = pd.read_csv(train_path)
test = pd.read_csv(test_path) 
submission = pd.read_csv(submission_path)

In [5]:
# convert types of digit and letter columns to categorical 
train.iloc[:,1] = pd.Categorical(train.iloc[:,1])
train.iloc[:,2] = pd.Categorical(train.iloc[:,2]) 
test.iloc[:,1] = pd.Categorical(test.iloc[:,1])

In [6]:
# define and re-format train and test data 
x_train = train.iloc[:,3:].values.reshape(-1,28,28,1).astype(np.float32) 
x_train /= 255.0 
y_train = train.iloc[:,1].values
y_train = np.asarray(y_train)
y_train = to_categorical(y_train, num_classes = 10)
train_letters = train.iloc[:,2].values

x_test = test.iloc[:,2:].values.reshape(-1,28,28,1).astype(np.float32) 
x_test /= 255.0
test_letters = test.iloc[:,1].values

In [7]:
train_letters_numeric = [] 
test_letters_numeric = [] 
for letter in train_letters: 
    train_letters_numeric.append(ord(letter) - ord("A"))
for letter in test_letters: 
    test_letters_numeric.append(ord(letter) - ord("A")) 
    
train_letters_numeric = np.asarray(train_letters_numeric) 
test_letters_numeric = np.asarray(test_letters_numeric) 

train_letters_numeric = to_categorical(train_letters_numeric, num_classes = 26) 
test_letters_numeric = to_categorical(test_letters_numeric, num_classes = 26)

x_train.shape, y_train.shape, x_test.shape, train_letters_numeric.shape, test_letters_numeric.shape

((2048, 28, 28, 1), (2048, 10), (20480, 28, 28, 1), (2048, 26), (20480, 26))

# Conduct Training

In [8]:
# define train and test data generators 
train_datagen = ImageDataGenerator(rotation_range = 40, 
                                  width_shift_range = 0.2, 
                                  height_shift_range = 0.2, 
                                  shear_range = 0.2,
                                  zoom_range = 0.2) 

In [9]:
# define models to be used for experimentation 
# a basic model that obtains around a 74% validation loss on 9:1 train/validation split. 
def base_cnn(): 
    inputs = Input((28,28,1))
    conv = Conv2D(64, 3, activation = 'relu')(inputs)
    conv = Conv2D(64, 3, activation = 'relu')(conv)
    conv = MaxPooling2D((2,2))(conv)
    bn = BatchNormalization()(conv) 
    conv = Conv2D(128, 3, activation = 'relu')(bn)
    conv = Conv2D(128, 3, activation = 'relu')(conv) 
    conv = MaxPooling2D((2,2))(conv) 
    bn = BatchNormalization()(conv)
    conv = Conv2D(256, 3, activation = 'relu')(bn)
    conv = MaxPooling2D((2,2))(conv)
    outputs = Flatten()(conv)
    outputs = BatchNormalization()(outputs)
    outputs = Dense(512, activation = 'relu')(outputs)
    outputs = Dense(10, activation = 'softmax')(outputs) 
    model = Model(inputs = inputs, outputs = outputs) 
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']) 
    return model 

In [21]:
# uses skip connections and also adds information from both MaxPooling2D and AveragePooling2D 
def conv2d_block(input_layer, n_filters, kernel):
    conv1 = Conv2D(n_filters, kernel, activation = 'relu', padding = 'same')(input_layer)
    conv1 = BatchNormalization()(conv1)
    conv2 = Conv2D(n_filters, kernel, activation = 'relu', padding = 'same')(conv1)
    conv1 = Add()([conv1, conv2])   
    conv1 = BatchNormalization()(conv1)
    maxpool = MaxPooling2D((2,2))(conv1) 
    avgpool = AveragePooling2D((2,2))(conv1)
    ret = Add()([maxpool,avgpool])
    return ret 

def dense_block(input_layer, units): 
    dense1 = Dense(units, activation = 'relu')(input_layer)
    for i in range(3): 
        bn = BatchNormalization()(dense1)
        dense2 = Dense(units, activation = 'relu')(bn)
        dense1 = Add()([dense1,dense2]) 
    return dense1 
    
# obtains around 75% validation loss on a 9:1 train/validation split 
def base_cnn_grade_2(): 
    inputs = Input((28,28,1))
    conv0 = conv2d_block(inputs, 64, 7)    
    conv1 = conv2d_block(inputs, 64, 5) 
    conv2 = conv2d_block(inputs, 64, 4) 
    conv3 = conv2d_block(inputs, 64, 3)
    conv = Concatenate()([conv0,conv1,conv2,conv3])   
    conv0 = conv2d_block(conv, 32, 7)
    conv1 = conv2d_block(conv, 32, 5)
    conv2 = conv2d_block(conv, 32, 4)
    conv3 = conv2d_block(conv, 32, 3) 
    conv = Concatenate()([conv0,conv1,conv2,conv3]) 
    outputs = Flatten()(conv)
    for unit in [256, 128, 64]: 
        outputs = Dense(unit, activation = 'selu', kernel_initializer = 'lecun_normal')(outputs)  
    outputs = Dense(10, activation = 'softmax')(outputs)
    model = Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model 

# obtains around 80% validation loss on a 9:1 train/validation split 
def base_cnn_grade_3(): 
    inputs = Input((28,28,1))
    letter_input = Input((26,))
    conv0 = conv2d_block(inputs, 64, 7)    
    conv1 = conv2d_block(inputs, 64, 5) 
    conv2 = conv2d_block(inputs, 64, 4) 
    conv3 = conv2d_block(inputs, 64, 3)
    conv = Concatenate()([conv0,conv1,conv2,conv3])   
    conv0 = conv2d_block(conv, 32, 7)
    conv1 = conv2d_block(conv, 32, 5)
    conv2 = conv2d_block(conv, 32, 4)
    conv3 = conv2d_block(conv, 32, 3) 
    conv = Concatenate()([conv0,conv1,conv2,conv3]) 
    outputs = Flatten()(conv) 
    outputs = Concatenate()([outputs,letter_input])
    for unit in [512, 256, 128]: 
        outputs = Dense(unit, activation = 'selu', kernel_initializer = 'lecun_normal')(outputs)  
    outputs = Dense(10, activation = 'softmax')(outputs)
    model = Model(inputs = [inputs, letter_input], outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model  

# obtains around 82% validation loss on a 9:1 train/validation split
# the most promising model so far, until we come up with a potentially more powerful grade 5 model 
def base_cnn_grade_4(): 
    inputs = Input((28,28,1))
    letter_input = Input((26,))   
    conv1 = conv2d_block(inputs, 64, 7) 
    conv2 = conv2d_block(inputs, 64, 5) 
    conv3 = conv2d_block(inputs, 64, 3) 
    conv = Concatenate()([conv1,conv2,conv3])   
    conv1 = conv2d_block(conv, 32, 7)
    conv2 = conv2d_block(conv, 32, 5)
    conv3 = conv2d_block(conv, 32, 3) 
    conv = Concatenate()([conv1,conv2,conv3]) 
    outputs = Flatten()(conv) 
    outputs = Concatenate()([outputs,letter_input])
    for unit in [512, 256, 128]: 
        outputs = Dense(unit, activation = 'relu')(outputs)  
        outputs = BatchNormalization()(outputs) 
    outputs = Dropout(0.4)(outputs) 
    outputs = Dense(10, activation = 'softmax')(outputs)
    model = Model(inputs = [inputs, letter_input], outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model  

# baseline model presented by Dacon. 
def cnn_dacon(): 
    inputs = Input((28,28,1)) 
    letter_data = Input((26,)) 

    bn = BatchNormalization()(inputs)
    conv = Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu')(bn)
    bn = BatchNormalization()(conv)
    conv = Conv2D(128, kernel_size=2, strides=1, padding='same', activation='relu')(bn)
    pool = MaxPooling2D((2, 2))(conv)

    bn = BatchNormalization()(pool)
    conv = Conv2D(256, kernel_size=2, strides=1, padding='same', activation='relu')(bn)
    bn = BatchNormalization()(conv)
    conv = Conv2D(256, kernel_size=2, strides=1, padding='same', activation='relu')(bn)
    pool = MaxPooling2D((2, 2))(conv)

    flatten = Flatten()(pool)
    flatten = Concatenate()([flatten, letter_data])
    bn = BatchNormalization()(flatten)
    dense = Dense(1000, activation='relu')(bn)

    bn = BatchNormalization()(dense)
    outputs = Dense(10, activation='softmax')(bn)
    model = Model(inputs = [inputs,letter_data], outputs = outputs) 
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']) 
    return model 
    


In [27]:

# codeblock to delete files in model directory - for experimentation 
files = [os.path.join('./storage/kfold1/',x) for x in os.listdir('./storage/kfold1/')] 
for file in files:  
    if 'epoch' in file: 
        os.remove(file)


In [28]:
# implement k-fold cv 
def k_fold(k,files):  
    folds = [] 
    fold_size = len(files) // k 
    for i in range(k): 
        if i == k-1:  
            l = files[i*fold_size:] 
        else: 
            l = files[i*fold_size:(i+1)*fold_size]  
        folds.append(l)   
    return folds  

x_train_folds = k_fold(5, x_train)
y_train_folds = k_fold(5, y_train) 
letter_train_folds = k_fold(5,train_letters_numeric)

for t in range(5):  
    print("************ Fold {} training ************".format(t+1)) 
    cur_val_x = x_train_folds[t] 
    cur_val_y = y_train_folds[t] 
    cur_val_letter = letter_train_folds[t]
    train_folds_x = x_train_folds[0:t] + x_train_folds[t+1:] 
    train_folds_y = y_train_folds[0:t] + y_train_folds[t+1:]
    train_fold_letter = letter_train_folds[0:t] + letter_train_folds[t+1:]
    cur_train_x = [] 
    cur_train_y = [] 
    cur_letter = [] 
    for j in train_folds_x:  
        for q in j:  
            cur_train_x.append(q) 
    for j in train_folds_y:  
        for q in j:  
            cur_train_y.append(q)  
    for j in train_fold_letter: 
        for q in j: 
            cur_letter.append(q) 
    cur_train_x = np.asarray(cur_train_x)
    cur_train_y = np.asarray(cur_train_y)
    cur_letter = np.asarray(cur_letter) 
    model_path = './storage/' + 'kfold' + str(t+1) + '/epoch_{epoch:03d}_val_{val_loss:.3f}_acc_{val_accuracy:.3f}.h5' 
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.8)
    checkpoint = ModelCheckpoint(filepath=model_path,monitor='val_accuracy',verbose=1,save_best_only=True)
    early_stopping = EarlyStopping(monitor='val_accuracy',patience=20)
    annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
    model = base_cnn_grade_4() 
 
    '''
    history = model.fit_generator(
        train_datagen.flow(cur_train_x, cur_train_y, batch_size = 32, shuffle = True),
        epochs = 200, 
        steps_per_epoch = cur_train_x.shape[0]//32,
        verbose = 1, 
        validation_data = (cur_val_x, cur_val_y),
        callbacks=[learning_rate_reduction, checkpoint, early_stopping]
    )
    ''' 
    
    history = model.fit([cur_train_x,cur_letter],
                        cur_train_y,
                       batch_size = 32,
                       shuffle=True, 
                       validation_data = ([cur_val_x,cur_val_letter],cur_val_y),
                       verbose = 1, 
                       epochs = 200,
                       callbacks = [learning_rate_reduction, checkpoint, early_stopping])


************ Fold 1 training ************
Train on 1639 samples, validate on 409 samples
Epoch 1/200
Epoch 00001: val_accuracy improved from -inf to 0.09535, saving model to ./storage/kfold1/epoch_001_val_3.439_acc_0.095.h5
Epoch 2/200
Epoch 00002: val_accuracy did not improve from 0.09535
Epoch 3/200
Epoch 00003: val_accuracy improved from 0.09535 to 0.10758, saving model to ./storage/kfold1/epoch_003_val_11.266_acc_0.108.h5
Epoch 4/200
Epoch 00004: val_accuracy did not improve from 0.10758
Epoch 5/200
Epoch 00005: val_accuracy did not improve from 0.10758
Epoch 6/200
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.

Epoch 00006: val_accuracy did not improve from 0.10758
Epoch 7/200
Epoch 00007: val_accuracy did not improve from 0.10758
Epoch 8/200
Epoch 00008: val_accuracy improved from 0.10758 to 0.14425, saving model to ./storage/kfold1/epoch_008_val_8.176_acc_0.144.h5
Epoch 9/200
Epoch 00009: val_accuracy did not improve from 0.14425
Epoch 10/200
Epoc

In [29]:
model1 = load_model('./storage/kfold1/epoch_043_val_0.832_acc_0.817.h5')
model2 = load_model('./storage/kfold2/epoch_024_val_0.754_acc_0.819.h5')
model3 = load_model('./storage/kfold3/epoch_036_val_0.697_acc_0.826.h5')
model4 = load_model('./storage/kfold4/epoch_032_val_0.696_acc_0.826.h5')
model5 = load_model('./storage/kfold5/epoch_061_val_0.661_acc_0.833.h5')

pred1 = model1.predict([x_test, test_letters_numeric])
pred2 = model2.predict([x_test, test_letters_numeric])
pred3 = model3.predict([x_test, test_letters_numeric])
pred4 = model4.predict([x_test, test_letters_numeric])
pred5 = model5.predict([x_test, test_letters_numeric])

pred_avg = (pred1+pred2+pred3+pred4+pred5)/5.0 


In [30]:
result_arr = [] 
for pred in pred_avg: 
    result_arr.append(np.argmax(pred))
result_arr = np.asarray(result_arr)
result_arr

array([6, 9, 6, ..., 6, 1, 0])

In [31]:
submission['digit'] = result_arr
submission.head()

Unnamed: 0,id,digit
0,2049,6
1,2050,9
2,2051,6
3,2052,0
4,2053,3


In [32]:
submission.to_csv('./storage/inception_5_fold_avg_no_generator.csv',index=False)