In [1]:
# environment: Paperspace Quadro P6000 GPU  
import numpy as np 
import pandas as pd 
import os 
import tensorflow as tf
import tensorflow.keras # run pip install keras==2.3 beforehand for compatability 
from tensorflow.keras import Input, Model 
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, AlphaDropout, MaxPooling2D, AveragePooling2D, BatchNormalization, Concatenate, Flatten, Reshape, Add, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import skimage
from skimage.transform import rotate
from skimage.util import random_noise
from skimage.transform import warp, AffineTransform
from skimage.transform import resize
import cv2
import random 
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.utils import shuffle # shuffle dataset before splitting into folds 

Using TensorFlow backend.


# Read file and preprocess data

In [2]:
train_path = './storage/modified_mnist_dataset/train.csv'  
test_path = './storage/modified_mnist_dataset/test.csv' 
submission_path = './storage/modified_mnist_dataset/submission.csv'

In [3]:
train = pd.read_csv(train_path)
test = pd.read_csv(test_path) 
submission = pd.read_csv(submission_path)

In [4]:
# convert types of digit and letter columns to categorical 
train.iloc[:,1] = pd.Categorical(train.iloc[:,1])
train.iloc[:,2] = pd.Categorical(train.iloc[:,2]) 
test.iloc[:,1] = pd.Categorical(test.iloc[:,1])

In [5]:
# define and re-format train and test data 
# this time, we will standardize instead of normalize data 
x_train = train.iloc[:,3:].values.reshape(-1,28,28,1).astype(np.float32) 
# standardize - we assume train and test data have the same "distribution" 
mu = np.mean(x_train)
sd = np.std(x_train)
x_train -= mu 
x_train /= sd 
y_train = train.iloc[:,1].values
y_train = np.asarray(y_train)
y_train = to_categorical(y_train, num_classes = 10)
train_letters = train.iloc[:,2].values

x_test = test.iloc[:,2:].values.reshape(-1,28,28,1).astype(np.float32)  
x_test -= mu 
x_test /= sd 
test_letters = test.iloc[:,1].values 

In [6]:
train_letters_numeric = [] 
test_letters_numeric = [] 
for letter in train_letters: 
    train_letters_numeric.append(ord(letter) - ord("A"))
for letter in test_letters: 
    test_letters_numeric.append(ord(letter) - ord("A")) 
    
train_letters_numeric = np.asarray(train_letters_numeric) 
test_letters_numeric = np.asarray(test_letters_numeric) 

train_letters_numeric = to_categorical(train_letters_numeric, num_classes = 26) 
test_letters_numeric = to_categorical(test_letters_numeric, num_classes = 26)

x_train.shape, y_train.shape, x_test.shape, train_letters_numeric.shape, test_letters_numeric.shape

((2048, 28, 28, 1), (2048, 10), (20480, 28, 28, 1), (2048, 26), (20480, 26))

# Augment Data

For now, we will try augmenting the data using the following methods 
- rotation 
- adding noise  
- adding gaussian blur 
- shifting image 

Please refer to [this notebook](https://github.com/iljimae0418/overlapping-digit-and-letter-mnist/blob/master/Examples%20of%20data%20augmentations.ipynb) for examples.  

Some more augmentations were decided to be added. They are 
- modifying brightness 
- ZCA Whitening 
- random crops

Please refer to [this notebook](https://github.com/iljimae0418/overlapping-digit-and-letter-mnist/blob/master/Examples%20of%20augmentation%202%20(further%20augmentation).ipynb) for examples. 

In [7]:
# apply rotations 
x_train_rotated = [] 
for x_data in x_train:
    rotated_img = rotate(x_data, angle = random.randint(10,40))
    x_train_rotated.append(rotated_img) 
x_train_rotated = np.asarray(x_train_rotated) 

In [8]:
# apply clockwise rotations 
x_train_rotated_2 = [] 
for x_data in x_train: 
    rotated_img = rotate(x_data, angle = -random.randint(10,40)) 
    x_train_rotated_2.append(rotated_img)
x_train_rotated_2 = np.asarray(x_train_rotated_2) 

In [9]:
# add noise 
x_noised = [] 
for x_data in x_train: 
    noised_img = random_noise(x_data) 
    x_noised.append(noised_img)
x_noised = np.asarray(x_noised) 

In [10]:
# add gaussian blur 
x_blurred = [] 
for x_data in x_train:
    kernel_size = random.choice([3,5,9]) 
    blurred = cv2.GaussianBlur(x_data, (kernel_size, kernel_size), 0) 
    x_blurred.append(blurred)
x_blurred = np.asarray(x_blurred)
x_blurred = x_blurred.reshape(-1,28,28,1) 

In [11]:
# shift image 
x_shifted = [] 
for x_data in x_train: 
    dx = random.choice([-2,-1,1,2])
    dy = random.choice([-2,-1,1,2])
    transform = AffineTransform(translation = (dx,dy))
    warp_img = warp(x_data, transform, mode = "wrap")
    x_shifted.append(warp_img) 
x_shifted = np.asarray(x_shifted)

In [12]:
# apply brightness modifications 
x_brightness = [] 
for x_data in x_train: 
    brightness = 0.5
    alpha = 1.0 + random.uniform(-brightness, brightness) 
    brightness_modified = x_data * alpha 
    x_brightness.append(brightness_modified) 

x_brightness = np.asarray(x_brightness) 

In [13]:
# apply zca whitening 
def zca_whitening(sample): 
    sample = sample - sample.mean(axis=0)
    cov = np.cov(sample, rowvar = False)
    U,S,V = np.linalg.svd(cov) 
    epsilon = 0.1
    X_ZCA = U.dot(np.diag(1.0/np.sqrt(S + epsilon))).dot(U.T).dot(sample.T).T
    X_ZCA_rescaled = (X_ZCA - X_ZCA.min()) / (X_ZCA.max() - X_ZCA.min())
    X_ZCA_rescaled = X_ZCA_rescaled.reshape((28,28,1)) 
    return X_ZCA_rescaled 

x_zca_whitened = [] 
for x_data in x_train: 
    zca_whitened = zca_whitening(x_data.reshape((28,28))) 
    x_zca_whitened.append(zca_whitened) 
    
x_zca_whitened = np.asarray(x_zca_whitened) 


In [14]:
# add random cropping (zooming effect)
def random_crop(img): 
    img = img.copy() 
    size = random.randint(22,24) # this seems to be a good balance, since our image size is 28 by 28
    crop_size = (size,size)
    w,h = img.shape[:2]
    x,y = np.random.randint(h-crop_size[0]), np.random.randint(w-crop_size[1])
    img = img[y:y+crop_size[0], x:x+crop_size[1]] 
    return img 

x_random_crop = []
for x_data in x_train: 
    cropped = random_crop(x_data) 
    cropped = resize(cropped, (28,28,1))
    x_random_crop.append(cropped)

x_random_crop = np.asarray(x_random_crop) 

In [15]:
# concatenating augmented data to the original 
x_train = np.concatenate((x_train, x_train_rotated, x_train_rotated_2, x_noised, x_blurred, x_shifted, x_brightness, x_zca_whitened, x_random_crop), axis = 0) 
y_train = np.concatenate((y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train, y_train), axis = 0) 
train_letters_numeric = np.concatenate((train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric, train_letters_numeric), axis = 0)

x_train.shape, y_train.shape, train_letters_numeric.shape


((18432, 28, 28, 1), (18432, 10), (18432, 26))

# Conduct Training

In [16]:
# uses skip connections and also adds information from both MaxPooling2D and AveragePooling2D 
def conv2d_block(input_layer, n_filters, kernel):
    conv1 = Conv2D(n_filters, kernel, activation = 'relu', padding = 'same')(input_layer)
    conv1 = BatchNormalization()(conv1)
    conv2 = Conv2D(n_filters, kernel, activation = 'relu', padding = 'same')(conv1)
    conv1 = Add()([conv1, conv2])   
    conv1 = BatchNormalization()(conv1)
    maxpool = MaxPooling2D((2,2))(conv1) 
    avgpool = AveragePooling2D((2,2))(conv1)
    ret = Add()([maxpool,avgpool])
    return ret 


def conv2d_block_2(input_layer, n_filters, kernel):
    conv1 = Conv2D(n_filters, kernel, activation = 'selu', padding = 'same', kernel_initializer='lecun_normal')(input_layer)
    conv1 = BatchNormalization()(conv1)
    conv2 = Conv2D(n_filters, kernel, activation = 'selu', padding = 'same', kernel_initializer='lecun_normal')(conv1)
    conv1 = Add()([conv1, conv2])   
    conv1 = BatchNormalization()(conv1)
    maxpool = MaxPooling2D((2,2))(conv1) 
    avgpool = AveragePooling2D((2,2))(conv1)
    ret = Add()([maxpool,avgpool])
    return ret 

# obtains around 82% validation loss on a 9:1 train/validation split
# the most promising model so far, until we come up with a potentially more powerful grade 5 model 
def base_cnn_grade_4(): 
    inputs = Input((28,28,1))
    letter_input = Input((26,))   
    conv1 = conv2d_block(inputs, 64, 7) 
    conv2 = conv2d_block(inputs, 64, 5) 
    conv3 = conv2d_block(inputs, 64, 3) 
    conv = Concatenate()([conv1,conv2,conv3])   
    conv1 = conv2d_block(conv, 32, 7)
    conv2 = conv2d_block(conv, 32, 5)
    conv3 = conv2d_block(conv, 32, 3) 
    conv = Concatenate()([conv1,conv2,conv3]) 
    outputs = Flatten()(conv) 
    outputs = Concatenate()([outputs,letter_input])
    for unit in [512, 256, 128]: 
        outputs = Dense(unit, activation = 'relu')(outputs)  
        outputs = BatchNormalization()(outputs) 
    outputs = Dropout(0.4)(outputs) 
    outputs = Dense(10, activation = 'softmax')(outputs)
    model = Model(inputs = [inputs, letter_input], outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model  

    
def base_cnn_grade_5(): 
    inputs = Input((28,28,1))
    letter_input = Input((26,))   
    conv1 = conv2d_block(inputs, 64, 7) 
    conv2 = conv2d_block(inputs, 64, 5) 
    conv3 = conv2d_block(inputs, 64, 3) 
    conv4 = conv2d_block(inputs, 64, 1)
    conv = Concatenate()([conv1,conv2,conv3,conv4])   
    conv = BatchNormalization()(conv) 
    conv1 = conv2d_block(conv, 32, 7)
    conv2 = conv2d_block(conv, 32, 5)
    conv3 = conv2d_block(conv, 32, 3) 
    conv4 = conv2d_block(conv, 32, 1)
    conv = Concatenate()([conv1,conv2,conv3,conv4])   
    conv = BatchNormalization()(conv) 
    conv1 = conv2d_block(conv, 16, 7)
    conv2 = conv2d_block(conv, 16, 5)
    conv3 = conv2d_block(conv, 16, 3)   
    conv4 = conv2d_block(conv, 16, 1)       
    conv = Concatenate()([conv1,conv2,conv3,conv4]) 
    conv = BatchNormalization()(conv) 
    outputs = Flatten()(conv) 
    for _ in range(3): 
        outputs = Dense(1024, activation = 'relu')(outputs) 
        outputs = BatchNormalization()(outputs)  
    outputs = Dropout(0.25)(outputs) 
    outputs = Dense(10, activation = 'softmax')(outputs)
    model = Model(inputs = [inputs, letter_input], outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model  

In [17]:
# implement k-fold cv 
def k_fold(k,files):  
    folds = [] 
    fold_size = len(files) // k 
    for i in range(k): 
        if i == k-1:  
            l = files[i*fold_size:] 
        else: 
            l = files[i*fold_size:(i+1)*fold_size]  
        folds.append(l)   
    return folds  

# uncomment below to shuffle before splitting data into folds 
x_train, y_train, train_letters_numeric = shuffle(x_train, y_train, train_letters_numeric)
# split data into 5 folds 
k = 10 
x_train_folds = k_fold(k, x_train)
y_train_folds = k_fold(k, y_train) 
letter_train_folds = k_fold(k,train_letters_numeric)

for t in range(k):  
    print("************ Fold {} training ************".format(t+1)) 
    cur_val_x = x_train_folds[t] 
    cur_val_y = y_train_folds[t] 
    cur_val_letter = letter_train_folds[t]
    train_folds_x = x_train_folds[0:t] + x_train_folds[t+1:] 
    train_folds_y = y_train_folds[0:t] + y_train_folds[t+1:]
    train_fold_letter = letter_train_folds[0:t] + letter_train_folds[t+1:]
    cur_train_x = [] 
    cur_train_y = [] 
    cur_letter = [] 
    for j in train_folds_x:  
        for q in j:  
            cur_train_x.append(q) 
    for j in train_folds_y:  
        for q in j:  
            cur_train_y.append(q)  
    for j in train_fold_letter: 
        for q in j: 
            cur_letter.append(q) 
    cur_train_x = np.asarray(cur_train_x)
    cur_train_y = np.asarray(cur_train_y)
    cur_letter = np.asarray(cur_letter) 
    model_path = './storage/mnist_test_4/' + 'kfold' + str(t+1) + '/epoch_{epoch:03d}_val_{val_loss:.3f}_acc_{val_accuracy:.3f}.h5' 
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.8)
    checkpoint = ModelCheckpoint(filepath=model_path,monitor='val_accuracy',verbose=1,save_best_only=True)
    early_stopping = EarlyStopping(monitor='val_accuracy',patience=25)
    annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x) # possible alternative to ReduceLROnPlateau
    model = base_cnn_grade_5() 
    
    history = model.fit([cur_train_x,cur_letter],
                        cur_train_y,
                       batch_size = 32,
                       shuffle = True, 
                       validation_data = ([cur_val_x,cur_val_letter],cur_val_y),
                       verbose = 1, 
                       epochs = 300,
                       callbacks = [learning_rate_reduction, checkpoint, early_stopping])


************ Fold 1 training ************
Train on 16589 samples, validate on 1843 samples
Epoch 1/300
Epoch 00001: val_accuracy improved from -inf to 0.46446, saving model to ./storage/mnist_test_4/kfold1/epoch_001_val_1.627_acc_0.464.h5
Epoch 2/300
Epoch 00002: val_accuracy improved from 0.46446 to 0.62832, saving model to ./storage/mnist_test_4/kfold1/epoch_002_val_1.337_acc_0.628.h5
Epoch 3/300
Epoch 00003: val_accuracy improved from 0.62832 to 0.70537, saving model to ./storage/mnist_test_4/kfold1/epoch_003_val_0.937_acc_0.705.h5
Epoch 4/300
Epoch 00004: val_accuracy improved from 0.70537 to 0.75909, saving model to ./storage/mnist_test_4/kfold1/epoch_004_val_0.733_acc_0.759.h5
Epoch 5/300
Epoch 00005: val_accuracy improved from 0.75909 to 0.79381, saving model to ./storage/mnist_test_4/kfold1/epoch_005_val_0.690_acc_0.794.h5
Epoch 6/300
Epoch 00006: val_accuracy did not improve from 0.79381
Epoch 7/300
Epoch 00007: val_accuracy improved from 0.79381 to 0.81063, saving model to ./

### Make prediction for further augmented, shuffling k-fold method, input normalized 

In [13]:
model1 = load_model('./storage/mnist_test_4/kfold1/epoch_066_val_0.546_acc_0.910.h5')
model2 = load_model('./storage/mnist_test_4/kfold2/epoch_088_val_0.619_acc_0.908.h5')
model3 = load_model('./storage/mnist_test_4/kfold3/epoch_066_val_0.603_acc_0.906.h5')

In [None]:
model4 = load_model('./storage/mnist_test_4/kfold4/epoch_058_val_0.526_acc_0.908.h5')
model5 = load_model('./storage/mnist_test_4/kfold5/epoch_067_val_0.580_acc_0.908.h5') 
model6 = load_model('./storage/mnist_test_4/kfold6/epoch_108_val_0.604_acc_0.918.h5')

In [None]:
model7 = load_model('./storage/mnist_test_4/kfold7/epoch_074_val_0.632_acc_0.915.h5') 
model8 = load_model('./storage/mnist_test_4/kfold8/epoch_081_val_0.594_acc_0.909.h5')
model9 = load_model('./storage/mnist_test_4/kfold9/epoch_067_val_0.590_acc_0.910.h5') 
model10 = load_model('./storage/mnist_test_4/kfold10/epoch_080_val_0.637_acc_0.900.h5')

In [8]:
pred1 = model1.predict([x_test, test_letters_numeric])
pred2 = model2.predict([x_test, test_letters_numeric])
pred3 = model3.predict([x_test, test_letters_numeric])
pred4 = model4.predict([x_test, test_letters_numeric])
pred5 = model5.predict([x_test, test_letters_numeric])  
pred6 = model6.predict([x_test, test_letters_numeric])
pred7 = model7.predict([x_test, test_letters_numeric])
pred8 = model8.predict([x_test, test_letters_numeric]) 
pred9 = model9.predict([x_test, test_letters_numeric]) 
pred10 = model10.predict([x_test, test_letters_numeric])


pred_avg = (pred1 + pred2 + pred3 + pred4 + pred5 + pred6 + pred7 + pred8 + pred9 + pred10)/10.0   

In [9]:
result_arr = [] 
for pred in pred_avg: 
    result_arr.append(np.argmax(pred))
result_arr = np.asarray(result_arr)
result_arr

array([6, 5, 8, ..., 6, 8, 0])

In [10]:
submission['digit'] = result_arr 
submission.head() 

Unnamed: 0,id,digit
0,2049,6
1,2050,5
2,2051,8
3,2052,0
4,2053,3


In [11]:
submission.to_csv('./storage/standard_test.csv', index=False)