In [1]:
# environment: Paperspace Quadro P6000 GPU  
import numpy as np 
import pandas as pd 
import os 
import tensorflow as tf
import tensorflow.keras # run pip install keras==2.3 beforehand for compatability 
from tensorflow.keras import Input, Model 
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, AlphaDropout, MaxPooling2D, AveragePooling2D, BatchNormalization, Concatenate, Flatten, Reshape, Add, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import skimage
from skimage.transform import rotate
from skimage.util import random_noise
from skimage.transform import warp, AffineTransform
from skimage.transform import resize
import cv2
import random 
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.utils import shuffle # shuffle dataset before splitting into folds 

Using TensorFlow backend.


In [2]:
train_path = './storage/modified_mnist_dataset/train.csv'  
test_path = './storage/modified_mnist_dataset/test.csv' 
submission_path = './storage/modified_mnist_dataset/submission.csv'

In [3]:
train = pd.read_csv(train_path)
test = pd.read_csv(test_path) 
submission = pd.read_csv(submission_path)

In [4]:
# convert types of digit and letter columns to categorical 
train.iloc[:,1] = pd.Categorical(train.iloc[:,1])
train.iloc[:,2] = pd.Categorical(train.iloc[:,2]) 
test.iloc[:,1] = pd.Categorical(test.iloc[:,1])

In [5]:
# define and re-format train and test data 
# this time, we will standardize instead of normalize data 
x_train = train.iloc[:,3:].values.reshape(-1,28,28,1).astype(np.float32) 
# standardize - we assume train and test data have the same "distribution" 
mu = np.mean(x_train)
sd = np.std(x_train)
x_train -= mu 
x_train /= sd 
y_train = train.iloc[:,1].values
y_train = np.asarray(y_train)
y_train = to_categorical(y_train, num_classes = 10)
train_letters = train.iloc[:,2].values

x_test = test.iloc[:,2:].values.reshape(-1,28,28,1).astype(np.float32)  
x_test -= mu 
x_test /= sd 
test_letters = test.iloc[:,1].values 

In [6]:
train_letters_numeric = [] 
test_letters_numeric = [] 
for letter in train_letters: 
    train_letters_numeric.append(ord(letter) - ord("A"))
for letter in test_letters: 
    test_letters_numeric.append(ord(letter) - ord("A")) 
    
train_letters_numeric = np.asarray(train_letters_numeric) 
test_letters_numeric = np.asarray(test_letters_numeric) 

train_letters_numeric = to_categorical(train_letters_numeric, num_classes = 26) 
test_letters_numeric = to_categorical(test_letters_numeric, num_classes = 26)

x_train.shape, y_train.shape, x_test.shape, train_letters_numeric.shape, test_letters_numeric.shape

((2048, 28, 28, 1), (2048, 10), (20480, 28, 28, 1), (2048, 26), (20480, 26))

# Running simple stacking ensemble (weak HAN)

In [7]:
meta_x_train = np.load('./storage/meta_x_train.npy') 
meta_x_test = np.load('./storage/meta_x_test.npy') 
meta_x_train_grade_4 = np.load('./storage/meta_x_train_grade_4.npy')
meta_x_test_grade_4 = np.load('./storage/meta_x_test_grade_4.npy')
meta_x_train.shape, meta_x_test.shape, meta_x_train_grade_4.shape, meta_x_test_grade_4.shape

((2048, 100), (20480, 100), (2048, 100), (20480, 100))

In [8]:
meta_x_train = np.concatenate([meta_x_train, meta_x_train_grade_4], axis = 1) 

In [9]:
meta_x_train.shape

(2048, 200)

In [10]:
meta_x_test = np.concatenate([meta_x_test, meta_x_test_grade_4],axis = 1) 

In [11]:
meta_x_test.shape

(20480, 200)

In [12]:
# standardize for better performance 
mu = np.mean(meta_x_train) 
sd = np.std(meta_x_train)  

meta_x_train -= mu 
meta_x_train /= sd 

meta_x_test -= mu 
meta_x_test /= sd 


In [13]:
def meta_model(): 
    inputs = Input((200,))
    dense = Dense(256, activation = 'relu')(inputs)  
    dense = BatchNormalization()(dense)
    dense = Dense(128, activation = 'relu')(dense)  
    dense = BatchNormalization()(dense)
    dense = Dense(64, activation = 'relu')(dense)  
    dense = BatchNormalization()(dense)
    outputs = Dense(10, activation = 'softmax')(dense) 
    model = Model(inputs = inputs, outputs = outputs) 
    model.compile(optimizer = 'adam', loss = 'mse', metrics = ['accuracy']) 
    return model 

In [14]:
model = meta_model()
model_path = './storage/dense_net/split/epoch_{epoch:03d}_val_{val_loss:.3f}_acc_{val_accuracy:.3f}.h5' 
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.8)
checkpoint = ModelCheckpoint(filepath=model_path,monitor='val_loss',verbose=1,save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss',patience=10)

model.fit(meta_x_train, y_train, validation_split = 0.1, shuffle=True, epochs = 100, 
          callbacks=[learning_rate_reduction,checkpoint, early_stopping])

Train on 1843 samples, validate on 205 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.00237, saving model to ./storage/dense_net/split/epoch_001_val_0.002_acc_1.000.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.00237 to 0.00029, saving model to ./storage/dense_net/split/epoch_002_val_0.000_acc_1.000.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.00029 to 0.00006, saving model to ./storage/dense_net/split/epoch_003_val_0.000_acc_1.000.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.00006 to 0.00002, saving model to ./storage/dense_net/split/epoch_004_val_0.000_acc_1.000.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.00002 to 0.00001, saving model to ./storage/dense_net/split/epoch_005_val_0.000_acc_1.000.h5
Epoch 6/100
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.

Epoch 00006: val_loss improved from 0.00001 to 0.00000, saving model to ./storage/dense_net/split/epoch_006_val_0.000_acc_1.000.h5
Epoch 7/100
Epoch 0

<tensorflow.python.keras.callbacks.History at 0x7f2dd04cb438>

In [16]:
han = load_model('./storage/dense_net/split/epoch_081_val_0.000_acc_1.000.h5')
han.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 200)]             0         
_________________________________________________________________
dense (Dense)                (None, 256)               51456     
_________________________________________________________________
batch_normalization (BatchNo (None, 256)               1024      
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256   

In [17]:
pred = han.predict(meta_x_test)

In [18]:
pred.shape

(20480, 10)

In [19]:
result_arr = [] 
for p in pred: 
    result_arr.append(np.argmax(p))
result_arr = np.asarray(result_arr)
result_arr 

array([6, 9, 8, ..., 6, 8, 0])

In [20]:
submission['digit'] = result_arr 
submission.head() 

Unnamed: 0,id,digit
0,2049,6
1,2050,9
2,2051,8
3,2052,0
4,2053,3


In [21]:
submission.to_csv('./storage/weak_han_2.csv', index=False)