In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, LSTM, concatenate, Dropout
from tensorflow.keras.utils import plot_model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [8]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [9]:
x = train.drop(['id', 'digit', 'letter'], axis=1).values
x = x.reshape(-1, 28, 28, 1)
x = x/255

y_data = train['digit']
y = np.zeros((len(y_data), len(y_data.unique())))
for i, digit in enumerate(y_data):
    y[i, digit] = 1

In [10]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, shuffle=True, stratify=y)

print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)

(1638, 28, 28, 1)
(410, 28, 28, 1)
(1638, 10)
(410, 10)


In [11]:
image_generator = ImageDataGenerator(width_shift_range=0.2, height_shift_range=0.2, 
                                     zoom_range=[0.75,1.25], brightness_range=[0.75,1.25], 
                                     shear_range=20)

In [12]:
def create_model(drop=0.2, optimizer='adam', padding='valid'):
    inputs = Input(shape=(28,28,1), name='input1')
    x1 = Conv2D(64, (3,3), activation='relu', padding=padding, name='conv1')(inputs)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool1')(x1)
    x1 = Conv2D(64, (2,2), activation='relu', padding=padding, name='conv2')(x1)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool2')(x1)
    x1 = Conv2D(128, (2,2), activation='relu', padding=padding, name='conv3')(x1)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool3')(x1)
    x1 = Flatten(name='flat1')(x1)
    x2 = Dense(500, activation='relu', name='hidden1')(x1)
    x2 = Dropout(drop)(x2)
    x2 = Dense(100, activation='relu', name='hidden2')(x2)
    x2 = Dropout(drop)(x2)
    x2 = Dense(50, activation='relu', name='hidden3')(x2)
    outputs = Dense(10, activation='softmax', name='output')(x2)
    
    model = Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = optimizer, metrics = ['accuracy'], 
                  loss = 'categorical_crossentropy')
    
    return model

In [13]:
model = create_model(0.2, 'adam', 'same')

In [20]:
image_generator.fit(x_train, augment=True)

In [21]:
print(x_train.shape)

(1638, 28, 28, 1)


In [22]:
history = model.fit_generator(image_generator.flow(x_train, y_train, batch_size=64), 
                              steps_per_epoch=x_train.shape[0]/64, epochs=50, verbose=1, 
                              validation_data=image_generator.flow(x_val, y_val, batch_size=64), 
                              validation_steps=16)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
x_test = test.drop(['id', 'letter'], axis=1).values
x_test = x_test.reshape(-1, 28, 28, 1)
x_test = x_test/255
x_test.shape

In [None]:
prediction = np.argmax(model.predict(x_test), axis=1)

In [None]:
submission = pd.read_csv('data/submission.csv')
submission['digit'] = np.argmax(model.predict(x_test), axis=1)

In [None]:
submission.to_csv('data/submission1.csv', index=False)