# MNIST - CNN on FloydHub

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
# Thanks to the dockerized setup, input and output directories are the same as on FloydHub
INPUT_ROOT = os.path.abspath('/input/')
OUTPUT_ROOT = os.path.abspath('/output/')
assert os.path.exists(INPUT_ROOT)
assert os.path.exists(OUTPUT_ROOT)

In [None]:
df_train = pd.read_csv(os.path.join(INPUT_ROOT, 'train.csv'))
df_test = pd.read_csv(os.path.join(INPUT_ROOT, 'test.csv'))
assert df_train.shape == (42000, 785)
assert df_test.shape == (28000, 784)

## Prepare training data

In [None]:
data_x = df_train.ix[:,1:].values / 255
data_y = df_train[['label']].values.T.flatten()
assert data_x.shape == (42000, 784)
assert data_y.shape == (42000,)

In [None]:
target_scaler = OneHotEncoder()
data_y = target_scaler.fit_transform(data_y.reshape((-1, 1))).todense()
assert data_y.shape == (42000, 10)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    data_x.astype(np.float32),
    data_y.astype(np.float32),
    train_size=(5./7)
)

## Prepare kaggle test data

In [None]:
data_test = df_test.values / 255
assert data_test.shape == (28000, 784)

## Define CNN architecture

In [None]:
learning_rate = 0.001
training_iters = 100000
batch_size = 56
display_step = 10

dim_x_y = 28
dim_depth = 1
n_input = dim_x_y*dim_x_y
n_classes = 10
keep_prob = 0.75

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])

input_layer = tf.reshape(x, shape=[-1, dim_x_y, dim_x_y, dim_depth])

conv1 = tf.layers.conv2d(input_layer, filters=64, kernel_size=[3,3], padding="same")
conv2 = tf.layers.conv2d(conv1, filters=64, kernel_size=[3,3], padding="same")
max_pool1 = tf.layers.max_pooling2d(conv2, pool_size=[2,2], strides=[2,2])
conv3 = tf.layers.conv2d(max_pool1, filters=128, kernel_size=[3,3], padding="same")
conv4 = tf.layers.conv2d(conv3, filters=128, kernel_size=[3,3], padding="same")
max_pool2 = tf.layers.max_pooling2d(conv4, pool_size=[2,2], strides=[2,2])
max_pool2 = tf.reshape(max_pool2, shape=[-1,7*7*128])

dense1 = tf.layers.dense(max_pool2, units=1024, activation=tf.nn.relu)
dropout = tf.nn.dropout(dense1, keep_prob=keep_prob)
logits = tf.layers.dense(dropout, units=10, activation=tf.nn.relu)

pred = tf.nn.softmax(logits)

cost = tf.reduce_mean(tf.losses.softmax_cross_entropy(y,logits))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

pred_numbers = tf.argmax(pred, 1)
correct_pred = tf.equal(pred_numbers, tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Train network

In [None]:
saver = tf.train.Saver()

init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

In [None]:
print ('Started optimization')
step = 1
while step * batch_size < training_iters:
    batch_index = np.random.permutation(x_train.shape[0])
    batch_x = x_train[batch_index,:][:batch_size]
    batch_y = y_train[batch_index,:][:batch_size]
    
    sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
    
    if step % display_step == 0:
        loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
        print('Iter: {}, Minibatch loss: {:.6f}, Training accuracy: {:.5f}'
            .format(str(step*batch_size), loss, acc))
        
    step += 1
    
print ('Optimization Finished!')

# save model
save_path = saver.save(sess, os.path.join(OUTPUT_ROOT, 'model.ckpt'))
print('Model saved in file: %s' % save_path)

In [None]:
result = np.zeros(data_test.shape[0])

for i in range(int(result.shape[0] / 100)):
    i_slice = slice(i*100, (i+1)*100)
    prediction = sess.run(pred_numbers, feed_dict={x: data_test[i_slice]})
    result[i_slice] = prediction

In [None]:
df_result = pd.DataFrame(result, columns=['Label'], index=df_test.index+1, dtype=np.int64)
df_result = df_result.reset_index()
df_result.columns = ['ImageId', 'Label']
df_result.head()

In [None]:
# save results
result_filepath = os.path.join(OUTPUT_ROOT, 'prediction.csv')
df_result.to_csv(result_filepath, index=False)
print('Results written to {}'.format(result_filepath))