In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
from tflearn.data_utils import shuffle, to_categorical

In [2]:
# Load kaggle train data
train = pd.read_csv('./input/train.csv')
train.shape

(42000, 785)

In [6]:
# extract X, y kaggle train data
X, Y = train.drop('label', axis=1).values, train['label'].values

print X.shape
print Y.shape

(42000, 784)
(42000,)


In [7]:
X, X_test, Y, Y_test = train_test_split(
    X,
    Y,
    test_size=0.15,
    random_state=42,
    stratify=train[train.columns[0]].values, # to preserve initial class balance
)

print "X shape:", X.shape
print "X_test shape:", X_test.shape
print "Y shape:", Y.shape
print "Y_test shape:", Y_test.shape

X shape: (35700, 784)
X_test shape: (6300, 784)
Y shape: (35700,)
Y_test shape: (6300,)


In [8]:
X = X.reshape([-1, 28, 28, 1])
X_test = X_test.reshape([-1, 28, 28, 1])
Y = to_categorical(Y, 10)
Y_test = to_categorical(Y_test, 10)

print "X shape:", X.shape
print "X_test shape:", X_test.shape
print "Y shape:", Y.shape
print "Y_test shape:", Y_test.shape

X shape: (35700, 28, 28, 1)
X_test shape: (6300, 28, 28, 1)
Y shape: (35700, 10)
Y_test shape: (6300, 10)


In [10]:
# for optimizer in ['SGD', 'Adam']:
#     for learning_rate in [0.01, 0.001]:
#         for n_epoch in [5]:
for optimizer in ['SGD', 'Adam']:
    for learning_rate in [0.1, 0.0001]:
        for n_epoch in [5, 10]:
            
            run_id = optimizer + ', ' + str(learning_rate) + ", n" + str(n_epoch)
            
            tf.reset_default_graph()

            network = input_data(shape=[None, 28, 28, 1], name='input')
            
            network = conv_2d(network, 32, 3, activation='relu', regularizer="L2")
            network = max_pool_2d(network, 2)
            network = local_response_normalization(network)

            network = conv_2d(network, 64, 3, activation='relu', regularizer="L2")
            network = max_pool_2d(network, 2)
            network = local_response_normalization(network)

            network = fully_connected(network, 128, activation='tanh')
            network = dropout(network, 0.8)

            network = fully_connected(network, 256, activation='tanh')
            network = dropout(network, 0.8)

            network = fully_connected(network, 10, activation='softmax')

            network = regression(
                network, 
                optimizer=optimizer, 
                learning_rate=learning_rate, 
                loss='categorical_crossentropy', 
                name='target'
            )
            
            model = tflearn.DNN(network, tensorboard_verbose=3)
            model.fit(
                {'input': X}, {'target': Y},
                n_epoch=n_epoch,
                validation_set=({'input': X_test}, {'target': Y_test}),
                snapshot_step=100, 
                show_metric=True, 
                run_id=run_id
            )    

Training Step: 2789  | total loss: [1m[32m0.17794[0m[0m | time: 24.251s
| Adam | epoch: 005 | loss: 0.17794 - acc: 0.9578 -- iter: 35648/35700
Training Step: 2790  | total loss: [1m[32m0.17137[0m[0m | time: 25.335s
| Adam | epoch: 005 | loss: 0.17137 - acc: 0.9589 | val_loss: 0.08114 - val_acc: 0.9737 -- iter: 35700/35700
--
