# Digit Recognizer (MNIST Dataset)
## Yahao Yan

In [25]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import ShuffleSplit
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

## Preparation

In [26]:
df = pd.read_csv('../input/train.csv')

In [27]:
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,pixel11,pixel12,pixel13,pixel14,pixel15,pixel16,pixel17,pixel18,pixel19,pixel20,pixel21,pixel22,pixel23,pixel24,pixel25,pixel26,pixel27,pixel28,pixel29,pixel30,pixel31,pixel32,pixel33,pixel34,pixel35,pixel36,pixel37,pixel38,...,pixel744,pixel745,pixel746,pixel747,pixel748,pixel749,pixel750,pixel751,pixel752,pixel753,pixel754,pixel755,pixel756,pixel757,pixel758,pixel759,pixel760,pixel761,pixel762,pixel763,pixel764,pixel765,pixel766,pixel767,pixel768,pixel769,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
label = np.array(df.pop('label')) #Extract label column
label = LabelEncoder().fit_transform(label)[:, None]
label = OneHotEncoder().fit_transform(label).todense()
matrix = StandardScaler().fit_transform(np.float32(df.values))

In [29]:
label.shape

(42000, 10)

In [30]:
df.shape #Notice the dataset has 784 pixels, which can be transformed to a 28*28 matric

(42000, 784)

In [31]:
matrix = matrix.reshape(-1, 28, 28, 1)

In [32]:
matrix.shape

(42000, 28, 28, 1)

In [33]:
#Set up train & validation data
VALID = 10000 #about 30% of the dataset
X_train, X_val = matrix[:-VALID], matrix[-VALID:]
Y_train, Y_val = label[:-VALID], label[-VALID:]
print("Train set's size: ", Y_train.shape, " Validation set's size: ", Y_val.shape)

Train set's size:  (32000, 10)  Validation set's size:  (10000, 10)


## Modelling

In [34]:
x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) #unknown batch
y = tf.placeholder(tf.float32, shape=(None, 10)) #10 numbers from 0 to 9

patch = 5
channels = 1 #1 color

In [35]:
#Set up CNN
w1 = tf.Variable(tf.truncated_normal([patch, patch, 1, 32], stddev=0.1))
b1 = tf.Variable(tf.zeros([32]))
    
w2 = tf.Variable(tf.truncated_normal([patch, patch, 32, 64], stddev=0.1))
b2 = tf.Variable(tf.constant(1.0, shape=[64]))
    
w3 = tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1))
b3 = tf.Variable(tf.constant(1.0, shape=[1024]))

w4 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b4 = tf.Variable(tf.constant(1.0, shape=[10]))

def cnn(inputs):
    # Convolutional layer 1
    inputs = tf.nn.conv2d(inputs, w1, [1, 1, 1, 1], padding='SAME')
    inputs = tf.nn.max_pool(inputs, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    inputs = tf.nn.relu(inputs + b1)
    
    # Convolutional layer 2
    inputs = tf.nn.conv2d(inputs, w2, [1, 1, 1, 1], padding='SAME')
    inputs = tf.nn.max_pool(inputs, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    inputs = tf.nn.relu(inputs + b2)
    
    # Fully connected layer
    inputs = tf.reshape(inputs, (-1, 3136))
    inputs = tf.nn.relu(tf.matmul(inputs, w3) + b3)
    
    return tf.matmul(inputs, w4) + b4

prediction = tf.nn.softmax(cnn(x))

In [36]:
#Optimizing
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=cnn(x)))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

accuracy = 100*tf.reduce_mean(tf.to_float(tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))))

train = optimizer.minimize(cross_entropy)

In [37]:
#Run TensorFlow session
init = tf.global_variables_initializer()
ss = ShuffleSplit(n_splits=5000, train_size=100)
ss.get_n_splits(X_train, Y_train)

with tf.Session() as sess:
    sess.run(init)
    
    for step, (i, _) in enumerate(ss.split(X_train, Y_train), start=1):
        fd1 = {x:X_train[i], y:Y_train[i]}
        sess.run(train, feed_dict=fd1)
        if step % 250 == 0:
            fd2 = {x:X_val, y:Y_val}
            val_accuracy = sess.run(accuracy, feed_dict=fd2)
            print('Step %i \t Accuracy = %f'%(step, val_accuracy), end='\n')

Step 250 	 Accuracy = 90.940002
Step 500 	 Accuracy = 93.690002
Step 750 	 Accuracy = 94.720001
Step 1000 	 Accuracy = 94.559998
Step 1250 	 Accuracy = 96.080002
Step 1500 	 Accuracy = 96.440002
Step 1750 	 Accuracy = 96.480003
Step 2000 	 Accuracy = 96.680000
Step 2250 	 Accuracy = 96.849998
Step 2500 	 Accuracy = 96.900002
Step 2750 	 Accuracy = 97.110001
Step 3000 	 Accuracy = 97.220001
Step 3250 	 Accuracy = 97.250000
Step 3500 	 Accuracy = 97.199997
Step 3750 	 Accuracy = 97.180000
Step 4000 	 Accuracy = 97.439995
Step 4250 	 Accuracy = 97.259995
Step 4500 	 Accuracy = 97.439995
Step 4750 	 Accuracy = 97.500000
Step 5000 	 Accuracy = 97.349998
