In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# dataset params
LABELS = 10
WIDTH = 28
CHANNELS = 1

VALID = 10000

# network
STEPS = 1000
BATCH = 100
PATCH = 5
DEPTH = 9
HIDDEN = 100

# learning rate
LR = 0.001

In [3]:
# import training data
df_train = pd.read_csv("./train.csv")
df_test = pd.read_csv("./test.csv")

In [4]:
X_train = df_train.values[:,1:]
Y_train = df_train.values[:,0]
test = df_test.values[:]

Data Preprocessing

In [5]:
# normalization
X_train = X_train/255.0
test = test/255.0

In [6]:
# reshape
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
test = test.reshape(test.shape[0], 28, 28, 1)

In [7]:
# label encoding
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [8]:
Y_train = LabelEncoder().fit_transform(Y_train)[:,None]

In [9]:
Y_train = OneHotEncoder().fit_transform(Y_train).todense() # todense means to convert to a matrix

In [10]:
# split training and validation set
random_seed = 1
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=random_seed)

In [11]:
print("Train data shape: " + str(X_train.shape))
print("Validation data shape: " + str(X_val.shape))

Train data shape: (33600, 28, 28, 1)
Validation data shape: (8400, 28, 28, 1)


In [12]:
tf_train = tf.placeholder(tf.float32, shape=(None, WIDTH, WIDTH, CHANNELS))
tf_labels = tf.placeholder(tf.float32, shape=(None, LABELS)) 

In [13]:
def weights(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

In [14]:
def bias(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [15]:
def conv2d(x, W):
    return tf.nn.conv2d(input=x, filter=W, strides=[1,1,1,1], padding='SAME')

In [16]:
def max_pool(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

In [17]:
def lenet(data):
    # 1st layer: conv+relu+max_pool
    w1 = weights([5,5,1,20])
    b1 = bias([20])
    h1 = tf.nn.relu(conv2d(data, w1) + b1)
    pool1 = max_pool(h1)
    
    # 2nd layer: conv+relu+max_pool
    w2 = weights([5,5,20,50])
    b2 = bias([50])
    h2 = tf.nn.relu(conv2d(pool1, w2) + b2)
    pool2 = max_pool(h2)
    flat = tf.reshape(pool2, [-1, 7*7*50])
    
    # full connection
    w3 = weights([7*7*50, 120])
    b3 = bias([120])
    h3 = tf.nn.relu(tf.matmul(flat, w3) + b3)
    
    w4 = weights([120, 84])
    b4 = bias([84])
    h4 = tf.nn.relu(tf.matmul(h3, w4) + b4)
    
    w5 = weights([84, 10])
    b5 = bias([10])
    return tf.nn.softmax(tf.matmul(h4, w5) + b5)

pred = lenet(tf_train)

In [21]:
# using the categorical cross entropy loss for training the model
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=lenet(tf_train), labels=tf_labels))
correction = tf.equal(tf.argmax(pred, 1), tf.argmax(tf_labels, 1))
accuracy = tf.reduce_mean(tf.cast(correction, "float"))

optimizer = tf.train.RMSPropOptimizer(LR)
train_step = optimizer.minimize(loss)

In [22]:
# starting train
init = tf.global_variables_initializer()
session = tf.Session()
session.run(init)

from sklearn.model_selection import ShuffleSplit
#ss = ShuffleSplit(n_splits=STEPS, train_size=BATCH)
ss = ShuffleSplit(n_splits=5000, train_size=BATCH)
ss.get_n_splits(X_train, Y_train)
history = [(0, np.nan, 10)]

print("Start Train")



for step, (idx, _) in enumerate(ss.split(X_train, Y_train), start=1):
    fd = {tf_train: X_train[idx], tf_labels: Y_train[idx]}
    session.run(train_step, feed_dict=fd)
    if step%500 == 0:
        print("Step: %d" % step)
        fd = {tf_train: X_val, tf_labels: Y_val}
        valid_loss, valid_acc = session.run([loss, accuracy], feed_dict=fd)
        history.append((step, valid_loss, valid_acc))
        print("step %d, valid loss: %f, valid accuracy: %f" %(step, valid_loss, valid_acc))

Start Train
Step: 500
step 500, valid loss: 1.513025, valid accuracy: 0.045714
Step: 1000
step 1000, valid loss: 1.490313, valid accuracy: 0.045714
Step: 1500
step 1500, valid loss: 1.477945, valid accuracy: 0.045714
Step: 2000
step 2000, valid loss: 1.478392, valid accuracy: 0.045714
Step: 2500
step 2500, valid loss: 1.479476, valid accuracy: 0.045714
Step: 3000
step 3000, valid loss: 1.475123, valid accuracy: 0.045714
Step: 3500
step 3500, valid loss: 1.474865, valid accuracy: 0.045714
Step: 4000
step 4000, valid loss: 1.476482, valid accuracy: 0.045714
Step: 4500
step 4500, valid loss: 1.476001, valid accuracy: 0.045714
Step: 5000
step 5000, valid loss: 1.478578, valid accuracy: 0.045714
