In [116]:
# This model is highly based on the MNIST 
# tutorial provided by TensorFlow with MNIST dataset 
# for Kaggle Competition

import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import cross_validation
from sklearn import datasets

In [229]:
# getData is used to get train and test sets 
# CorssValidation set is to be added
data=pd.read_csv('../dataset/train.csv', header=0, dtype=np.float64)

In [230]:
# Check the shape of data
data.shape

(42000, 785)

In [231]:
# See what we have in the data
data.head(3)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [232]:
# Get the features 'pixel0, ..., pixel783' as X, 'label' as y.
X_data=data.iloc[0::,1::].values
y_data=data['label'].values

In [263]:
# K_fold cross_validation.
# For this ipython notebook, we actually just work in the last fold split.
# For later *.py, we can include the training in this for loop.
k_fold=cross_validation.KFold(n=len(y_data), n_folds=7)
for train, test in k_fold:
    train_x=X_data[train]
    test_x=X_data[test]
    train_y=y_data[train]
    test_y=y_data[test]

In [264]:
# Make label of 1, 2, 3, 4.... into a hot vector 
# whose ith index is the label and the rest are 0
def make_hot(lables):
    result = []
    for i in range(len(lables)):
        temp = [0.0] * 10
        temp[int(lables[i])] = 1.0
        result.append(temp)
    return result

In [266]:
# Transform vector of the form [1, 3, 4...] into 
# hot vector [1, 0, 0, 0 ...] [0, 0, 1, 0, ...]....
train_y = make_hot(train_y)
test_y = make_hot(test_y)

In [267]:
# Just check for fun.
train_y[0:10:]

[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]

In [270]:
def normalize(myList):
    for i in range(len(myList)):
        myList[i] = [k/255 for k in myList[i]] 
    return myList

In [271]:
train_x = normalize(train_x)
test_x = normalize(test_x)

In [273]:
# A softmax regression containing only one layer of 
# Network. The regression is in form of y = softmax(Wx + b)
# W is the weigt and b is the bias 

# First we need a placeholder for each input value x
x = tf.placeholder(tf.float32, [None, 784])

# Then we decalre W and b as Variables for model parameters
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

In [274]:
# Implementation of the model 
y = tf.nn.softmax(tf.matmul(x, W) + b)

In [275]:
# Training step: tell TensorFlow what makes up a good model
# We use corss-entropy as the cost function 
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))

# Backgpropagation with selected optimizer algorithm
# There are a variety of optimizers to choose from
# GradientSecentOpimizer will fail to converge if the batch size exceeds 200
# Alternative solution is to use AdagradOptimizer, AdamOptimizer ...
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

In [276]:
# Launch the model 
init = tf.initialize_all_variables()

# All computations won't start until the session is initilized
sess = tf.Session()
sess.run(init)

In [283]:
loopSize = len(train_x) / 100
# Using stochastic fitting 
# Good to have visulization to find the best learning rate 
for i in range(loopSize):
    batch_xs, batch_ys = train_x[i*(100):(i+1)*100], train_y[i*(100):(i+1)*100]
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

In [286]:
# Correction_prediction will result in a list of boolean in form of 
# [True, True, False, Flase ...] True if prediction matches the true label
# Flase otherwise
correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [287]:
# Accuracy on the test set
print(sess.run(accuracy, feed_dict={x: test_x, y_: test_y}))

0.911167
