In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import math
#tqdm for the progress bar
from tqdm import tqdm
print("All modules imported successfully.")

All modules imported successfully.


All of the files can be found on Kaggle.

https://www.kaggle.com/c/digit-recognizer/data

In [2]:
#load in data from kaggle
train_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/train.csv")
validation_X=train_X.loc[40000:]
train_X=train_X.loc[:39999]
train_y=train_X['label']
validation_y=validation_X['label']
del train_X['label']
del validation_X['label']
test_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/test.csv")

In [3]:
#convert to numpy arrays and normalize values between 0 and 1
#normalizing allows the network to train better and converge faster
train_X=np.array(train_X)/255
train_y=np.array(train_y)
validation_X=np.array(validation_X)/255
validation_y=np.array(validation_y)
print(train_X.shape, train_y.shape, validation_X.shape, validation_y.shape)
#test data
test_X=np.array(test_X).astype(dtype='float32')/255

(40000, 784) (40000,) (2000, 784) (2000,)


In [4]:
#convert to one-hot array
train_y=np.array(pd.get_dummies(train_y))
validation_y=np.array(pd.get_dummies(validation_y))
print(train_y.shape, validation_y.shape)

(40000, 10) (2000, 10)


In [5]:
#make sure everything is a float32
tf.cast(train_X, tf.float32)
tf.cast(train_y, tf.float32)
tf.cast(validation_X, tf.float32)
tf.cast(validation_y, tf.float32)

<tf.Tensor 'Cast_3:0' shape=(2000, 10) dtype=float32>

In [6]:
#setting up placeholders where data will be passed into  later
features=tf.placeholder(tf.float32, shape=[None, 784])
labels=tf.placeholder(tf.float32)

In [7]:
#set some parameters
batch_size=128

nodes_hl1=1000
nodes_hl2=500
nodes_hl3=100

output_size=10

num_epochs=50

A website showing different weight initializations:
https://intoli.com/blog/neural-network-initialization/

In [8]:
#create variables(weights and biases) Uses standard deviation of sqrt(2/nodes) which is a good starting point.

weights_input_hl1=tf.get_variable('weights_input_hl1', dtype=tf.float32, 
  initializer=tf.truncated_normal([784, nodes_hl1], dtype=tf.float32, stddev=np.sqrt(2/784)))
biases_hl1=tf.get_variable('biases_hl1', [nodes_hl1], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl1_hl2=tf.get_variable('weights_hl1_hl2', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl1, nodes_hl2], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl1)))
biases_hl2=tf.get_variable('biases_hl2', [nodes_hl2], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl2_hl3=tf.get_variable('weights_hl2_hl3', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl2, nodes_hl3], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl2)))
biases_hl3=tf.get_variable('biases_hl3', [nodes_hl3], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl3_output=tf.get_variable('weights_hl3_output', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl3, output_size], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl3)))

In [9]:
#create saver, max_to_keep is maximum checkpoint files kept
saver=tf.train.Saver(max_to_keep=1)

In [10]:
#dropout rate, each time it is trained, ~20% of neurons will be killed in each layer, it helps prevent overfitting
train_keep=0.8
keep_amt=train_keep

#training pass
#elu=exponential linear unit, generally performs better than relu

def forward_pass(x, keep_amt):
    dropout_rate=tf.constant(keep_amt)
    l1=tf.add(tf.matmul(x, weights_input_hl1), biases_hl1)
    l1=tf.nn.elu(l1)
    l1=tf.nn.dropout(l1, dropout_rate)
    l2=tf.add(tf.matmul(l1, weights_hl1_hl2), biases_hl2)
    l2=tf.nn.elu(l2)
    l2=tf.nn.dropout(l2, dropout_rate)
    l3=tf.add(tf.matmul(l2, weights_hl2_hl3), biases_hl3)
    l3=tf.nn.elu(l3)
    l3=tf.nn.dropout(l3, dropout_rate)
    output_layer=tf.matmul(l3, weights_hl3_output)
    return output_layer

In [11]:
#cost and gradient descent
#tf.reduce_mean=np.mean and tf.reduce_sum=np.sum
logits=forward_pass(features,keep_amt)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(cost)

#accuracy
#argmax takes the maximum value in each vector and sets it to 1, all others are set to 0
output=tf.nn.softmax(logits)
accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(labels, 1)),tf.float32))

#used later for predicting the test data
prediction=tf.argmax(tf.nn.softmax(logits=output), 1)

In [12]:
with tf.Session() as sess:
    #initialize variables
    sess.run(tf.global_variables_initializer())
    #restore weights if file found
    try:
        saver.restore(sess, "/tmp/model.ckpt")
        print("Model restored.")
    except:
        print("No save file found.")

    
    batch_count = int(math.ceil(len(train_X)/batch_size))

    for epoch in range(num_epochs):
        
        # Progress bar
        batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch+1, num_epochs), unit='batches')
        train_loss=0.0
        # The training cycle
        keep_amt=train_keep
        for batch_i in batches_pbar:
            # Get a batch of training features and labels
            batch_start = batch_i*batch_size
            batch_features = train_X[batch_start:batch_start + batch_size]
            batch_labels = train_y[batch_start:batch_start + batch_size]
            #train
            _, c = sess.run([optimizer, cost], feed_dict={features: batch_features, labels: batch_labels})
            train_loss+=c
        #set keep amount to 100% for testing
        keep_amt=1.0    
        print("Training Loss = {}, Validation Accuracy = {}"
              .format(train_loss, sess.run(accuracy, feed_dict={features: validation_X, labels: validation_y})))
        
        #save model after every 5 epochs
        #uncomment these lines if you want to save the model
        #WARNING: May take up a lot of disk space depending on how many hidden layers/nodes per hidden layer you have
        #if epoch%5==0:
        #    save_path = saver.save(sess, "/tmp/model.ckpt")
        #    print("Model saved in file: {}".format(save_path))
    print("Training Finished!")
    keep_amt=1.0
    predictions=sess.run(prediction, feed_dict={features: test_X})


INFO:tensorflow:Restoring parameters from /tmp/model.ckpt
No save file found.


Epoch  1/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.25batches/s]


Training Loss = 102.32129826396704, Validation Accuracy = 0.9275000095367432


Epoch  2/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.74batches/s]


Training Loss = 54.22416315227747, Validation Accuracy = 0.9440000057220459


Epoch  3/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 32.46batches/s]


Training Loss = 41.25215024873614, Validation Accuracy = 0.9555000066757202


Epoch  4/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.55batches/s]


Training Loss = 33.848864185623825, Validation Accuracy = 0.9474999904632568


Epoch  5/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.59batches/s]


Training Loss = 29.5208446579054, Validation Accuracy = 0.9589999914169312


Epoch  6/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 32.30batches/s]


Training Loss = 25.019415774848312, Validation Accuracy = 0.9589999914169312


Epoch  7/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 31.73batches/s]


Training Loss = 22.295615491922945, Validation Accuracy = 0.9624999761581421


Epoch  8/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.91batches/s]


Training Loss = 22.21392764803022, Validation Accuracy = 0.9635000228881836


Epoch  9/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.36batches/s]


Training Loss = 18.094218654790893, Validation Accuracy = 0.9620000123977661


Epoch 10/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.05batches/s]


Training Loss = 17.90799182606861, Validation Accuracy = 0.9620000123977661


Epoch 11/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.17batches/s]


Training Loss = 16.045512702781707, Validation Accuracy = 0.9639999866485596


Epoch 12/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.86batches/s]


Training Loss = 14.959381462307647, Validation Accuracy = 0.9570000171661377


Epoch 13/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.53batches/s]


Training Loss = 15.182976490061264, Validation Accuracy = 0.9635000228881836


Epoch 14/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.61batches/s]


Training Loss = 12.728685961803421, Validation Accuracy = 0.9674999713897705


Epoch 15/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.48batches/s]


Training Loss = 13.99771769263316, Validation Accuracy = 0.9710000157356262


Epoch 16/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.71batches/s]


Training Loss = 13.209135476965457, Validation Accuracy = 0.9649999737739563


Epoch 17/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.47batches/s]


Training Loss = 12.621745192445815, Validation Accuracy = 0.9704999923706055


Epoch 18/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.42batches/s]


Training Loss = 10.602271305571776, Validation Accuracy = 0.9664999842643738


Epoch 19/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.97batches/s]


Training Loss = 10.69824275074643, Validation Accuracy = 0.9725000262260437


Epoch 20/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.46batches/s]


Training Loss = 10.403348150255624, Validation Accuracy = 0.9624999761581421


Epoch 21/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.50batches/s]


Training Loss = 10.570713086402975, Validation Accuracy = 0.9700000286102295


Epoch 22/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.08batches/s]


Training Loss = 9.359602044190979, Validation Accuracy = 0.9710000157356262


Epoch 23/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.81batches/s]


Training Loss = 10.02218270796584, Validation Accuracy = 0.965499997138977


Epoch 24/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.44batches/s]


Training Loss = 8.596742068191816, Validation Accuracy = 0.9649999737739563


Epoch 25/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.30batches/s]


Training Loss = 10.0691267666989, Validation Accuracy = 0.965499997138977


Epoch 26/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.39batches/s]


Training Loss = 8.44668627930514, Validation Accuracy = 0.9710000157356262


Epoch 27/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.22batches/s]


Training Loss = 9.214886619040044, Validation Accuracy = 0.9674999713897705


Epoch 28/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.80batches/s]


Training Loss = 9.634363563818624, Validation Accuracy = 0.9700000286102295


Epoch 29/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.67batches/s]


Training Loss = 8.083131894120015, Validation Accuracy = 0.9710000157356262


Epoch 30/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.62batches/s]


Training Loss = 7.095757319708355, Validation Accuracy = 0.9695000052452087


Epoch 31/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.91batches/s]


Training Loss = 7.898029955453239, Validation Accuracy = 0.9700000286102295


Epoch 32/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.01batches/s]


Training Loss = 7.6670078809256665, Validation Accuracy = 0.9679999947547913


Epoch 33/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:10<00:00, 30.78batches/s]


Training Loss = 7.488102185205207, Validation Accuracy = 0.9704999923706055


Epoch 34/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.22batches/s]


Training Loss = 8.605418314487906, Validation Accuracy = 0.9710000157356262


Epoch 35/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 32.63batches/s]


Training Loss = 8.715980708002462, Validation Accuracy = 0.9635000228881836


Epoch 36/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.14batches/s]


Training Loss = 7.642302871812717, Validation Accuracy = 0.9725000262260437


Epoch 37/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.73batches/s]


Training Loss = 7.797874154523015, Validation Accuracy = 0.9700000286102295


Epoch 38/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.48batches/s]


Training Loss = 6.953686293723877, Validation Accuracy = 0.972000002861023


Epoch 39/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.97batches/s]


Training Loss = 7.360705634539045, Validation Accuracy = 0.9704999923706055


Epoch 40/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.87batches/s]


Training Loss = 5.854027967507136, Validation Accuracy = 0.972000002861023


Epoch 41/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.09batches/s]


Training Loss = 7.177585973883197, Validation Accuracy = 0.9674999713897705


Epoch 42/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.75batches/s]


Training Loss = 6.703018018037255, Validation Accuracy = 0.9710000157356262


Epoch 43/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.34batches/s]


Training Loss = 5.112073671520193, Validation Accuracy = 0.9695000052452087


Epoch 44/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:10<00:00, 31.02batches/s]


Training Loss = 5.864365268060283, Validation Accuracy = 0.9754999876022339


Epoch 45/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 32.35batches/s]


Training Loss = 6.625342379527865, Validation Accuracy = 0.9695000052452087


Epoch 46/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 31.35batches/s]


Training Loss = 7.681130375916837, Validation Accuracy = 0.9735000133514404


Epoch 47/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.13batches/s]


Training Loss = 7.641642359747493, Validation Accuracy = 0.9714999794960022


Epoch 48/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.47batches/s]


Training Loss = 6.097299263426976, Validation Accuracy = 0.972000002861023


Epoch 49/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.56batches/s]


Training Loss = 5.975751123722148, Validation Accuracy = 0.9745000004768372


Epoch 50/50: 100%|██████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.11batches/s]


Training Loss = 5.133170308780791, Validation Accuracy = 0.9710000157356262
Training Finished!


In [13]:
predictions

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [14]:
#use replace the 0s in the sample submission file with the outputs from the neural net
submission=pd.read_csv("C:/Users/Eric Zhou/Downloads/sample_submission.csv")
for x in range(len(predictions)):
    submission['Label'][x]+=predictions[x]

In [15]:
print(submission)

       ImageId  Label
0            1      2
1            2      0
2            3      9
3            4      0
4            5      3
5            6      7
6            7      0
7            8      3
8            9      0
9           10      3
10          11      5
11          12      7
12          13      4
13          14      0
14          15      4
15          16      3
16          17      3
17          18      1
18          19      9
19          20      0
20          21      9
21          22      1
22          23      1
23          24      5
24          25      7
25          26      4
26          27      2
27          28      7
28          29      4
29          30      7
...        ...    ...
27970    27971      5
27971    27972      0
27972    27973      4
27973    27974      8
27974    27975      0
27975    27976      3
27976    27977      6
27977    27978      0
27978    27979      1
27979    27980      9
27980    27981      3
27981    27982      1
27982    27983      1
27983    2

In [16]:
#index=False gets rid of the double numbers
submission.to_csv("C:/Users/Eric Zhou/Downloads/submission.csv", index=False)