In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import math
#tqdm for the progress bar
from tqdm import tqdm
print("All modules imported successfully.")

All modules imported successfully.


All of the files can be found on Kaggle.

https://www.kaggle.com/c/digit-recognizer/data

In [2]:
#load in data from kaggle
train_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/train.csv")
validation_X=train_X.loc[40000:]
train_X=train_X.loc[:39999]
train_y=train_X['label']
validation_y=validation_X['label']
del train_X['label']
del validation_X['label']
test_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/test.csv")

In [3]:
#convert to numpy arrays and normalize values between 0 and 1
#normalizing allows the network to train better and converge faster
train_X=np.array(train_X)/255
train_y=np.array(train_y)
validation_X=np.array(validation_X)/255
validation_y=np.array(validation_y)
print(train_X.shape, train_y.shape, validation_X.shape, validation_y.shape)
#test data
test_X=np.array(test_X).astype(dtype='float32')/255

(40000, 784) (40000,) (2000, 784) (2000,)


In [4]:
#convert to one-hot array
train_y=np.array(pd.get_dummies(train_y))
validation_y=np.array(pd.get_dummies(validation_y))
print(train_y.shape, validation_y.shape)

(40000, 10) (2000, 10)


In [5]:
#make sure everything is a float32
tf.cast(train_X, tf.float32)
tf.cast(train_y, tf.float32)
tf.cast(validation_X, tf.float32)
tf.cast(validation_y, tf.float32)

<tf.Tensor 'Cast_3:0' shape=(2000, 10) dtype=float32>

In [6]:
#setting up placeholders where data will be passed into  later
features=tf.placeholder(tf.float32, shape=[None, 784])
labels=tf.placeholder(tf.float32)

In [7]:
#set some parameters
batch_size=128

nodes_hl1=1000
nodes_hl2=500
nodes_hl3=100

output_size=10

num_epochs=200

A website showing different weight initializations:
https://intoli.com/blog/neural-network-initialization/

In [8]:
#create variables(weights and biases) Uses standard deviation of sqrt(2/nodes) which is a good starting point.

weights_input_hl1=tf.get_variable('weights_input_hl1', dtype=tf.float32, 
  initializer=tf.truncated_normal([784, nodes_hl1], dtype=tf.float32, stddev=np.sqrt(2/784)))
biases_hl1=tf.get_variable('biases_hl1', [nodes_hl1], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl1_hl2=tf.get_variable('weights_hl1_hl2', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl1, nodes_hl2], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl1)))
biases_hl2=tf.get_variable('biases_hl2', [nodes_hl2], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl2_hl3=tf.get_variable('weights_hl2_hl3', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl2, nodes_hl3], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl2)))
biases_hl3=tf.get_variable('biases_hl3', [nodes_hl3], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl3_output=tf.get_variable('weights_hl3_output', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl3, output_size], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl3)))

In [9]:
#create saver, max_to_keep is maximum checkpoint files kept
saver=tf.train.Saver(max_to_keep=1)

In [10]:
#dropout rate, each time it is trained, ~20% of neurons will be killed in each layer, it helps prevent overfitting
train_keep=0.8
keep_amt=train_keep

#training pass
#elu=exponential linear unit, generally performs better than relu

def forward_pass(x, keep_amt):
    dropout_rate=tf.constant(keep_amt)
    l1=tf.add(tf.matmul(x, weights_input_hl1), biases_hl1)
    l1=tf.nn.elu(l1)
    l1=tf.nn.dropout(l1, dropout_rate)
    l2=tf.add(tf.matmul(l1, weights_hl1_hl2), biases_hl2)
    l2=tf.nn.elu(l2)
    l2=tf.nn.dropout(l2, dropout_rate)
    l3=tf.add(tf.matmul(l2, weights_hl2_hl3), biases_hl3)
    l3=tf.nn.elu(l3)
    l3=tf.nn.dropout(l3, dropout_rate)
    output_layer=tf.matmul(l3, weights_hl3_output)
    return output_layer

In [11]:
#cost and gradient descent
#tf.reduce_mean=np.mean and tf.reduce_sum=np.sum
logits=forward_pass(features,keep_amt)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(cost)

#accuracy
#argmax takes the maximum value in each vector and sets it to 1, all others are set to 0
output=tf.nn.softmax(logits)
accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(labels, 1)),tf.float32))

#used later for predicting the test data
prediction=tf.argmax(tf.nn.softmax(logits=output), 1)

In [12]:
with tf.Session() as sess:
    #initialize variables
    sess.run(tf.global_variables_initializer())
    #restore weights if file found
    try:
        saver.restore(sess, "/tmp/model.ckpt")
        print("Model restored.")
    except:
        print("No save file found.")

    
    batch_count = int(math.ceil(len(train_X)/batch_size))
    best_val_acc=0
    last_improve_epoch=0
    for epoch in range(num_epochs):
        
        # Progress bar
        batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch+1, num_epochs), unit='batches')
        train_loss=0.0
        # The training cycle
        keep_amt=train_keep
        for batch_i in batches_pbar:
            # Get a batch of training features and labels
            batch_start = batch_i*batch_size
            batch_features = train_X[batch_start:batch_start + batch_size]
            batch_labels = train_y[batch_start:batch_start + batch_size]
            #train
            _, c = sess.run([optimizer, cost], feed_dict={features: batch_features, labels: batch_labels})
            train_loss+=c
        #set keep amount to 100% for testing
        keep_amt=1.0    
        validation_accuracy=sess.run(accuracy, feed_dict={features: validation_X, labels: validation_y})
        print('Training Loss = {}, Validation Accuracy = {}'.format(train_loss, validation_accuracy))

        #save model if validation is at a new best and do not save for first 5 epochs
        if validation_accuracy>best_val_acc and epoch>5:
            save_path = saver.save(sess, "/tmp/model.ckpt")
            print("Model saved in file: {}".format(save_path))
            print("Accuracy improved from {} to {}".format(best_val_acc, validation_accuracy))
            best_val_acc=validation_accuracy
            last_improve_epoch=epoch
        elif validation_accuracy>best_val_acc and epoch<=5:
            print("Accuracy has improved from {} to {}".format(best_val_acc, validation_accuracy))
            best_val_acc=validation_accuracy
            last_improve_epoch=epoch
        #stop training if validation loss hasn't improved for 10 epochs 
        if epoch>=last_improve_epoch+10:
            print("Model has not improved for 10 epochs. Training has been stopped.")
            print("Best validation accuracy: {}".format(best_val_acc))
            break;
    print("Training Finished!")
    
    #load in best model
    try:
        saver.restore(sess, "/tmp/model.ckpt")
        print("Best Model restored.")
    except:
        print("No save file found. Prediction will use current weights which may not be the best.")

    keep_amt=1.0
    predictions=sess.run(prediction, feed_dict={features: test_X})


INFO:tensorflow:Restoring parameters from /tmp/model.ckpt
No save file found.


Epoch  1/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.19batches/s]


Training Loss = 244.25763057172298, Validation Accuracy = 0.9079999923706055
Accuracy has improved from 0 to 0.9079999923706055


Epoch  2/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.53batches/s]


Training Loss = 69.75137526914477, Validation Accuracy = 0.9369999766349792
Accuracy has improved from 0.9079999923706055 to 0.9369999766349792


Epoch  3/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.25batches/s]


Training Loss = 50.48773854225874, Validation Accuracy = 0.9430000185966492
Accuracy has improved from 0.9369999766349792 to 0.9430000185966492


Epoch  4/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.81batches/s]


Training Loss = 41.12426498904824, Validation Accuracy = 0.949999988079071
Accuracy has improved from 0.9430000185966492 to 0.949999988079071


Epoch  5/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.61batches/s]


Training Loss = 34.181536921299994, Validation Accuracy = 0.9514999985694885
Accuracy has improved from 0.949999988079071 to 0.9514999985694885


Epoch  6/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.23batches/s]


Training Loss = 29.91188729647547, Validation Accuracy = 0.9620000123977661


Epoch  7/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.40batches/s]


Training Loss = 27.009469925425947, Validation Accuracy = 0.9580000042915344
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9514999985694885 to 0.9580000042915344


Epoch  8/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.45batches/s]


Training Loss = 24.469233253970742, Validation Accuracy = 0.9570000171661377


Epoch  9/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.43batches/s]


Training Loss = 23.02157904370688, Validation Accuracy = 0.965499997138977
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9580000042915344 to 0.965499997138977


Epoch 10/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.85batches/s]


Training Loss = 20.845310770440847, Validation Accuracy = 0.9624999761581421


Epoch 11/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.72batches/s]


Training Loss = 20.3659028619295, Validation Accuracy = 0.9545000195503235


Epoch 12/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.89batches/s]


Training Loss = 18.649469145806506, Validation Accuracy = 0.9610000252723694


Epoch 13/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.13batches/s]


Training Loss = 18.071291252039373, Validation Accuracy = 0.9580000042915344


Epoch 14/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.34batches/s]


Training Loss = 16.94214643945452, Validation Accuracy = 0.9635000228881836


Epoch 15/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.91batches/s]


Training Loss = 16.6372059189016, Validation Accuracy = 0.9670000076293945
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.965499997138977 to 0.9670000076293945


Epoch 16/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.04batches/s]


Training Loss = 15.327295228024013, Validation Accuracy = 0.9559999704360962


Epoch 17/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.22batches/s]


Training Loss = 15.090687828196678, Validation Accuracy = 0.9645000100135803


Epoch 18/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.81batches/s]


Training Loss = 15.6552419764339, Validation Accuracy = 0.9700000286102295
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9670000076293945 to 0.9700000286102295


Epoch 19/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.20batches/s]


Training Loss = 14.194194764539134, Validation Accuracy = 0.9635000228881836


Epoch 20/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.12batches/s]


Training Loss = 14.070386057741416, Validation Accuracy = 0.9704999923706055
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9700000286102295 to 0.9704999923706055


Epoch 21/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.54batches/s]


Training Loss = 13.98744220478693, Validation Accuracy = 0.9674999713897705


Epoch 22/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.32batches/s]


Training Loss = 12.28431231185823, Validation Accuracy = 0.968500018119812


Epoch 23/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.60batches/s]


Training Loss = 12.884486720222412, Validation Accuracy = 0.9674999713897705


Epoch 24/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.81batches/s]


Training Loss = 13.004466517111723, Validation Accuracy = 0.9670000076293945


Epoch 25/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.85batches/s]


Training Loss = 11.885090636409586, Validation Accuracy = 0.9660000205039978


Epoch 26/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.26batches/s]


Training Loss = 13.320912815564952, Validation Accuracy = 0.968999981880188


Epoch 27/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.43batches/s]


Training Loss = 14.058292384645029, Validation Accuracy = 0.965499997138977


Epoch 28/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.76batches/s]


Training Loss = 12.5332677730039, Validation Accuracy = 0.972000002861023
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9704999923706055 to 0.972000002861023


Epoch 29/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.61batches/s]


Training Loss = 12.851216088600722, Validation Accuracy = 0.965499997138977


Epoch 30/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.72batches/s]


Training Loss = 11.790719217737205, Validation Accuracy = 0.9760000109672546
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.972000002861023 to 0.9760000109672546


Epoch 31/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.74batches/s]


Training Loss = 10.484310240841296, Validation Accuracy = 0.9700000286102295


Epoch 32/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.55batches/s]


Training Loss = 11.421623485075543, Validation Accuracy = 0.968999981880188


Epoch 33/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.77batches/s]


Training Loss = 12.524019024840527, Validation Accuracy = 0.968500018119812


Epoch 34/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 38.76batches/s]


Training Loss = 10.539986013336602, Validation Accuracy = 0.9670000076293945


Epoch 35/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.83batches/s]


Training Loss = 11.411063515219212, Validation Accuracy = 0.9725000262260437


Epoch 36/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 33.32batches/s]


Training Loss = 11.770510603952062, Validation Accuracy = 0.9629999995231628


Epoch 37/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.69batches/s]


Training Loss = 11.817261378218973, Validation Accuracy = 0.9704999923706055


Epoch 38/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.24batches/s]


Training Loss = 11.547929756956137, Validation Accuracy = 0.968500018119812


Epoch 39/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 37.20batches/s]


Training Loss = 11.393507323273298, Validation Accuracy = 0.9660000205039978


Epoch 40/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.41batches/s]


Training Loss = 12.354783843316909, Validation Accuracy = 0.9754999876022339
Model has not improved for 10 epochs. Training has been stopped.
Best validation accuracy: 0.9760000109672546
Training Finished!
INFO:tensorflow:Restoring parameters from /tmp/model.ckpt
Best Model restored.


In [13]:
predictions

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [14]:
#use replace the 0s in the sample submission file with the outputs from the neural net
submission=pd.read_csv("C:/Users/Eric Zhou/Downloads/sample_submission.csv")
for x in range(len(predictions)):
    submission['Label'][x]+=predictions[x]

In [15]:
print(submission)

       ImageId  Label
0            1      2
1            2      0
2            3      9
3            4      9
4            5      3
5            6      7
6            7      0
7            8      3
8            9      0
9           10      3
10          11      5
11          12      7
12          13      4
13          14      0
14          15      4
15          16      3
16          17      3
17          18      1
18          19      9
19          20      0
20          21      9
21          22      1
22          23      1
23          24      5
24          25      7
25          26      4
26          27      2
27          28      7
28          29      4
29          30      7
...        ...    ...
27970    27971      5
27971    27972      0
27972    27973      4
27973    27974      8
27974    27975      0
27975    27976      3
27976    27977      6
27977    27978      0
27978    27979      1
27979    27980      9
27980    27981      3
27981    27982      1
27982    27983      1
27983    2

In [16]:
#index=False gets rid of the double numbers
submission.to_csv("C:/Users/Eric Zhou/Downloads/submission.csv", index=False)