In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import math
#tqdm for the progress bar
from tqdm import tqdm
print("All modules imported successfully.")

All modules imported successfully.


All of the files can be found on Kaggle.

https://www.kaggle.com/c/digit-recognizer/data

In [2]:
#load in data from kaggle
train_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/train.csv")
validation_X=train_X.loc[40000:]
train_X=train_X.loc[:39999]
train_y=train_X['label']
validation_y=validation_X['label']
del train_X['label']
del validation_X['label']
test_X=pd.read_csv("C:/Users/Eric Zhou/Downloads/test.csv")

In [3]:
#convert to numpy arrays and normalize values between 0 and 1
#normalizing allows the network to train better and converge faster
train_X=np.array(train_X)/255
train_y=np.array(train_y)
validation_X=np.array(validation_X)/255
validation_y=np.array(validation_y)
print(train_X.shape, train_y.shape, validation_X.shape, validation_y.shape)
#test data
test_X=np.array(test_X).astype(dtype='float32')/255

(40000, 784) (40000,) (2000, 784) (2000,)


In [4]:
#convert to one-hot array
train_y=np.array(pd.get_dummies(train_y))
validation_y=np.array(pd.get_dummies(validation_y))
print(train_y.shape, validation_y.shape)

(40000, 10) (2000, 10)


In [5]:
#make sure everything is a float32
tf.cast(train_X, tf.float32)
tf.cast(train_y, tf.float32)
tf.cast(validation_X, tf.float32)
tf.cast(validation_y, tf.float32)

<tf.Tensor 'Cast_3:0' shape=(2000, 10) dtype=float32>

In [6]:
#setting up placeholders where data will be passed into  later
features=tf.placeholder(tf.float32, shape=[None, 784])
labels=tf.placeholder(tf.float32)

In [7]:
#set some parameters
batch_size=128

nodes_hl1=1000
nodes_hl2=500
nodes_hl3=100

output_size=10

num_epochs=200

A website showing different weight initializations:
https://intoli.com/blog/neural-network-initialization/

In [8]:
#create variables(weights and biases) Uses standard deviation of sqrt(2/nodes) which is a good starting point.

weights_input_hl1=tf.get_variable('weights_input_hl1', dtype=tf.float32, 
  initializer=tf.truncated_normal([784, nodes_hl1], dtype=tf.float32, stddev=np.sqrt(2/784)))
biases_hl1=tf.get_variable('biases_hl1', [nodes_hl1], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl1_hl2=tf.get_variable('weights_hl1_hl2', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl1, nodes_hl2], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl1)))
biases_hl2=tf.get_variable('biases_hl2', [nodes_hl2], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl2_hl3=tf.get_variable('weights_hl2_hl3', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl2, nodes_hl3], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl2)))
biases_hl3=tf.get_variable('biases_hl3', [nodes_hl3], dtype=tf.float32, 
  initializer=tf.zeros_initializer)

weights_hl3_output=tf.get_variable('weights_hl3_output', dtype=tf.float32, 
  initializer=tf.truncated_normal([nodes_hl3, output_size], dtype=tf.float32, stddev=np.sqrt(2/nodes_hl3)))

In [9]:
#create saver, max_to_keep is maximum checkpoint files kept
saver=tf.train.Saver(max_to_keep=1)

In [10]:
#dropout rate, each time it is trained, ~20% of neurons will be killed in each layer, it helps prevent overfitting
train_keep=0.8
keep_amt=train_keep

#training pass
#elu=exponential linear unit, generally performs better than relu

def forward_pass(x, keep_amt):
    dropout_rate=tf.constant(keep_amt)
    l1=tf.add(tf.matmul(x, weights_input_hl1), biases_hl1)
    l1=tf.nn.elu(l1)
    l1=tf.nn.dropout(l1, dropout_rate)
    l2=tf.add(tf.matmul(l1, weights_hl1_hl2), biases_hl2)
    l2=tf.nn.elu(l2)
    l2=tf.nn.dropout(l2, dropout_rate)
    l3=tf.add(tf.matmul(l2, weights_hl2_hl3), biases_hl3)
    l3=tf.nn.elu(l3)
    l3=tf.nn.dropout(l3, dropout_rate)
    output_layer=tf.matmul(l3, weights_hl3_output)
    return output_layer

In [11]:
#cost and gradient descent
#tf.reduce_mean=np.mean and tf.reduce_sum=np.sum
lr=1e-3
learning_rate=tf.placeholder(tf.float32, shape=[])

logits=forward_pass(features,keep_amt)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

#accuracy
#argmax takes the maximum value in each vector and sets it to 1, all others are set to 0
output=tf.nn.softmax(logits)
accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(labels, 1)),tf.float32))

#used later for predicting the test data
prediction=tf.argmax(tf.nn.softmax(logits=output), 1)

In [12]:
import time
before_time=time.time()

with tf.Session() as sess:
    #initialize variables
    sess.run(tf.global_variables_initializer())
    #restore weights if file found
    try:
        saver.restore(sess, "/tmp/model.ckpt")
        print("Model restored.")
    except:
        print("No save file found.")

    
    batch_count = int(math.ceil(len(train_X)/batch_size))
    best_val_acc=0
    last_improve_epoch=0
    for epoch in range(num_epochs):
        #shuffle data
        state=np.random.get_state()
        np.random.shuffle(train_X)
        np.random.set_state(state)
        np.random.shuffle(train_y)
        # Progress bar
        batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch+1, num_epochs), unit='batches')
        train_loss=0.0
        # The training cycle
        keep_amt=train_keep
        for batch_i in batches_pbar:
            # Get a batch of training features and labels
            batch_start = batch_i*batch_size
            batch_features = train_X[batch_start:batch_start + batch_size]
            batch_labels = train_y[batch_start:batch_start + batch_size]
            #train
            _, c = sess.run([optimizer, cost], feed_dict={features: batch_features, labels: batch_labels, learning_rate:lr})
            train_loss+=c
        #set keep amount to 100% for testing
        keep_amt=1.0    
        validation_accuracy=sess.run(accuracy, feed_dict={features: validation_X, labels: validation_y})
        print('Training Loss = {}, Validation Accuracy = {}'.format(train_loss, validation_accuracy))

        #save model if validation is at a new best and do not save for first 5 epochs
        if validation_accuracy>best_val_acc:
            save_path = saver.save(sess, "/tmp/model.ckpt")
            print("Model saved in file: {}".format(save_path))
            print("Accuracy improved from {} to {}".format(best_val_acc, validation_accuracy))
            best_val_acc=validation_accuracy
            last_improve_epoch=epoch
        #if model hasn't improved for 5 epochs step down learning rate
        elif (epoch-last_improve_epoch)%5==0:
            lr/=10
            print("Learning rate decreased.")
        #stop training if validation loss hasn't improved for 10 epochs 
        if epoch>=last_improve_epoch+10:
            print("Model has not improved for 10 epochs. Training has been stopped.")
            print("Best validation accuracy: {}".format(best_val_acc))
            break;
    print("Training Finished! It took {} minutes.".format(np.round((time.time()-before_time)/60,2)))
    
    #load in best model
    try:
        saver.restore(sess, "/tmp/model.ckpt")
        print("Best Model restored.")
    except:
        print("No save file found. Prediction will use current weights which may not be the best.")

    keep_amt=1.0
    predictions=np.array([])
    batches_test = int(math.ceil(len(test_X)/batch_size))
    progress_bar = tqdm(range(batches_test), desc='Generating Predictions', unit='batches')
    for batch_i in progress_bar:
        # Get a batch of test features and labels
        batch_start = batch_i*batch_size
        batch_features = test_X[batch_start:batch_start + batch_size]
        predictions=np.append(predictions, sess.run(prediction, feed_dict={features: batch_features}))
    print(predictions.shape)


INFO:tensorflow:Restoring parameters from /tmp/model.ckpt
No save file found.


Epoch  1/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.00batches/s]


Training Loss = 102.3697184920311, Validation Accuracy = 0.9319999814033508
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0 to 0.9319999814033508


Epoch  2/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.49batches/s]


Training Loss = 53.88555122539401, Validation Accuracy = 0.9459999799728394
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9319999814033508 to 0.9459999799728394


Epoch  3/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.17batches/s]


Training Loss = 41.96949165314436, Validation Accuracy = 0.9599999785423279
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9459999799728394 to 0.9599999785423279


Epoch  4/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.19batches/s]


Training Loss = 34.636224480345845, Validation Accuracy = 0.9574999809265137


Epoch  5/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.10batches/s]


Training Loss = 28.119031809270382, Validation Accuracy = 0.9434999823570251


Epoch  6/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.57batches/s]


Training Loss = 25.290360514074564, Validation Accuracy = 0.9629999995231628
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9599999785423279 to 0.9629999995231628


Epoch  7/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.38batches/s]


Training Loss = 24.09494905732572, Validation Accuracy = 0.9585000276565552


Epoch  8/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.27batches/s]


Training Loss = 20.123904526233673, Validation Accuracy = 0.9664999842643738
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9629999995231628 to 0.9664999842643738


Epoch  9/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.98batches/s]


Training Loss = 19.799405670259148, Validation Accuracy = 0.9639999866485596


Epoch 10/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.13batches/s]


Training Loss = 17.159060012549162, Validation Accuracy = 0.9674999713897705
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9664999842643738 to 0.9674999713897705


Epoch 11/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.53batches/s]


Training Loss = 16.082715121563524, Validation Accuracy = 0.9729999899864197
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9674999713897705 to 0.9729999899864197


Epoch 12/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 34.98batches/s]


Training Loss = 14.706055641639978, Validation Accuracy = 0.9700000286102295


Epoch 13/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 32.78batches/s]


Training Loss = 13.086639221408404, Validation Accuracy = 0.9605000019073486


Epoch 14/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.66batches/s]


Training Loss = 14.270017648465, Validation Accuracy = 0.9645000100135803


Epoch 15/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.08batches/s]


Training Loss = 13.228970643191133, Validation Accuracy = 0.9649999737739563


Epoch 16/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.36batches/s]


Training Loss = 13.55996012617834, Validation Accuracy = 0.9614999890327454


Epoch 17/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.47batches/s]


Training Loss = 6.082643291156273, Validation Accuracy = 0.9804999828338623
Model saved in file: /tmp/model.ckpt
Accuracy improved from 0.9729999899864197 to 0.9804999828338623


Epoch 18/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.43batches/s]


Training Loss = 4.013365510996664, Validation Accuracy = 0.9764999747276306


Epoch 19/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.46batches/s]


Training Loss = 3.4345639409439173, Validation Accuracy = 0.9754999876022339


Epoch 20/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.20batches/s]


Training Loss = 3.0340399424894713, Validation Accuracy = 0.9750000238418579


Epoch 21/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.66batches/s]


Training Loss = 2.4906575869536027, Validation Accuracy = 0.9800000190734863


Epoch 22/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.17batches/s]


Training Loss = 2.532156542278244, Validation Accuracy = 0.9785000085830688


Epoch 23/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:09<00:00, 34.08batches/s]


Training Loss = 2.4492944907105993, Validation Accuracy = 0.9794999957084656


Epoch 24/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.64batches/s]


Training Loss = 2.1419501418859, Validation Accuracy = 0.9789999723434448


Epoch 25/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 36.04batches/s]


Training Loss = 2.0511268072295934, Validation Accuracy = 0.9764999747276306


Epoch 26/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.30batches/s]


Training Loss = 1.863662065457902, Validation Accuracy = 0.9800000190734863


Epoch 27/200: 100%|█████████████████████████████████████████████████████████████| 313/313 [00:08<00:00, 35.66batches/s]


Training Loss = 2.1856867607857566, Validation Accuracy = 0.9775000214576721
Model has not improved for 10 epochs. Training has been stopped.
Best validation accuracy: 0.9804999828338623
Training Finished! It took 4.79 minutes.
INFO:tensorflow:Restoring parameters from /tmp/model.ckpt
Best Model restored.


Generating Predictions: 100%|███████████████████████████████████████████████████| 219/219 [00:02<00:00, 91.58batches/s]


(28000,)


In [13]:
predictions

array([ 2.,  0.,  9., ...,  3.,  9.,  2.])

In [14]:
#use replace the 0s in the sample submission file with the outputs from the neural net
submission=pd.read_csv("~/Downloads/sample_submission.csv")
for x in range(len(predictions)):
    submission['Label'][x]+=predictions[x]

In [15]:
print(submission)

       ImageId  Label
0            1      2
1            2      0
2            3      9
3            4      9
4            5      3
5            6      7
6            7      0
7            8      3
8            9      0
9           10      3
10          11      5
11          12      7
12          13      4
13          14      0
14          15      4
15          16      3
16          17      3
17          18      1
18          19      9
19          20      0
20          21      9
21          22      1
22          23      1
23          24      5
24          25      7
25          26      4
26          27      2
27          28      7
28          29      4
29          30      7
...        ...    ...
27970    27971      5
27971    27972      0
27972    27973      4
27973    27974      8
27974    27975      0
27975    27976      3
27976    27977      6
27977    27978      0
27978    27979      1
27979    27980      9
27980    27981      3
27981    27982      1
27982    27983      1
27983    2

In [16]:
#index=False gets rid of the double numbers
submission.to_csv("~/Downloads/submission.csv", index=False)