# MNIST tf sequential model trained on AWS

MNIST data used to train two tf sequential neural nets with two different numbers of nodes (only two hidden layers).
- Accuracy was around 0.95 for the hold out data set
- Possible optimizations:
-- Deeper network
-- Increase nodes
-- Use alternative optimizer instead of 'adam'

In [None]:
from pathlib import Path
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.callbacks import EarlyStopping
from keras import backend as bk
from keras.models import save_model, load_model


In [2]:
#Check that GPU is available on EC2 (should be as it is a GPU instance)
bk.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [7]:
Path.cwd()

PosixPath('/home/ubuntu/notebooks')

In [8]:
#Set up dirs for project
base_dir= Path.cwd().parents[0]
data_dir= base_dir/'data'

In [9]:
train_data= pd.read_csv(data_dir/'train_data_std.csv', header= None)
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,774,775,776,777,778,779,780,781,782,783
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
labels= pd.read_csv(data_dir/'train_labels.csv', header= None)
labels.shape

(42000, 1)

In [25]:
labels[0]= labels[0].astype('category')
labels_dummy= pd.get_dummies(labels)
labels_dummy

Unnamed: 0,0_0,0_1,0_2,0_3,0_4,0_5,0_6,0_7,0_8,0_9
0,0,1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0
5,1,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,1,0,0
7,0,0,0,1,0,0,0,0,0,0
8,0,0,0,0,0,1,0,0,0,0
9,0,0,0,1,0,0,0,0,0,0


In [26]:
#build the baseline deep model
base= Sequential()
base.add(Dense(25, activation= 'relu', input_shape= (784,)))
base.add(Dense(25, activation= 'relu'))
base.add(Dense(10, activation= 'softmax'))

In [27]:
#compiling the model
base.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy'])

In [29]:
#fitting the model
stopping= EarlyStopping(patience=2)
base.fit(train_data, labels_dummy, epochs=20, validation_split=0.25, callbacks=[stopping])

Train on 31500 samples, validate on 10500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


<keras.callbacks.callbacks.History at 0x7f54341019b0>

In [32]:
base.save(base_dir/'base_model.h5')

In [33]:
#build the deeper deep model
base_plus= Sequential()
base_plus.add(Dense(50, activation= 'relu', input_shape= (784,)))
base_plus.add(Dense(100, activation= 'relu'))
base_plus.add(Dense(10, activation= 'softmax'))

In [34]:
#compiling the model
base_plus.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy'])

In [35]:
#fitting the model
base_plus.fit(train_data, labels_dummy, epochs=20, validation_split=0.25, callbacks=[stopping])

Train on 31500 samples, validate on 10500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


<keras.callbacks.callbacks.History at 0x7f54143520f0>

In [36]:
base.save(base_dir/'base_plus_model.h5')