In [3]:
# Author: Dominic Drury
# Example from textbook, Deep Learning with Keras
#SNHU CS-370

# epoch of 20, 2 hidden layers

from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import RMSprop, Adam 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 

# network and training 
NB_EPOCH = 20 
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = Adam() # Adam optimizer, explained in chapter 1 of Deep Learning with Keras
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT = 0.3

# data: shuffled and split between train and test sets
#
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 60,000 rows of 28x28 values --> reshaped is 60,000 x 784
RESHAPED = 784
#
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
# 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# N_HIDDEN hidden layers
# 10 outputs
# final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# compiling the model
model.compile(loss='categorical_crossentropy', 
optimizer=OPTIMIZER, 
metrics=['accuracy'])

# training the model
history = model.fit(X_train, Y_train,
batch_size = BATCH_SIZE, epochs = NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# evaluating the model
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)

In [2]:
# Changes made
# Increased batch size to 512 (4x the size) and saw a decrease in accuracy because the larger size results in a slower but
# more accurate error gradient with a slightly faster run time
# 
# Information I found on the effect of batch size on accuracy
# https://stackoverflow.com/questions/55485837/why-does-different-batch-sizes-give-different-accuracy-in-keras

from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import RMSprop, Adam 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 

# network and training 
NB_EPOCH = 20
BATCH_SIZE = 512 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = Adam() # Adam optimizer, explained in chapter 1 of Deep Learning with Keras
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT = 0.3

# data: shuffled and split between train and test sets
#
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 60,000 rows of 28x28 values --> reshaped is 60,000 x 784
RESHAPED = 784
#
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
# 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# N_HIDDEN hidden layers
# 10 outputs
# final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# compiling the model
model.compile(loss='categorical_crossentropy', 
optimizer=OPTIMIZER, 
metrics=['accuracy'])

# training the model
history = model.fit(X_train, Y_train,
batch_size = BATCH_SIZE, epochs = NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# evaluating the model
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
___________

In [4]:
# Changes made
# Decreased batch size to 32 (1/4 the size) and saw a decrease in accuracy because the smaller size results in a quicker but
# less accurate error gradient but the compile time was significantly higher
# 
# Information I found on the effect of batch size on accuracy
# https://stackoverflow.com/questions/55485837/why-does-different-batch-sizes-give-different-accuracy-in-keras

from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import RMSprop, Adam 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 

# network and training 
NB_EPOCH = 20
BATCH_SIZE = 32 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = Adam() # Adam optimizer, explained in chapter 1 of Deep Learning with Keras
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT = 0.3

# data: shuffled and split between train and test sets
#
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 60,000 rows of 28x28 values --> reshaped is 60,000 x 784
RESHAPED = 784
#
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
# 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# N_HIDDEN hidden layers
# 10 outputs
# final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# compiling the model
model.compile(loss='categorical_crossentropy', 
optimizer=OPTIMIZER, 
metrics=['accuracy'])

# training the model
history = model.fit(X_train, Y_train,
batch_size = BATCH_SIZE, epochs = NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# evaluating the model
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_7 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)