In [None]:
import numpy as np
import matplotlib.pyplot as plt

# supress unnecessary warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

In [None]:
##
## MINST happens to be preloaded with Keras
##

# load mnist
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# display some digits
fig = plt.figure()
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(train_images[i], cmap='gray', interpolation='none')
    plt.title("Digit: {}".format(train_labels[i]))
    plt.xticks([])
    plt.yticks([])
plt.show()

# image shape
sz = train_images.shape[1]

In [None]:
##
## preprocess training and testing patterns
##
## this is a common first step for training any neural network
##

# check out dimensions and types of mnist data
print('*** Original Training and Testing Patterns ***')
print('Training images shape: ', train_images.shape)
print('Training images type:  ', type(train_images[0][0][0]))
print('Testing images shape:  ', test_images.shape)
print('Testing images type:   ', type(test_images[0][0][0]))
print()

# need to reshape and preprocess the training/testing images
train_images_vec = train_images.reshape((train_images.shape[0], train_images.shape[1] * train_images.shape[2]))
train_images_vec = train_images_vec.astype('float32') / 255
test_images_vec = test_images.reshape((test_images.shape[0], test_images.shape[1] * test_images.shape[2]))
test_images_vec = test_images_vec.astype('float32') / 255

# display new input dimensions/type
print('*** Reformatted Training and Testing Patterns ***')
print('Training images shape: ', train_images_vec.shape)
print('Training images type:  ', type(train_images_vec[0][0]))
print('Testing images shape:  ', test_images_vec.shape)
print('Testing images type:   ', type(test_images_vec[0][0]))

In [None]:
##
## MINST labels are numeric - want to reformat as one-hot coded vectors
##

from tensorflow.keras.utils import to_categorical

# also need to categorically encode the labels
print('First 5 training labels as labels:\n', train_labels[:5])

train_labels_onehot = to_categorical(train_labels)
test_labels_onehot = to_categorical(test_labels)
print('First 5 training labels as one-hot encoded vectors:\n', train_labels_onehot[:5])

## start with a simple one-layer neural network in Keras

In [None]:
#########################################################################################
##
## define and train neural network in Keras
##

# import tools for basic keras networks 
from tensorflow.keras import models
from tensorflow.keras import layers# number of input node (nin = 784)
nin = train_images_vec.shape[1]

# number of output nodes (nout = 10)
nout = train_labels_onehot.shape[1]

# create architecture of simple neural network model
# input layer  : 28*28 = 784 (nin) input nodes
# output layer : 10 (nout) sigmoid output nodes

# Sequential is a basic stack of layers (most basic type of neural network)
# https://keras.io/getting-started/sequential-model-guide/
#
# this initializes a blank Sequential network
network = models.Sequential()

# network.add() adds the first (and only) layer to the network (the output layer)
# 
# layers.Dense() is a densely connect layer
# with nout units
# with a sigmoidal activation function
# that receives input from an input later with a specific shape
# 
network.add(layers.Dense(nout, 
                         activation='sigmoid', 
                         input_shape=(nin,)))

# what happens if nin isn't included on first layer?
#network.add(layers.Dense(nout, 
#                         activation='sigmoid'))

# with multi-layer networks, we will have additional network.add() calls

In [None]:
# print a model summary
print(network.summary())

In [None]:
# print more info about the network

print('*** Basic Network Structure ***')
for layer in network.layers:
    print('layer name : {} | input shape : {} | output shape : {}'.format(layer.name, layer.input.shape, layer.output.shape))
print()
print('*** Detailed Network Layer Information ***')
for layer in network.layers:
    print(layer.get_config())
print()

In [None]:
# note that before training, weights are initialized to small random values
print('W shape : {}'.format(network.layers[0].get_weights()[0].shape))
print('weight (W) initial values:')
print(network.layers[0].get_weights()[0])

In [None]:
# compile network
#
# configures the network for training, specifying optimizer and loss function
#
# optimizer='sgd'           : stochastic gradient descent (simplest, not the smartest)
# loss='mean_squared_error' : uses MSE (MSE = 1/N * SSE)
# metrics=['accuracy']      : what is printed when verbose=True
network.compile(optimizer='sgd', 
                loss='mean_squared_error', 
                metrics=['accuracy', 'mse'])

# if leave off metrics, only saves loss and val_loss
# network.compile(optimizer='sgd', 
#                 loss='mean_squared_error')

In [None]:
# note that before training, weights are initialized to small random values
print('W shape : {}'.format(network.layers[0].get_weights()[0].shape))
print('weight (W) initial values:')
print(network.layers[0].get_weights()[0])

In [None]:
# now train the network
#
# training requires training patterns (train_image_vec) and teachers (train_labels_onehot)
#
# sets # training epochs, validation (described later), and batch_size
#
# set verbose=True to see training unfold
history = network.fit(train_images_vec, 
                      train_labels_onehot, 
                      verbose=True, 
                      validation_split=.1, 
                      epochs=20, 
                      batch_size=128)
print('Done training!')

# if run this again, it will do more training on the same network

In [None]:
# history contains the loss (training loss), accuracy, mse, val_loss (validation loss), 
# val_accuracy (validation accuracy), val_mse, as a Python dictionary
print(history.history)

In [None]:
# access one of the elements of the dictionary by name (in this case training accuracy)
print(history.history['accuracy'])

In [None]:
# call network.evaluate() if you have test patterns and test answers and want to know performance
test_results = network.evaluate(test_images_vec, 
                                test_labels_onehot, 
                                verbose=False)

# loss is MSE
# accuracy is proportion correct
print(network.metrics_names)
print(test_results)

In [None]:
# call network.predict() if you have test patterns and want to get predicted outputs
out = network.predict(test_images_vec)

print('dimensions of out : {}'.format(out.shape))

## multi-layer neural network in Keras

In [None]:
from tensorflow.keras import regularizers

# number of input node (nin = 784)
nin = train_images_vec.shape[1]

# create a multi-layer network with two layers of nhid hidden nodes
nhid = 100

# number of output nodes (nout = 10)
nout = train_labels_onehot.shape[1]

# create architecture of multi-layer neural network model
# input layer  : 28*28 = 784 (nin) input nodes
# hidden layer : 100 hidden nodes
# output layer : 10 (nout) softmax output nodes

# this initializes a blank Sequential network
network2 = models.Sequential()

# add layers to the initialized network
# 
# hidden layer (input->hidden) - using relu because of its nice mathematical properties
network2.add(layers.Dense(nhid, 
                          kernel_regularizer=regularizers.l2(0.01), 
                          activation='relu', 
                          input_shape=(nin,)))

# hidden layer (hidden->hidden) - using relu because of its nice mathematical properties
network2.add(layers.Dense(nhid, 
                          kernel_regularizer=regularizers.l2(0.01), 
                          activation='relu'))

# output layer (hidden->output) - using softmax as per discussion in class
network2.add(layers.Dense(nout, 
                          kernel_regularizer=regularizers.l2(0.01), 
                          activation='softmax'))

# regularizers (L1 or L2) can potentially help with over-fitting

In [None]:
# print a model summary
print(network2.summary())

# print more info about the network

print('*** Basic Network Structure ***')
for layer in network2.layers:
    print('layer name : {} | input shape : {} | output shape : {}'.format(layer.name, layer.input.shape, layer.output.shape))
print()
print('*** Detailed Network Layer Information ***')
for layer in network2.layers:
    print(layer.get_config())
    print()

In [None]:
# compile network
#
# using 'adam' optimizer (extension of stochastic gradient descent)
#
# using categorical cross entropy with softmax output activation
network2.compile(optimizer='adam', 
                 loss='categorical_crossentropy', 
                 metrics=['accuracy'])

In [None]:
# now train the network
#
# validation is used to adjust optimization
history = network2.fit(train_images_vec, 
                       train_labels_onehot, 
                       verbose=True, 
                       validation_split=.1, 
                       epochs=20, 
                       batch_size=128)
print('Done training!')