# Build Neural Network

<font color='steelblue'>

## MNIST Digit Recognition
  <br>  
<font size = 4>
- From the Keras datasets, import the MNIST Digits data.<br>
- There are images of digits 0 to 9 and have labels associated
    with each image.<br>
</font>
</font>

<font color = 'grey'>
<font size = 3>
    
### Following examples are included in the processing:
1. Check the version of Tensorflow and Keras
2. Load training and test data including labels
3. Normalize the images
4. Plot few images after being normalized
5. Create a Neural Network and build a model
6. Train the model on the training dataset
7. Evaluate the accuracy of the model using test dataset
8. Plot the accuracy and loss for the model
9. Introduction to tensorflow `"Callbacks"`    
</font>
</font>

In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
#warnings.filterwarnings(action='once')

In [None]:
# make sure tensorflow is properly installed
tf.__version__, tf.keras.__version__

## Locate the dataset

In [None]:
digits_mnist = keras.datasets.mnist

In [None]:
(train_images, train_labels), (test_images, test_labels) = \
                                digits_mnist.load_data()

## Explore Data

In [None]:
print("Size: train images {}, train labels {}".format(train_images.shape, 
                                                      train_labels.shape))

In [None]:
print("Size: test images {}, test labels {}".format(test_images.shape, 
                                                      test_labels.shape))

In [None]:
# look at first 10 labels in training set
train_labels[:10]

In [None]:
plt.figure
plt.imshow(train_images[5], cmap=plt.cm.binary)
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
train_images[5]

## Normalize the images

In [None]:
# Normalize the values to be between 0 and 1; min-max normalization

train_images = train_images / 255.0
test_images = test_images / 255.0

In [None]:
train_images[5]

In [None]:
print(type(train_images))

## Plot few normalized images

In [None]:
# view 25 images in grayscale

plt.figure(figsize = (10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)      # print 5 images per row
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap = plt.cm.binary)
    plt.xlabel(train_labels[i]);

# Create Neural Network

In [None]:
# Create Neural Network

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))

In [None]:
# Alternative way to instantiate and create a Sequential() neural network

def makeModel(modelName = ""):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(28, 28)),
        keras.layers.Dense(128, activation='relu', name = "FirstLayer"),
        keras.layers.Dense(10, activation='softmax', name = "Output")
        ], name = modelName)
    return model

In [None]:
tf.random.set_seed(2345)

# Compile the model with chosen parameters

model = makeModel("FirstModel")
model.compile(optimizer='adam', # keras.optimizers.Adam(learning_rate=0.001)
              loss='sparse_categorical_crossentropy',    # discrete numbers 0 - 9
              metrics=['accuracy'])

In [None]:
model.summary()

# Train the neural network

In [None]:
%%time
tf.random.set_seed(2345)

# Train the model and include a validation set (composed of 10% of the dataset)
# Capturing the returned history enables you to plot the change in 
# error/loss and accuracy over time

history = model.fit(train_images, train_labels, validation_split=0.1, 
                    epochs=15, verbose = 1)

In [None]:
metrics_names = model.metrics_names
metrics_names

## Evaluate the accuracy of the model

- Evaluate to see the accuracy and loss
- Plot the metrics

In [None]:
# Use the test images to evaluate the model on a set of unseen images

test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test Loss: ", test_loss)
print("Test accuracy: ", test_acc)

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, string1, string2):
    # 2 rows 1 column
    plt.subplots(2, 1, sharex=False, sharey=False, figsize = (8,6))

    # plot 1
    plt.subplot(211)
    plt.plot(history.history[string1])
    plt.plot(history.history['val_'+string1])
    plt.ylabel(string1)
    plt.legend([string1, 'val_'+string1]);
    
    # plot 2
    plt.subplot(212)
    plt.plot(history.history[string2])
    plt.plot(history.history['val_'+string2])
    plt.xlabel('Epochs')
    plt.ylabel(string2)
    plt.legend([string2, 'val_'+string2]);

In [None]:
plot_graphs(history, metrics_names[0], metrics_names[1])

## Make predictions on test data

- Make predictions on the test images
- Plot correct and incorrect predictions
- Create Confusion Matrix

In [None]:
preds = model.predict(test_images)

In [None]:
# preds currently has the probability for each of the 10000 test images
preds[:5]

In [None]:
# get the prediction for each image
preds = np.argmax(preds, axis = 1)

In [None]:
preds[:5]

In [None]:
# Print first 10 predictions
fig, axes = plt.subplots(ncols=10, sharex=False,
                         sharey=True, figsize=(20, 4))
for i in range(10):
    axes[i].set_title(preds[i])
    axes[i].imshow(test_images[i], cmap='gray')
    axes[i].get_xaxis().set_visible(False)
    axes[i].get_yaxis().set_visible(False)
plt.show()

In [None]:
print("Number of correct predictions: {}".format(len(preds[preds == test_labels])))

In [None]:
print("Number of incorrect predictions: {}".format(len(preds[preds != test_labels])))

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels, preds)

In [None]:
import seaborn as sn
plt.figure(figsize = (8,6))
sn.heatmap(cm, annot=True, cmap=plt.cm.Blues, fmt = 'g')
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label');

### plot the first 10 incorrect predictions

In [None]:
fig, axes = plt.subplots(ncols=10, sharex=False,
                         sharey=True, figsize=(20, 4))
count = 0
for i in range(10000):
    if preds[i] != test_labels[i]:
        axes[count].set_title(preds[i])
        axes[count].imshow(test_images[i], cmap='gray')
        axes[count].get_xaxis().set_visible(False)
        axes[count].get_yaxis().set_visible(False)
        count = count + 1
        if count == 10:
            break
plt.show()

## Check Points

**How long should the model be trainied?**

- Often it depends on the problem and data, sometimes the model trains very fast other times it takes a while
- Also the validation loss and validation accuracy keeps bouncing from epoch to epoch
- This is where the check points come in and there are many different types
- Look at the Model Check point, saving the best model from all the epochs used

<font size = "3">
    
[Check callbacks documentation](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback)
    
</font>

In [None]:
import os

# Create a function to implement a ModelCheckpoint callback with 
# a specific filename

# define folder to store model
folderName = "ModelExperiments"

def create_model_checkpoint(model_name, save_path = folderName):
    '''
    Create a model check point, provide model name and path
    '''
    return tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(save_path, model_name), # create filepath to save model
        verbose=1,                                    # only output a limited amount of text
        save_best_only=True)                          # save only the best model to file

In [None]:
# create model that will use the check point

tf.random.set_seed(2345)

model1 = makeModel("WithCheckpoint")
model1.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model1.summary()

In [None]:
%%time
tf.random.set_seed(2345)

# train the model and provide a check point
history1 = model1.fit(train_images, train_labels, validation_split=0.1, 
                      epochs=15, verbose = 1,
                      callbacks = [create_model_checkpoint(model_name = model1.name)])

In [None]:
# plot metrics for the best saved model
plot_graphs(history1, metrics_names[0], metrics_names[1])

In [None]:
test_loss, test_acc = model1.evaluate(test_images, test_labels)
print("Test Loss: ", test_loss)
print("Test accuracy: ", test_acc)

In [None]:
name = folderName + "/" + model1.name
model1 = tf.keras.models.load_model(name)

In [None]:
test_loss, test_acc = model1.evaluate(test_images, test_labels)
print("Test Loss: ", test_loss)
print("Test accuracy: ", test_acc)

<span style="font-family:Comic sans MS; font-size:1.4em;">
<font color='tomato'>
    <h2>Practice</h2>
    <h3>Try out different parameters and see how model accuracy changes</h3>
    <ol>
        <li>Don't normalize the pixel values, see what happens</li>
        <li>Play with different epoch values (10, 20, ...)</li>
        <li>Modify the number of neurons in the Dense (hidden) layer following the Flatten layer. Try numbers as low as 10 and as high as 512 and note the effect on accuracy and training time</li>
        <li>Add an additional Dense (hidden) layer before the final Dense (output) layer, and experiment with different numbers of neurons in the layer</li>
        <li>Modify the learning rate and observe the impact it has on the training time and the accuracy of the model</li>    
    </ol>
</font>
</span>