In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical

# Note you may get a warning about CUDA and GPU set up
# You can ignore these for now
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def get_mnist_data(subset=True):
    """
    Returns the MNIST dataset as a tuple:
    (x_train, y_train, x_val, y_val, x_test, y_test)
    
    When subset=TRUE:
    Returns only a subset of the mnist dataset.
    Especially important to use if you are on datahub and only have 1-2GB of memory.
    """
    
    if subset:
        N_TRAIN = 5000
        N_VALIDATION = 1000
        N_TEST = 1000
    else:
        N_TRAIN = 48000
        N_VALIDATION = 12000
        N_TEST = 10000
    
    (x_train_and_val, y_train_and_val), (x_test, y_test) = mnist.load_data()
    
    x_train = x_train_and_val[:N_TRAIN,:,:]
    y_train = y_train_and_val[:N_TRAIN]
    
    x_val = x_train_and_val[N_TRAIN: N_TRAIN + N_VALIDATION,:,:]
    y_val = y_train_and_val[N_TRAIN: N_TRAIN + N_VALIDATION]
    
    x_test = x_test[:N_TEST]
    y_test = y_test[:N_TEST]
    
    return x_train, y_train, x_val, y_val, x_test, y_test

# Load the data
# Set subset=False if you want to use the full dataset!
# Note that this will require 2+ GB of memory and will make training take longer

x_train, y_train, x_val, y_val, x_test, y_test = get_mnist_data(subset=True)

def transform_data(xdata, ydata):
    """
    Transforms image data:
        1. Flattens pixel dimensions from 2 -> 1
        2. Scales pixel values between [0,1]
    Transforms target data (ydata):
        - Formats targets as one hot encoded columns
    """
    
    x = {}
    for name, partition in zip(["x_train", "x_val", "x_test"],xdata):
        flatten = partition.reshape((partition.shape[0], 28 * 28))
        scaled = flatten.astype('float32') / 255
        x[name] = scaled
    
    y = {}
    for name, partition in zip(["y_train", "y_val", "y_test"],ydata):
        y[name] = to_categorical(partition)
    
    return x['x_train'], y['y_train'], x['x_val'], y['y_val'], x['x_test'], y['y_test']


x_train_trans, y_train_trans, x_val_trans, y_val_trans, x_test_trans, y_test_trans = transform_data([x_train, x_val, x_test],
                                                                                                    [y_train, y_val, y_test])

## Challenge 1: Understanding the Input Data

1. Why do we use split our data into train/validation/test?
2. What is the shape of our input data partitions?
3. What is the type of the data?

**BONUS:**

4. What is the distribution of the target classes within the data, is it balanced?

In [None]:
# 1.1 Why do we split our data into train, validation, and test sets?

# We need to train our model and avoid overfitting.

# We use the training set to fit the model.
# We use the validation set to tune the hyperparameters of our model.
# We use the holdout test set to determine our final performance (generalization). 

In [None]:
# 1.2 What is the shape of our input data partitions?

# The training set contains 5000 examples
# The validation set contains 1000 examples 
# The test set contains 1000 examples
# The X are 3-dimensional
# The y are 1 dimensional

In [None]:
# 1.3
[type(partition) for partition in [x_train, y_train, x_val, y_val, x_test, y_test]]
# All are numpy arrays

In [None]:
# BONUS 1.4

# Simply plot the histogram of each y in a different cell, for example
# plt.hist(y_train)
# plt.title('Train Class Distribution');
# repeat for y_val and y_test
 
# or make subplots
def plot_target_distributions(targets, titles):
    """
    Returns the distribution of target classes.
    """
    
    fig, axes = plt.subplots(3,1, figsize = (10,10))
    
    for ax, target, title in zip(axes, targets, titles):
        ax.hist(target) 
        ax.set_title(f"{title} Class Distribution")
    
    return plt.show()

plot_target_distributions([y_train, y_val, y_test], ["Train", "Validation", "Test"])
    

## Challenge 2: Build your own neural network

1. Build and compile your own neural network in an object called `my_network`. Feel free to choose your own:
    - Architecture
    - Activation Function
    - Epochs
    
2. Train your model, saving the results to an object called `history_my_network`.

In [None]:
# 2.1
# An example network with: 3 dense layers, each with 512 neurons and a dropout of 0.3

my_network = Sequential()
my_network.add(Dense(512, activation= "relu", input_shape=(28*28,)))
my_network.add(Dropout(0.3))
my_network.add(Dense(512, activation= "relu"))
my_network.add(Dropout(0.3))
my_network.add(Dense(512, activation= "relu"))
my_network.add(Dropout(0.3))
my_network.add(Dense(10, activation="softmax"))

my_network.compile(optimizer = 'rmsprop', 
                     loss = 'categorical_crossentropy',
                     metrics = ['accuracy'])

In [None]:
# 2.2
# Train model for 20 epochs with batch size of 128
history_my_network = my_network.fit(x_train_trans, 
                            y_train_trans, 
                            epochs=20, 
                            batch_size=128, 
                            validation_data=(x_val_trans, y_val_trans))


## Challenge 3: Evaluate your own model

Use your own model from challenge 2 to evaluate its general performance.

1. Visualize the training and validation accuracy over each epoch.
2. Print the accuracy of your model on the test set.

In [None]:
def plot_epoch_accuracy(history_dict):
    """
    Plots the training and validation accuracy of a neural network.
    """
    
    acc = history_dict['accuracy']
    val_acc = history_dict['val_accuracy']
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, color = 'navy', alpha = 0.8, label='Training Accuracy')
    plt.plot(epochs, val_acc, color = 'green', label='Validation Accuracy')
    plt.title('Training and validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    return plt.show()

def get_model_accuracy(model, x_test, y_test):
    """
    Takes a model and a test set of data.
    Returns the accuracy.
    """
    
    score = model.evaluate(x_test, y_test, verbose=0)
    
    accuracy = round(score[1]*100, 1)
    
    return accuracy

In [None]:
# 3.1 
plot_epoch_accuracy(history_my_network.history)

In [None]:
# 3.2
get_model_accuracy(my_network, x_test_trans, y_test_trans)