# Task 1 - Setup and Data Preparation
For this task, you will:
- Import necessary packages for executing the code
- Install the EMNIST package
- Load the EMINST (letters) data and study its basic features such as its shape
- Convert the pixel gray levels of the images into the range [0,1]
- One-hot encode the class labels in the data
- Flatten the image data into 1-D arrays

In [None]:
# Import 'numpy' and 'pandas' to work with numbers and dataframes
import numpy as np
import pandas as pd

# Import 'pyplot' from 'matplotlib' and 'seaborn' for visualizations
from matplotlib import pyplot as plt
import seaborn as sns

# Import methods for building neural networks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Import 'GridSearchCV' for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

# Import 'KerasClassifier' from 'keras' for connecting neural networks with 'sklearn' and 'GridSearchCV'
from keras.wrappers.scikit_learn import KerasClassifier

# Import method to suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Install the EMNIST package
# Note: If you haven't already installed the EMNIST package, run the following code to do so
# !pip install emnist

In [None]:
# Extract the 'letters' data (training and testing) from 'emnist'
# Note: The EMNIST data size is about 536 MB, so the download may take a couple of minutes
from emnist import extract_training_samples, extract_test_samples
X_train, y_train = extract_training_samples('letters')
X_test, y_test = extract_test_samples('letters')

In [None]:
print('Train data shape: ', X_train.shape) # (124800, 28, 28) --- 124800 images, each 28x28 pixels
print('Test data shape: ', X_test.shape) # (20800, 28, 28) --- 20800 images, each 28x28

In [None]:
# Look at the frequency of the unique class labels in the training data
unique, counts = np.unique(y_train, return_counts = True)
print('Train labels: ', dict(zip(unique, counts)))

# Look at the frequency of the unique class labels in the testing data
unique, counts = np.unique(y_test, return_counts = True)
print('Test labels: ', dict(zip(unique, counts)))

print('\n')
print('Note that the labels 1, 2, 3, ..., 26 represent the 26 letters of the English alphabet.')

In [None]:
# Sample 25 EMNIST images from the training data and view the images
indices = np.random.randint(0, X_train.shape[0], size = 25)

images = X_train[indices]
labels = y_train[indices]

plt.figure(figsize = (5, 5))

for i in range(len(indices)):
    plt.subplot(5, 5, i + 1)
    image = images[i]
    plt.imshow(image, cmap = 'gray')
    plt.axis('off')

plt.show()
plt.close('all')

In [None]:
# Convert the pixel gray level values from the range [0, 255] to the range [0,1]
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

In [None]:
# View a few training data images and their corresponding labels
indices = np.random.randint(0, X_train.shape[0], size = 10)
plt.figure(figsize = (16, 8))

indexcount = 0
for data_index in indices:
    indexcount = indexcount + 1
    plt.subplot(1, 10, indexcount)
    plt.imshow(X_train[data_index], cmap = 'gray')
    plt.title(str(y_train[data_index]))

plt.show()
plt.tight_layout()
plt.close('all')

In [None]:
# View a few testing data images and their corresponding labels
indices = np.random.randint(0, X_test.shape[0], size = 10)
plt.figure(figsize = (16, 8))

indexcount = 0
for data_index in indices:
    indexcount = indexcount + 1
    plt.subplot(1, 10, indexcount)
    plt.imshow(X_test[data_index], cmap = 'gray')
    plt.title(str(y_test[data_index]))

plt.show()
plt.tight_layout()
plt.close('all')

In [None]:
# Convert the class labels to one-hot encoded vectors using the 'to_categorical()' function
num_classes = 26

# Note: Reduce all y labels by 1 to ensure that labeling starts at 0 and ends at 25
y_train = to_categorical(y_train - 1, num_classes)
y_test = to_categorical(y_test - 1, num_classes)

In [None]:
# View a couple of training data images and their corresponding labels
plt.figure(figsize = (14, 4))

for data_index in np.arange(0, 2, 1):
    plt.subplot(1, 2, data_index + 1)
    plt.imshow(X_train[data_index], cmap = 'gray')
    plt.title(str(y_train[data_index]))

plt.show()
plt.tight_layout()
plt.close('all')

In [None]:
# View a couple of testing data images and their corresponding labels
plt.figure(figsize = (14, 4))

for data_index in np.arange(0, 2, 1):
    plt.subplot(1, 2, data_index + 1)
    plt.imshow(X_test[data_index], cmap = 'gray')
    plt.title(str(y_test[data_index]))

plt.show()
plt.tight_layout()
plt.close('all')

In [None]:
# Flatten the images by converting them into a list of values
image_vector_size = 28 * 28

X_train = X_train.reshape(X_train.shape[0], image_vector_size)
X_test = X_test.reshape(X_test.shape[0], image_vector_size)

# Task 2 - FCFNN
For this task, you will perform the following steps:
- Build a simple neural network (1 hidden layer and 2 neurons in the hidden layer) using *keras* and train it on the training data
- Check its performance on the testing data

In [None]:
# Create a neural network with 1 hidden layer and 2 neurons in the hidden layer and train it on the training data

# Declare an instance of an artificial neural network model using the 'Sequential()' method
nn1 = Sequential()

# Add the first hidden layer using the 'add()' and 'Dense()' methods
# Note: Set the 'units' parameter to 2 - This specifies the number of neurons in the hidden layer
# Note: Set the 'input_shape' parameter to (image_vector_size, ) - This specifies the number of input features for each record
# Note: Set the 'activation' parameter to 'sigmoid' - This specifies the type of activation function to use for the neurons in this layer
nn1.add(Dense(units = 2,
              input_shape = (image_vector_size, ),
              activation = 'sigmoid'))

# Add the output layer using the 'add()' and 'Dense()' methods
# Note: Set the 'units' parameter to 'num_classes' - Multiclass classification with 26 classes requires 26 output neurons
# Note: Set the 'activation' parameter to 'softmax' - The softmax activation function is commonly used for output layer neurons in multiclass classification tasks
nn1.add(Dense(units = num_classes,
              activation = 'softmax'))

# Compile the model using the 'compile()' method
# Note: Set the 'loss' parameter to 'categorical_crossentropy' - The categorical crossentropy loss function is commonly used for multiclass classification tasks
# Note: Set the 'metrics' parameter to 'accuracy' - This records the accuracy of the model along with the loss during training
nn1.compile(loss = 'categorical_crossentropy',
            metrics = 'accuracy')

# Train the model on the training data using the 'fit()' method
# Note: Set the 'validation_split' parameter to 0.2 - This sets aside 20% of the training data as validation data
# Note: Set the 'epochs' parameter to 200 - This specifies the scope of loss computations and parameter updates
# Note: Set the 'batch_size' to 'X_train.shape[0]' - This specifies the batch size as the complete training data set instead of the default value of 32
nn1.summary()
print('\n')
nn1.fit(X_train, y_train, batch_size = X_train.shape[0], validation_split = 0.2, epochs = 200)

In [None]:
# Retrain the model and this time, save its training history

# Declare an instance of an artificial neural network model using the 'Sequential()' method
nn1 = Sequential()

# Add the first hidden layer using the 'add()' and 'Dense()' methods
# Note: Set the 'units' parameter to 2
# Note: Set the 'input_shape' parameter to (image_vector_size, )
# Note: Set the 'activation' parameter to 'sigmoid'
nn1.add(Dense(units = 2,
              input_shape = (image_vector_size, ),
              activation = 'sigmoid'))

# Add the output layer using the 'add()' and 'Dense()' methods
# Note: Set the 'units' parameter to 'num_classes'
# Note: Set the 'activation' parameter to 'softmax'
nn1.add(Dense(units = num_classes,
              activation = 'softmax'))

# Compile the model using the 'compile()' method
# Note: Set the 'loss' parameter to 'categorical_crossentropy'
# Note: Set the 'metrics' parameter to 'accuracy'
nn1.compile(loss = 'categorical_crossentropy',
            metrics = 'accuracy')

# Capture the training history of the model using the 'fit()' method
# Note: Set the 'validation_split' parameter to 0.2
# Note: Set the 'epochs' parameter to 200
# Note: Set the 'batch_size' to 'X_train.shape[0]'
nn1.summary()
print('\n')
nn1_history = nn1.fit(X_train, y_train, batch_size = X_train.shape[0], validation_split = 0.2, epochs = 200)

In [None]:
# Convert the neural network history object into a data frame to view its specifics
hist = pd.DataFrame(nn1_history.history)
hist['epoch'] = nn1_history.epoch
hist['epoch'] = hist['epoch'].apply(lambda x: x + 1)
hist.set_index('epoch')

In [None]:
# View the training and validation accuracies as functions of epoch
plt.figure(figsize = (14, 4))

sns.lineplot(data = hist, x = 'epoch', y = 'accuracy', color = 'red', label = 'Training')
sns.lineplot(data = hist, x = 'epoch', y = 'val_accuracy', color = 'blue', label = 'Validation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy as a Function of Epoch');

In [None]:
# Compute the accuracy of the model on the testing data set using the 'evaluate()' method
performance_test = nn1.evaluate(X_test, y_test)

print('The loss value of the model on the test data is {}'.format(performance_test[0]))
print('The accuracy of the model on the test data is {}'.format(performance_test[1]))

# Task 3 - FCFNN Hyperparameter Tuning: Number of Neurons
For this task, you will perform the following steps:
- Build a neural network (1 hidden layer and tuned for number of neurons) using *keras* and train it on the training data
- View the impact of number of neurons on the accuracy of the model

In [None]:
# Declare a range of number of neurons per hidden layer to tune for
n_neurons_list = [2, 8, 32, 128]

# Create and train a neural network model for each value of number of neurons per hidden layer
performance_df = pd.DataFrame(data = None)
hist = [None] * 4
indexcount = -1

for n_neu in n_neurons_list:
    indexcount = indexcount + 1
    
    # Declare an instance of an artificial neural network model using the 'Sequential()' method
    nn = Sequential()
    
    # Add the first hidden layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 'n_neu' - This specifies the current number of neurons per hidden layer
    # Note: Set the 'input_shape' parameter to (image_vector_size, )
    # Note: Set the 'activation' parameter to 'sigmoid'
    nn.add(Dense(units = n_neu,
                 input_shape = (image_vector_size, ),
                 activation = 'sigmoid'))
    
    # Add the output layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 'num_classes'
    # Note: Set the 'activation' parameter to 'softmax'
    nn.add(Dense(units = num_classes,
                 activation = 'softmax'))
    
    # Compile the model using the 'compile()' method
    # Note: Set the 'loss' parameter to 'categorical_crossentropy'
    # Note: Set the 'metrics' parameter to 'accuracy'
    nn.compile(loss = 'categorical_crossentropy',
               metrics = 'accuracy')
    
    # Capture the training history of the model using the 'fit()' method
    # Note: Set the 'validation_split' parameter to 0.2
    # Note: Set the 'epochs' parameter to 200
    # Note: Set the 'batch_size' to 'X_train.shape[0]'
    print('\n Training and validation for {} neurons - START \n'.format(n_neu))
    nn.summary()
    print('\n')
    nn_history = nn.fit(X_train, y_train, batch_size = X_train.shape[0], validation_split = 0.2, epochs = 200)
    print('\n Training and validation for {} neurons - END \n'.format(n_neu))
    
    hist[indexcount] = pd.DataFrame(nn_history.history)
    hist[indexcount]['epoch'] = nn_history.epoch
    
    tempdf = pd.DataFrame(index = [indexcount],
                          data = {'Number of Neurons': n_neu,
                                  'Train Accuracy': hist[indexcount]['accuracy'][199],
                                  'Validation Accuracy': hist[indexcount]['val_accuracy'][199]})
    
    performance_df = pd.concat([performance_df, tempdf])

In [None]:
# View the training and validation accuracies as functions of number of neurons
plt.figure(figsize = (14, 4))

sns.lineplot(data = performance_df, x = 'Number of Neurons', y = 'Train Accuracy', color = 'red', label = 'Training')
sns.lineplot(data = performance_df, x = 'Number of Neurons', y = 'Validation Accuracy', color = 'blue', label = 'Validation')
plt.xlabel('Number of Neurons')
plt.ylabel('Accuracy')
plt.title('Accuracy as a Function of Number of Neurons');

In [None]:
# View the training accuracies as functions of epochs for different values of number of neurons
plt.figure(figsize = (14, 4))
colorlist = ['purple', 'green', 'blue', 'red']

indexcount = -1
for n_neu in n_neurons_list:
    indexcount = indexcount + 1
    sns.lineplot(data = hist[indexcount],
                 x = 'epoch',
                 y = 'accuracy',
                 color = colorlist[indexcount],
                 label = str(n_neu) + ' neurons')

plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
plt.title('Training Accuracy as a Function of Epoch for Different Values of Number of Neurons');

In [None]:
# View the validation accuracies as functions of epochs for different values of number of neurons
plt.figure(figsize = (14, 4))
colorlist = ['purple', 'green', 'blue', 'red']

indexcount = -1
for n_neu in n_neurons_list:
    indexcount = indexcount + 1
    sns.lineplot(data = hist[indexcount],
                 x = 'epoch',
                 y = 'val_accuracy',
                 color = colorlist[indexcount],
                 label = str(n_neu) + ' neurons')

plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy as a Function of Epoch for Different Values of Number of Neurons');

# Task 4 - FCFNN Hyperparameter Tuning: Number of Hidden Layers
For this task, you will perform the following steps:
- Build a neural network (2 neurons per hidden layer and tuned for the number of hidden layers) using *keras* and train it on the training data
- View the impact of number of hidden layers on the accuracy of the model

In [None]:
# Declare a range of number of hidden layers to tune for
n_hidden_list = [1, 2, 3, 4]

# Create and train a neural network model for each value of number of hidden layers
performance_df = pd.DataFrame(data = None)
hist = [None] * 4
indexcount = -1

for n_hid in n_hidden_list:
    indexcount = indexcount + 1
    
    # Declare an instance of an artificial neural network model using the 'Sequential()' method
    nn = Sequential()
    
    # Add the first hidden layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 2
    # Note: Set the 'input_shape' parameter to (image_vector_size, )
    # Note: Set the 'activation' parameter to 'sigmoid'
    nn.add(Dense(units = 2,
                 input_shape = (image_vector_size, ),
                 activation = 'sigmoid'))
    
    # Conditionally add the remaining hidden layers using the 'add()' and 'Dense()' methods and a 'for' loop
    # Note: Set the 'units' parameter to 2
    # Note: Set the 'activation' parameter to 'sigmoid'
    # Note: The 'input_shape' parameter is derived from the previous layer automatically
    for temp_n_hid in np.arange(1, n_hid, 1):
        nn.add(Dense(units = 2,
                     activation = 'sigmoid'))
    
    # Add the output layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 'num_classes'
    # Note: Set the 'activation' parameter to 'softmax'
    nn.add(Dense(units = num_classes,
                 activation = 'softmax'))
    
    # Compile the model using the 'compile()' method
    # Note: Set the 'loss' parameter to 'categorical_crossentropy'
    # Note: Set the 'metrics' parameter to 'accuracy'
    nn.compile(loss = 'categorical_crossentropy',
               metrics = 'accuracy')
    
    # Capture the training history of the model using the 'fit()' method
    # Note: Set the 'validation_split' parameter to 0.2
    # Note: Set the 'epochs' parameter to 200
    # Note: Set the 'batch_size' to 'X_train.shape[0]'
    print('\n Training and validation for {} hidden layers - START \n'.format(n_hid))
    nn.summary()
    print('\n')
    nn_history = nn.fit(X_train, y_train, batch_size = X_train.shape[0], validation_split = 0.2, epochs = 200)
    print('\n Training and validation for {} hidden layers - END \n'.format(n_hid))
    
    hist[indexcount] = pd.DataFrame(nn_history.history)
    hist[indexcount]['epoch'] = nn_history.epoch
    
    tempdf = pd.DataFrame(index = [indexcount],
                          data = {'Number of Hidden Layers': n_hid,
                                  'Train Accuracy': hist[indexcount]['accuracy'][199],
                                  'Validation Accuracy': hist[indexcount]['val_accuracy'][199]})
    
    performance_df = pd.concat([performance_df, tempdf])

In [None]:
# View the training and validation accuracies as functions of number of hidden layers
plt.figure(figsize = (14, 4))

sns.lineplot(data = performance_df, x = 'Number of Hidden Layers', y = 'Train Accuracy', color = 'red', label = 'Training')
sns.lineplot(data = performance_df, x = 'Number of Hidden Layers', y = 'Validation Accuracy', color = 'blue', label = 'Validation')
plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy')
plt.title('Accuracy as a Function of Number of Hidden Layers');

In [None]:
# View the training accuracies as functions of epochs for different values of number of hidden layers
plt.figure(figsize = (14, 4))
colorlist = ['purple', 'green', 'blue', 'red']

indexcount = -1
for n_hid in n_hidden_list:
    indexcount = indexcount + 1
    if n_hid == 1:
        currentlabel = '1 hidden layer'
    else:
        currentlabel = str(n_hid) + ' hidden layers'
    sns.lineplot(data = hist[indexcount],
                 x = 'epoch',
                 y = 'accuracy',
                 color = colorlist[indexcount],
                 label = currentlabel)

plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
plt.title('Training Accuracy as a Function of Epoch for Different Values of Number of Hidden Layers');

In [None]:
# View the validation accuracies as functions of epochs for different values of number of hidden layers
plt.figure(figsize = (14, 4))
colorlist = ['purple', 'green', 'blue', 'red']

indexcount = -1
for n_hid in n_hidden_list:
    indexcount = indexcount + 1
    if n_hid == 1:
        currentlabel = '1 hidden layer'
    else:
        currentlabel = str(n_hid) + ' hidden layers'
    sns.lineplot(data = hist[indexcount],
                 x = 'epoch',
                 y = 'val_accuracy',
                 color = colorlist[indexcount],
                 label = currentlabel)

plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy as a Function of Epoch for Different Values of Number of Hidden Layers');

# Task 5 - FCFNN Hyperparameter Tuning: Combinations of Hyperparameters
For this task, you will perform the following steps:
- Build a neural network (tuned for both the number of hidden layers and the number of neurons per hidden layer) using *keras* and train it on the training data
- Test its performance on the testing data

In [None]:
# Define a function to create a neural network model
# Note: Declare the number of hidden layers and the number of neurons per hidden layer as variable parameters of the function
def create_nn(n_hidden = 1, n_neurons = 2):
    
    # Declare an instance of an artificial neural network model using the 'Sequential()' method
    nn = Sequential()
    
    # Add the first hidden layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 'n_neurons' - This specifies the selected number of neurons per hidden layer
    # Note: Set the 'input_shape' parameter to (image_vector_size, )
    # Note: Set the 'activation' parameter to 'sigmoid'
    nn.add(Dense(units = n_neurons,
                 input_shape = (image_vector_size, ),
                 activation = 'sigmoid'))
    
    # Conditionally add the remaining hidden layers using the 'add()' and 'Dense()' methods and a 'for' loop
    # Note: Set the 'units' parameter to 'n_neurons' - This specifies the selected number of neurons per hidden layer
    # Note: Set the 'activation' parameter to 'sigmoid'
    # Note: The 'input_shape' parameter is derived from the previous layer automatically
    for n_hid in np.arange(1, n_hidden, 1):
        nn.add(Dense(units = n_neurons,
                     activation = 'sigmoid'))
    
    # Add the output layer using the 'add()' and 'Dense()' methods
    # Note: Set the 'units' parameter to 'num_classes'
    # Note: Set the 'activation' parameter to 'softmax'
    nn.add(Dense(units = num_classes,
                 activation = 'softmax'))
    
    # Compile the model using the 'compile()' method
    # Note: Set the 'loss' parameter to 'categorical_crossentropy'
    # Note: Set the 'metrics' parameter to 'accuracy'
    nn.compile(loss = 'categorical_crossentropy',
               metrics = 'accuracy')
    
    return(nn)

In [None]:
# Initialize a basic neural network object using the 'KerasClassifier()' method
# Note: Set the 'build_fn' parameter to 'create_nn' - This converts the 'create_nn' function into a 'KerasClassifier' object
base_grid_model = KerasClassifier(build_fn = create_nn)

# Define the range of the 'n_hidden' and 'n_neurons' parameters and store it in a parameter grid dictionary
parameters_grid = {'n_hidden': [1, 2], 'n_neurons': [32, 128]}

# Perform a grid search using the 'GridSearchCV()' method to obtain a grid on which to fit the training data
# Note: Set the 'estimator' parameter to 'base_grid_model' - This specifies the estimator to be used by 'GridSearchCV()'
# Note: Set the 'param_grid' parameter to 'parameters_grid' - This specifies the grid of parameters to search over
# Note: Set the 'cv' parameter to 2 - This specifies the number of folds in the cross-validation process
# Note: Set the 'verbose' parameter to 4 - This helps show more relevant information during training
grid = GridSearchCV(estimator = base_grid_model,
                    param_grid = parameters_grid,
                    cv = 2,
                    verbose = 4)

# Train the model on the training data using the 'fit()' method
# Note: Set the 'epochs' parameter to 200
# Note: Set the 'batch_size' to 'X_train.shape[0]'
# Note: The 'validation_split' parameter isn't particularly required since cross-validation is already in place
grid_model = grid.fit(X_train, y_train, batch_size = X_train.shape[0], epochs = 200)

# Print the optimal values of 'n_hidden' and 'n_neurons'
best_n_hidden = grid_model.best_params_['n_hidden']
best_n_neurons = grid_model.best_params_['n_neurons']
best_accuracy = grid_model.best_score_

print('The optimal value of number of hidden layers is', best_n_hidden)
print('The optimal value of number of neurons per hidden layer is', best_n_neurons)
print('The accuracy of the model with these optimal parameters is ', best_accuracy)

In [None]:
# Retrain the model with the optimal combination of hyperparameters and save its training history

# Use the 'create_nn' function to create a neural network with the optimal values of 'n_hidden' and 'n_neurons'
# Note: Set the 'n_hidden' parameter to 'best_n_hidden' - This specifies the optimal value for the 'n_hidden' parameter
# Note: Set the 'n_neurons' parameter to 'best_n_neurons' - This specifies the optimal value for the 'n_neurons' parameter
nn2 = create_nn(n_hidden = best_n_hidden, n_neurons = best_n_neurons)

# Capture the training history of the model using the 'fit()' method
# Note: Set the 'validation_split' parameter to 0.2
# Note: Set the 'epochs' parameter to 200
# Note: Set the 'batch_size' to 'X_train.shape[0]'
nn2.summary()
print('\n')
nn2_history = nn2.fit(X_train, y_train, batch_size = X_train.shape[0], validation_split = 0.2, epochs = 200)
hist = pd.DataFrame(nn2_history.history)
hist['epoch'] = nn2_history.epoch

In [None]:
# View the training and validation accuracies as functions of epoch
plt.figure(figsize = (14, 4))

sns.lineplot(data = hist, x = 'epoch', y = 'accuracy', color = 'red', label = 'Training')
sns.lineplot(data = hist, x = 'epoch', y = 'val_accuracy', color = 'blue', label = 'Validation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy as a Function of Epoch');

In [None]:
# Compute the accuracy of the model on the testing data set using the 'evaluate()' method
performance_test = nn2.evaluate(X_test, y_test)

print('The loss value of the model on the test data is {}'.format(performance_test[0]))
print('The accuracy of the model on the test data is {}'.format(performance_test[1]))