<a href="https://colab.research.google.com/github/ihedges9/Python-Projects/blob/main/dnn_mnist_HW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Homework - DNN

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

## Preprocessing

In [40]:
# the data, shuffled and split between train and test sets
(X_train_pre, y_train_pre), (X_test_pre, y_test_pre) = mnist.load_data()

In [41]:
input_dim = 784 # 28*28
X_train = X_train_pre.reshape(X_train_pre.shape[0], input_dim)
X_test = X_test_pre.reshape(X_test_pre.shape[0], input_dim)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.

print('training data shape:', X_train.shape)
print('{} train samples, {} test samples'.format(X_train.shape[0], X_test.shape[0]))

training data shape: (60000, 784)
60000 train samples, 10000 test samples


In [45]:
num_classes = 10 # 0-9
y_train = keras.utils.to_categorical(y_train_pre, num_classes)
y_test = keras.utils.to_categorical(y_test_pre, num_classes)

In [46]:
validation_split = 0.166667
index = np.random.permutation(X_train.shape[0])

train_idx = int(X_train.shape[0]*validation_split)

X_val, X_train = X_train[index[:train_idx]], X_train[index[train_idx:]]
y_val, y_train = y_train[index[:train_idx]], y_train[index[train_idx:]]


## The models

### Build the model **1**

In [13]:
model = keras.Sequential()
model.add(layers.Input(shape=(784,)))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='elu'))
model.add(layers.Dense(128, activation='selu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.RMSprop(0.005),
              metrics=['accuracy'])

In [14]:
model.summary()

In [15]:
EPOCHS = 25

history = model.fit(X_train, y_train,
                    batch_size=128, epochs=EPOCHS,
                    validation_data=(X_val, y_val))

Epoch 1/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.7796 - loss: 1.3099 - val_accuracy: 0.9177 - val_loss: 0.2898
Epoch 2/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.9521 - loss: 0.1661 - val_accuracy: 0.9442 - val_loss: 0.2095
Epoch 3/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9675 - loss: 0.1130 - val_accuracy: 0.9621 - val_loss: 0.1369
Epoch 4/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 17ms/step - accuracy: 0.9751 - loss: 0.0858 - val_accuracy: 0.9618 - val_loss: 0.1513
Epoch 5/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.9804 - loss: 0.0697 - val_accuracy: 0.9708 - val_loss: 0.1383
Epoch 6/25
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 22ms/step - accuracy: 0.9830 - loss: 0.0604 - val_accuracy: 0.9633 - val_loss: 0.1747
Epoch 7/25
[1m326/

In [16]:
_, acc = model.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 97.71%


### Build the model **2**

In [17]:
model2 = keras.Sequential()
model2.add(layers.Input(shape=(784,)))
model2.add(layers.Dense(512, activation='sigmoid'))
model2.add(layers.Dense(128, activation='sigmoid'))
model2.add(layers.Dense(10, activation='sigmoid'))
model2.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(0.001),
              metrics=['accuracy'])

In [18]:
model2.summary()

In [19]:
EPOCHS = 50

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

history = model2.fit(X_train, y_train,
                    batch_size=128, epochs=EPOCHS,
                    validation_data=(X_val, y_val),
                    callbacks = [early_stop])

Epoch 1/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.7054 - loss: 1.1388 - val_accuracy: 0.9044 - val_loss: 0.3312
Epoch 2/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 20ms/step - accuracy: 0.9211 - loss: 0.2745 - val_accuracy: 0.9225 - val_loss: 0.2582
Epoch 3/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9392 - loss: 0.2093 - val_accuracy: 0.9380 - val_loss: 0.2098
Epoch 4/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.9541 - loss: 0.1592 - val_accuracy: 0.9473 - val_loss: 0.1789
Epoch 5/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.9616 - loss: 0.1335 - val_accuracy: 0.9521 - val_loss: 0.1600
Epoch 6/50
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 13ms/step - accuracy: 0.9676 - loss: 0.1093 - val_accuracy: 0.9558 - val_loss: 0.1384
Epoch 7/50
[1m326/326

In [20]:
_, acc = model2.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 97.76%


### Build the model **3**

In [55]:
model3 = keras.Sequential()
model3.add(layers.Input(shape=(784,)))
model3.add(layers.Dense(128, activation='softmax'))
model3.add(layers.Dense(64, activation='softmax'))
model3.add(layers.Dense(32, activation='softmax'))
model3.add(layers.Dense(10, activation='softmax'))
model3.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(0.005),
              metrics=['accuracy'])

In [56]:
model3.summary()

In [57]:
EPOCHS = 25

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

history = model3.fit(X_train, y_train,
                    batch_size=128, epochs=EPOCHS,
                    validation_data=(X_val, y_val),
                    callbacks = [early_stop])

Epoch 1/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.1369 - loss: 2.2144 - val_accuracy: 0.3094 - val_loss: 1.5947
Epoch 2/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.3354 - loss: 1.5203 - val_accuracy: 0.4066 - val_loss: 1.1948
Epoch 3/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.4256 - loss: 1.1689 - val_accuracy: 0.5000 - val_loss: 1.1072
Epoch 4/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.5300 - loss: 1.0796 - val_accuracy: 0.6938 - val_loss: 0.8713
Epoch 5/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7159 - loss: 0.8093 - val_accuracy: 0.7618 - val_loss: 0.7017
Epoch 6/25
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7695 - loss: 0.6434 - val_accuracy: 0.7910 - val_loss: 0.6056
Epoch 7/25
[1m391/391[0m 

In [58]:
_, acc = model3.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 94.85%


### Recommendations ###

Model 2 performed the best, but had a very marginal difference in accuracy with model 1 (difference of 0.05%). Model 3 performed worse, but model 3 originally used a 0.001 learning rate and only reached around 50% accuracy so increasing the learning rate can definitely improve the results.