<a href="https://colab.research.google.com/github/dimou-gk/Deep-dive-ML/blob/main/Deep_Neural_Networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import pandas as pd
from pandas import DataFrame
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
import matplotlib.pyplot as plt
import numpy as np

# 2-4. Load Dataset & Preprocessing

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
def normalize(data):
  return (data - np.min(data)) / (np.max(data) - np.min(data))

x_train_normalized = normalize(x_train)
x_test_normalized = normalize(x_test)

In [None]:
y_train_one_hot = tf.one_hot(indices=y_train, depth=10)
y_test_one_hot = tf.one_hot(indices=y_test, depth=10)

In [None]:
import random
SEED = 0

def set_seeds(seed=SEED):
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

# 5. Creation of MLP Neural Network

In [None]:
input_shape = (784,)
num_hidden_layers = 2
units_per_layer = 128
activation = 'relu'
num_classes = 10

mlp = tf.keras.Sequential(name='my-mlp')
mlp.add(tf.keras.layers.Input(input_shape, name='input_layer'))
for i in range(num_hidden_layers):
    mlp.add(tf.keras.layers.Dense(units=units_per_layer, activation=activation, name=f'hidden_layer_{i+1}'))
mlp.add(tf.keras.layers.Dense(units=num_classes, activation='softmax', name='output_layer'))
mlp.summary()

Our MLP Neural Network consists of the following architecture. First we initialize our NN with an input fully connected layer. Then we create two hidden layers each with 128 as output and ReLU as actication function. Finaly we add another fully connected output layer with softmax as activation function, which returns probabilities the input beloning to each of our classes (adding to 1)

# 6. Creation of CNN

In [None]:
num_classes = 10
num_conv_layers = 2
num_filters = 64
kernel_size = (5, 5)
strides = (1, 1)
mlp_num_hidden_layers = 2
mlp_units_per_layer = 128
activation = 'relu'
dropout_rate = 0.4

x_train_cnn = np.reshape(x_train_normalized, (x_train_normalized.shape[0], 28,28,1))
x_test_cnn = np.reshape(x_test_normalized, (x_test_normalized.shape[0], 28,28,1))

cnn = tf.keras.Sequential(name='my-cnn')
cnn.add(tf.keras.layers.Input((28, 28, 1), name='input_layer'))

# CNN
for i in range(num_conv_layers):
    cnn.add(
        tf.keras.layers.Conv2D(
            filters=num_filters,
            kernel_size=kernel_size,
            strides=strides,
            activation=activation,
            name=f'conv_layer_{i+1}'
        )
    )
    cnn.add(tf.keras.layers.Dropout(rate=0.4, name=f'dropout_{i+1}'))
    cnn.add(tf.keras.layers.MaxPooling2D(name=f'maxpool_{i+1}'))

# Flattening
cnn.add(tf.keras.layers.Flatten())

# MLP
for i in range(mlp_num_hidden_layers):
    cnn.add(tf.keras.layers.Dense(units=mlp_units_per_layer, activation=activation, name=f'hidden_layer_{i+1}'))
cnn.add(tf.keras.layers.Dense(units=num_classes, activation='softmax', name='output_layer'))
cnn.summary()

This model combines a CNN with an MLP.

* Input Layer: It takes grayscale images of size 28x28x1.
* CNN Block: It has 2 convolutional layers with 64 filters of size (5x5), ReLU activation, followed by Dropout (0.4) and MaxPooling2D layers for each convolutional layer.
* Flattening: The output of the CNN is flattened into a 1D vector.
* MLP Block: It has 2 Dense (fully connected) layers with 128 units each and ReLU activation.
* Output Layer: A final Dense layer with softmax activation outputs predictions for 10 classes.

# Training & Testing

In [None]:
loss=tf.keras.losses.CategoricalCrossentropy()
# optimizer=tf.keras.optimizers.Adam(1e-3)
metrics=['accuracy']
batch_size = 32
epochs = 50

In [None]:
x_train_normalized = x_train.reshape(-1, 28 * 28)
x_test_normalized = x_test.reshape(-1, 28 * 28)

train_dataset = tf.data.Dataset.from_tensor_slices((x_train_normalized, y_train_one_hot))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test_normalized, y_test_one_hot))
test_dataset = test_dataset.batch(batch_size)

device_name = tf.test.gpu_device_name()
with tf.device(device_name):
    mlp.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(1e-3),
                metrics=['accuracy'],
                )

    history = mlp.fit(x=train_dataset, epochs=epochs, validation_data=test_dataset, verbose=1)

Epoch 1/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8344 - loss: 4.6852 - val_accuracy: 0.9231 - val_loss: 0.4588
Epoch 2/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9244 - loss: 0.3857 - val_accuracy: 0.9284 - val_loss: 0.3119
Epoch 3/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9440 - loss: 0.2255 - val_accuracy: 0.9397 - val_loss: 0.2519
Epoch 4/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9543 - loss: 0.1694 - val_accuracy: 0.9476 - val_loss: 0.1983
Epoch 5/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9604 - loss: 0.1412 - val_accuracy: 0.9462 - val_loss: 0.1958
Epoch 6/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9649 - loss: 0.1290 - val_accuracy: 0.9539 - val_loss: 0.1917
Epoch 7/50
[1m1

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_cnn, y_train_one_hot))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test_cnn, y_test_one_hot))
test_dataset = test_dataset.batch(batch_size)

with tf.device(device_name):
    cnn.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(1e-3),
                metrics=['accuracy'],
                )

    history = cnn.fit(x=train_dataset, epochs=epochs, validation_data=test_dataset, verbose=1)

Epoch 1/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9068 - loss: 0.2852 - val_accuracy: 0.9843 - val_loss: 0.0970
Epoch 2/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9832 - loss: 0.0570 - val_accuracy: 0.9877 - val_loss: 0.0674
Epoch 3/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9869 - loss: 0.0446 - val_accuracy: 0.9881 - val_loss: 0.0523
Epoch 4/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9892 - loss: 0.0353 - val_accuracy: 0.9911 - val_loss: 0.0406
Epoch 5/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9902 - loss: 0.0306 - val_accuracy: 0.9919 - val_loss: 0.0418
Epoch 6/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9920 - loss: 0.0268 - val_accuracy: 0.9908 - val_loss: 0.0333
Epoch 7/50
[1

Our CNN models performance is near perfect but we could try improving it by using another form of Pooling or change the Learning Rate using a sceduler. Finaly we could add Residual Connections