# 3. Building network and training

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
import datetime

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))
print(physical_devices)

In [None]:
import boto3
from boto3 import session
import os

key_id = os.environ.get("AWS_ACCESS_KEY_ID")
secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
bucket_name = os.environ.get("AWS_S3_BUCKET")
s3_endpoint = os.environ.get("AWS_S3_ENDPOINT")

s3_client = boto3.client("s3", aws_access_key_id=key_id, aws_secret_access_key=secret_key, endpoint_url=s3_endpoint)

In [None]:
X_train = np.load('/tmp/ml-pipeline/data/X_train.npy')
y_train = np.load('/tmp/ml-pipeline/data/y_train.npy')
X_val = np.load('/tmp/ml-pipeline/data/X_val.npy')
y_val = np.load('/tmp/ml-pipeline/data/y_val.npy')
X_test = np.load('/tmp/ml-pipeline/data/X_test.npy')

> <div class="alert alert-block alert-info">
<b>ConvBlocks in this network will follow the LeNet design as shown above, with a few additions including Batch normalization and dropout to keep network speeds running optimally and reduce the chances of network overfitting. </b>
    <br></br>
    <b>You can find more on LeNet's here: https://en.wikipedia.org/wiki/LeNet</b></div>

![network_architecture.png](attachment:b38057f5-39db-4ff3-a980-72966a741b96.png)

> <div class="alert alert-block alert-info">
<b>The above figure shows the best parameter combination I found after experimenting with a few variations. One thing to keep in mind when experiementing with CNN's is that increasing filters throughout the layers tends to be best.</b>
</div>

###  2.6 Custom f1, recall and precision metrics

> <div class="alert alert-block alert-info">
<b>We will now create some custom metrics, to help aid us in evaluating model performance. F1-score, Recall and precision are not pre-defined metrics avaliable in keras, which is why we have to manually create them ourselves.</b></div>

> <div class="alert alert-block alert-info">
<b>Using Keras backend, we have now defined the following metric functions; precision, recall and f1. (Epsilon is used to avoid infinite value error caused by division of zero).</b></div>

### 3.1 Building network

In [None]:
def build_model():
    inp = keras.Input(shape=(28,28,1), name="input_1")
    x = keras.layers.Conv2D(filters=32, kernel_size=(5,5), strides=(1,1),padding='SAME', 
                              activation='relu')(inp)
    x = keras.layers.MaxPool2D(pool_size=(2,2))(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(0.25)(x)
    x = keras.layers.Conv2D(filters=64, kernel_size=(5,5), padding='SAME', activation='relu')(x)
    x = keras.layers.MaxPool2D(pool_size=(2,2))(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(0.25)(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(256, activation='relu')(x)
    x = keras.layers.Dropout(0.5)(x)
    output = keras.layers.Dense(10, activation='softmax')(x)

    model = keras.Model(inputs=inp, outputs=output)

    model.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer=keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy'])

    return model, inp, output

> <div class="alert alert-block alert-info">
    <b>The model is built and compiled using categorical crossentropy and adam optimizer.</b>
</div>

In [None]:
model, inp, out = build_model()
model.summary()

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model)

### 3.2 Training network

> <div class="alert alert-block alert-info">
    <b>Ok, we've built the network. Now, let's train it.</b>
</div>

In [None]:
batch_size=32

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=batch_size,
                    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',mode='min',patience=10, 
                                                             min_delta=0.005, restore_best_weights=True),
                              keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 3)])

In [None]:
os.makedirs('/tmp/ml-pipeline/model/', exist_ok=True)

In [None]:
model.save("/tmp/ml-pipeline/model/saved_model")

In [None]:
!python -m tf2onnx.convert --saved-model /tmp/ml-pipeline/model/saved_model --output /tmp/ml-pipeline/model/model.onnx --opset 13

In [None]:
import boto3
import os

key_id = os.environ.get("AWS_ACCESS_KEY_ID")
secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
bucket_name = os.environ.get("AWS_S3_BUCKET")
s3_endpoint = os.environ.get("AWS_S3_ENDPOINT")

s3_client = boto3.client("s3", aws_access_key_id=key_id, aws_secret_access_key=secret_key, endpoint_url=s3_endpoint)

In [None]:
s3_client.upload_file("/tmp/ml-pipeline/model/model.onnx", bucket_name, "onnx/model-v2.onnx")

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Loss','Val loss'])

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Accuracy','Val accuracy'])