# Deliverable #4 - VGG16

This notebook is dedicated solely to the **`VGG16`**'s fine tuning.

See [index notebook](index.ipynb) for instructions and other list of derivative notebooks created.

## Imports

All needed libraries will be imported here.

Unless conditional, all imports must be done in this section to prevent workspace cluttering. Imports are sorted in an ascending manner, starting from "a" to "Z".

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras import backend as K, mixed_precision
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, Flatten, Input, Resizing
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l1, l2, l1_l2
from typing import Union

import gc
import os
import random
import sys
import tensorflow as tf
import time

sys.path.append("..")
from helpers import *

%matplotlib inline

mixed_precision.set_global_policy('mixed_float16')

## Data and Variable

Sets all the global data and variables here.

Global variables will be defined and instantiated in this section, preventing a confusing clutter down the line and allowing readability when revisions are needed.

### Instantiations

Instantiations of variables will be done here, preventing mixture of variable preview and definition.

In [None]:

data = {
    "train": {
        "raw": [os.path.join('data', file) for file in os.listdir("data") if file.startswith('data_batch_')],
        "loaded": {},
        "processed": None
    },
    "test": {
        "raw": os.path.join('data', 'test_batch'),
        "loaded": {},
        "processed": None
    },
    "meta": os.path.join('data', 'batches.meta')
}
"""
A dictionary to hold the data for the CIFAR-10 dataset.
The dictionary contains the following keys:

- train: A dictionary containing the training data. It has two keys:
    - raw: A list of file paths for the training data files.
    - loaded: A dictionary to hold the loaded training data.
    - processed: A dictionary to hold the processed training data.
- test: A dictionary containing the test data. It has two keys:
    - raw: The file path for the test data file.
    - loaded: A dictionary to hold the loaded test data.
    - processed: A dictionary to hold the processed test data.
- meta: The file path for the metadata file.

:var data: dict
"""

callbacks = [
    EarlyStopping(
        monitor = 'val_loss',
        patience = 10,
        verbose = 2,
        restore_best_weights = True
    ),
    ReduceLROnPlateau(
        monitor = 'val_loss',
        factor = 0.5,
        patience = 5,
        verbose = 2
    )
]
"""
A list of callbacks for the model training. Currently, it contains
the following callbacks:
- EarlyStopping: Stops training when a monitored metric has stopped improving.
- __ReduceLROnPlateau__: Reduces the learning rate when a metric has stopped improving.
"""

configs = {
	'baseModel': VGG16,
	'trainBase': 'block5_conv1',
	'poolingLayer': Flatten,
	'dropoutRate': 0.4,
	'denseUnits': 256,
	'useBatchNorm': True,
}

regularizers = {
	'none': None,
	'l1': l1(1e-5),
	'l2': l2(1e-4),
	'l1_l2': l1_l2(1e-5, 1e-4)
}
"""
Defines the regularizers to be used in the model.
The dictionary contains the following:
- `none`: No regularization.
- `l1`: L1 regularization with a strength of 1e-5.
- `l2`: L2 regularization with a strength of 1e-4.
- `l1_l2`: L1 and L2 regularization with strengths of 1e-5 and 1e-4, respectively.
"""

trainDurations = {
	"none": 0,
	"l1": 0,
	"l2": 0,
	"l1_l2": 0,
}
"""
Defines the duration of the entire `fit()` process.
"""

batchSize = 24
"""
Defines the batch size for the training and validation data.
"""

None

In [None]:

def getMetrics(model: Model, dataset: Union[tf.data.Dataset, ImageDataGenerator], logPerBatch = False, yTrue = None) -> tuple:
	"""
	Calculates the accuracy `(avg)` and accuracy range `(min, max)`
	for the given model and dataset.

	Also returns the true labels and predicted labels. The metrics are all floats
	that represent the score in decimal and not percentage.

	When the `yTrue` parameter is not provided, the function will use the dataset to get the
	true labels. In doing so, the function will take longer to run as it will have to iterate
	through the dataset.

	**NOTE**: Using `logPerBatch` will log the metrics per batch and will slow the process down.

	:param model: The model to use for prediction.
	:type model: tensorflow.keras.models.Model

	:param dataset: The dataset to calculate the metrics for.
	:type dataset: Union[tensorflow.data.Dataset, ImageDataGenerator]

	:param logPerBatch: Whether to log the metrics per batch or not. Default is False.
	:type logPerBatch: bool

	:param yTrue: The true labels of the dataset. Optional.
	:type yTrue: list

	:return: A tuple containing metrics, the true labels, and the predicted labels; wherein the metrics is also a tuple containing the `(avg, min, max)` values.
	:rtype: tuple(tuple, list, list)
	"""
	min = 0
	max = 0
	avg = 0

	skipIteration = True
	if yTrue is None:
		skipIteration = False
		yTrue = []
	yPred = []

	print(f"Calculating metrics for {model.name}...")
	print(f"Dataset size: {len(dataset)}")

	if logPerBatch:
		print("Predicting...")
		for x, y in dataset:
			classes = model.predict(x, verbose = 0)
			classes = np.argmax(classes, axis = 1)

			yTrue.extend(y)
			yPred.extend(classes)

			print(f"Classes: {classes}")
			print(f"Labels: {y}")

		# Accuracy
		accuracy = np.sum(classes == y) / len(y)
		if accuracy > max:
			max = accuracy
		if accuracy < min or min == 0:
			min = accuracy
		avg += accuracy
	else:
		print("Predicting...")
		probs = model.predict(dataset, verbose=0)
		print(f"Probs: {probs}")
		classes = np.argmax(probs, axis=1)

		if not skipIteration:
			for _, y in dataset:
				yTrue.extend(y)
		yPred.extend(classes)

		# Accuracy
		accuracy = np.sum(classes == yTrue) / len(yTrue)
		min = max = avg = accuracy

	print(f"Formula: {np.sum(classes == yTrue)} / {len(yTrue)}")
	print(f"Accuracy: {accuracy * 100:.2f}%")
	print(f"Of {len(yTrue)} images, {np.sum(classes == yTrue)} were correct while {np.sum(classes != yTrue)} were incorrect.")

	avg /= len(dataset)
	return (avg, min, max), yTrue, yPred

def plotModelHistory(modelName, history, accuracy, yTrue, yPred) -> None:
	"""
	Plots the training history of the model.

	:param modelName: The name of the model.
	:type modelName: str

	:param history: The training history of the model.
	:type history: tensorflow.keras.callbacks.History

	:param accuracy: The accuracy of the model in decimal form (not percentage).
	:type accuracy: float

	:param yTrue: The true labels of the dataset.
	:type yTrue: list

	:param yPred: The predicted labels of the dataset.
	:type yPred: list
	"""
	status = "Underfitted" if accuracy < 0.5 else "Overfitted" if accuracy > 0.9 else "Just Right"
	yTrue = np.sum(np.array(yTrue) == np.array(yPred))
	yLength = len(yPred)
	yScore = yTrue / yLength

	accuracy = accuracy * 100
	unixTime = int(datetime.datetime.now().timestamp() * 1e6)

	print(f"Accuracy: {accuracy:.2f}%")
	print(f"Using `forCM`: {yTrue} / {yLength} = {yScore * 100:.2f}%")

	if not os.path.exists(f"outputs/accuracy/{modelName}"):
		os.makedirs(f"outputs/accuracy/{modelName}")

	plt.figure(figsize = (10, 6))
	plt.plot(history.history['accuracy'], color = 'blue', label = 'train')
	plt.plot(history.history['val_accuracy'], color = 'red', label = 'val')
	plt.legend()
	plt.grid()
	plt.title(f'Accuracy ({modelName})\nStatus: {status} ({accuracy:.2f}%)')
	plt.xlabel('Epochs')
	plt.ylabel('Accuracy')
	plt.savefig(f"outputs/accuracy/{modelName}/{unixTime} - {accuracy:.2f}%.png")

def plotConfusionMatrix(yTrue, yPred, modelName, accuracy) -> None:
	"""
	Plots the confusion matrix for the model predictions.

	:param yTrue: The true labels of the dataset.
	:type yTrue: list

	:param yPred: The predicted labels of the dataset.
	:type yPred: list

	:param modelName: The name of the model.
	:type modelName: str

	:param accuracy: The accuracy of the model in decimal form (not percentage).
	:type accuracy: float
	"""
	accuracy = accuracy * 100
	unixTime = int(datetime.datetime.now().timestamp() * 1e6)
	confusion = confusion_matrix(yTrue, yPred)

	plt.figure(figsize = (10, 8))
	sns.heatmap(confusion, annot = True, fmt = 'd', cmap = 'Blues')

	plt.title(f'Confusion Matrix - {modelName}\nAccuracy: {accuracy:.2f}%')
	plt.xlabel('Predicted Label')
	plt.ylabel('True Label')

	if not os.path.exists(f"outputs/confusion_matrix/{modelName}"):
		os.makedirs(f"outputs/confusion_matrix/{modelName}")

	plt.savefig(f"outputs/confusion_matrix/{modelName}/{unixTime} - {accuracy:.2f}%.png")

def plotModel(model, accuracy, modelName = None) -> None:
	"""
	Plots the model architecture.

	:param model: The model to plot.
	:type model: tensorflow.keras.models.Model

	:param accuracy: The accuracy of the model in decimal form (not percentage).
	:type accuracy: float

	:param modelName: The name of the model. Optional.
	:type modelName: str
	"""
	if modelName is None:
		modelName = model.__name__

	unixTime = int(datetime.datetime.now().timestamp() * 1e6)
	plot_model(model, to_file = f"outputs/{modelName}_model_{unixTime} - {accuracy}%.png", show_shapes = True, show_layer_names = True)


### Previews

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.config.list_physical_devices('GPU')

In [None]:
print(f"Training data files: {data['train']['raw']}")
print(f"Testing data file: {data['test']['raw']}")
print(f"Meta data file: {data['meta']}")

In [None]:
for file in data['train']["raw"]:
    print(f"Unpickling {file}...")
    batch = unpickle(file)
    print(f"Unpickled {file} with keys: {batch.keys()}")
    print(f"Batch shape: {batch[b'data'].shape}")
    print(f"Labels shape: {len(batch[b'labels'])}")
    print(f"Batch size: {len(batch[b'data'])}")
    print(f"Batch content size: {len(batch[b'data'][random.randint(0, len(batch[b'data']) - 1)])}")
    print(f"Peek in the batch: {batch[b'data']}")
    print(f"Peek in the batch content: {batch[b'data'][random.randint(0, len(batch[b'data']) - 1)]}")
    print("==============================================")
    data['train']["loaded"][file] = batch

In [None]:
print(f"Unpickling {data['test']['raw']}...")
batch = unpickle(data['test']['raw'])
print(f"Unpickled {data['test']['raw']} with keys: {batch.keys()}")
print(f"Batch shape: {batch[b'data'].shape}")
print(f"Labels shape: {len(batch[b'labels'])}")
print(f"Batch size: {len(batch[b'data'])}")
print(f"Batch content size: {len(batch[b'data'][random.randint(0, len(batch[b'data']) - 1)])}")
print(f"Peek in the batch: {batch[b'data']}")
print(f"Peek in the batch content: {batch[b'data'][random.randint(0, len(batch[b'data']) - 1)]}")
print("==============================================")
data['test']["loaded"] = batch

In [None]:
data['meta'] = unpickle(data['meta'])
data['meta']

In [None]:
data['train']['loaded'][f'data\\data_batch_{random.randint(1, 5)}']

In [None]:
data['meta'][b'label_names']

In [None]:
target = data['train']['loaded'][f'data\\data_batch_{random.randint(1, 5)}']
targetIndex = random.randint(0, len(target[b'data']) - 1)
label = target[b'labels'][targetIndex]
label = f"{data['meta'][b'label_names'][label].decode('UTF-8')} ({label})"

showImg(
    target[b'data'][targetIndex].reshape(3, 32, 32).transpose(1, 2, 0),
    f"Label: {label}",
)

In [None]:
data['test']['loaded'][b'data']

In [None]:
target = data['test']['loaded']
targetIndex = random.randint(0, len(target[b'data']) - 1)
label = target[b'labels'][targetIndex]
label = f"{data['meta'][b'label_names'][label].decode('UTF-8')} ({label})"

showImg(
    data['test']['loaded'][b'data'][targetIndex].reshape(3, 32, 32).transpose(1, 2, 0),
    f"Label: {label}"
)

## Process

Here begins the process which includes data splitting and pre-processing.

### Pre Processing

In [None]:
data['train']['processed'] = {
    'combined': {
        'data': None,
        'labels': None,
        'generator': None
    },
    'validation': {
        'data': None,
        'labels': None,
        'generator': None
    },
}

data['test']['processed'] = {
    'generator': None
}

# Splitting the data into training and validation sets
x = []
y = []
for file in data['train']['loaded']:
    x.append(data['train']['loaded'][file][b'data'])
    y.append(data['train']['loaded'][file][b'labels'])
x_train, x_val, y_train, y_val = train_test_split(
    np.concatenate(x),
    np.concatenate(y),
    test_size = 0.2
)

# Reshaping the data to match the input shape of the model - Uses 244 since the model is VGG16
targetShape = 224
targetSize = (targetShape, targetShape)
    
#######################
### VALIDATION DATA ###
#######################
print(f"Validation data shape: {x_val.shape}")
data['train']['processed']['validation']['data'] = x_val
data['train']['processed']['validation']['labels'] = y_val
# data['train']['processed']['validation']['generator'] = makeDataset(
# 	data['train']['processed']['validation']['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
# 	data['train']['processed']['validation']['labels'],
# 	targetSize,
# 	# batchSize = batchSize
# )

data['train']['processed']['validation']['generator'] = (ImageDataGenerator(
    rescale = 1./255,
)).flow(
    data['train']['processed']['validation']['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
    data['train']['processed']['validation']['labels'],
    batch_size = batchSize,
)

##################
### TRAIN DATA ###
##################
print(f"Training data shape: {x_train.shape}")
data['train']['processed']['combined']['data'] = x_train
data['train']['processed']['combined']['labels'] = y_train
# data['train']['processed']['combined']['generator'] = makeDataset(
# 	data['train']['processed']['combined']['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
# 	data['train']['processed']['combined']['labels'],
# 	targetSize,
# 	True,
# 	# batchSize = batchSize
# )

data['train']['processed']['combined']['generator'] = (ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)).flow(
    data['train']['processed']['combined']['data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
    data['train']['processed']['combined']['labels'],
    batch_size = batchSize,
)

### -------------
###
### -------------

#################
### TEST DATA ###
#################
print(f"Test data shape: {data['test']['loaded'][b'data'].shape}")
# data['test']['processed']['generator'] = makeDataset(
# 	data['test']['loaded'][b'data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
# 	data['test']['loaded'][b'labels'],
# 	targetSize,
# 	# batchSize = batchSize
# )
data['test']['processed']['labels'] = data['test']['loaded'][b'labels']
data['test']['processed']['generator'] = (ImageDataGenerator(
    rescale = 1./255,
)).flow(
    data['test']['loaded'][b'data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1),
    data['test']['loaded'][b'labels'],
    batch_size = batchSize,
)

%run memoryDashboard.py

# Free up some memory
del data['train']['raw']
del data['test']['raw']

del data['train']['loaded']
del data['test']['loaded']

sizes = {
	'spe': len(data['train']['processed']['combined']['data']),
	'vs': len(data['train']['processed']['validation']['data']),
}

del data['train']['processed']['combined']['data']
del data['train']['processed']['validation']['data']

nD = data

del data
data = nD
del nD

K.clear_session()
gc.collect()

%run memoryDashboard.py

### Creating the Model

Includes the compilation and fitting after using 10 configuration samples to learn how they affect the model.

#### Building

In [None]:
print('\n\n-------------------')
print('-- CONFIGURATION --')
print('-------------------\n\n')

print(f'Base Model: {configs["baseModel"].__name__}')
print(f'Pooling Layer: {configs["poolingLayer"].__name__}')
print(f'Train Base: {configs["trainBase"]}')
print(f'Dropout Rate: {configs["dropoutRate"]}')
print(f'Dense Units: {configs["denseUnits"]}')
print(f'Use Batch Norm: {configs["useBatchNorm"]}')

%run memoryDashboard.py

print('\n\n--------------------')
print('-- BUILDING MODEL --')
print('--------------------\n\n')

# Applies the resizing layer to the input shape; 224 since the model is VGG16
inputTensor = Input(shape = (32, 32, 3))
resized = Resizing(224, 224)(inputTensor)
print("Resizing Layer Shape: ", resized.shape)

# Creates the base model using the specified base model and input tensor
baseModel = VGG16(
    weights = 'imagenet',
    include_top = False,
    input_tensor = resized
)

baseModel.summary()

# Unfreezes the base model layers for training
# baseModel.trainable = configs['trainBase'](baseModel, 'block5_conv1')
trainableFlag = False
for layer in baseModel.layers:
	if layer.name == 'block5_conv1':
		trainableFlag = True
	print(f"Layer: {layer.name} - Trainable: {layer.trainable} => {trainableFlag}")
	layer.trainable = trainableFlag

print("")
print(f"{configs['baseModel'].__name__} Input: {baseModel.input}")
print(f"{configs['baseModel'].__name__} Output: {baseModel.output}")

# Uncomment if `input_tensor` is not used
# baseModel = baseModel(resized)

# Applies the pooling layer to the base model output
x = Flatten()(baseModel.output)
# x = Flatten()(baseModel)
print(f"Pooling Layer Shape: {x.shape}")

# Batch Normalization Layer
if configs['useBatchNorm']:
	x = BatchNormalization()(x)
	print(f"Batch Normalization Shape: {x.shape}")

# Move `x` to `model`
model = x

# Change `x` to `dict`
x = {
	'none': None,
	'l1': None,
	'l2': None,
	'l1_l2': None,
}

# Holds the final model to be compiled, trained, and evaluated
models = {
	'none': None,
	'l1': None,
	'l2': None,
	'l1_l2': None,
}

# Adds the batch normalization, dropout, and output layers
def addTailLayers(model, useBatchNorm, dropoutRate = 0) -> Model:
	"""
	Adds the last layers to the model.

	:param model: The model to add the layers to.
	:type model: Model

	:param useBatchNorm: Whether to use batch normalization or not.
	:type useBatchNorm: bool

	:param dropoutRate: The dropout rate to use. Default is 0.
	:type dropoutRate: float

	:return: The model with the added layers.
	:rtype: Model
	"""
	if useBatchNorm:
		model = BatchNormalization()(model)
		print(f"Batch Normalization Shape: {model.shape}")

	if dropoutRate > 0:
		model = Dropout(configs['dropoutRate'])(model)
		print(f"Dropout Layer Shape: {model.shape}")
	return model

None

#### Branching

In this section, branching will be tested so that kernel regularizers could be tested. It branches off to several models with each using a different kernel regularizer:

- `None`
- `l1`
- `l2`
- `l1_l2`

In [None]:
for kr in ['none', 'l1', 'l2', 'l1_l2']:
	txtLen = int((len(kr) + 2) / 2)
	txtPadding = (len("=====================================================") - txtLen - 2) // 2
	txt = f"{'=' * txtPadding} {kr.upper()} {'=' * txtPadding}"
	print(txt)

	# Hidden Layer 1 - 256 units
	x[kr] = Dense(configs['denseUnits'], activation = "relu", kernel_regularizer = regularizers[kr])(model)
	print(f"Hidden Layer Shape: {x[kr].shape}")

	x[kr] = addTailLayers(x[kr], configs['useBatchNorm'], configs['dropoutRate'])

	# Output Layer - 10 units (for 10 classes)
	x[kr] = Dense(10, activation = "softmax", kernel_regularizer = regularizers[kr], dtype = 'float32')(x[kr])
	print(f"Output Layer Shape: {x[kr].shape}")

	# models[kr] = Model(inputs = baseModel.input, outputs = x[kr])
	models[kr] = Model(inputs = inputTensor, outputs = x[kr])
	print(f"{'=' * len(txt)}\n\n")

#### Compiling

In [None]:
for key in models:
	print(f"Compiling Model Variant: {key.upper()}")

	models[key].compile(
		optimizer = Adam(learning_rate = 0.001),
		loss = "sparse_categorical_crossentropy",
		metrics = ["accuracy"]
	)

#### Fitting

In [None]:
print(f"Batch Size: {batchSize}")

# Steps per Epoch
SPE = sizes['spe'] // batchSize

# Validation Steps
VS = sizes['vs'] // batchSize

print(f"Potential SPE: {SPE}")
print(f"Potential VS: {VS}")
print("")

# SPE = 300
# VS = 150

In [None]:
keys = list(models.keys())
for key in keys:
	print(f"Training Model Regularization Variant: {key.upper()}\n")

	%run memoryDashboard.py
	start = time.time()
	history = models[key].fit(
		data['train']['processed']['combined']['generator'],
		steps_per_epoch = SPE,
		epochs = 100,
		validation_data = data['train']['processed']['validation']['generator'],
		validation_steps = VS,
		verbose = 2,
		callbacks = callbacks,
	)
	end = time.time()
	print("")
	%run memoryDashboard.py

	trainDuration = end - start

	trainDurations[key] = trainDuration
	print(f"Training Duration ({key.upper()}): {trainDuration} seconds / {trainDuration / 60} minutes\n")

	# Get the metrics for the model
	metrics, yTrue, yPred = getMetrics(
		models[key],
		data['test']['processed']['generator'],
		False,
		yTrue = data['test']['processed']['labels'],
	)
	avg, min, max = metrics

	# Model History
	plotModelHistory(
		key.upper(),
		history,
		avg,
		yTrue,
		yPred
	)

	# Confusion Matrix
	plotConfusionMatrix(
		yTrue,
		yPred,
		key.upper(),
		avg
	)

	# Show model architecture
	plotModel(
		models[key],
		avg,
		key.upper()
	)

	# Free up some memory
	del history
	del yTrue
	del yPred
	del metrics
	del avg
	del min
	del max
	del models[key]

	K.clear_session()
	gc.collect()

print(f"\n\nTotal Training Duration: {sum(trainDurations.values())} seconds / {sum(trainDurations.values()) / 60} minutes")