Name : Hazem Bin Ryaz Patel (2200550)
Class : DAAA/2B/07 


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import visualkeras
import keras
import numpy as np
import seaborn as sns
from keras.layers import (
    AveragePooling2D,
    ZeroPadding2D,
    BatchNormalization,
    Activation,
    MaxPool2D,
    Add,
)
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Normalization, Dense, Conv2D, Dropout, BatchNormalization, ReLU
from keras.models import Sequential
from keras.models import Model
from keras.optimizers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils.vis_utils import plot_model
from sklearn.utils.class_weight import compute_class_weight

In [None]:
seed_r = 42
np.random.seed(seed_r)

## EDA

### Loading in data

In [None]:
data_dir = "./Dataset for CA1 part A"
# image_count = len(list(data_dir.glob('*/*.jpg')))

batch_size = 32
img_height = 224
img_width = 224

train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/train",
    seed=seed_r,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/validation",
    seed=seed_r,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir + "/test",
    seed=seed_r,
    image_size=(img_height, img_width),
    batch_size=batch_size,
)

### Visualizing the data

In [None]:
class_names = train_ds.class_names
print(len(class_names))

In [None]:
x_train = []
y_train = []

for images, labels in train_ds:
    x_train.extend(images.numpy())
    y_train.extend(labels.numpy())

x_train = np.array(x_train)
y_train = np.array(y_train)

In [None]:
fig, ax = plt.subplots(3, 5, figsize=(8, 5), tight_layout=True)

for label, subplot in enumerate(ax.ravel()):
    subplot.axis("off")
    subplot.imshow(
        x_train[y_train == label][
            np.random.randint(0, len(x_train[y_train == label]))
        ].astype("uint8"),
        cmap="Greys",
    )
    subplot.set_title(class_names[label])

plt.show()

### Checking for mislabelled data

In [None]:
fig, ax = plt.subplots(15, 10, figsize=(15, 20))
for i in range(15):
    images = x_train[np.squeeze(y_train == i)].astype("uint8")
    random_index = np.random.choice(images.shape[0], 15, replace=False)
    images = images[random_index]
    label = class_names[i]
    for j in range(10):
        subplot = ax[i, j]
        subplot.axis("off")
        subplot.imshow(images[j], cmap="Greys")
        subplot.set_title(label, fontsize=8)

plt.show()

Conclusion : There isn't any mislabelling, so we don't need to re-label any of the data

### Image Averaging

In [None]:
fig, ax = plt.subplots(3, 5, figsize=(20, 10))

for idx, subplot in enumerate(ax.ravel()):
    avg_image = np.mean(x_train[np.squeeze(y_train == idx)], axis=0) / 255
    subplot.imshow(avg_image, cmap="Greys")
    subplot.set_title(f"{class_names[idx]}")
    subplot.axis("off")

While some exhibit discernible colors, most appear as a ball of vibrant green hues

### Distribution of Classes

In [None]:
labels, counts = np.unique(y_train, return_counts=True)
for label, count in zip(labels, counts):
    print(f"{class_names[label]}: {count}")

plt.barh(labels, counts, tick_label=class_names)
plt.show()

## Pre-Processing 

### Oversampling with the use of Data Augmentations
One of the things that is important to address is the imbalance of data. I've chosen to augment the data to help with the disparity

In [None]:
train_dict = {tf.Tensor.ref(img): label for img, label in train_ds.unbatch()}


def data_augmentation(data):
    imageArr = []
    for images in data:
        image = tf.image.random_flip_left_right(images)
        image = tf.image.random_crop(image, size=(224, 224, 3))
        imageArr.append(tf.reshape(image, (224, 224, 3)))
    return np.array(imageArr)

def augment_undersampled_vegs(img_labels, X_train, y_train):
    undersampled_labels = []
    undersampled_vegs = []
    for veg_type in img_labels:
        # Get all images of a veg type
        veg_images = [
            img.deref() for img, label in train_dict.items() if label == veg_type
        ]
        veg_labels = [label for img, label in train_dict.items() if label == veg_type]

        if veg_type == img_labels[0]:
            undersampled_vegs = veg_images
            undersampled_labels = veg_labels
        else:
            undersampled_vegs = np.concatenate((undersampled_vegs, veg_images), axis=0)
            undersampled_labels = np.concatenate(
                (undersampled_labels, veg_labels), axis=0
            )

        veg_train_aug = data_augmentation(undersampled_vegs)

    print(veg_train_aug.shape)
    print(undersampled_labels.shape)

    X_train = np.concatenate((X_train, veg_train_aug), axis=0)
    y_train = np.concatenate((y_train, undersampled_labels), axis=0)
    return X_train, y_train

veg_types = [2, 5, 6, 7, 10, 11, 13]
X_train_aug, y_train_aug = augment_undersampled_vegs(veg_types, x_train, y_train)

In [None]:
with tf.device("/device:CPU:0"):
    train_ds_rebatch = tf.data.Dataset.from_tensor_slices((X_train_aug, y_train_aug))
    train_ds_rebatch = train_ds_rebatch.shuffle(buffer_size=len(X_train_aug))  # Shuffle the data
    train_ds_rebatch = train_ds_rebatch.batch(32)

In [None]:
def process(ds):
    ds = ds.map(lambda x, y: (tf.image.rgb_to_grayscale(x), y))
    ds = ds.map(lambda x, y: (tf.image.resize(x, (128, 128)), y))
    return ds

train_non_aug_ds_128 = process(train_ds)
train_ds_128 = process(train_ds_rebatch)
val_ds_128 = process(val_ds)
test_ds_128 = process(test_ds)

## EDA after pre-processing
How do the images look now

In [None]:
x_train_128 = []
y_train_128 = []

for images, labels in train_ds_128:
    x_train_128.extend(images.numpy())
    y_train_128.extend(labels.numpy())

x_train_128 = np.array(x_train_128)
y_train_128 = np.array(y_train_128)

In [None]:
labels, counts = np.unique(y_train_128, return_counts=True)
for label, count in zip(labels, counts):
    print(f"{class_names[label]}: {count}")

plt.barh(labels, counts, tick_label=class_names)
plt.show()

In [None]:
fig, ax = plt.subplots(3, 5, figsize=(8, 5), tight_layout=True)

for idx, subplot in enumerate(ax.ravel()):
    avg_image = np.mean(x_train_128[np.squeeze(y_train_128 == idx)], axis=0) / 255
    subplot.imshow(avg_image, cmap="Greys")
    subplot.set_title(f"{class_names[idx]}")
    subplot.axis("off")

In [None]:
fig, ax = plt.subplots(3, 5, figsize=(8, 5), tight_layout=True)

for label, subplot in enumerate(ax.ravel()):
    subplot.axis("off")
    subplot.imshow(
        x_train_128[y_train_128 == label][
            np.random.randint(0, len(x_train_128[y_train_128 == label]))
        ],
        cmap="Greys",
    )
    subplot.set_title(class_names[label])

plt.show()

## Training for CNN 128
Element of modelling to consider
- With Augmentation or W/out
- Does Class_Weight_Dict help the augmentation or not much difference


### Base Model


In [None]:
class_weights = compute_class_weight(class_weight='balanced', classes = np.unique(y_train_128), y = y_train_128)
class_weight_dict = dict(enumerate(class_weights))

In [None]:
def conv2d_block(layer_number):
    layer_number = 5 + layer_number
    conv_filter1 = 2**layer_number
    ks = (7,7) if layer_number == 0 else (5,5) if layer_number == 1 else (3,3)    
    return Sequential(
        [
            Conv2D(conv_filter1, ks, padding="same", activation="relu"),
            BatchNormalization()
        ]
    )


In [None]:
def base_model_128():  
    model = Sequential()

    for i in range(4):
        model.add(conv2d_block(i))
        if ((i+1) % 2 == 0):
            model.add(MaxPooling2D(2, 2))
            model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dense(15, activation="softmax"))
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

model_128 = base_model_128()
history_128 = model_128.fit(
    train_ds_128,
    validation_data=val_ds_128,
    epochs=30,
    batch_size=64,
    class_weight=class_weight_dict,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min'), 
    ModelCheckpoint('model_128.h5', monitor='val_loss', save_best_only=True, verbose=1)]
)
model_128.evaluate(test_ds_128)
model_128.summary()
plot_model(model_128, show_shapes=True, show_layer_names=True)

In [None]:
plt.plot(history_128.history["accuracy"])
plt.plot(history_128.history["val_accuracy"])
plt.title("model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

# summarize history for loss
plt.plot(history_128.history["loss"])
plt.plot(history_128.history["val_loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

### Does Augmentation / Class Weights help the model

In [None]:
def tune_128_model(batchsize, dropout, dense):
    model = Sequential()
    model.add(
        Conv2D(64, 7, input_shape=(128, 128, 1), padding="same", activation="relu")
    )

    model.add(MaxPooling2D(2, 2))
    model.add(BatchNormalization())
    model.add(Conv2D(128, 5, padding="same", activation="relu"))

    model.add(MaxPooling2D(2, 2))
    model.add(Dropout(dropout))

    model.add(Conv2D(64, 3, padding="same", activation="relu"))
    model.add(MaxPooling2D(2, 2))

    model.add(Conv2D(32, 3, padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))

    # Flatten the feature map
    model.add(Flatten())

    # Add the fully connected layers
    model.add(Dense(dense, activation="relu"))
    model.add(Dense(256, activation="relu"))
    model.add(Dense(15, activation="softmax"))

    # Compile your model with your optimizer, loss, and metrics
    model.compile(
        optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    history = model.fit(
        train_ds_128,
        validation_data=val_ds_128,
        epochs=20,
        batch_size=batchsize,
        shuffle=True,
        class_weight=class_weight_dict,
        callbacks=[
            EarlyStopping(
                monitor="val_loss",
                patience=5,
                verbose=1,
                mode="min",
                restore_best_weights=True,
            )
        ],
    )

    # Evaluate model on unseen data
    scores = model.evaluate(test_ds_128)
    testError = 100 - scores[1] * 100
    return history, scores[1], testError

In [None]:
def grid_search(batch_sizes, dropouts, dense_sizes):
    results = []
    for batch_size in batch_sizes:
        for dropout in dropouts:
            for dense_size in dense_sizes:
                history, accuracy, test_error = tune_128_model(
                    batch_size, dropout, dense_size
                )
                
                # Store the results
                results.append(
                    {
                        "batch_size": batch_size,
                        "dropout": dropout,
                        "dense_size": dense_size,
                        "accuracy": accuracy,
                        "test_error": test_error,
                    }
                )

    return results


# Define the hyperparameters to test
batch_sizes = [64, 128]
dropouts = [0.1, 0.2, 0.5]
dense_sizes = [256, 512, 1024]


# Run the grid search
results = grid_search(batch_sizes, dropouts, dense_sizes)

In [None]:
# Initialize the best score and best params
best_score = 0
best_params = None

# Iterate over the results
for result in results:
    # If the current score is better than the best score
    if result["accuracy"] > best_score:
        # Update the best score and best params
        best_score = result["accuracy"]
        best_params = {
            "batch_size": result["batch_size"],
            "dropout": result["dropout"],
            "dense_size": result["dense_size"],
        }

# Print the best params
print('Best params:', best_params)
print('Best accuracy:', best_score)  

Tune the other dense layer

In [None]:
def finalmodel():
    model = Sequential()
    model.add(
        Conv2D(64, 7, input_shape=(128, 128, 1), padding="same", activation="relu")
    )

    model.add(MaxPooling2D(2, 2))
    model.add(BatchNormalization())
    model.add(Conv2D(128, 5, padding="same", activation="relu"))

    model.add(MaxPooling2D(2, 2))
    model.add(Dropout(0.1))

    model.add(Conv2D(64, 3, padding="same", activation="relu"))
    model.add(MaxPooling2D(2, 2))
    model.add(Conv2D(32, 3, padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    # Flatten the feature map
    model.add(Flatten())

    # Add the fully connected layers
    model.add(Dense(256, activation="relu"))
    model.add(Dense(256, activation="relu"))
    model.add(Dense(15, activation="softmax"))

    # Compile your model with your optimizer, loss, and metrics
    model.compile(
        optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )

    return model


model_128_final = finalmodel()
history_128_final = model_128_final.fit(
    train_ds_128,
    validation_data=val_ds_128,
    epochs=30,
    batch_size=128,
    class_weight=class_weight_dict,
    callbacks=[
        EarlyStopping(monitor="val_loss", patience=5, verbose=1, mode="min"),
        ModelCheckpoint(
            "model_128_final.h5", monitor="val_loss", save_best_only=True, verbose=1
        ),
    ],
)

## Load in model from .h5 file

In [None]:
from tensorflow.keras.models import load_model

# Create a new instance of the model
model = finalmodel()

# Load the weights from the .h5 file
model.load_weights("model_128_final_2.h5")


test_loss, test_accuracy = model.evaluate(test_ds_128)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

In [None]:
best_loss = float("inf")
best_model = None

for i in range(12):  # Change this to the number of times you want to retrain the model
    print(f"Training run {i+1}")
    # Create a new instance of the model
    model_128_final = finalmodel()

    # Train the model
    history_128_final = model_128_final.fit(
        train_ds_128,
        validation_data=val_ds_128,
        epochs=30,
        batch_size=128,
        class_weight=class_weight_dict,
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=5, verbose=1, mode="min")
        ],
    )

    # Get the best validation loss in this training run
    current_loss = min(history_128_final.history["val_loss"])
    print(f"Current loss: {current_loss}")

    # If the current loss is better than the best loss seen so far, update the best loss and save the model
    if current_loss < best_loss:
        best_loss = current_loss
        best_model = model_128_final
        best_model.save("model_128_final_3.h5")
        print(f"New best loss: {best_loss}")

print(f"Best loss: {best_loss}")