## 🌐 Connect to Google Drive

In [1]:
from google.colab import drive

drive.mount('/gdrive')
%cd /gdrive/My Drive/
%cd [2024-2025] AN2DL Homework 1/

Mounted at /gdrive
/gdrive/My Drive
/gdrive/My Drive/[2024-2025] AN2DL Homework 1


## 🛠 Fix Codabench Dependencies

In [2]:
# Creates a file in which we specify the versions of the libraries we want
%%writefile requirements.txt
tensorflow==2.17.0
keras==3.4.1

Overwriting requirements.txt


In [3]:
!pip install -r requirements.txt

Collecting tensorflow==2.17.0 (from -r requirements.txt (line 1))
  Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting keras==3.4.1 (from -r requirements.txt (line 2))
  Downloading keras-3.4.1-py3-none-any.whl.metadata (5.8 kB)
Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (601.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.3/601.3 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras-3.4.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m53.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras, tensorflow
  Attempting uninstall: keras
    Found existing installation: keras 3.5.0
    Uninstalling keras-3.5.0:
      Successfully uninstalled keras-3.5.0
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.17.1
    Uninstalling tenso

## ⚙️ Import Libraries

In [4]:
import numpy as np
import logging

import tensorflow as tf
import keras as tfk
from keras import layers as tfkl

# Import other libraries
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Set seeds for NumPy and TensorFlow
seed = 29
np.random.seed(seed)
tf.random.set_seed(seed);

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

## ⏳ Load and Clean the Data

In [5]:
# Load the dataset
data = np.load('training_set.npz')

# Put images on X and labels on y
X = data['images']
y = data['labels']

print("Initial X shape: ", X.shape)
print("Initial y shape: ", y.shape)

# Delete outliers from the dataset
delete_index = 11958
X = X[:delete_index + 1]
y = y[:delete_index + 1]

print("Final X shape: ", X.shape)
print("Final y shape: ", y.shape)

Initial X shape:  (13759, 96, 96, 3)
Initial y shape:  (13759, 1)
Final X shape:  (11959, 96, 96, 3)
Final y shape:  (11959, 1)


## 🚆 Split into train, validation and train sets

In [None]:
# Split the dataset into a training + validation set, and a separate test set
# The test set is the 10% of the whole dataset
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X,
    y,
    test_size=0.1,
    stratify=y,
    random_state=seed)

# Further split the training + validation set into a training set and a validation set
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val,
    y_train_val,
    test_size=len(X_test),
    stratify=y_train_val,
    random_state=seed)

# Convert labels to one-hot encoding
y_train = tfk.utils.to_categorical(y_train, 8)
y_val = tfk.utils.to_categorical(y_val, 8)
y_test = tfk.utils.to_categorical(y_test, 8)

# Print the shapes of the resulting sets
print('Training set shape:\t', X_train.shape, y_train.shape)
print('Validation set shape:\t', X_val.shape, y_val.shape)
print('Test set shape:\t\t', X_test.shape, y_test.shape)

Training set shape:	 (9567, 96, 96, 3) (9567, 8)
Validation set shape:	 (1196, 96, 96, 3) (1196, 8)
Test set shape:		 (1196, 96, 96, 3) (1196, 8)


## 🧮 Define Network Parameters

In [7]:
# Input shape for the model
input_shape = X_train.shape[1:]

# Output shape for the model
output_shape = y_train.shape[1]

# Number of training epochs
epochs = 100

# Number of samples passed to the network at each training step
batch_size = 16

# Learning rate: step size for updating the model's weights
learning_rate = 1e-5

# L2 Lambda for regularization
l2_lambda = 1e-5

## 🔨 Import and tune the Model

In [None]:
# Import and initialize MobileNetV3
model = tfk.applications.MobileNetV3Small(
    include_top=False,
    weights='imagenet',
    input_shape=input_shape,
    pooling='avg',
    classes=output_shape,
    classifier_activation='softmax',
    include_preprocessing=True
)

# Initialize regularizer
regularizer = tfk.regularizers.L2(l2_lambda)

# Freeze all layers to use the model solely as a feature extractor
model.trainable = False

# Create input layer
inputs = tfkl.Input(shape=input_shape)

# Connect model with inputs
x = model(inputs, training=False)

# Add layers
x = tfkl.Dense(1024, activation='relu', kernel_regularizer=regularizer)(x)
x = tfkl.BatchNormalization()(x)
x = tfkl.Dropout(0.5)(x)
x = tfkl.Dense(1024, activation='relu', kernel_regularizer=regularizer)(x)
x = tfkl.BatchNormalization()(x)
x = tfkl.Dropout(0.5)(x)

# Setup Fully Connected Blocks
x = tfkl.Dropout(rate=0.3)(x)
outputs = tfkl.Dense(units=output_shape, activation='softmax', dtype='float32')(x)

# Connect input and output
model = tfk.Model(inputs=inputs, outputs=outputs)

# Compile the model
loss = tfk.losses.CategoricalCrossentropy()
optimizer = tfk.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
metrics = ['accuracy']
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

  return MobileNetV3(


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_small_224_1.0_float_no_top_v2.h5
[1m4334752/4334752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


## 🧠 Train the Model for Transfer Learning

In [9]:
# Create an EarlyStopping callback
early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5,
    restore_best_weights=True
)

# Create a LearningRate Scheduler, which reduces learning rate if val_loss doesn't improve
lr_scheduler = tfk.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6
)

# Store the callback in a list
callbacks = [early_stopping, lr_scheduler]

In [10]:
tl_history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=callbacks
).history

print('Training finished.')

# Calculate and print the final validation accuracy
tl_final_val_accuracy = round(max(tl_history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {tl_final_val_accuracy}%')

# Save the trained model to a file, including final accuracy in the filename
tl_model_filename = 'MobileNetV3Small' + str(tl_final_val_accuracy) + '.keras'
model.save(tl_model_filename)

# Free memory by deleting the model instance
del model

Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 32ms/step - accuracy: 0.1832 - loss: 3.3659 - val_accuracy: 0.6789 - val_loss: 0.9908 - learning_rate: 1.0000e-05
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.4251 - loss: 2.0162 - val_accuracy: 0.7559 - val_loss: 0.7522 - learning_rate: 1.0000e-05
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.5326 - loss: 1.6274 - val_accuracy: 0.7918 - val_loss: 0.6501 - learning_rate: 1.0000e-05
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.5891 - loss: 1.3981 - val_accuracy: 0.8110 - val_loss: 0.5964 - learning_rate: 1.0000e-05
Epoch 5/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.6261 - loss: 1.2525 - val_accuracy: 0.8211 - val_loss: 0.5605 - learning_rate: 1.0000e-05
Epoch 6/100
[1m598/598[0m [32m━━━━━━━━━

## 🔧 Fine Tuning

In [11]:
# Re-load the model after transfer learning
ft_model = tfk.models.load_model(tl_model_filename)

# Set the model layers as trainable
ft_model.get_layer('MobileNetV3Small').trainable = True

# Set all layers as non-trainable
for layer in ft_model.get_layer('MobileNetV3Small').layers:
    layer.trainable = False

# Enable training only for Conv2D and DepthwiseConv2D layers
for i, layer in enumerate(ft_model.get_layer('MobileNetV3Small').layers):
    if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.DepthwiseConv2D):
        layer.trainable = True

In [12]:
# Set the number of layers to freeze
N = 200

# Set the first N layers as non-trainable
for i, layer in enumerate(ft_model.get_layer('MobileNetV3Small').layers[:N]):
    layer.trainable = False

# Compile the model
loss = tfk.losses.CategoricalCrossentropy()
optimizer = tfk.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True)
metrics = ['accuracy']
ft_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

## 🧠 Train Fine-Tuned Model

In [13]:
ft_history = ft_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=callbacks
).history

print('Training finished.')

# Calculate and print the final validation accuracy
ft_final_val_accuracy = round(max(ft_history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {ft_final_val_accuracy}%')

# Save the trained model to a file
ft_model.save('weights.keras')

# Free memory by deleting the model instance
del ft_model

Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 23ms/step - accuracy: 0.8578 - loss: 0.4443 - val_accuracy: 0.9231 - val_loss: 0.2728 - learning_rate: 1.0000e-05
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.8583 - loss: 0.4513 - val_accuracy: 0.9231 - val_loss: 0.2717 - learning_rate: 1.0000e-05
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.8681 - loss: 0.4330 - val_accuracy: 0.9247 - val_loss: 0.2721 - learning_rate: 1.0000e-05
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.8588 - loss: 0.4565 - val_accuracy: 0.9247 - val_loss: 0.2716 - learning_rate: 1.0000e-05
Epoch 5/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.8664 - loss: 0.4323 - val_accuracy: 0.9256 - val_loss: 0.2704 - learning_rate: 1.0000e-05
Epoch 6/100
[1m598/598[0m [32m━━━━━━━━━━

## ✅ Verify that the weights work as intended

In [14]:
# Load the model
model = tfk.models.load_model('weights.keras')

# Predict on test set and validation set
y_pred_test = model.predict(X_test)
y_pred_val = model.predict(X_val)

# Convert to class labels
y_pred_test_classes = np.argmax(y_pred_test, axis=1)
y_pred_val_classes = np.argmax(y_pred_val, axis=1)
y_test_classes = np.argmax(y_test, axis=1)
y_val_classes = np.argmax(y_val, axis=1)

# Compute accuracy
test_accuracy = np.sum(y_test_classes == y_pred_test_classes) / len(y_test_classes)
val_accuracy = np.sum(y_val_classes == y_pred_val_classes) / len(y_val_classes)

print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 91ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Validation Accuracy: 0.9264
Test Accuracy: 0.9105


## 📊 Create the model.py

In [15]:
%%writefile model.py
import numpy as np

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl


class Model:
    def __init__(self):
        self.neural_network = tfk.models.load_model('weights.keras')

    def predict(self, X):
        preds = self.neural_network.predict(X)
        if len(preds.shape) == 2:
            preds = np.argmax(preds, axis=1)
        return preds

Overwriting model.py


## 📁 Export the ZIP file

In [16]:
# Set filename for the zip file
from datetime import datetime
filename = f'submission_{datetime.now().strftime("%y%m%d_%H%M%S")}.zip'

# Create a zip file with the provided filename, containing model and weights
!zip {filename} model.py weights.keras

  adding: model.py (deflated 48%)
  adding: weights.keras (deflated 9%)
