# Artificial Neural Networks and Deep Learning

---

## Homework 1


## 🌐 Connect Colab to Google Drive

In [1]:
from google.colab import drive
drive.flush_and_unmount()

drive.mount('/gdrive', force_remount=True)
%cd /gdrive/My Drive/[2024-2025] AN2DL/Homework 1

Drive not mounted, so nothing to flush and unmount.
Mounted at /gdrive
/gdrive/My Drive/[2024-2025] AN2DL/Homework 1


## ⚙️ Import Libraries

In [2]:
!pip install keras-cv --upgrade

Collecting keras-cv
  Downloading keras_cv-0.9.0-py3-none-any.whl.metadata (12 kB)
Collecting keras-core (from keras-cv)
  Downloading keras_core-0.1.7-py3-none-any.whl.metadata (4.3 kB)
Downloading keras_cv-0.9.0-py3-none-any.whl (650 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m650.7/650.7 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-core, keras-cv
Successfully installed keras-core-0.1.7 keras-cv-0.9.0


In [12]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras import models as tfkm

from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#extra imports
import math
from sklearn.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image

from keras_cv.layers import RandAugment

seed = 42

np.random.seed(seed)
tf.random.set_seed(seed);

# Support Functions


### Preprocess for ResNet

In [13]:
from tensorflow.keras.applications.resnet50 import preprocess_input

def preprocess_images_in_batches_resnet(images, batch_size=1024):
    """
    Preprocess images for ResNet in batches.

    Parameters:
    - images: NumPy array of shape (num_images, height, width, channels).
    - batch_size: Number of images to process in each batch.

    Returns:
    - preprocessed_images: NumPy array with the same shape as images.
    """
    # Placeholder for the preprocessed dataset
    preprocessed_images = np.empty_like(images, dtype=np.float32)

    # Calculate number of batches
    num_batches = (len(images) + batch_size - 1) // batch_size

    for i in range(num_batches):
        start = i * batch_size
        end = min(start + batch_size, len(images))

        # Preprocess the current batch
        batch = images[start:end].astype('float32')  # Ensure float32 for preprocessing
        preprocessed_images[start:end] = preprocess_input(batch)

        # Free up memory by deleting the batch (not strictly necessary in Python)
        del batch

    return preprocessed_images

### Add flipped version of minority class

In [14]:
def get_flipped_minority_class(images, labels):
    # Initialize lists to hold the flipped images and labels
    flipped_images = []
    flipped_labels = []
    for i, img in enumerate(images):
        # Flip the image horizontally
        if labels[i] == 1:
          flipped_img = np.fliplr(img)
          flipped_images.append(flipped_img)
          flipped_labels.append(labels[i])
      # Append the flipped image and label to the lists
    flipped_images = np.array(flipped_images)
    flipped_labels = np.array(flipped_labels)
    return flipped_images, flipped_labels

# Data Exploration

### Print data info

In [6]:
def print_data_info(data):
  print("Keys in the .npz file:", data.files)
  for key in data.files:
    print(f"Array '{key}' - shape: {data[key].shape}, dtype: {data[key].dtype}")

  print("Number of label types, and count of elements in each label type")
  unique_labels, label_counts = np.unique(data['labels'], return_counts=True)
  print(dict(zip(unique_labels, label_counts)))

### Plot bar charts

In [7]:
def plot_bar_chart(labels, label_counts):
  plt.bar(labels, label_counts)
  plt.xlabel('Label')
  plt.ylabel('Count')
  plt.title('Label Distribution')
  plt.show()

Noting that the data is unbalanced, possible solutions; image augmentation, rotating every image pi/2, pi, 3/2*pi and adding weights to the features in the training


### Display a random subsample of images




In [15]:
def visualize_samples(images, labels, num_samples=5):
    unique_labels = np.unique(labels)
    plt.figure(figsize=(15, 15))
    for i, label in enumerate(unique_labels):
        idx = np.where(labels == label)[0]
        selected_images = np.random.choice(idx, num_samples, replace=False)
        for j, img_idx in enumerate(selected_images):
            plt.subplot(len(unique_labels), num_samples, i * num_samples + j + 1)
            plt.imshow(images[img_idx])
            plt.axis('off')
            plt.title(f"Class {label}")
    plt.show()


source: https://machinelearningmastery.com/best-practices-for-preparing-and-augmenting-image-data-for-convolutional-neural-networks/

### Print random pixel values

In [9]:
import random

def print_pixel_values(images, num_images=5):
    """
    Prints pixel values for a specified number of random images in the dataset.

    Parameters:
        images (np.array): Array of images, assumed to be in the shape (num_images, height, width, channels).
        num_images (int): Number of random images to inspect.
    """
    # Choose random indices to select images from the dataset
    indices = random.sample(range(images.shape[0]), num_images)

    # Loop through selected indices and print pixel values for each image
    for i, idx in enumerate(indices):
        print(f"\nImage {i + 1} (Index {idx}):")
        print("Pixel values (sample):")
        print(images[idx, :5, :5, :])  # Print a 5x5 patch of pixels for brevity
        print(f"Min pixel value: {images[idx].min()}")
        print(f"Max pixel value: {images[idx].max()}\n")

# 🛠️ Train and Save the Model

### Data Pre processing

### Getting data

In [16]:
train_data_path = '/gdrive/My Drive/[2024-2025] AN2DL/Homework 1/X_train_MAIN.npy'
train_labels_path = '/gdrive/My Drive/[2024-2025] AN2DL/Homework 1/y_train_MAIN.npy'
val_data_path = '/gdrive/My Drive/[2024-2025] AN2DL/Homework 1/X_val_MAIN.npy'
val_labels_path = '/gdrive/My Drive/[2024-2025] AN2DL/Homework 1/y_val_MAIN.npy'

# Load the datasets
X_val = np.load(val_data_path)
y_val = np.load(val_labels_path)
X_train = np.load(train_data_path)
y_train = np.load(train_labels_path)


# Verify loaded data
print("Datasets loaded successfully!")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")

FileNotFoundError: [Errno 2] No such file or directory: '/gdrive/My Drive/[2024-2025] AN2DL/Homework 1/X_val_MAIN.npy'

In [None]:
#print number of elements in each class
unique_labels, label_counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique_labels, label_counts)))

### Adding flipped versions of minority

In [None]:
flipped_images, flipped_labels = get_flipped_minority_class(X_train, y_train)

print(flipped_labels[:2])

unique_labels, label_counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique_labels, label_counts)))

X_train = np.concatenate((X_train, flipped_images))
y_train = np.concatenate((y_train, flipped_labels))
del flipped_images, flipped_labels

unique_labels, label_counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique_labels, label_counts)))

### Get class weights

In [None]:
# Define the number of samples per class
class_counts = {}
class_counts[0] = np.sum(y_train == 0)
class_counts[1] = np.sum(y_train == 1)

print(f"class counts: {class_counts}")

# Get the unique classes and their counts
classes = np.array(list(class_counts.keys()))
counts = np.array(list(class_counts.values()))

# Calculate class weights
total_samples = sum(counts)
class_weights = {i: total_samples / (len(class_counts) * count) for i, count in class_counts.items()}
print(f"class weights: {class_weights}")

### Preprocess

In [None]:
from tensorflow.keras.utils import to_categorical

print(f"X_train shape before preprocess: {X_train.shape}")
print(f"y_train shape before preprocess: {y_train.shape}")


#print number of elements in each class
unique_labels, label_counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique_labels, label_counts)))

X_train = preprocess_images_in_batches_resnet(X_train)
X_val = preprocess_images_in_batches_resnet(X_val)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

print(f"X_train shape after preprocess: {X_train.shape}")
print(f"y_train shape after preprocess: {y_train.shape}")

#print number of elements in each class
class_counts = np.sum(y_train, axis=0)
print(f"class counts: {dict(enumerate(class_counts))}")

### Getting variables

In [None]:
#Uses a lot of memory to run
HEIGHT = X_train[0].shape[0]
WIDTH = X_train[0].shape[1]
CHANNELS = X_train[0].shape[2]
NUM_CLASSES = len(np.unique(y_train))


# Number of training epochs
epochs = 15

# Batch size for training
batch_size = 64

# Learning rate: step size for updating the model's weights
learning_rate = 0.001

# Print the defined parameters
print("Epochs:", epochs)
print("Batch Size:", batch_size)
print("Learning Rare:", learning_rate)
print("Image height:", HEIGHT)
print("Image width:", WIDTH)
print("Image channels:", CHANNELS)
print("Number of classes:", NUM_CLASSES)

### Defining *weights*

### Model definition

In [None]:
from tensorflow.keras.applications import ResNet50

def build_model(
    input_shape=(HEIGHT, WIDTH, CHANNELS),
    output_shape=NUM_CLASSES,
    learning_rate=learning_rate,
    seed=seed
):
    tf.random.set_seed(seed)

    # Load ResNet-50 as the base model
    base_model = ResNet50(
        input_shape=input_shape,
        include_top=False,  # Exclude the classification head
        weights='imagenet'  # Load pre-trained ImageNet weights
    )

    # Freeze the base model to prevent training its pre-trained weights
    base_model.trainable = False

    # Add custom classification head
    inputs = tfkl.Input(shape=input_shape, name='Input')
    x = base_model(inputs, training=False)  # Pass input through base model
    x = tfkl.GlobalAveragePooling2D(name='global_avg_pool')(x)  # Pool features
    x = tfkl.Dense(128, activation='relu', name='dense_1')(x)   # First dense layer
    x = tfkl.Dropout(0.3, seed=seed, name='dropout_1')(x)       # Dropout for regularization
    x = tfkl.Dense(64, activation='relu', name='dense_2')(x)    # Second dense layer
    outputs = tfkl.Dense(units=output_shape, activation='softmax', name='output')(x)

    # Create the final model
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='class_1_detector')

    # Compile the model
    loss = tfk.losses.CategoricalCrossentropy()  # Adjust loss function for one-hot labels
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    metrics = ['accuracy', 'precision', 'recall']
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    return model

### Building the model

In [None]:
# Build the model with specified input and output shapes
model = build_model()

# Display a summary of the model architecture
model.summary(expand_nested=True, show_trainable=True)

# Plot the model architecture
#tfk.utils.plot_model(model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

### Training the model

In [None]:
# Define the patience value for early stopping
patience = 10

# Create an EarlyStopping callback
early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=patience,
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,  # Reduce learning rate by half
    patience=3,  # After 3 epochs of no improvement
    min_lr=1e-6  # Set a minimum learning rate
)

# Store the callback in a list
callbacks = [early_stopping, reduce_lr]

In [None]:
# Train the model with early stopping callback
history = model.fit(
    x=X_train,
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    class_weight=class_weights
).history

# Calculate and print the final validation accuracy
final_val_accuracy = round(max(history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file with the accuracy included in the filename
model_filename = 'class7.keras'
model.save(model_filename)

# Delete the model to free up resources
del model

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate metrics
accuracy = accuracy_score(y_val, y_val_pred_classes)
precision = precision_score(y_val, y_val_pred_classes, average='weighted')  # Use weighted average for multi-class
recall = recall_score(y_val, y_val_pred_classes, average='weighted')
f1 = f1_score(y_val, y_val_pred_classes, average='weighted')

# Print the results
print(f'Validation Accuracy: {accuracy}')
print(f'Validation Precision: {precision}')
print(f'Validation Recall: {recall}')
print(f'Validation F1-Score: {f1}')

In [None]:
# Plot training and validation loss
plt.figure(figsize=(15, 2))
plt.plot(history['loss'], label='Training loss', alpha=.8)
plt.plot(history['val_loss'], label='Validation loss', alpha=.8)
plt.title('Loss')
plt.legend()
plt.grid(alpha=.3)

# Plot training and validation accuracy
plt.figure(figsize=(15, 2))
plt.plot(history['accuracy'], label='Training accuracy', alpha=.8)
plt.plot(history['val_accuracy'], label='Validation accuracy', alpha=.8)
plt.title('Accuracy')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_val)
y_pred_classes = (y_pred > 0.5).astype(int)
print(classification_report(y_val, y_pred_classes, target_names=["negative", "positive"]))


### Idk hva dette er

In [None]:
def extract_activations(model, X, num_images):

    # Identify the first convolutional layer
    first_conv_index = None
    for i, layer in enumerate(model.layers):
        if isinstance(layer, tfk.layers.Conv2D):
            first_conv_index = i
            break

    if first_conv_index is None:
        raise ValueError("The model does not contain a 2D convolution")

    # Extract activations from the first convolutional layer
    first_conv = tfk.Sequential(model.layers[:first_conv_index + 1])
    first_activations = first_conv(X[:num_images])

    # Identify the first pooling layer after the first convolution
    pooling_index = None
    for i, layer in enumerate(model.layers[first_conv_index + 1:], start=first_conv_index + 1):
        if isinstance(layer, (tfk.layers.MaxPooling2D, tfk.layers.AveragePooling2D)):
            pooling_index = i
            break

    if pooling_index is None:
        raise ValueError("The model does not contain a 2D pooling operation after the first convolution")

    # Extract activations from the first convolution and the first pooling layer
    second_conv = tfk.Sequential(model.layers[:pooling_index + 1])
    second_activations = second_conv(X[:num_images])

    return first_activations, second_activations

def find_last_conv_layer(model):

    # Identify the last convolutional layer in the model
    for layer in reversed(model.layers):
        if isinstance(layer, tfk.layers.Conv2D):
            return layer.name
    raise ValueError("No Conv2D layer found in the model.")

def visualize(model, X, y, unique_labels, num_images=50, display_activations=True):

    # Extract activations from the model
    first_activations, second_activations = extract_activations(model, X, num_images)

    # Select a random image for prediction and visualisation
    image = np.random.randint(0, num_images)
    predictions = model.predict(np.expand_dims(X[image], axis=0), verbose=0)
    class_int = np.argmax(predictions[0])
    class_str = unique_labels[class_int]

    # Create figure layout for displaying the image and predictions
    fig = plt.figure(constrained_layout=True, figsize=(16, 4))
    gs = gridspec.GridSpec(1, 2, figure=fig, width_ratios=[1.5, 1.5], wspace=0)

    # Display the selected image with the true class
    ax1 = fig.add_subplot(gs[0])
    ax1.set_title(f"True class: {unique_labels[np.argmax(y[image])]}", loc='left')
    if X[image].shape[-1] == 1:
        ax1.imshow(np.squeeze(X[image]), cmap='bone', vmin=0., vmax=1.)
    else:
        ax1.imshow(np.squeeze(X[image]), vmin=0., vmax=1.)
    ax1.axis('off')

    # Display the prediction bar
    ax2 = fig.add_subplot(gs[1])
    ax2.barh(unique_labels, np.squeeze(predictions, axis=0), color=plt.get_cmap('tab10').colors)
    ax2.set_title(f"Predicted class: {class_str} (Confidence: {max(np.squeeze(predictions, axis=0)):.2f})", loc='left')
    ax2.grid(alpha=0.3)
    ax2.set_xlim(0.0, 1.0)

    plt.show()

    # Display activations if required
    if display_activations:
        # Visualise the activations from the first convolutional layer
        fig, axes = plt.subplots(1, 8, figsize=(16, 14))
        for i in range(8):
            ax = axes[i]
            ax.imshow(first_activations[image, :, :, i], cmap='bone', vmin=0., vmax=1.)
            ax.axis('off')
            if i == 0:
                ax.set_title('First convolution activations', loc='left')
        plt.tight_layout()
        plt.show()

        # Visualise the activations from the first pooling layer
        fig, axes = plt.subplots(2, 8, figsize=(16, 5))
        for i in range(16):
            ax = axes[i // 8, i % 8]
            ax.imshow(second_activations[image, :, :, i], cmap='bone', vmin=0., vmax=1.)
            ax.axis('off')
            if i == 0:
                ax.set_title('Second convolution activations', loc='left')
        plt.tight_layout()
        plt.show()

# Example usage
visualize(model, X_test, y_test, unique_labels, display_activations=True)

In [None]:
# Predict labels for the entire test set
predictions = model.predict(X_test, verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

In [None]:
# Load the saved model
model = tfk.models.load_model('CIFAR10_CNN_69.92.keras')

# Display a summary of the model architecture
model.summary(expand_nested=True, show_trainable=True)

# Plot the model architecture
tfk.utils.plot_model(model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

### Old Model

In [None]:
model = tfkm.Sequential([
    tfkl.InputLayer(input_shape=(HEIGHT, WIDTH, CHANNELS)),  # Input shape
    tfkl.Conv2D(32, (3, 3), activation='relu'),  # First convolutional layer
    tfkl.MaxPooling2D((2, 2)),  # Pooling layer

    tfkl.Conv2D(64, (3, 3), activation='relu'),  # Second convolutional layer
    tfkl.MaxPooling2D((2, 2)),  # Pooling layer

    tfkl.Conv2D(128, (3, 3), activation='relu'),  # Third convolutional layer
    tfkl.MaxPooling2D((2, 2)),  # Pooling layer

    tfkl.Flatten(),  # Flatten the output from convolutional tfkl
    tfkl.Dense(128, activation='relu'),  # Fully connected layer

    tfkl.Dense(NUM_CLASSES, activation='softmax'),  # Output layer (softmax for multi-class classification)
  ])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

model.fit(train_dataset, epochs=10)

model.save('weights.keras')

#testing model on test-set
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

del model

# 📊 Prepare Your Submission

To prepare your submission, create a `.zip` file that includes all the necessary code to run your model. It **must** include a `model.py` file with the following class:

```python
# file: model.py
class Model:
    def __init__(self):
        """Initialize the internal state of the model."""
        self.neural_network = tf.keras.models.load_model('weights.keras')



    def predict(self, X):
        """Return a numpy array with the labels corresponding to the input X."""
```

The next cell shows an example implementation of the `model.py` file, which includes loading model weights from the `weights.keras` file and conducting predictions on provided input data. The `.zip` file is created and downloaded in the last notebook cell.

❗ Feel free to modify the method implementations to better fit your specific requirements, but please ensure that the class name and method interfaces remain unchanged.

In [None]:
%%writefile model.py
import numpy as np

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl

import cv2

class Model:
    def __init__(self):
        """
        Initialize the internal state of the model. Note that the __init__
        method cannot accept any arguments.

        The following is an example loading the weights of a pre-trained
        model.
        """
        self.neural_network = tfk.models.load_model('weights.keras')


    def make_background_white(self, image, lower_purple=(105, 25, 25), upper_purple=(165, 255, 255),
                              lower_red=(165, 70, 130), upper_red=(210,150,150)):
        """
        Process an image to replace its background with white if it contains colors within the specified purple or red ranges.
        """
        # Ensure image is numpy array with correct type
        if isinstance(image, tf.Tensor):
            image = image.numpy()
        assert isinstance(image, np.ndarray), f"Expected numpy array, got {type(image)}"
        assert image.dtype == np.uint8, f"Expected uint8 data type, got {image.dtype}"

        print(f"Image dtype: {image.dtype}, Image shape: {image.shape}")

        # Convert RGB to HSV
        hsv_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2HSV)

        # Create masks
        purple_mask = cv2.inRange(hsv_image, lower_purple, upper_purple)
        red_mask = cv2.inRange(hsv_image, lower_red, upper_red)
        combined_mask = cv2.bitwise_or(purple_mask, red_mask)
        background_mask = cv2.bitwise_not(combined_mask)

        white_background = np.full_like(image, 255)
        result_image = cv2.bitwise_and(image, image, mask=combined_mask)
        result_image = cv2.add(result_image, cv2.bitwise_and(white_background, white_background, mask=background_mask))

        return result_image




    def predict(self, X):
        """
        Predict the labels corresponding to the input X. Note that X is a numpy
        array of shape (n_samples, 96, 96, 3) and the output should be a numpy
        array of shape (n_samples,). Therefore, outputs must no be one-hot
        encoded.

        The following is an example of a prediction from the pre-trained model
        loaded in the __init__ method.

        """

        # Making background white
        X = np.array([self.make_background_white(image) for image in X])
        X = X / 255.0


        # Cropping to correct size
        # NEW_SIZE = (96, 96)
        # X = tf.image.resize_with_crop_or_pad(X, NEW_SIZE[0], NEW_SIZE[1])

        #Converting back to tensor (temp solution)
        # X = tf.convert_to_tensor(X, dtype=tf.uint8)

        # Normalizing the images
        # X = tf.cast(X, tf.float32) / 255.0

        preds = self.neural_network.predict(X)
        if len(preds.shape) == 2:
            preds = np.argmax(preds, axis=1)
        return preds

In [None]:
from datetime import datetime
filename = f'submission_{datetime.now().strftime("%y%m%d_%H%M%S")}.zip'

# Add files to the zip command if needed
!zip {filename} model.py weights.keras

from google.colab import files
files.download(filename)