# Connect to drive and import libraries

In [1]:
# Note: the notebook is configured to be executed on Kaggle, as Colab doesn't
# have enough RAM to support the execution. In Kaggle, create a dataset containing
# the preprocessed data and import it in the input section.

import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [3]:
# Fix randomness and hide warnings
# We use the usual imports and setup.

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np

import logging
import random

In [None]:
# This is needed to save the model; new versions of tensorflow
# cause issues when saving larger models.

!pip install -U tensorflow==2.14.0

In [None]:
# Import tensorflow
import tensorflow as tf

from tensorflow import keras as tfk
from keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

# Import other libraries
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns
from PIL import Image
import pandas as pd

# Load the preprocessed dataset

In [None]:
# The dataset is already without duplicates, outliers,
# and SMOTE has been applied. See "Dateset preprocessing" notebook.

data = np.load('/kaggle/input/smoted-dataset/smoted_dataset.npz', allow_pickle=True)

X = np.array(data['arr_0'])
y = np.array(data['arr_1'])

## One-hot encoding

In [None]:
y[y == 'healthy'] = 0
y[y == 'unhealthy'] = 1

y = tfk.utils.to_categorical(y,len(np.unique(y)))

#Split the dataset

In [None]:
# Split data into train and val sets; the test set used is the one in codalab.

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.30, stratify=np.argmax(y,axis=1))

# Print shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")

# Augmentation

## Augmenting the training set with CutMix and MixUp

In [None]:
# We use the Keras CV library for the MixUp and CutMix.
# Note that this augmentation has to be done differently over the
# training set and the validation set resulting from the split;
# therefore it cannot be performed together with the preprocessing.

!pip install keras-cv

In [None]:
import keras_cv as kcv

In [None]:
# We apply mix up twice to have more new samples to add to the training set.

mixup = kcv.layers.MixUp(alpha=0.2)
mixup_result_1 = mixup({'images': X_train, 'labels': y_train})
mixup_result_2 = mixup({'images': X_train, 'labels': y_train})

In [None]:
# We print some examples of mixed up images and labels, also to check that the
# two mixup calls yield different results.

i = 0
print(mixup_result_1["labels"][i])
plt.imshow(mixup_result_1["images"][i])

print(len(mixup_result_1["images"]))

In [None]:
print(mixup_result_2["labels"][i])
plt.imshow(mixup_result_2["images"][i])

print(len(mixup_result_2["images"]))

In [None]:
# We keep only the results where the convex combination of input images and labels
# uses a parameter delta different from 1 or 0, so to avoid unnecessary duplicates
# of the original non-augmented training set.

X_mu = list()
y_mu = list()

for i in range(len(mixup_result_1["labels"])):
    if(float(mixup_result_1["labels"][i][0]) > 0.0 and float(mixup_result_1["labels"][i][0]) < 1.0):
      X_mu.append(mixup_result_1["images"][i])
      y_mu.append(mixup_result_1["labels"][i])

for i in range(len(mixup_result_2["labels"])):
    if(float(mixup_result_2["labels"][i][0]) > 0.0 and float(mixup_result_2["labels"][i][0]) < 1.0):
      X_mu.append(mixup_result_2["images"][i])
      y_mu.append(mixup_result_2["labels"][i])

In [None]:
# This is the amount of images we will add to the training set from MixUp.

print(len(X_mu))
print(len(y_mu))

In [None]:
# Repeat the same operations using CutMix augmentation.
# Here there is no need to remove anything as, by library implementation, no
# result from CutMix is ever equal to one of the starting images.
# In the end, we'll have more CutMix images, as they have been proven in
# different papers that they allow for greater performance improvement
# than MixUp.

cutmix_result = kcv.layers.CutMix()({'images': X_train, 'labels': y_train})
cutmix_result_2 = kcv.layers.CutMix()({'images': X_train, 'labels': y_train})

In [None]:
i = 10
print(cutmix_result["labels"][i])
plt.imshow(cutmix_result["images"][i])

print(len(cutmix_result["images"]))

In [None]:
# We put together the original training set from the split and the
# result of the augmentation operations.

X_train = np.concatenate((X_train, X_mu), axis=0)
y_train = np.concatenate((y_train, y_mu), axis=0)

In [None]:
X_train = np.concatenate((X_train, cutmix_result["images"]), axis=0)
y_train = np.concatenate((y_train, cutmix_result["labels"]), axis=0)

X_train = np.concatenate((X_train, cutmix_result_2["images"]), axis=0)
y_train = np.concatenate((y_train, cutmix_result_2["labels"]), axis=0)

print(X_train.shape)

## Augmenting validation set

In [None]:
# We try to understand the test set distribution through validation;
# as we don't know how test data is, we use some random augmentation.

X_val_aug = kcv.layers.RandAugment(value_range=(0,1), magnitude=0.3)(X_val)

In [None]:
i = 1000
plt.imshow(X_val[i])

In [None]:
# We see the effects of random augmentation.

plt.imshow(X_val_aug[i])

In [None]:
print(X_val.shape)
print(X_val_aug.shape)
# print(len(X_mu_val))

In [None]:
# This will be the new validation data, by means of which we will
# compute the validation error and approximate the test one.

X_val = X_val_aug

#Define batch size and other learning hyperparameters

In [None]:
# Define batch size, number of epochs, learning rate, input shape, and output shape
batch_size = 20
epochs = 200
input_shape = X_train.shape[1:]
output_shape = y_train.shape[-1]

# Print batch size, epochs, learning rate, input shape, and output shape
print(f"Batch Size: {batch_size}, Epochs: {epochs}")
print(f"Input Shape: {input_shape}, Output Shape: {output_shape}")

# Transfer Learning (with weighted loss)

In [None]:
# We download the FEN we will use.

fen = tfk.applications.ConvNeXtXLarge(
    include_top=False,
    include_preprocessing=True,
    weights="imagenet",
    input_shape=(96,96,3),
    pooling='avg',
)
fen.trainable = False

In [None]:
# This method builds the model with given hyperparameters as inputs. The
# rationale behind the model is inside the report.

def build_model(fen, dropout_par, l2_par, input_shape=input_shape, output_shape=output_shape):

  # Preprocessing
  inputs = tfkl.Input(shape=input_shape, name='Input')

  # Layer performing some geometric operations, that resemble that of the RandAugment
  # layer (which was too computationally demanding to be used inside the network).
  x = tf.keras.Sequential([
      tfkl.RandomFlip(mode='horizontal_and_vertical'),
      tfkl.RandomBrightness(0.2, value_range=(0,1)),
      tfkl.RandomTranslation(0.2,0.2),
      tfkl.RandomZoom(0.2),
      tfkl.RandomRotation(0.2),
      tfkl.RandomContrast(0.2)
    ], name='augment') (inputs)

  # FEN
  x = fen(x)
  x = tfkl.Dropout(dropout_par, name='dropout_0')(x)

  # FC
  # First FC block
  x = tfkl.Dense(units=1024, kernel_regularizer=tf.keras.regularizers.l2(l2_par), kernel_initializer=tfk.initializers.HeUniform())(x)
  x = tfkl.BatchNormalization(name='batch_norm_1')(x)
  x = tfkl.Activation('relu', name='act_1')(x)
  x = tfkl.Dropout(dropout_par, name='dropout_1')(x)

  # Second FC block
  x = tfkl.Dense(units=1024, kernel_regularizer=tf.keras.regularizers.l2(l2_par), kernel_initializer=tfk.initializers.HeUniform())(x)
  x = tfkl.BatchNormalization(name='batch_norm_2')(x)
  x = tfkl.Activation('relu', name='act_2')(x)
  x = tfkl.Dropout(dropout_par, name='dropout_2')(x)

  outputs = tfkl.Dense(output_shape, name='out', activation='softmax')(x)

  model = tfk.Model(inputs=inputs, outputs=outputs, name='model')
  model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.AdamW(), metrics=['accuracy'])

  return model

In [None]:
tl_model = build_model(fen, 0.7, 0.02, input_shape=(96,96,3), output_shape=2)

tl_model.summary()

In [None]:
tfk.utils.plot_model(tl_model, show_shapes=True)

In [None]:
# We give more weights to unhealthy images.

weights = np.zeros(len(y_train))

for i in range(len(weights)):
  weights[i] = y_train[i][0] * 1 + y_train[i][1] * 1.3

In [None]:
# Train the model
tl_history = tl_model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = (X_val, y_val),
    sample_weight = weights,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=5, factor=0.2, min_lr=1e-8)]
).history

## Results

In [None]:
# Plot  the transfer learning training histories
plt.figure(figsize=(15,5))

plt.plot(tl_history['loss'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(tl_history['val_loss'], label='Transfer Learning', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))

plt.plot(tl_history['accuracy'], alpha=.3, color='#4D61E2', linestyle='--')
plt.plot(tl_history['val_accuracy'], label='Transfer Learning', alpha=.8, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
# Save the  model
tl_model.save('/kaggle/working/tl_convex')
# del tl_model

In [None]:
# These instructions are not needed in Kaggle, but can be useful
# in Colab, where we often exceed the RAM, so it's useful to save
# the intermediate model and its data, delete them and re-upload.

#del tl_model

In [None]:
#np.savez_compressed('training', X_train, y_train)
#np.savez_compressed('validation', X_val, y_val)

In [None]:
#del X_train
#del y_train
#del X_val
#del y_val


# Fine tuning

## Training

In [None]:
#data = np.load('training_bagg_1.npz', allow_pickle=True)

#X_train = np.array(data['arr_0'])
#y_train = np.array(data['arr_1'])

In [None]:
#data = np.load('validation_bagg_1.npz', allow_pickle=True)

#X_val = np.array(data['arr_0'])
#y_val = np.array(data['arr_1'])

In [None]:
# Re-load the model after transfer learning, if needed.
# ft_model = tfk.models.load_model('tl_model')

ft_model = tl_model

In [None]:
# Set all layers as trainable
ft_model.get_layer('convnext_xlarge').trainable = True
for i, layer in enumerate(ft_model.get_layer('convnext_xlarge').layers):
   print(i, layer.name, layer.trainable)

In [None]:
# Freeze first N layers
N = 204
for i, layer in enumerate(ft_model.get_layer('convnext_xlarge').layers[:N]):
  layer.trainable=False
for i, layer in enumerate(ft_model.get_layer('convnext_xlarge').layers):
   print(i, layer.name, layer.trainable)
ft_model.summary()

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.AdamW(), metrics='accuracy')

In [None]:
# Fine-tune the model. Notice we use a lower patience.

ft_history = ft_model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    sample_weight = weights,
    validation_data = (X_val, y_val),
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=10, restore_best_weights=True),
                 tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=3, factor=0.1, min_lr=1e-8)]
).history

## Results

In [None]:
# Plot the the fine-tuned training history
plt.figure(figsize=(15,5))
plt.plot(ft_history['loss'], alpha=.3, color='#408537', linestyle='--')
plt.plot(ft_history['val_loss'], label='Fine Tuning', alpha=.8, color='#408537')
plt.legend(loc='upper left')
plt.title('Binary Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(ft_history['accuracy'], alpha=.3, color='#408537', linestyle='--')
plt.plot(ft_history['val_accuracy'], label='Fine Tuning', alpha=.8, color='#408537')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

In [None]:
# Save the  model
ft_model.save('/kaggle/working/ft_convex')
# del ft_model