In [1]:
!pip install tensorflow==2.14
!pip install keras-cv

Collecting tensorflow==2.14
  Downloading tensorflow-2.14.0-cp39-cp39-macosx_10_15_x86_64.whl.metadata (3.9 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow==2.14)
  Downloading flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)
Collecting tensorboard<2.15,>=2.14 (from tensorflow==2.14)
  Downloading tensorboard-2.14.1-py3-none-any.whl.metadata (1.7 kB)
Collecting tensorflow-estimator<2.15,>=2.14.0 (from tensorflow==2.14)
  Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras<2.15,>=2.14.0 (from tensorflow==2.14)
  Downloading keras-2.14.0-py3-none-any.whl.metadata (2.4 kB)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow==2.14)
  Downloading grpcio-1.59.3-cp39-cp39-macosx_10_10_universal2.whl.metadata (4.0 kB)
Collecting google-auth-oauthlib<1.1,>=0.5 (from tensorboard<2.15,>=2.14->tensorflow==2.14)
  Downloading google_auth_oauthlib-1.0.0-py2.py3-none-any.whl (18 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensor

## Imports

In [2]:
#General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from shutil import copyfile, make_archive, unpack_archive #for saving the submission
import sklearn as scikit_learn
from datetime import datetime

#For random seed
import random

import tensorflow as tf
import keras_cv
tfk = tf.keras
tfkl = tf.keras.layers

#Check tensorflow version
print(tf.__version__)

2023-11-18 17:09:51.696145: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-18 17:09:51.696216: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-18 17:09:51.696280: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Using TensorFlow backend
2.14.0


In [3]:
DATA_DIR = 'training_data_final' 

## Setting general simulation parameters

In [4]:
#Image parameters
IMG_SIZE = (96,96)
IMG_SHAPE = (96,96,3)

#Number of classes: healthy and unhealthy
N_CLASSES = 2

#We opt for a 80-20 train-validation split
VALIDATION_SPLIT = 0.2

#We set a large batch size
BATCH_SIZE = 64

#Set the random seed for generalizability
seed=42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

## Instatiating dataset generators

In [5]:
training_set = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels='inferred',
    label_mode='binary',
    class_names = None,
    color_mode='rgb',
    image_size=IMG_SIZE,
    shuffle=True,
    seed=seed,
    batch_size=BATCH_SIZE,
    validation_split=VALIDATION_SPLIT,
    subset='training'
)

validation_set = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels='inferred',
    label_mode='binary',
    class_names = None,
    color_mode='rgb',
    image_size=IMG_SIZE,
    shuffle=True,
    seed=seed,
    validation_split=VALIDATION_SPLIT,
    batch_size=BATCH_SIZE,
    subset='validation'
)

Found 5004 files belonging to 2 classes.
Using 4004 files for training.
Found 5004 files belonging to 2 classes.
Using 1000 files for validation.


#### Total number of images: 5004.
#### Training images: 4004.
#### Validation images: 1000.

## Introduction of class weights

#### We introduce class weights, that will be used 

In [8]:
#We instantiate a generator to access the data which are stored in two separate folders
datagen_for_class_weights = tf.keras.preprocessing.image.ImageDataGenerator()
my_data = datagen_for_class_weights.flow_from_directory(DATA_DIR)

#We count the unique number of occurrences of the class
unique = np.unique(my_data.classes, return_counts=True)

#We use the scikit_learn function to compute class weights to make the class balanced (inversely proportional to the number of elements per class)
myclass_weights = scikit_learn.utils.compute_class_weight('balanced', 
                                                          y=my_data.classes,
                                                          classes = np.unique(my_data.classes) )
class_weights_dict = dict(enumerate(myclass_weights.flatten()))

print(class_weights_dict)

Found 5004 images belonging to 2 classes.
{0: 0.8068365043534343, 1: 1.3147661586967945}


#### Based on our approach, the class weights are approaximately 0.807, 1: 1.315

### Introduce mixed precision policy to optimize memory consumption

``keras.mixed_precision`` allows to se adaptively different precisions (in our case 0.16 and 0.32 float) during training, in a way that optimized memory use.

In [9]:
tf.keras.mixed_precision.set_global_policy('mixed_float16')

if tf.keras.mixed_precision.global_policy().name == 'mixed_float16':
    print("Mixed precision is enabled.")
else:
    print("Mixed precision is not enabled.")

Mixed precision is enabled.


# Introduce autotune for the training and validation set cache

Here we introduce the autotune function, which is used o optimally tune data. In this case, we are using it to shuffle the elements of the dataset with a buffer size of 1000. 

More in detail, `training_set.cache()` is used to cache the elements of the dataset. This step is done to improve the performance of data loading by keeping the data in memory. The further arguent introduces a shuffle, which is introduced to  shuffles the elements of the dataset with a buffer size of 1000 to ensure different samples are seen during each epoch. Similar reasoning can be used for `validation_set.cache()`

In [10]:
AUTOTUNE = tf.data.AUTOTUNE
training_set = training_set.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_set = validation_set.cache().prefetch(buffer_size=AUTOTUNE)

# Data augmentation

To improve the generalizability of our results, we rely on a data augmentation strategy. 

We introduce a series of basic transformations. For more information on the transformations which we have decided to apply. Please refer to the `augmentation_study.ipynb` notebook for more details

In [11]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),  # Horizontal and vertical random flip 
  tf.keras.layers.RandomBrightness(0.1),  # Random brightness shift until 10% of the intensity
  tf.keras.layers.RandomTranslation(
      height_factor=0.05,  # Random height shift until 5% of the height
      width_factor=0.05,   # Random width shift until 5% of the width
      fill_mode='reflect' # when there are holes, the closest pixels are reflected 
  ),
tf.keras.layers.RandomRotation(0.125),  # Random rotation of +-45 degrees
], name='data_augmentation')

# Initial model with ConvNtXLarge

## Here we start by setting the hyperparameters specific to the the model 

In [57]:
#Adam optimizer is chosen, with 1e-3 learning rate
model_LEARNING_RATE = 1e-3
model_OPTIMIZER = tf.keras.optimizers.Adam(model_LEARNING_RATE)


#Loss function is binary crossentropy, as we are dealing with a binary tast
model_LOSS = tf.keras.losses.BinaryCrossentropy()


#We want to use early stopping, so we set a reasonable value for patience 
model_EARLY_STOPPING_PATIENCE = 20

#We set a high number of max epochs
model_MAX_EPOCHS = 200




#### Based on the benchmark described in `Transfer-learning-study.ipynb`, we use the ConvNeXtXLarge supernet, initialized with the imagenet weights.

In [58]:
model_convnetxlarge = tf.keras.applications.convnext.ConvNeXtXLarge(
    input_shape = IMG_SHAPE,
    include_top = False,
    weights='imagenet'
)

#We freeze the model so that it will not change its weights in this phase
model_convnetxlarge.trainable = False

We used anti-overfitting techniques such as dropout and L1L2 regularization.

In [59]:
model = tf.keras.Sequential([
    
    tf.keras.Input(shape=IMG_SHAPE, name="input_layer"),
    
    #Adding the augmentation
    data_augmentation,
    
    #We introduce input preprocessing
    tf.keras.layers.Lambda(tf.keras.applications.convnext.preprocess_input, 
                           name='convnetxlarge_preprocessing'),
    
    #Introducing our pretrained model with frozen weights
    model_convnetxlarge,
    
    #We introduce a flatten layer and a dropout layer from the FEN to the classification layers
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.4),
    
    #Now we introduce
    tf.keras.layers.Dense(1024, activation='relu', 
                          kernel_regularizer=tf.keras.regularizers.L1L2(1e-3), 
                          kernel_initializer=tfk.initializers.HeUniform(seed)),
    
    #tf.keras.layers.Dropout(0.5), #performs better during fine tuning if we remove it

    tf.keras.layers.Dense(512, activation='relu', 
                          kernel_regularizer=tf.keras.regularizers.L1L2(1e-3), 
                          kernel_initializer=tfk.initializers.HeUniform(seed)),
    tf.keras.layers.Dropout(0.3),
    
    #We include a final dense layer with just one neuron which is a sigmoid
    tf.keras.layers.Dense(1, 
                          activation='sigmoid', 
                          kernel_initializer=tf.keras.initializers.GlorotUniform(seed), 
                          name='output_layer')
], 
                          name = "ConvNeXtXLarge")

In [60]:
model.compile(loss=model_LOSS, optimizer=model_OPTIMIZER, metrics='accuracy')
model.summary()

Model: "ConvNeXtXLarge"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data_augmentation (Sequent  (None, 96, 96, 3)         0         
 ial)                                                            
                                                                 
 convnetxlarge_preprocessin  (None, 96, 96, 3)         0         
 g (Lambda)                                                      
                                                                 
 convnext_xlarge (Functiona  (None, 3, 3, 2048)        348147968 
 l)                                                              
                                                                 
 flatten_11 (Flatten)        (None, 18432)             0         
                                                                 
 dropout_22 (Dropout)        (None, 18432)             0         
                                                    

## Trainiing without fine tuning, with FEN layers freezed 

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', 
                                                        mode='auto', 
                                                        restore_best_weights=True,
                                                        patience=model_EARLY_STOPPING_PATIENCE)

model_history = model.fit(
  training_set,
  validation_data = validation_set,
  epochs = 3, ###change back to max epochs
  class_weight = class_weights_dict,
    callbacks = [early_stopping]
)

# Fine tuning

### After this step, we move to the fine tuning, to modify the preassigned weights from ConvNetXLarge

For this step, while we keep the binary crossentropy loss, we reduce the optimizer fine tuning learning rate

In [68]:
fine_tuning_LOSS = tf.keras.losses.BinaryCrossentropy()

fine_tuning_LEARNING_RATE = 5.2e-5
fine_tuning_OPTIMIZER = tf.keras.optimizers.Adam(fine_tuning_LEARNING_RATE)

#Here we basically unfreeze all the layers at a time, 
#as this will unfreeze everything if the numberof layers is greater 
fine_tuning_UNFREEZE = 1000 

#Again, we introduce the early stopping and the patience to ensure the model trains enough
model_MAX_EPOCHS = 200
fine_tuning_EARLY_STOPPING_PATIENCE = 30

In [69]:
fine_tuning_model = model

fine_tuning_model.compile(
    optimizer=fine_tuning_OPTIMIZER,
    loss=fine_tuning_LOSS,
    metrics=['accuracy']
)

#We start by setting the original model weights
fine_tuning_model.set_weights(model.get_weights())

In [70]:
#We print the model summary
fine_tuning_model.summary()

Model: "ConvNeXtXLarge"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data_augmentation (Sequent  (None, 96, 96, 3)         0         
 ial)                                                            
                                                                 
 convnetxlarge_preprocessin  (None, 96, 96, 3)         0         
 g (Lambda)                                                      
                                                                 
 convnext_xlarge (Functiona  (None, 3, 3, 2048)        348147968 
 l)                                                              
                                                                 
 flatten_11 (Flatten)        (None, 18432)             0         
                                                                 
 dropout_22 (Dropout)        (None, 18432)             0         
                                                    

### Unfreeze layers

Here we unfreeze the whole model, with the exception of batch normalization layers.

In [71]:
#We start by setting the weights as trainable
fine_tuning_model.get_layer(model_convnetxlarge.name).trainable = True

unfreezed = 0

#Here we unfreeze all the layers except for the batch normalization ones
for i in reversed(range(len(fine_tuning_model.get_layer(model_convnetxlarge.name).layers))):
    layer = fine_tuning_model.get_layer(model_convnetxlarge.name).layers[i]
    if unfreezed < fine_tuning_UNFREEZE and not isinstance(layer, tf.keras.layers.BatchNormalization):
        unfreezed += 1
        layer.trainable = True
    else:
        layer.trainable = False
  

In [72]:
fine_tuning_model.summary()

Model: "ConvNeXtXLarge"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data_augmentation (Sequent  (None, 96, 96, 3)         0         
 ial)                                                            
                                                                 
 convnetxlarge_preprocessin  (None, 96, 96, 3)         0         
 g (Lambda)                                                      
                                                                 
 convnext_xlarge (Functiona  (None, 3, 3, 2048)        348147968 
 l)                                                              
                                                                 
 flatten_11 (Flatten)        (None, 18432)             0         
                                                                 
 dropout_22 (Dropout)        (None, 18432)             0         
                                                    

In [74]:
fine_tuning_LEARNING_RATE = 5.2e-5
fine_tuning_OPTIMIZER = tf.keras.optimizers.Adam(fine_tuning_LEARNING_RATE)

fine_tuning_model.compile(
    optimizer=fine_tuning_OPTIMIZER,
    loss=fine_tuning_LOSS,
    metrics=['accuracy']
)

In [75]:
fine_tuning_model.summary()

Model: "ConvNeXtXLarge"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data_augmentation (Sequent  (None, 96, 96, 3)         0         
 ial)                                                            
                                                                 
 convnetxlarge_preprocessin  (None, 96, 96, 3)         0         
 g (Lambda)                                                      
                                                                 
 convnext_xlarge (Functiona  (None, 3, 3, 2048)        348147968 
 l)                                                              
                                                                 
 flatten_11 (Flatten)        (None, 18432)             0         
                                                                 
 dropout_22 (Dropout)        (None, 18432)             0         
                                                    

In [76]:
#Implementing early stopping also for fine tuning

fine_tuning_total_epochs =  early_stopping.best_epoch + fine_tuning_MAX_EPOCHS

fine_tuning_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', 
                                                              mode='auto', 
                                                              patience=fine_tuning_EARLY_STOPPING_PATIENCE, 
                                                              restore_best_weights=True)

In [None]:
fine_tuning_model_history = fine_tuning_model.fit(
  training_set,
    validation_data=validation_set,
    class_weight = class_weights_dict,
    epochs=fine_tuning_MAX_EPOCHS,
    initial_epoch=early_stopping.best_epoch,
  callbacks = [fine_tuning_early_stopping]
)

## Saving the model

In [None]:
import shutil

fine_tuning_model.save('SubmissionModel')
shutil.make_archive('SubmissionModel', 'zip', 'SubmissionModel')

In [None]:
from IPython.display import FileLink
FileLink(r'ft_model.keras')