# Code for competition

In [None]:
# Code to prepare Kaggle's environment
!apt -y install --allow-change-held-packages libcudnn8=8.6.0.163-1+cuda11.8

!pip uninstall -y tensorflow
!pip install tensorflow==2.9.1

# See available GPUs
!nvidia-smi

In [None]:
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import shutil
from collections import Counter
import random
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import class_weight
from PIL import Image
import re
import time
from IPython.display import FileLink

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)
print(tf.config.list_physical_devices())

# Enable feature for memory occupation growth control 
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for dev in physical_devices:
    tf.config.experimental.set_memory_growth(dev, True)
    
# Enable mixed precision (NOT USE WITH EFFICIENT-NET)
#mixed_precision.set_global_policy('mixed_float16')

# Enable distributed training
strategy = tf.distribute.MirroredStrategy()

# Random seed for reproducibility
seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Metadata

In [None]:
classes = ["Species1", "Species2", "Species3", "Species4", "Species5", "Species6", "Species7", "Species8"]
input_shape = (96, 96, 3)
input_size = input_shape[:-1]

batch_size = 64 * strategy.num_replicas_in_sync
epochs = 400

# Used to compute the new dimensions for the Resizing layer
inflation_coeff = 2.5

print(batch_size)

### Prepare the environment

In [None]:
# Copy the dataset in the current directory (useful only for Kaggle's environment)
path = os.getcwd()
if not os.path.exists(path+'/training_data_final'):
    shutil.copytree('../input/training-data-final2/training_data_final', path + r'/training_data_final')
print(os.listdir(os.getcwd()))

In [None]:
# Decide the portion of data to be used as training set, remaining one will be used as validation set
train_split = 0.8                                                                              

path = os.getcwd()
if not os.path.exists(path+'/training') and not os.path.exists(path+'/validation'):
    os.mkdir(path+'/training')                                                                # Create train and validation into the training and validation folders
    os.mkdir(path+'/validation')

    # Destination path 
    dest_train = path + '/training'
    dest_valid = path + '/validation'

    # Source path
    source = path + '/training_data_final'

    for folder in os.listdir(source):
        if not os.path.exists(dest_train + '/' + folder):
            os.mkdir(dest_train + '/' + folder)
        if not os.path.exists(dest_valid + '/' + folder):
            os.mkdir(dest_valid + '/' + folder)
    
        class_source = source + '/' + folder                                                   # Create path of the class
        files = os.listdir(class_source)                                                       # List of files for the class
        random.shuffle(files)                                                                  # Split is performed randomly
        
        for i in range(int(len(files) * train_split)):
            dest = shutil.copy(class_source+'/'+files[i], dest_train+'/'+folder+'/'+files[i])  # Copy a random image in the training set
        
        for j in range(i + 1, len(files)):
            dest = shutil.copy(class_source+'/'+files[j], dest_valid+'/'+folder+'/'+files[j])  # copy a random image in the validation set

### Preprocessing function

Allows us to simply define a pipeline of preprocessing transformations

In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input

def preprocessing(image):
    #return tf.image.adjust_saturation(image, 3)
    return preprocess_input(image)

### Prepare the training set for standardization

In [None]:
samples = []
targets = []

# Destination path 
dest_train = os.getcwd() + '/training'

for folder in os.listdir(dest_train):
    dest_class = dest_train + '/' + folder                                   # Local destination path
    i = int(re.sub("\D", "", folder)) - 1                                    # Getting the right label directly from the folder's name
    
    # Apply the preprocess on every image of the same class and save it locally
    for img in os.listdir(dest_class):                                       
        temp = Image.open(dest_class + '/' + img).convert('RGB')
        image = preprocessing(np.squeeze(np.expand_dims(temp, axis=0)))
        label = tfk.utils.to_categorical(i, len(classes))                    # Getting the one-hot encoding of the label 
        samples.append(image)
        targets.append(label)
X_train = np.array(samples)
y_train = np.array(targets, dtype=np.uint8)
del samples
del targets
print(X_train.shape, X_train.dtype, sep=", ")
print(y_train.shape, y_train.dtype, sep=", ")

# Compute the class weights in order to balance loss during training
y_numeric = []
for v in y_train:
    y_numeric.append(np.argmax(v))

labels = np.unique(np.fromiter([np.argmax(t) for t in y_train], np.int32))
class_weights = dict(enumerate(class_weight.compute_class_weight('balanced', classes=labels, y=y_numeric)))
print(class_weights)

### Static augmentation (only on training set)

In [None]:
# If "True" the normal training set will be used for training, otherwise applies static augmentation (see then) 
static_aug = False
# If "True" and static augmentation is being used oversamples the different classes in such a way that they all have the same size, otherwise we can specify different expansions for each one of them 
balanced = False

# If augmented samples are already present they're NOT overwritten
if static_aug and not os.path.exists(path+'/training_aug'):
    old_train = os.getcwd() + '/training'
    dest_train = os.getcwd() + '/training_aug'
    shutil.copytree(old_train, dest_train)

    # 1 unit of expansion always corresponds to the size of the class with the largest amount of samples
    desired_amount = int(537 * train_split)

    static_gen = ImageDataGenerator()

    for folder in os.listdir(dest_train):
        dest_path = dest_train + '/' + folder
        label = int(re.sub("\D", "", folder)) - 1

        if balanced:
            class_expansion = [3, 3, 3, 3, 3, 3, 3, 3]
        else:
            class_expansion = [6, 1, 1, 1, 1, 4, 1, 4]
        to_produce = (class_expansion[label] * desired_amount) - len(os.listdir(dest_path))
        
        static_gen_data = static_gen.flow_from_directory(dest_train,
                                                        batch_size=1,
                                                        target_size=input_size,
                                                        classes=[folder],
                                                        class_mode='categorical',      # Targets are directly converted into one-hot vectors
                                                        shuffle=False,
                                                        seed=seed) 

        print(f'Computing {to_produce} augmented images for target "{folder}"')
        os.chdir(dest_path)
        for i in range(0, to_produce):
            Image.fromarray(np.squeeze(next(static_gen_data)[0]).astype(np.uint8)).save(f'aug{i:05}.jpg')
        os.chdir('../')

    os.chdir('../')
print('\n' + os.getcwd())

### Online augmentation
Lets create the generators we'll need...

In [None]:
shift = 30
train_data_gen = ImageDataGenerator(rotation_range=90,
                                    width_shift_range=shift,
                                    height_shift_range=shift,
                                    horizontal_flip=True,
                                    brightness_range=[0.8, 1.1],
                                    #channel_shift_range=150,
                                    zoom_range=[0.7, 1.3],
                                    shear_range=30,
                                    fill_mode='nearest',
                                    preprocessing_function=preprocessing
                                    #featurewise_std_normalization=True,
                                    #featurewise_center=True 
                                    #rescale=1./255
                                    )

valid_data_gen = ImageDataGenerator(preprocessing_function=preprocessing
                                    #featurewise_std_normalization=True,
                                    #featurewise_center=True 
                                    #rescale=1./255
                                    )

# Fit the standardization values
#train_data_gen.fit(X_train)
#valid_data_gen.fit(X_train)

... using flow_from_directory

In [None]:
# Setting right paths basing on whether we intend to use static augmentation or not
path = os.getcwd()
if static_aug:
    training_dir = path + '/training_aug'
else:
    training_dir = path + '/training'
validation_dir = path + '/validation'

train_gen = train_data_gen.flow_from_directory(training_dir,
                                               batch_size=batch_size,
                                               target_size=input_size,
                                               classes=classes,
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=seed)  

valid_gen = valid_data_gen.flow_from_directory(validation_dir,
                                               batch_size=batch_size, 
                                               target_size=input_size,
                                               classes=classes,
                                               class_mode='categorical',
                                               shuffle=False,
                                               seed=seed)

# Disable AutoShard
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF

# Create Datasets objects
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float16, tf.uint8),
                                               output_shapes=([None, input_shape[0], input_shape[1], input_shape[2]], [None, len(classes)]))

train_dataset = train_dataset.with_options(options)
train_dataset = train_dataset.repeat()

valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float16, tf.uint8),
                                               output_shapes=([None, input_shape[0], input_shape[1], input_shape[2]], [None, len(classes)]))

valid_dataset = valid_dataset.with_options(options)
valid_dataset = valid_dataset.repeat()

### Prepare the validation set for evaluation purposes

In [None]:
samples = []
targets = []

# Populate values needed for standardization
#mean = train_data_gen.mean
#std = train_data_gen.std
#print(mean, std)

dest_valid = os.getcwd() + '/validation'

for folder in os.listdir(dest_valid):
    dest_class = dest_valid + '/' + folder
    i = int(re.sub("\D", "", folder)) - 1
    for img in os.listdir(dest_class):
        temp = Image.open(dest_class + '/' + img).convert('RGB')
        #image = preprocessing((np.squeeze(np.expand_dims(temp, axis=0)) - mean) / std)  # IF WE USE STANDARDIZATION USE THIS LINE INSTEAD OF THE ONE BELOW
        image = preprocessing(np.squeeze(np.expand_dims(temp, axis=0)))
        label = tfk.utils.to_categorical(i, len(classes))
        samples.append(image)
        targets.append(label)

X_val = np.array(samples, dtype=np.float16)
y_val = np.array(targets, dtype=np.uint8)
del samples
del targets
print(X_val.shape, X_val.dtype, sep=", ")
print(y_val.shape, y_val.dtype, sep=", ")

### Models definition functions

In [None]:
def build_tl_model(input_shape):
    tf.random.set_seed(seed)
    
    # Compute the Resizing layer dimensions
    inflated_dim0 = int(inflation_coeff * input_shape[0])
    inflated_dim1 = int(inflation_coeff * input_shape[1])
    inflated_shape = (inflated_dim0, inflated_dim1, 3)

    # Load the supernet
    supernet = tfk.applications.EfficientNetB2(include_top=False,
                                               weights="imagenet",
                                               input_shape=inflated_shape)
    
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='input_layer')
    
    x = tfkl.Resizing(inflated_dim0, inflated_dim1, interpolation="bicubic", name='resizing')(input_layer)
    
    x = supernet(x)

    x = tfkl.GlobalAveragePooling2D(name='gap')(x)
    
    output_layer = tfkl.Dense(
        units = len(classes), 
        activation = 'softmax', 
        kernel_initializer = tfk.initializers.GlorotUniform(seed),
        name = 'output_layer')(x)
    
    # Connect input and output through the Model class
    model = tfk.Model(inputs = input_layer, outputs = output_layer, name = 'tl_model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(5e-4), metrics='accuracy')

    # Return the model
    return model

### Build the model

In [None]:
# Using the distributed training strategy
with strategy.scope():
    model = build_tl_model(input_shape)
    model.summary()

### Train the model

In [None]:
# Used to control how many passes of the dataset we intend to perform
train_mul = 1

# Define personalized behaviour of the learning rate scheduler
decay_rate = 5                                                         # Patience in early stopping should be set to: changes_to_see * decay_rate + 1
min_lr = 2e-5

def scheduler(epoch, lr):
    if epoch % decay_rate == (decay_rate - 1):
        return max(lr * tf.math.exp(-0.1), min_lr)
    return lr

# Define personalized callback to see the elapsed time (useful only in Kaggle since there's no time statistics)
start = time.time()

class ElapsedTimeCallback(tfk.callbacks.Callback):
    def on_test_end(self, epoch, logs=None):
        el = time.time() - start
        print(f'\nElapsed time: {int(el // 60)} minutes {(el % 60):.3f} seconds')

# Model fit
history = model.fit(x=train_dataset,
                    epochs=epochs,                                     # Only indicative since we set "repeat" in training and validation datasets
                    steps_per_epoch=int(len(train_gen) * train_mul),
                    validation_data=valid_dataset,
                    validation_steps=len(valid_gen),
                    class_weight=class_weights,
                    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=21, restore_best_weights=True),
                                 tfk.callbacks.LearningRateScheduler(scheduler),
                                 #tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-6),
                                 ElapsedTimeCallback()]
).history

### Plot training history

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Std training', alpha=.3, color='#ff7f0e', linestyle='--')
plt.plot(history['val_loss'], label='Std validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Std training', alpha=.8, color='#ff7f0e', linestyle='--')
plt.plot(history['val_accuracy'], label='Std validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper right')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

del history

### Optional: restore previous model (if available)

In [None]:
path = os.getcwd()
if os.path.exists('../input/bestmodel2'):
    model = tfk.models.load_model('../input/bestmodel2')

### Plot the confusion matrix (evaluated on the validation set)

In [None]:
predictions = model.predict(X_val)
cm = confusion_matrix(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))

accuracy = accuracy_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average=None)
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

plt.figure(figsize=(10,8))
sns.heatmap(cm.T, xticklabels=classes, yticklabels=classes, annot=True, fmt="d")
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

### Save the model

In [None]:
path = os.getcwd()
if os.path.exists(path + r'/best_model'):
    shutil.rmtree(path + r'/best_model')

model.save('best_model', include_optimizer=False)
del model

# Create FileLink to easily download developed models also from Kaggle's interactive sessions
shutil.make_archive('best_model', 'zip', 'best_model')
FileLink(r'best_model.zip')

### Improve the classifier module of the previously trained supernet

In [None]:
def build_classifier_model(input_shape):
    tf.random.set_seed(seed)
    
    inflated_dim0 = int(inflation_coeff * input_shape[0])
    inflated_dim1 = int(inflation_coeff * input_shape[1])
    inflated_shape = (inflated_dim0, inflated_dim1, 3)

    supernet = tfk.applications.EfficientNetB2(include_top=False,
                                               weights="imagenet",
                                               input_shape=inflated_shape)

    # Recover previous weights
    supernet.set_weights(tfk.models.load_model('best_model').get_layer('efficientnetb2').get_weights())
    
    # Use the supernet only as feature extractor
    supernet.trainable = False  # "True" for fine tuning
    for i, layer in enumerate(supernet.layers[:-30]):
      layer.trainable = False  # "False" for fine tuning
      #print(i, layer.name, layer.trainable)
    
    input_layer = tfkl.Input(shape=input_shape, name='input_layer')
    
    x = tfkl.Resizing(inflated_dim0, inflated_dim1, interpolation="bicubic", name='resizing')(input_layer)
    
    x = supernet(x)
    
    x = tfkl.GlobalAveragePooling2D(name='gap')(x)

    x = tfkl.Dense(
        units = 2048,  
        activation = 'relu',
        kernel_initializer = tfk.initializers.HeUniform(seed),
        name = 'classifier1')(x)
    
    x = tfkl.Dropout(0.5, seed=seed, name='dropout1')(x)
    
    x = tfkl.Dense(
        units = 1024,
        activation = 'relu',
        kernel_initializer = tfk.initializers.HeUniform(seed),
        name = 'classifier2')(x)
    
    x = tfkl.Dropout(0.5, seed=seed, name='dropout2')(x)
    
    output_layer = tfkl.Dense(
                   units = len(classes), 
                   activation = 'softmax', 
                   kernel_initializer = tfk.initializers.GlorotUniform(seed),
                   name = 'output_layer')(x)
    
    model = tfk.Model(inputs = input_layer, outputs = output_layer, name = 'classifier_model')

    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-3), metrics='accuracy')

    return model

In [None]:
with strategy.scope():
    classifier_model = build_classifier_model(input_shape)
    classifier_model.summary()

In [None]:
# Repeated code needed to be able to perform only this training without having to execute the previous one before
train_mul = 1

decay_rate = 5
min_lr = 2e-5

def scheduler(epoch, lr):
    if epoch % decay_rate == (decay_rate - 1):
        return max(lr * tf.math.exp(-0.1), min_lr)
    return lr

start = time.time()

class ElapsedTimeCallback(tfk.callbacks.Callback):
    def on_test_end(self, epoch, logs=None):
        el = time.time() - start
        print(f'\nElapsed time: {int(el // 60)} minutes {(el % 60):.3f} seconds')

classifier_history = classifier_model.fit(x=train_dataset,
                                          epochs=epochs,                                  
                                          steps_per_epoch=int(len(train_gen) * train_mul),
                                          validation_data=valid_dataset,
                                          validation_steps=len(valid_gen),
                                          class_weight=class_weights,
                                          callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=26, restore_best_weights=True),
                                                       tfk.callbacks.LearningRateScheduler(scheduler),
                                                       #tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-6),
                                                       ElapsedTimeCallback()]
).history

### Plot new training history

In [None]:
history = classifier_history

plt.figure(figsize=(15,5))
plt.plot(history['loss'], label='Std training', alpha=.3, color='#ff7f0e', linestyle='--')
plt.plot(history['val_loss'], label='Std validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=.3)

plt.figure(figsize=(15,5))
plt.plot(history['accuracy'], label='Std training', alpha=.8, color='#ff7f0e', linestyle='--')
plt.plot(history['val_accuracy'], label='Std validation', alpha=.8, color='#ff7f0e')
plt.legend(loc='upper right')
plt.title('Accuracy')
plt.grid(alpha=.3)

plt.show()

del history
del classifier_history

### Plot the new confusion matrix

In [None]:
predictions = classifier_model.predict(X_val)
cm = confusion_matrix(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))

accuracy = accuracy_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average=None)
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

plt.figure(figsize=(10,8))
sns.heatmap(cm.T, xticklabels=classes, yticklabels=classes, annot=True, fmt="d")
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

### Save the new model

In [None]:
path = os.getcwd()
if os.path.exists(path + r'/best_model_improved'):
    shutil.rmtree(path + r'/best_model_improved')

classifier_model.save('best_model_improved', include_optimizer=False)
del classifier_model

shutil.make_archive('best_model_improved', 'zip', 'best_model_improved')
FileLink(r'best_model_improved.zip')