## Train the Convolutional Neural Network ##

This notebook finetunes an EfficientNet neural network on an array of Kaggle GPUs.

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt


from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import EfficientNetV2S
from tensorflow.keras.applications import EfficientNetV2M
from tensorflow.keras.applications import EfficientNetV2L

from PIL import Image, ImageFilter
import os
import glob

from tqdm import tqdm



In [2]:
#Get GPU list, set strategy
gpus = tf.config.list_logical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)
    
strategy = tf.distribute.MirroredStrategy(gpus)
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

Name: /device:GPU:0   Type: GPU
Name: /device:GPU:1   Type: GPU
Number of devices: 2


In [3]:
DATA_DIR = '/kaggle/input/UBC-OCEAN/'
os.listdir(DATA_DIR)

['updated_image_ids.json',
 'test_thumbnails',
 'sample_submission.csv',
 'train_images',
 'train_thumbnails',
 'train.csv',
 'test.csv',
 'test_images']

In [4]:
# Open the training dataframe and display the initial dataframe
DATA_DIR = '/kaggle/input/UBC-OCEAN/'

train_images=os.listdir(DATA_DIR+'train_images')
train_thumbnails=os.listdir(DATA_DIR+'train_thumbnails')

In [5]:
train_df=pd.read_csv(DATA_DIR+'train.csv')
train_df.tail()

Unnamed: 0,image_id,label,image_width,image_height,is_tma
533,65022,LGSC,53355,46675,False
534,65094,MC,55042,45080,False
535,65300,HGSC,75860,27503,False
536,65371,HGSC,42551,41800,False
537,65533,HGSC,45190,33980,False


In [6]:
#Output dimension
num_labels=train_df['label'].unique().shape[0]
num_labels

5

In [9]:
#Load image dataset from directories set up in Balance notebook
ws=256
target_size=(ws,ws)
#train_path='/kaggle/input/imageprep-06/ImagesTrain/'
train_path='/kaggle/input/balance-07/ImagesTrain/'

train_data = tf.keras.utils.image_dataset_from_directory(train_path,
                                                        image_size=target_size,
                                                        seed=41,
                                                        batch_size=64)

Found 64000 files belonging to 5 classes.


In [10]:
#Load image dataset from directories set up in Balance notebook
val_path='/kaggle/input/balance-07/ImagesVal/'
val_data = tf.keras.utils.image_dataset_from_directory(val_path,
                                                        image_size=target_size,
                                                        seed=41,
                                                        batch_size=64)

Found 16000 files belonging to 5 classes.


In [11]:
# Memory issues when using this
# AUTOTUNE = tf.data.AUTOTUNE

# train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)

In [12]:
#leaky activation function
leaky=keras.layers.LeakyReLU(alpha=0.1)

#learning rate decay
def lr_exp_decay(epoch, lr):
    k = 0.05
    lr=1.0 * lr * np.exp(-k*epoch)
    
    return lr

In [13]:
#Model checkpoints - Early Stopping, Learning Rate Decay, Save Best Checkpoint
mdlcheckpoint_cb = keras.callbacks.ModelCheckpoint("model.h5", monitor="val_accuracy", save_best_only=True)
earlystopping_cb = keras.callbacks.EarlyStopping(patience=12, restore_best_weights=True)
learningrate_cb = keras.callbacks.LearningRateScheduler(lr_exp_decay, verbose=1)

In [14]:
#Function to import EfficientNet, define output layer
def build_model():
    #Input Shape
    input_shape=(ws,ws,3)

    #EfficientNetV2
    conv_base = EfficientNetV2M(weights='imagenet', include_top=False, input_shape=input_shape)
#     conv_base.trainable=False

    opt=tf.keras.optimizers.Adam(learning_rate=0.001)
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

    #Augmentation layers - flip and rotate
    data_augmentation = tf.keras.Sequential([
        #tf.keras.layers.RandomFlip("horizontal_and_vertical"),
        #tf.keras.layers.RandomRotation(0.2),
        tf.keras.layers.RandomBrightness([-0.5,0.5]),
        tf.keras.layers.RandomContrast(0.2),
    ])

    model = tf.keras.Sequential()
    model.add(data_augmentation)
    model.add(tf.keras.layers.Rescaling(1./255, input_shape=input_shape))
    model.add(conv_base)
    model.add(tf.keras.layers.GlobalMaxPooling2D(name="gap"))

    
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    #avoid overfitting
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(num_labels, activation="softmax"))

    model.compile(optimizer=tf.keras.optimizers.Adam(), 
              loss=loss,
              metrics=['accuracy'])
    
    return model

In [15]:
#Built model
with strategy.scope():
    model=build_model()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-m_notop.h5


In [16]:
#Train model, epochs chosen to fit within 12 hour Notebook training limit
history = model.fit(train_data, epochs=30, 
                    validation_data=val_data,
                    callbacks=[earlystopping_cb, mdlcheckpoint_cb, learningrate_cb])



Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/30


2023-12-31 13:42:05.769538: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_1/efficientnetv2-m/block1b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer



Epoch 2: LearningRateScheduler setting learning rate to 0.0009512294696816873.
Epoch 2/30

Epoch 3: LearningRateScheduler setting learning rate to 0.0008607080250436449.
Epoch 3/30

Epoch 4: LearningRateScheduler setting learning rate to 0.0007408182487071516.
Epoch 4/30

Epoch 5: LearningRateScheduler setting learning rate to 0.0006065307016295101.
Epoch 5/30

Epoch 6: LearningRateScheduler setting learning rate to 0.000472366580395985.
Epoch 6/30

Epoch 7: LearningRateScheduler setting learning rate to 0.0003499377670932674.
Epoch 7/30

Epoch 8: LearningRateScheduler setting learning rate to 0.0002465969741029308.
Epoch 8/30

Epoch 9: LearningRateScheduler setting learning rate to 0.00016529890454317035.
Epoch 9/30

Epoch 10: LearningRateScheduler setting learning rate to 0.00010539923287902251.
Epoch 10/30

Epoch 11: LearningRateScheduler setting learning rate to 6.392786508783188e-05.
Epoch 11/30

Epoch 12: LearningRateScheduler setting learning rate to 3.6883170163630086e-05.
Epo

In [17]:
#restore checkpoint weights
model.load_weights("model.h5")
model.save('OCEANs_train072.h5')