# Classification with EfficientNetV2

* Original Google Repo: https://github.com/google/automl/tree/master/efficientnetv2
* Paper published 2021

In [1]:
import math, re, os
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
print(tf.__version__)
print(tfa.__version__)

from flowerclass_read_tf_ds import get_datasets
import tensorflow_hub as hub
import pandas as pd
import math
import plotly_express as px

2.6.2
0.14.0
Tensorflow version 2.6.2


In [2]:
tf.test.gpu_device_name()

2022-03-05 13:16:09.038937: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-05 13:16:09.089163: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:09.090191: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:09.090862: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

'/device:GPU:0'

# I. Data Loading

* Choose 480x480 as model is fixed: https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2

In [3]:
image_size = 224
batch_size = 64

In [4]:
#%%debug (50, 480)
ds_train, ds_valid, ds_test = get_datasets(BATCH_SIZE=batch_size, IMAGE_SIZE=(image_size, image_size), 
                                           RESIZE=None, tpu=False)

2022-03-05 13:16:11.296970: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:11.297838: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:11.298495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:11.299418: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 13:16:11.300093: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


# II. Model Setup: EfficientNetV2

In [5]:
#effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2"
#effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2"
effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2"

In [6]:
hub.KerasLayer

tensorflow_hub.keras_layer.KerasLayer

In [7]:
    
effnet2_tfhub = tf.keras.Sequential([
    # Explicitly define the input shape so the model can be properly
    # loaded by the TFLiteConverter
    tf.keras.layers.InputLayer(input_shape=(image_size, image_size,3)),
    hub.KerasLayer(effnet2_base, trainable=False),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(104, activation='softmax')
])
effnet2_tfhub.build((None, image_size, image_size,3,)) #This is to be used for subclassed models, which do not know at instantiation time what their inputs look like.


effnet2_tfhub.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 1280)              20331360  
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 104)               133224    
Total params: 20,464,584
Trainable params: 133,224
Non-trainable params: 20,331,360
_________________________________________________________________


Notice large amounts of untrainable params as efficientnetv2 layers are frozen

In [8]:
effnet2_tfhub.layers

[<tensorflow_hub.keras_layer.KerasLayer at 0x7f4f681f3750>,
 <keras.layers.core.Dropout at 0x7f4f217838d0>,
 <keras.layers.core.Dense at 0x7f4f21783f10>]

In [9]:
layer = effnet2_tfhub.layers[0]
print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))

weights: 670
trainable_weights: 0
non_trainable_weights: 670


In [10]:
layer.weights[0].shape


TensorShape([3, 3, 24, 24])

In [11]:
layer.trainable

False

Why?

# III. Training

Keras Transfer Learning: https://keras.io/guides/transfer_learning/

# IIIa) Phase I: Train Top Layer (frozen layers)

### Optimize Training for Compute Infrastructure

In [12]:
effnet2_tfhub.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(num_classes=104, average='macro'), tf.keras.metrics.CategoricalAccuracy(
    name='categorical_accuracy', dtype=None)])

* batchsize:4 with 512 resized to 480px OOM

* `effnet2L_tfhub.fit(ds_train, epochs=1, validation_data=ds_valid, batch_size=batch_size, steps_per_epoch=1)`

#### EfficientNetV2 Large

* try batchsize 4, 8, 16 and image size  224, 331 (without resizing for now)
* bs/image size (no resize)
    * 8/224: pass
    * 16/224 pass
    * 32/224 pass
    * 64/224 pass
    * 128/224 pass
* try 331 (second largest size of images available) with efficientetV2 small
    * 16/331: OOM
    * 8/331: OOM

* Test with optimal 480x480 input:
    
   * 8/448 (resized 480): OOM
   * 8/224 (resized 480): OOM
   * 2/224 (resized 480): OOM
   > Resizing to the optimal 480x480 image size not possible with EfficientNetV2 Large due to OOM

    
    
#### EfficientNetV2 Medium

* Test with optimal 480x480 input:

   * 2/224 (resized 480): OOM

#### EfficientNetV2 Small

* Test with optimal 384 x 384: OOM

> All 3 model types, small, medium, large cannot be used with their optimal resolution.
> 




In [13]:
compute_steps_per_epoch = lambda x: int(math.ceil(1. * x / batch_size))
steps_per_epoch_tr = compute_steps_per_epoch(12753)
steps_per_epoch_val = compute_steps_per_epoch(3712)
steps_per_epoch_tr, steps_per_epoch_val

(200, 58)

In [14]:
callback_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_f1_score', min_delta=0, patience=5, verbose=1,
    mode='max', baseline=None, restore_best_weights=False
)
callback_model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath="training/cp-{epoch:04d}.ckpt",
                                                 save_weights_only=True,
                                                               monitor='val_f1_score',
                                                 verbose=1,  mode='max', save_best_only=True)

history = effnet2_tfhub.fit(ds_train, epochs=40, validation_data=ds_valid, 
                            batch_size=batch_size, 
                            steps_per_epoch= steps_per_epoch_tr,
                            validation_steps=steps_per_epoch_val,
                           callbacks=[callback_stopping, callback_model_checkpoint], shuffle=True)

Epoch 1/40


2022-03-05 13:16:35.569249: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-03-05 13:16:41.677592: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



Epoch 00001: val_f1_score improved from -inf to 0.92901, saving model to training/cp-0001.ckpt
Epoch 2/40

Epoch 00002: val_f1_score improved from 0.92901 to 0.94281, saving model to training/cp-0002.ckpt
Epoch 3/40

Epoch 00003: val_f1_score improved from 0.94281 to 0.94462, saving model to training/cp-0003.ckpt
Epoch 4/40

Epoch 00004: val_f1_score improved from 0.94462 to 0.95230, saving model to training/cp-0004.ckpt
Epoch 5/40

Epoch 00005: val_f1_score improved from 0.95230 to 0.95357, saving model to training/cp-0005.ckpt
Epoch 6/40

Epoch 00006: val_f1_score did not improve from 0.95357
Epoch 7/40

Epoch 00007: val_f1_score did not improve from 0.95357
Epoch 8/40

Epoch 00008: val_f1_score did not improve from 0.95357
Epoch 9/40

Epoch 00009: val_f1_score improved from 0.95357 to 0.95687, saving model to training/cp-0009.ckpt
Epoch 10/40

Epoch 00010: val_f1_score improved from 0.95687 to 0.95719, saving model to training/cp-0010.ckpt
Epoch 11/40

Epoch 00011: val_f1_score imp

In [15]:
effnet2_tfhub.save('saved_model/my_model_phase1')

2022-03-05 13:26:56.593267: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


In [16]:
results_tr = pd.DataFrame.from_dict(history.history)
results_tr['epochs'] = results_tr.index + 1
results_tr.head()

results_to_plot = results_tr.melt(id_vars="epochs")
results_to_plot.head()

Unnamed: 0,epochs,variable,value
0,1,loss,0.981464
1,2,loss,0.171331
2,3,loss,0.098535
3,4,loss,0.064503
4,5,loss,0.042287


In [17]:
results_to_plot['variable'].unique()

array(['loss', 'f1_score', 'categorical_accuracy', 'val_loss',
       'val_f1_score', 'val_categorical_accuracy'], dtype=object)

In [18]:
px.line(data_frame=results_to_plot[results_to_plot.variable.isin(['loss', 'val_loss'])],
           x='epochs', y='value', color="variable")

In [19]:
px.line(data_frame=results_to_plot[results_to_plot.variable.isin(['f1_score', 'val_f1_score'])],
           x='epochs', y='value', color="variable")

In [20]:
best_phase1_f1 = results_tr['val_f1_score'].max()
best_phase1_epoch = results_tr.loc[results_tr['val_f1_score'] == best_phase1_f1, 'epochs'].values[0]


In [21]:
best_phase1_f1, best_phase1_epoch

(0.9613547921180725, 12)

## IIIb) Phase II: Unfreeze and FineTuning

Unfreeze weights, try fine tuning whole network

In [22]:
effnet2_tfhub.trainable = True

In [23]:
effnet2_tfhub.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(num_classes=104, average='macro'), tf.keras.metrics.CategoricalAccuracy(
    name='categorical_accuracy', dtype=None)])

In [24]:
callback_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_f1_score', min_delta=0, patience=5, verbose=1,
    mode='max', baseline=None, restore_best_weights=False
)
callback_model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath="training2/cp-{epoch:04d}.ckpt",
                                                 save_weights_only=True,
                                                               monitor='val_f1_score',
                                                 verbose=1, mode='max', save_best_only=True)

history = effnet2_tfhub.fit(ds_train, epochs=10, validation_data=ds_valid, 
                            batch_size=batch_size, 
                            steps_per_epoch=steps_per_epoch_tr,
                            validation_steps=steps_per_epoch_val,
                           callbacks=[callback_stopping, callback_model_checkpoint], shuffle=True)

Epoch 1/10

Epoch 00001: val_f1_score improved from -inf to 0.82843, saving model to training2/cp-0001.ckpt
Epoch 2/10

Epoch 00002: val_f1_score improved from 0.82843 to 0.89051, saving model to training2/cp-0002.ckpt
Epoch 3/10

Epoch 00003: val_f1_score improved from 0.89051 to 0.90441, saving model to training2/cp-0003.ckpt
Epoch 4/10

Epoch 00004: val_f1_score improved from 0.90441 to 0.91786, saving model to training2/cp-0004.ckpt
Epoch 5/10

Epoch 00005: val_f1_score improved from 0.91786 to 0.92682, saving model to training2/cp-0005.ckpt
Epoch 6/10

Epoch 00006: val_f1_score improved from 0.92682 to 0.93249, saving model to training2/cp-0006.ckpt
Epoch 7/10

Epoch 00007: val_f1_score improved from 0.93249 to 0.93651, saving model to training2/cp-0007.ckpt
Epoch 8/10

Epoch 00008: val_f1_score improved from 0.93651 to 0.93915, saving model to training2/cp-0008.ckpt
Epoch 9/10

Epoch 00009: val_f1_score improved from 0.93915 to 0.94653, saving model to training2/cp-0009.ckpt
Epoc

In [25]:
effnet2_tfhub.save('saved_model/my_model_phase2')

In [26]:
results_tr = pd.DataFrame.from_dict(history.history)
results_tr['epochs'] = results_tr.index + 1
results_tr.head()

results_to_plot = results_tr.melt(id_vars="epochs")
results_to_plot.head()

Unnamed: 0,epochs,variable,value
0,1,loss,1.691661
1,2,loss,0.975063
2,3,loss,0.699097
3,4,loss,0.530101
4,5,loss,0.424769


In [27]:
px.line(data_frame=results_to_plot[results_to_plot.variable.isin(['loss', 'val_loss'])],
           x='epochs', y='value', color="variable")

In [28]:
px.line(data_frame=results_to_plot[results_to_plot.variable.isin(['f1_score', 'val_f1_score'])],
           x='epochs', y='value', color="variable")

### Load best model, either phase 1 or 2

In [29]:
best_phase2_f1 = results_tr['val_f1_score'].max()

if best_phase1_f1 > best_phase2_f1:
    effnet2_tfhub.load_weights("training/"+"cp-"+f"{best_phase1_epoch}".rjust(4, '0')+".ckpt")
    print(f"best phase 1: {best_phase1_f1}")
else:
    print(f"best phase 2: {best_phase2_f1}")



best phase 1: 0.9613547921180725


# IV. Submission

id,label
a762df180,0
24c5cf439,0
7581e896d,0
eb4b03b29,0
etc.

In [30]:
test_pred = effnet2_tfhub.predict(ds_test, batch_size=batch_size)


In [31]:
img_ids = []
img_preds = []
for imgs, idnum in ds_test:
    img_preds.append(effnet2_tfhub.predict(imgs, batch_size=batch_size))
    img_ids.append(idnum)

In [32]:
img_ids = np.concatenate([img_id.numpy() for img_id in img_ids])


In [33]:
img_preds = np.concatenate([img_pred.argmax(1) for img_pred in img_preds])

In [34]:
img_ids.shape, img_preds.shape

((7382,), (7382,))

In [35]:
submission = pd.DataFrame({"id": img_ids, "label": img_preds})
submission['id'] = submission['id'].apply(lambda x: x.decode())

In [36]:
submission.head()

Unnamed: 0,id,label
0,59d1b6146,70
1,48c96bd6b,15
2,7b437ba4e,9
3,1b7aef8e8,79
4,d6143b4d4,4


In [37]:
submission.dtypes

id       object
label     int64
dtype: object

In [38]:
submission.to_csv("submission.csv", index=False)