# Classification with EfficientNetV2 - Hyperparam Search Optuna

## Goals

* Hyperparam search in dropout space and L1 regularization space
* Leverage insights from previous notebooks
* Just train the final layer, no phase 2 modeling


In [1]:
import math, re, os
import numpy as np
import tensorflow as tf


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)



import tensorflow_addons as tfa
print(tf.__version__)
print(tfa.__version__)

from flowerclass_read_tf_ds import get_datasets
import tensorflow_hub as hub
import pandas as pd
import math
import plotly_express as px
import gc

2022-04-02 14:16:29.894207: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:30.011664: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:30.012321: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:30.016849: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

1 Physical GPUs, 1 Logical GPUs
2.6.2
0.14.0
Tensorflow version 2.6.2


In [2]:
tf.test.gpu_device_name()

2022-04-02 14:16:36.259566: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:36.260359: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:36.261016: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:36.418546: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-02 14:16:36.419344: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

'/device:GPU:0'

# I. Data Loading

* Choose 480x480 as model is fixed: https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2

In [3]:
image_size = 224
batch_size = 64

In [4]:
#%%debug (50, 480)


# II. Model Setup: EfficientNetV2

In [5]:
#effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2"
#effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2"
effnet2_base = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2"

In [6]:

def get_model(lr, dropout):

    effnet2_tfhub = tf.keras.Sequential([
        # Explicitly define the input shape so the model can be properly
        # loaded by the TFLiteConverter
        tf.keras.layers.InputLayer(input_shape=(image_size, image_size,3)),
        hub.KerasLayer(effnet2_base, trainable=False),
        tf.keras.layers.Dropout(rate=dropout),
        tf.keras.layers.Dense(104, activation='softmax')
    ])
    effnet2_tfhub.build((None, image_size, image_size,3,)) #This is to be used for subclassed models, which do not know at instantiation time what their inputs look like.

    effnet2_tfhub.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(num_classes=104, average='macro'), tf.keras.metrics.CategoricalAccuracy(
    name='categorical_accuracy', dtype=None)])
    

    return effnet2_tfhub

Notice large amounts of untrainable params as efficientnetv2 layers are frozen

In [7]:
compute_steps_per_epoch = lambda x: int(math.ceil(1. * x / batch_size))
steps_per_epoch_tr = compute_steps_per_epoch(12753)
steps_per_epoch_val = compute_steps_per_epoch(3712)
steps_per_epoch_tr, steps_per_epoch_val

(200, 58)

# III. Hyperparam Tuning of Phase 1 with Optuna

In [8]:
import optuna
#from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState
from optuna.integration import SkoptSampler

from optuna.samplers import RandomSampler
from optuna.samplers import TPESampler # Tree Parzen Estimator (TPE)
from optuna.integration import TFKerasPruningCallback

In [9]:
study = optuna.create_study(
direction="maximize",
#sampler = optuna.samplers.TPESampler,
#pruner=optuna.pruners.MedianPruner(n_startup_trials=2),
     # optuna.pruners.SuccessiveHalvingPruner(min_resource='auto', 
                 #      reduction_factor=4, min_early_stopping_rate=0)
pruner = optuna.pruners.HyperbandPruner(),
study_name="initial_run2"
)

[32m[I 2022-04-02 14:16:36,998][0m A new study created in memory with name: initial_run2[0m


In [10]:
def objective(trial):
    
    # hyperparams
    lr = trial.suggest_float("l1reg", 1e-6, 1e-3, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.8)
    
    
    # Clear clutter from previous TensorFlow graphs.
    tf.keras.backend.clear_session()

    ds_train, ds_valid, ds_test = get_datasets(BATCH_SIZE=batch_size, IMAGE_SIZE=(image_size, image_size), 
                                           RESIZE=None, tpu=False)
    
    model = get_model(lr, dropout)
        

    callback_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_f1_score', min_delta=0, patience=5, verbose=1,
        mode='max', baseline=None, restore_best_weights=False
    )
#     callback_model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath="training/cp-{epoch:04d}.ckpt",
#                                                      save_weights_only=True,
#                                                                    monitor='val_f1_score',
#                                                      verbose=1,  mode='max', save_best_only=True)

    history = model.fit(ds_train, epochs=5, validation_data=ds_valid, 
                                batch_size=batch_size, 
                                steps_per_epoch= steps_per_epoch_tr,
                                validation_steps=steps_per_epoch_val,
                               callbacks=[callback_stopping], shuffle=True,
                               verbose = 0, workers=1, use_multiprocessing=False,)
        
    
    results = pd.DataFrame.from_dict(history.history)
    results['epochs'] = results.index + 1
    best_f1 = results['val_f1_score'].max()

    results['trial'] = trial.number # get trial number

    best_epoch_vals = results[results['val_f1_score'] == best_f1]
    save_trial_results(best_epoch_vals)
    
    gc.collect()
    del model, ds_train, ds_valid, ds_test
    gc.collect()

    
    return best_f1


import os
def save_trial_results(df):
    # if file does not exist write header 
    if not os.path.isfile('best_vals.csv'):
       df.to_csv('best_vals.csv')
    else:
       df.to_csv('best_vals.csv', mode='a', header=False)
    


In [11]:

study.optimize(objective, n_trials=10000, timeout=2730, gc_after_trial=True) # timeout after 8hrs: 28800


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


2022-04-02 14:17:00.318316: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-04-02 14:17:06.479698: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005
[32m[I 2022-04-02 14:20:25,243][0m Trial 0 finished with value: 0.08506777882575989 and parameters: {'l1reg': 2.0146351609579515e-05, 'dropout': 0.6940012262172529}. Best is trial 0 with value: 0.08506777882575989.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:23:35,793][0m Trial 1 finished with value: 0.033124275505542755 and parameters: {'l1reg': 1.4249440511144374e-05, 'dropout': 0.6940025556717078}. Best is trial 0 with value: 0.08506777882575989.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:26:45,988][0m Trial 2 finished with value: 0.034129220992326736 and parameters: {'l1reg': 8.232306324560832e-06, 'dropout': 0.42050553646873334}. Best is trial 0 with value: 0.08506777882575989.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:30:30,470][0m Trial 3 finished with value: 0.18300054967403412 and parameters: {'l1reg': 1.9283231211614285e-05, 'dropout': 0.3722570359543288}. Best is trial 3 with value: 0.18300054967403412.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:33:40,114][0m Trial 4 finished with value: 0.005461948458105326 and parameters: {'l1reg': 1.5749426871219676e-06, 'dropout': 0.0369911849723926}. Best is trial 3 with value: 0.18300054967403412.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:36:48,246][0m Trial 5 finished with value: 0.07196437567472458 and parameters: {'l1reg': 9.04410149317752e-06, 'dropout': 0.3103590222072094}. Best is trial 3 with value: 0.18300054967403412.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:40:32,932][0m Trial 6 finished with value: 0.006991975475102663 and parameters: {'l1reg': 1.9337959850499116e-06, 'dropout': 0.26483670656761804}. Best is trial 3 with value: 0.18300054967403412.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:44:17,730][0m Trial 7 finished with value: 0.01274310052394867 and parameters: {'l1reg': 2.4175031282430007e-06, 'dropout': 0.4517346988161439}. Best is trial 3 with value: 0.18300054967403412.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:47:28,531][0m Trial 8 finished with value: 0.47093600034713745 and parameters: {'l1reg': 3.5302647891953515e-05, 'dropout': 0.39379760155976795}. Best is trial 8 with value: 0.47093600034713745.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:51:12,980][0m Trial 9 finished with value: 0.01846526376903057 and parameters: {'l1reg': 3.138126196236901e-06, 'dropout': 0.24111776511986946}. Best is trial 8 with value: 0.47093600034713745.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:54:57,178][0m Trial 10 finished with value: 0.9137140512466431 and parameters: {'l1reg': 0.00018934880031219744, 'dropout': 0.5466798996123458}. Best is trial 10 with value: 0.9137140512466431.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 14:58:41,497][0m Trial 11 finished with value: 0.9305016398429871 and parameters: {'l1reg': 0.00018466134527585812, 'dropout': 0.5588663517941681}. Best is trial 11 with value: 0.9305016398429871.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 15:01:51,908][0m Trial 12 finished with value: 0.9476720094680786 and parameters: {'l1reg': 0.00031461430695879693, 'dropout': 0.5759944948595936}. Best is trial 12 with value: 0.9476720094680786.[0m


Training: <PrefetchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Validation: <BatchDataset shapes: ((None, 224, 224, 3), (None, 104)), types: (tf.float32, tf.float32)>
Test: <PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.string)>


[32m[I 2022-04-02 15:05:36,625][0m Trial 13 finished with value: 0.954409658908844 and parameters: {'l1reg': 0.0007185502036356661, 'dropout': 0.5726806534247517}. Best is trial 13 with value: 0.954409658908844.[0m


In [12]:

def show_result(study):

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

def show_best_vals():

    results_best_epochs = pd.read_csv("best_vals.csv")
    
    plot_stats_lines(results_best_epochs, var='loss', var_val='val_loss')
    plot_stats_lines(results_best_epochs, var='val_f1_score', var_val='val_f1_score')
    plot_stats_lines(results_best_epochs, var='epochs', var_val=None)
    

def plot_stats_lines(results, var='loss', var_val='val_loss'):
    fig = px.line(data_frame=results.groupby("trial").mean().reset_index(),
               x='trial', 
               y=var,
            error_y = results.groupby("trial").std().reset_index()[var],
                )
    
    if var_val is not None:
    
        fig.add_traces(list(px.line(data_frame=results.groupby("trial").mean().reset_index(),
                   x='trial', 
                   y=var_val,
                    error_y = results.groupby("trial").std().reset_index()[var_val],  
                    ).select_traces()))
        fig.data[1].showlegend = True
        fig.data[1].line.color = "red"
        fig.data[1].name = var_val  
        
    fig.data[0].name = var
    fig.data[0].showlegend = True
    fig.show()
        
        

In [13]:
show_result(study)

Study statistics: 
  Number of finished trials:  14
  Number of pruned trials:  0
  Number of complete trials:  14
Best trial:
  Value:  0.954409658908844
  Params: 
    l1reg: 0.0007185502036356661
    dropout: 0.5726806534247517


In [14]:
show_best_vals()

In [15]:
from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_param_importances, plot_intermediate_values
from optuna.visualization import plot_edf

In [16]:
plot_optimization_history(study)

In [17]:
plot_parallel_coordinate(study)

In [18]:
plot_param_importances(study)

In [19]:
plot_intermediate_values(study)

[33m[W 2022-04-02 15:05:40,361][0m You need to set up the pruning feature to utilize `plot_intermediate_values()`[0m


In [20]:
plot_edf(study)