# Learning Rate testing


The best model was BeitLargePatch16 with validation accuracy 0.9856114983558655.
- We test with the DEiT base model here (small and fast) in the interest of time

Here we will experiment with learning rate to see if this can be improved.

In [1]:
import mlflow

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

import keras_tuner as kt
import tensorflow_hub as hub
import tensorflow_addons as tfa

from tensorflow import keras
from keras.applications import imagenet_utils

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
DATASET_SIZE = 9367
IMAGE_SIZE = 224
BATCH_SIZE = 8
WORKERS = 4
EPOCHS = 10

BASE_PATH='../data'

classes = [
    'cup', 
    'fork', 
    'glass', 
    'knife', 
    'plate', 
    'spoon'
]

First, we will load the training dataframe and split it into train and validation

In [3]:
df_train_full = pd.read_csv('data/train.csv', dtype={'Id': str})
df_train_full['filename'] = 'data/images/' + df_train_full['Id'] + '.jpg'
df_train_full.head()

Unnamed: 0,Id,label,filename
0,560,glass,data/images/0560.jpg
1,4675,cup,data/images/4675.jpg
2,875,glass,data/images/0875.jpg
3,4436,spoon,data/images/4436.jpg
4,8265,plate,data/images/8265.jpg


In [4]:
val_cutoff = int(len(df_train_full) * 0.8)
df_train = df_train_full[:val_cutoff]
df_val = df_train_full[val_cutoff:]

## Learning Rate Search

Now let's create image generators

In [5]:
# These models don't have the imagenet preprocessing built in so I have to apply this
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="tf"
    )

In [6]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    dtype="float16"
)

train_generator = train_datagen.flow_from_dataframe(
    df_train,
    x_col='filename',
    y_col='label',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    dtype="float16"
)

val_generator = val_datagen.flow_from_dataframe(
    df_val,
    x_col='filename',
    y_col='label',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
)

Found 4447 validated image filenames belonging to 6 classes.
Found 1112 validated image filenames belonging to 6 classes.


In [7]:
classes = np.array(list(train_generator.class_indices.keys()))
classes

array(['cup', 'fork', 'glass', 'knife', 'plate', 'spoon'], dtype='<U5')

In [8]:
earlystopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_accuracy',
    min_delta = 1e-4,
    patience = 3,
    mode = 'max',
    restore_best_weights = True,
    verbose = 1
)

callbacks = [earlystopping]

In [9]:
def get_model_deit(model_url, res=IMAGE_SIZE, num_classes=len(classes)) -> tf.keras.Model:
    inputs = tf.keras.Input((res, res, 3))
    hub_module = hub.KerasLayer(model_url, trainable=False)

    base_model_layers, _ = hub_module(inputs)   # Second output in the tuple is a dictionary containing attention scores.
    outputs = keras.layers.Dense(num_classes, activation="softmax")(base_model_layers)
    
    return tf.keras.Model(inputs, outputs) 

Warnings are normal; the pre-trained weights for the original classifications heads are not being skipped.

In [10]:
def build_model(hp):
    model_gcs_path = "http://tfhub.dev/sayakpaul/deit_base_distilled_patch16_224_fe/1"
    model = get_model_deit(model_gcs_path)

    # Define the optimizer learning rate as a hyperparameter.
    learning_rate = hp.Float("lr", min_value=1e-5, max_value=1e-1, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [11]:
EXP_NAME = 'learning_rate_search'
mlflow.set_experiment(EXP_NAME)
mlflow.set_tracking_uri("sqlite:///mlruns.db")
mlflow.tensorflow.autolog()

tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=5,
    executions_per_trial=1,
    overwrite=False,
    directory='kt_' + EXP_NAME,
    tuner_id='kt_' + EXP_NAME,
    project_name='kt_' + EXP_NAME,
)

Traceback (most recent call last):
  File "c:\Users\andre\anaconda3\envs\py-39-tf-2.10\lib\site-packages\mlflow\store\tracking\file_store.py", line 279, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "c:\Users\andre\anaconda3\envs\py-39-tf-2.10\lib\site-packages\mlflow\store\tracking\file_store.py", line 372, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "c:\Users\andre\anaconda3\envs\py-39-tf-2.10\lib\site-packages\mlflow\store\tracking\file_store.py", line 1082, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "c:\Users\andre\anaconda3\envs\py-39-tf-2.10\lib\site-packages\mlflow\store\tracking\file_store.py", line 1075, in _read_helper
    result = read_yaml(root, file_name)
  File "c:\Users\andre\anaconda3\envs\py-39-tf-2.10\lib\site-packages\mlflow\utils\file_utils.py", line 182, in read_yaml
    raise MissingConfigException("Yaml file '%s' does not ex

INFO:tensorflow:Reloading Oracle from existing project kt_learning_rate_search\kt_learning_rate_search\oracle.json


INFO:tensorflow:Reloading Oracle from existing project kt_learning_rate_search\kt_learning_rate_search\oracle.json






In [12]:
tuner.search_space_summary()

Search space summary
Default search space size: 1
lr (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.1, 'step': None, 'sampling': 'log'}


In [13]:
tuner.search(
    x = train_generator,
    validation_data=val_generator,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    workers=WORKERS,
    callbacks=callbacks
)

Trial 5 Complete [00h 07m 21s]
val_accuracy: 0.9676259160041809

Best val_accuracy So Far: 0.9703237414360046
Total elapsed time: 00h 38m 32s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


In [14]:
print(tuner.results_summary())

Results summary
Results in kt_learning_rate_search\kt_learning_rate_search
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000020D10173FD0>
Trial summary
Hyperparameters:
lr: 0.0015658663854515723
Score: 0.9703237414360046
Trial summary
Hyperparameters:
lr: 0.0945999617156897
Score: 0.9676259160041809
Trial summary
Hyperparameters:
lr: 0.0492500587799961
Score: 0.9613309502601624
Trial summary
Hyperparameters:
lr: 3.184628921998135e-05
Score: 0.9595323801040649
Trial summary
Hyperparameters:
lr: 1.9521149767274305e-05
Score: 0.9424460530281067
None


## Final Search with the BEiT model:

In [None]:
from keras_cv_attention_models import beit

In [None]:
def build_model(hp):
    model = beit.BeitLargePatch16( #BeitBasePatch16(
        pretrained='imagenet21k-ft1k',  #weights='imagenet',
        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        num_classes=len(classes)
    )
    model.trainable = False

    # Define the optimizer learning rate as a hyperparameter.
    learning_rate = hp.Choice("lr", values = [0.003, 0.001, 0.0003])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [None]:
EXP_NAME = 'learning_rate_search_final'

tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=3,
    executions_per_trial=1,
    overwrite=False,
    directory='kt_' + EXP_NAME,
    tuner_id='kt_' + EXP_NAME,
    project_name='kt_' + EXP_NAME,
)

In [None]:
tuner.search(
    x = train_generator,
    validation_data=val_generator,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    workers=WORKERS,
    callbacks=callbacks
)

In [None]:
print(tuner.results_summary())