# Kitchenware Classification Model Search

Searching for best CNN model for [Kitchenware classification](https://www.kaggle.com/competitions/kitchenware-classification) competition on Kaggle

In [1]:
import os, uuid, pickle
from math import log10, floor

import mlflow 
import keras_tuner

import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from keras.applications import imagenet_utils

Checking that a GPU is available:

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

Some parameters:

In [3]:
DATASET_SIZE = 9367
IMAGE_SIZE = 224
BATCH_SIZE = 8
WORKERS = 4
EPOCHS = 10

BASE_PATH='./data'

classes = [
    'cup', 
    'fork', 
    'glass', 
    'knife', 
    'plate', 
    'spoon'
]

Loading training data and splitting into train and validation

In [4]:
df_train_full = pd.read_csv(BASE_PATH + '/train.csv', dtype={'Id': str})
df_train_full['filename'] = BASE_PATH + '/images/' + df_train_full['Id'] + '.jpg'
df_train_full.head()

Unnamed: 0,Id,label,filename
0,560,glass,./data/images/0560.jpg
1,4675,cup,./data/images/4675.jpg
2,875,glass,./data/images/0875.jpg
3,4436,spoon,./data/images/4436.jpg
4,8265,plate,./data/images/8265.jpg


In [5]:
val_cutoff = int(len(df_train_full) * 0.8)
df_train = df_train_full[:val_cutoff]
df_val = df_train_full[val_cutoff:]

In [6]:
import tensorflow.keras.applications as applications
from keras_cv_attention_models import beit, davit, efficientnet, convnext, hornet, swin_transformer_v2, maxvit
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### Model Search

Function for getting model:

In [7]:
def get_model(name='Xception'):
    if name == 'EfficientNetB7':
        base_model = applications.efficientnet.EfficientNetB7(
            weights='imagenet',
            include_top=False,
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
        )
    elif name == 'EfficientNetV2L':
        base_model = applications.efficientnet_v2.EfficientNetV2L(
            weights='imagenet',
            include_top=False,
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
        )
    elif name == 'ConvNeXtXLarge':
        base_model = applications.convnext.ConvNeXtXLarge(
            weights='imagenet',
            include_top=False,
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
        )
    elif name == 'BeitLargePatch16':     ############################################################ LARGER TRANSFORMERS
        base_model = beit.BeitLargePatch16( #BeitBasePatch16(
            pretrained='imagenet21k-ft1k',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model.trainable = False
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-3].output)
    elif name == 'ConvNeXtXLarge-21k':
        base_model = convnext.ConvNeXtXlarge( # ConvNeXtXlarge(
            pretrained='imagenet21k-ft1k',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model.trainable = False
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-4].output)
    elif name == 'HorNetLargeGF':
        base_model = hornet.HorNetLargeGF(    #HorNetBaseGF(
            pretrained='imagenet22k',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model.trainable = False
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-4].output)
    elif name == 'EfficientNetV1B7':    ############################################################ EVEN LARGER TRANSFORMERS
        base_model = efficientnet.EfficientNetV1B7(  # EfficientNetV1L2(
            pretrained='noisy_student',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-4].output)
    elif name == 'SwinTransformerV2Base_window16':
        base_model = swin_transformer_v2.SwinTransformerV2Base_window16(    # SwinTransformerV2Large_window16(
            pretrained='imagenet22k',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-4].output)
    elif name == 'MaxViT_Base':
        base_model = maxvit.MaxViT_Base(    # MaxViT_Small(
            pretrained='imagenet21k-ft1k',  #weights='imagenet',
            input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
            num_classes=len(classes)
        )
        base_model = tf.keras.Model(inputs=base_model.layers[1].input, outputs= base_model.layers[-4].output)
    
    return base_model

Function for loading data:

In [8]:
# These models don't have the imagenet preprocessing built in so I have to apply this
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="tf"
    )

In [9]:
def get_datagen(params={}):
    
    image_size = params.get('image_size', IMAGE_SIZE)
    batch_size = params.get('batch_size', BATCH_SIZE)

    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function = preprocess_input, 
        rotation_range=params.get('rotation_range', 0),
        width_shift_range=params.get('trans_width_range', 0.0),
        height_shift_range=params.get('trans_height_range', 0.0),
        shear_range=params.get('shear_range', 0.0),
        zoom_range=params.get('zoom_range', 0),
        horizontal_flip=params.get('horizontal_flip', False),
        vertical_flip=params.get('vertical_flip', False),
        dtype="float16"
    )

    val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function = preprocess_input, 
        dtype="float16"
    )

    
    train_gen = train_datagen.flow_from_dataframe(
        df_train,
        x_col='filename',
        y_col='label',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        seed=0,
    )

    val_gen = val_datagen.flow_from_dataframe(
        df_val,
        x_col='filename',
        y_col='label',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        seed=0,
    )

    return train_gen, val_gen

Keras tuner function for building model:

In [10]:
def build_model(hp):    
    # Getting model
    hp_model = hp.Choice('model', ['EfficientNetB7', 'EfficientNetV2L', 'ConvNeXtXLarge'])    ## Conv models
    # hp_model = hp.Choice('model', ['BeitLargePatch16', 'ConvNeXtXLarge-21k'])#, 'HorNetLargeGF'])  ## Transformers + Conv

    base_model = get_model(hp_model)
    base_model.trainable = False
    
    inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))

    if base_model.name.lower().startswith('efficientnet'):
        model = tf.keras.Sequential([
            inputs,
            base_model,
            tf.keras.layers.Flatten(),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(.2),
            tf.keras.layers.Dense(12, 'gelu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dropout(.2),
            tf.keras.layers.Dense(6, 'softmax')
        ],
        name = hp_model)
    else:
        model = tf.keras.Sequential([
            inputs,
            base_model,
            tf.keras.layers.Flatten(),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(12, 'gelu'),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.Dense(6, 'softmax')
        ],
        name = hp_model)

    # Getting optimizer
    learning_rate = 0.01
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    loss = keras.losses.CategoricalCrossentropy(from_logits=True)

    # Compiling                              
    model.compile(
        optimizer = optimizer,
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

Callbacks:

In [11]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_accuracy',
                                                 factor = 0.2,
                                                 patience = 2,
                                                 verbose = 1,
                                                 min_delta = 1e-4,
                                                 min_lr = 1e-6,
                                                 mode = 'max')

earlystopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                 min_delta = 1e-4,
                                                 patience = 5,
                                                 mode = 'max',
                                                 restore_best_weights = True,
                                                 verbose = 1)

callbacks = [earlystopping]#, reduce_lr]

Search over transformer and CNN models:

In [12]:
EXP_NAME = 'model_search_transformers'
mlflow.set_experiment(EXP_NAME)
mlflow.set_tracking_uri("sqlite:///mlruns.db")
mlflow.tensorflow.autolog()

tuner = keras_tuner.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=1,
    overwrite=False,
    directory='kt_' + EXP_NAME,
    tuner_id='kt_' + EXP_NAME,
    project_name='kt_' + EXP_NAME,
)

tuner.search_space_summary()



INFO:tensorflow:Reloading Oracle from existing project kt_model_search_transformers\kt_model_search_transformers\oracle.json
>>>> Load pretrained from: C:\Users\andre\.keras\models\beit_large_patch16_224_imagenet21k-ft1k.h5
Search space summary
Default search space size: 1
model (Choice)
{'default': 'BeitLargePatch16', 'conditions': [], 'values': ['BeitLargePatch16', 'ConvNeXtXLarge-21k', 'HorNetLargeGF'], 'ordered': False}


In [13]:
train_gen, val_gen = get_datagen({})

# tuner.search(x = train_gen,
#             validation_data=val_gen,
#             batch_size=BATCH_SIZE,
#             epochs=EPOCHS,
#             workers=WORKERS,
#             callbacks=callbacks
# )

Found 4447 validated image filenames belonging to 6 classes.
Found 1112 validated image filenames belonging to 6 classes.


In [14]:
print(tuner.results_summary())

Results summary
Results in kt_model_search_transformers\kt_model_search_transformers
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000023BC351F2B0>
Trial summary
Hyperparameters:
model: BeitLargePatch16
Score: 0.9856114983558655
Trial summary
Hyperparameters:
model: ConvNeXtXLarge-21k
Score: 0.9793165326118469
None


## Search over CNN models

In [15]:
EXP_NAME = 'model_search'
mlflow.set_experiment(EXP_NAME)
mlflow.set_tracking_uri("sqlite:///mlruns.db")
mlflow.tensorflow.autolog()

tuner = keras_tuner.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=1,
    overwrite=False,
    directory='kt_' + EXP_NAME,
    tuner_id='kt_' + EXP_NAME,
    project_name='kt_' + EXP_NAME,
)

tuner.search_space_summary()



Search space summary
Default search space size: 1
model (Choice)
{'default': 'EfficientNetB7', 'conditions': [], 'values': ['EfficientNetB7', 'EfficientNetV2L', 'ConvNeXtXLarge'], 'ordered': False}


In [16]:
train_gen, val_gen = get_datagen({})

tuner.search(x = train_gen,
            validation_data=val_gen,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            workers=WORKERS,
            callbacks=callbacks
)

Found 4447 validated image filenames belonging to 6 classes.
Found 1112 validated image filenames belonging to 6 classes.

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
ConvNeXtXLarge    |?                 |model



2022/12/15 14:23:24 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'ad2f890f95324fd98067b3cc6ec21bf7', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/10

In [None]:
print(tuner.results_summary())

Results summary
Results in kt_model_search\kt_model_search
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000024A7840E5E0>
Trial summary
Hyperparameters:
model: ConvNeXtXLarge
Score: 0.4298561215400696
Trial summary
Hyperparameters:
model: EfficientNetB7
Score: 0.216726616024971
Trial summary
Hyperparameters:
model: EfficientNetV2L
Score: 0.2032374143600464
None
