# Efficient Net - Repurposing/Finetuning
## Introduction

This notebook is an attempt to repurpose and finetune an EfficientNet model to the task of American Sign Language detection for the DSPRO2 project at HSLU.

## Setup
In this section all the necessary libraries are imported.

In [9]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
import wandb
import torch
import torch.nn as nn
import torchvision.models as visionmodels
import torchvision.transforms.v2 as transforms
import lightning as L

from lightning.pytorch.loggers import WandbLogger

import nbformat

from typing import Callable

import os

# Our own modules
import models.sweep_helper as sweep_helper

from datapipeline.asl_image_data_module import ASLImageDataModule
from models.asl_model import ASLModel
from models.training import sweep, train_model

In [2]:
os.environ["WANDB_NOTEBOOK_NAME"] = "./dspro2/efficientnet.ipynb"

## Preprocessing
No general data preprocessing is necessary, however there will be random transforms applied to the images during training. The images are resized to 224x224 pixels, which is the input size of the EfficientNet model. The images are also normalized using the mean and standard deviation of the ImageNet dataset, which is the dataset on which the EfficientNet model was pretrained.

The following cells will show the loading of the dataset and the preparation of the mentioned transforms.

In [3]:
PATH = "/exchange/dspro2/silent-speech/ASL_Pictures_Dataset"

In [4]:
img_size = 224

# See https://pytorch.org/vision/master/auto_examples/transforms/plot_transforms_illustrations.html#sphx-glr-auto-examples-transforms-plot-transforms-illustrations-py
# for more examples of transforms

# Open Idea: Grayscale for anti bias


data_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.3, saturation=0.3, hue=0.3), # Idea: ColorJitter for anti bias
    transforms.RandomRotation(degrees=5),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet stats
])



In [5]:
datamodule = ASLImageDataModule(path=PATH, transforms=data_transforms, val_split_folder="Validation", batch_size=32, num_workers=128)

## Models

In [6]:
NUM_CLASSES = 28

In [7]:
class ASLEfficientNetRepurpose(nn.Module):
    def __init__(self, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float = 0.2, num_classes: int = NUM_CLASSES):
        super().__init__()
        self.model = efficientnet_model
        self.model.requires_grad_(False)
        self.model.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(self.model.classifier[1].in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [8]:
class ASLEfficientNetFinetune(ASLEfficientNetRepurpose):
    def __init__(self, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float = 0.2, unfreeze_features: int = 1, num_classes: int = NUM_CLASSES):
        super().__init__(efficientnet_model, dropout, num_classes)

        assert unfreeze_features > 0, "unfreeze_features must be greater than 0"
        unfreeze_features = min(unfreeze_features, len(self.model.features))

        self.model.features[-unfreeze_features:].requires_grad_(True)

## Training

In [9]:
TUNE_TYPE = "tune_type"
EFFICIENTNET_MODEL = "efficientnet_model"
DROPOUT = "dropout"

In [10]:
def get_pretrained_efficientnet_model(model_type: str):
    if model_type == "b0":
        efficientnet_model = visionmodels.efficientnet_b0(weights=visionmodels.EfficientNet_B0_Weights.DEFAULT)
    elif model_type == "b1":
        efficientnet_model = visionmodels.efficientnet_b1(weights=visionmodels.EfficientNet_B1_Weights.DEFAULT)
    elif model_type == "b2":
        efficientnet_model = visionmodels.efficientnet_b2(weights=visionmodels.EfficientNet_B2_Weights.DEFAULT)
    elif model_type == "b3":
        efficientnet_model = visionmodels.efficientnet_b3(weights=visionmodels.EfficientNet_B3_Weights.DEFAULT)
    elif model_type == "b4":
        efficientnet_model = visionmodels.efficientnet_b4(weights=visionmodels.EfficientNet_B4_Weights.DEFAULT)
    elif model_type == "b5":
        efficientnet_model = visionmodels.efficientnet_b5(weights=visionmodels.EfficientNet_B5_Weights.DEFAULT)
    elif model_type == "b6":
        efficientnet_model = visionmodels.efficientnet_b6(weights=visionmodels.EfficientNet_B6_Weights.DEFAULT)
    elif model_type == "b7":
        efficientnet_model = visionmodels.efficientnet_b7(weights=visionmodels.EfficientNet_B7_Weights.DEFAULT)

    return efficientnet_model

In [11]:
UNFREEZE_FEATURES = "unfreeze_features"

def get_asl_efficientnet_model(type: str, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float, unfreeze_features: int = 1) -> nn.Module:
    if type == "repurpose":
        model = ASLEfficientNetRepurpose(efficientnet_model, dropout=dropout)
    elif type == "finetune":
        model = ASLEfficientNetFinetune(efficientnet_model, dropout=dropout, unfreeze_features=unfreeze_features)
    else:
        raise ValueError(f"Invalid model type: {type}")

    return model

In [12]:
def get_efficientnet_model_from_config(config: dict) -> nn.Module:
    efficientnet_model = get_pretrained_efficientnet_model(config[EFFICIENTNET_MODEL])
    model = get_asl_efficientnet_model(config[TUNE_TYPE], efficientnet_model, config[DROPOUT], config[UNFREEZE_FEATURES])
    return model

In [20]:
run_id = 0
SEED = 42


def train_efficient_net():
    train_model("efficientnet", get_efficientnet_model_from_config, datamodule, seed=SEED)

In [None]:
sweep_config = {
    "name": "EfficientNet-B0-B2-Repurpose",
    "method": "bayes",
    "metric": {
        "name": f"{ASLModel.VALID_ACCURACY}",
        "goal": "maximize"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 5
    },
    "parameters": {
        TUNE_TYPE: {
            "values": ["repurpose"]
        },
        EFFICIENTNET_MODEL: {
            "values": ["b0", "b1", "b2"]
        },
        UNFREEZE_FEATURES: {
            "min": 0,
            "max": 8
        },
        DROPOUT: {
            "min": 0.1,
            "max": 0.5
        },
        sweep_helper.OPTIMIZER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "values": [sweep_helper.OptimizerType.ADAM, sweep_helper.OptimizerType.ADAMW, sweep_helper.OptimizerType.RMSPROP]
                },
                sweep_helper.LEARNING_RATE: {
                    "min": 1e-5,
                    "max": 1e-2,
                    "distribution": "log_uniform_values"
                },
                sweep_helper.WEIGHT_DECAY: {
                    "min": 0,
                    "max": 1e-3,
                },
                sweep_helper.MOMENTUM: {
                    "min": 0.8,
                    "max": 0.99
                }
            }
        },
        sweep_helper.LEARNING_RATE_SCHEDULER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "values": [sweep_helper.LearningRateSchedulerType.NONE, sweep_helper.LearningRateSchedulerType.STEP, sweep_helper.LearningRateSchedulerType.EXPONENTIAL, sweep_helper.LearningRateSchedulerType.CONSTANT]
                },
                sweep_helper.STEP_SIZE: {
                    "min": 1,
                    "max": 10
                },
                sweep_helper.GAMMA: {
                    "min": 0.1,
                    "max": 0.9
                },
                sweep_helper.FACTOR: {
                    "min": 0.1,
                    "max": 0.5,
                }
            }
        }
    }
}

In [None]:
sweep(sweep_config=sweep_config, count=20, training_procedure=train_efficient_net)

In [22]:
finetune_sweep_config_b0 = {
    "name": "EfficientNet-B0-Finetune",
    "method": "bayes",
    "metric": {
        "name": f"{ASLModel.VALID_ACCURACY}",
        "goal": "maximize"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 5
    },
    "parameters": {
        TUNE_TYPE: {
            "value": "finetune"
        },
        EFFICIENTNET_MODEL: {
            "value": "b0"
        },
        UNFREEZE_FEATURES: {
            "min": 1,
            "max": 9
        },
        DROPOUT: {
            "min": 0.1,
            "max": 0.5
        },
        sweep_helper.OPTIMIZER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "value": sweep_helper.OptimizerType.RMSPROP
                },
                sweep_helper.LEARNING_RATE: {
                    "min": 1e-5,
                    "max": 1e-2,
                    "distribution": "log_uniform_values"
                },
                sweep_helper.WEIGHT_DECAY: {
                    "min": 0,
                    "max": 1e-3,
                },
                sweep_helper.MOMENTUM: {
                    "min": 0.8,
                    "max": 0.99
                }
            }
        },
        sweep_helper.LEARNING_RATE_SCHEDULER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "values": [sweep_helper.LearningRateSchedulerType.NONE, sweep_helper.LearningRateSchedulerType.STEP, sweep_helper.LearningRateSchedulerType.EXPONENTIAL]
                },
                sweep_helper.STEP_SIZE: {
                    "min": 1,
                    "max": 10
                },
                sweep_helper.GAMMA: {
                    "min": 0.1,
                    "max": 0.9
                }
            }
        }
    }
}

In [None]:
sweep(sweep_config=finetune_sweep_config_b0, count=30, training_procedure=train_efficient_net)



Create sweep with ID: da3s4woi
Sweep URL: https://wandb.ai/dspro2-silent-speech/silent-speech/sweeps/da3s4woi


[34m[1mwandb[0m: Agent Starting Run: 5ya04w7r with config:
[34m[1mwandb[0m: 	dropout: 0.4882254824597369
[34m[1mwandb[0m: 	efficientnet_model: b0
[34m[1mwandb[0m: 	learning_rate_scheduler: {'gamma': 0.12207377267316807, 'step_size': 8, 'type': 'exponential'}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 0.00014626705462299463, 'momentum': 0.9600000119859484, 'type': 'rmsprop', 'weight_decay': 0.0003786203337267541}
[34m[1mwandb[0m: 	tune_type: finetune
[34m[1mwandb[0m: 	unfreeze_features: 2
Seed set to 42
[34m[1mwandb[0m: Currently logged in as: [33mv8-luky[0m ([33mdspro2-silent-speech[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A16') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
/opt/conda/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type                    | Params | Mode 
-------------------------------------------------------------------
0 | model          | ASLEfficientNetFinetune | 4.0 M  | train
1 | criterion 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved. New best score: 0.990
Metric train_accuracy improved. New best score: 0.946


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.996
Metric train_accuracy improved by 0.042 >= min_delta = 0.0. New best score: 0.989


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.996
Metric train_accuracy improved by 0.004 >= min_delta = 0.0. New best score: 0.993


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.996
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.993


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.993


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.997


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_accuracy did not improve in the last 5 records. Best score: 0.993. Signaling Trainer to stop.


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇██
lr-RMSprop,█████▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-weight_decay,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy_epoch,▁▇████████
train_accuracy_step,▁▂▇▇▅▄█████▇▇▇██▇███████▇███▇███▇█▇███▇█
train_loss_epoch,█▂▁▁▁▁▁▁▁▁
train_loss_step,▅█▃▂▄▁▂▁▂▁▁▁▁▂▁▁▂▁▃▃▁▁▂▂▂▁▄▁▁▁▂▁▂▁▂▂▁▁▂▁
trainer/global_step,▁▁▁▁▁▃▂▂▂▂▂▅▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃█▃▄▄▄▄▄
valid_accuracy_epoch,▁▇▇▇▇█▇▆▆█

0,1
epoch,9.0
lr-RMSprop,0.0
lr-RMSprop-momentum,0.96
lr-RMSprop-weight_decay,0.00038
train_accuracy_epoch,0.99306
train_accuracy_step,1.0
train_loss_epoch,0.03775
train_loss_step,0.02358
trainer/global_step,31219.0
valid_accuracy_epoch,0.99683


[34m[1mwandb[0m: Agent Starting Run: p7v8q4nc with config:
[34m[1mwandb[0m: 	dropout: 0.2827040182453778
[34m[1mwandb[0m: 	efficientnet_model: b0
[34m[1mwandb[0m: 	learning_rate_scheduler: {'gamma': 0.5596732515653552, 'step_size': 4, 'type': 'none'}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 7.800980168851934e-05, 'momentum': 0.8940542066814777, 'type': 'rmsprop', 'weight_decay': 0.0004635554431632722}
[34m[1mwandb[0m: 	tune_type: finetune
[34m[1mwandb[0m: 	unfreeze_features: 3
Seed set to 42


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type                    | Params | Mode 
-------------------------------------------------------------------
0 | model          | ASLEfficientNetFinetune | 4.0 M  | train
1 | criterion      | CrossEntropyLoss        | 0      | train
2 | train_accuracy | MulticlassAccuracy      | 0      | train
3 | valid_accuracy | MulticlassAccuracy      | 0      | train
4 | test_accuracy  | MulticlassAccuracy      | 0      | train
-------------------------------------------------------------------
3.2 M     Trainable params
851 K     Non-trainabl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved. New best score: 0.987
Metric train_accuracy improved. New best score: 0.979


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.009 >= min_delta = 0.0. New best score: 0.996
Metric train_accuracy improved by 0.015 >= min_delta = 0.0. New best score: 0.994


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.997
Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.994


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.997
Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.995


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.998
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.995


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.998


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.996


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.996


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric valid_accuracy did not improve in the last 5 records. Best score: 0.998. Signaling Trainer to stop.
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.996


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
lr-RMSprop,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-weight_decay,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy_epoch,▁▇▇████████
train_accuracy_step,▁▅███▅████▅█████████████▅▅██▅███████████
train_loss_epoch,█▂▂▂▁▁▁▁▁▁▁
train_loss_step,▆▂▁▄▃▁▁▁▂▂▂█▁▂▂▂▂▂▁▁▂▂▁▂▂▃▂▁▂▂▃▄▄▃▂▃▅▄▂▂
trainer/global_step,▁▁▁▁▁▁▁▁▁▁▂▃▂▂▂▂▂▂▂▂▅▂▂▂▂▂▃▃▃▃▃▃▃▃▃██▃▃▃
valid_accuracy_epoch,▁▇▇▇██▇▇▇█▇

0,1
epoch,10.0
lr-RMSprop,8e-05
lr-RMSprop-momentum,0.89405
lr-RMSprop-weight_decay,0.00046
train_accuracy_epoch,0.99581
train_accuracy_step,0.96875
train_loss_epoch,0.025
train_loss_step,0.09118
trainer/global_step,34341.0
valid_accuracy_epoch,0.99767


[34m[1mwandb[0m: Agent Starting Run: d718htxj with config:
[34m[1mwandb[0m: 	dropout: 0.171572991544872
[34m[1mwandb[0m: 	efficientnet_model: b0
[34m[1mwandb[0m: 	learning_rate_scheduler: {'gamma': 0.23042224803407785, 'step_size': 3, 'type': 'exponential'}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 0.0006703235551518386, 'momentum': 0.8568555153832116, 'type': 'rmsprop', 'weight_decay': 0.00018206585619814596}
[34m[1mwandb[0m: 	tune_type: finetune
[34m[1mwandb[0m: 	unfreeze_features: 8
Seed set to 42


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type                    | Params | Mode 
-------------------------------------------------------------------
0 | model          | ASLEfficientNetFinetune | 4.0 M  | train
1 | criterion      | CrossEntropyLoss        | 0      | train
2 | train_accuracy | MulticlassAccuracy      | 0      | train
3 | valid_accuracy | MulticlassAccuracy      | 0      | train
4 | test_accuracy  | MulticlassAccuracy      | 0      | train
-------------------------------------------------------------------
4.0 M     Trainable params
928       Non-trainabl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved. New best score: 0.970
Metric train_accuracy improved. New best score: 0.905


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.026 >= min_delta = 0.0. New best score: 0.997
Metric train_accuracy improved by 0.091 >= min_delta = 0.0. New best score: 0.996


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.003 >= min_delta = 0.0. New best score: 0.999
Metric train_accuracy improved by 0.003 >= min_delta = 0.0. New best score: 0.998


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 1.000
Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.999


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 1.000
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.999


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 1.000
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.999


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 1.000
Monitored metric train_accuracy did not improve in the last 5 records. Best score: 0.999. Signaling Trainer to stop.


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█
lr-RMSprop,█████▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-weight_decay,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy_epoch,▁██████████
train_accuracy_step,▁████▇██▇███████▇██████████████████▇██▇█
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▄▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃█▃▃▃▃▃▃▃▄▄▄▄
valid_accuracy_epoch,▁▇█████████

0,1
epoch,10.0
lr-RMSprop,0.0
lr-RMSprop-momentum,0.85686
lr-RMSprop-weight_decay,0.00018
train_accuracy_epoch,0.99914
train_accuracy_step,1.0
train_loss_epoch,0.00843
train_loss_step,0.01374
trainer/global_step,34341.0
valid_accuracy_epoch,0.99973


[34m[1mwandb[0m: Agent Starting Run: d018tdx6 with config:
[34m[1mwandb[0m: 	dropout: 0.4391321561240148
[34m[1mwandb[0m: 	efficientnet_model: b0
[34m[1mwandb[0m: 	learning_rate_scheduler: {'gamma': 0.21204549660809668, 'step_size': 5, 'type': 'step'}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 0.008411968198193719, 'momentum': 0.8349173493479937, 'type': 'rmsprop', 'weight_decay': 0.0008021813311850058}
[34m[1mwandb[0m: 	tune_type: finetune
[34m[1mwandb[0m: 	unfreeze_features: 5
Seed set to 42


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type                    | Params | Mode 
-------------------------------------------------------------------
0 | model          | ASLEfficientNetFinetune | 4.0 M  | train
1 | criterion      | CrossEntropyLoss        | 0      | train
2 | train_accuracy | MulticlassAccuracy      | 0      | train
3 | valid_accuracy | MulticlassAccuracy      | 0      | train
4 | test_accuracy  | MulticlassAccuracy      | 0      | train
-------------------------------------------------------------------
4.0 M     Trainable params
65.7 K    Non-trainabl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved. New best score: 0.032
Metric train_accuracy improved. New best score: 0.573


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.091 >= min_delta = 0.0. New best score: 0.124
Metric train_accuracy improved by 0.133 >= min_delta = 0.0. New best score: 0.706


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.016 >= min_delta = 0.0. New best score: 0.722


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.728


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.730 >= min_delta = 0.0. New best score: 0.853
Metric train_accuracy improved by 0.187 >= min_delta = 0.0. New best score: 0.915


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.009 >= min_delta = 0.0. New best score: 0.924


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.005 >= min_delta = 0.0. New best score: 0.928


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.002 >= min_delta = 0.0. New best score: 0.930


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.930


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.129 >= min_delta = 0.0. New best score: 0.982
Metric train_accuracy improved by 0.043 >= min_delta = 0.0. New best score: 0.973


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.988
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.974


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.989
Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.975


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.975


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.995
Metric train_accuracy improved by 0.010 >= min_delta = 0.0. New best score: 0.985


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.986


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.986


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.986


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.986


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.995
Metric train_accuracy improved by 0.002 >= min_delta = 0.0. New best score: 0.988


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.996
Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.989


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.989


Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric valid_accuracy did not improve in the last 5 records. Best score: 0.996. Signaling Trainer to stop.
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.990


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
lr-RMSprop,█████████▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-momentum,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr-RMSprop-weight_decay,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy_epoch,▁▃▃▃▄▇▇▇▇▇█████████████████
train_accuracy_step,▄▄▂▁▂▃▆▃▅█▇▆▆▇▇▆▇▇▇▇██▇██▇█▇▇█▇███▇█████
train_loss_epoch,█▅▅▅▅▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,▅▆▆██▄▃▂▅▇▇▃▁▂▂▂▄▂▁▁▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▁▁▃▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇█████
valid_accuracy_epoch,▁▂▂▁▁▇▆▆▇▆█████████████████

0,1
epoch,26.0
lr-RMSprop,0.0
lr-RMSprop-momentum,0.83492
lr-RMSprop-weight_decay,0.0008
train_accuracy_epoch,0.98976
train_accuracy_step,1.0
train_loss_epoch,0.05969
train_loss_step,0.04487
trainer/global_step,84293.0
valid_accuracy_epoch,0.99521


[34m[1mwandb[0m: Agent Starting Run: vebgisjc with config:
[34m[1mwandb[0m: 	dropout: 0.23339073443675645
[34m[1mwandb[0m: 	efficientnet_model: b0
[34m[1mwandb[0m: 	learning_rate_scheduler: {'gamma': 0.530506901490113, 'step_size': 6, 'type': 'step'}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 0.0006408095360303812, 'momentum': 0.8990830296219949, 'type': 'rmsprop', 'weight_decay': 0.00013017658405344235}
[34m[1mwandb[0m: 	tune_type: finetune
[34m[1mwandb[0m: 	unfreeze_features: 1
Seed set to 42


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/conda/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type                    | Params | Mode 
-------------------------------------------------------------------
0 | model          | ASLEfficientNetFinetune | 4.0 M  | train
1 | criterion      | CrossEntropyLoss        | 0      | train
2 | train_accuracy | MulticlassAccuracy      | 0      | train
3 | valid_accuracy | MulticlassAccuracy      | 0      | train
4 | test_accuracy  | MulticlassAccuracy      | 0      | train
-------------------------------------------------------------------
448 K     Trainable params
3.6 M     Non-trainabl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved. New best score: 0.938
Metric train_accuracy improved. New best score: 0.880


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.944
Metric train_accuracy improved by 0.021 >= min_delta = 0.0. New best score: 0.901


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.009 >= min_delta = 0.0. New best score: 0.953
Metric train_accuracy improved by 0.010 >= min_delta = 0.0. New best score: 0.911


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.008 >= min_delta = 0.0. New best score: 0.960
Metric train_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.916


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.004 >= min_delta = 0.0. New best score: 0.920


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.002 >= min_delta = 0.0. New best score: 0.922


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.011 >= min_delta = 0.0. New best score: 0.971
Metric train_accuracy improved by 0.017 >= min_delta = 0.0. New best score: 0.939


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.939


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.940


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.003 >= min_delta = 0.0. New best score: 0.974


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.002 >= min_delta = 0.0. New best score: 0.976
Metric train_accuracy improved by 0.011 >= min_delta = 0.0. New best score: 0.951


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.003 >= min_delta = 0.0. New best score: 0.979


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.980
Metric train_accuracy improved by 0.000 >= min_delta = 0.0. New best score: 0.951


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_accuracy improved by 0.004 >= min_delta = 0.0. New best score: 0.984
Metric train_accuracy improved by 0.006 >= min_delta = 0.0. New best score: 0.957


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_accuracy improved by 0.001 >= min_delta = 0.0. New best score: 0.958


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
finetune_sweep_config_b1 = {
    "name": "EfficientNet-B1-Finetune",
    "method": "bayes",
    "metric": {
        "name": f"{ASLModel.VALID_ACCURACY}",
        "goal": "maximize"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 5
    },
    "parameters": {
        TUNE_TYPE: {
            "value": "finetune"
        },
        EFFICIENTNET_MODEL: {
            "value": "b0"
        },
        UNFREEZE_FEATURES: {
            "min": 1,
            "max": 9
        },
        DROPOUT: {
            "min": 0.1,
            "max": 0.5
        },
        sweep_helper.OPTIMIZER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "value": sweep_helper.OptimizerType.RMSPROP
                },
                sweep_helper.LEARNING_RATE: {
                    "min": 1e-5,
                    "max": 1e-2,
                    "distribution": "log_uniform_values"
                },
                sweep_helper.WEIGHT_DECAY: {
                    "min": 0,
                    "max": 1e-3,
                },
                sweep_helper.MOMENTUM: {
                    "min": 0.8,
                    "max": 0.99
                }
            }
        },
        sweep_helper.LEARNING_RATE_SCHEDULER: {
            "parameters": {
                sweep_helper.TYPE: {
                    "values": [sweep_helper.LearningRateSchedulerType.NONE, sweep_helper.LearningRateSchedulerType.STEP, sweep_helper.LearningRateSchedulerType.EXPONENTIAL]
                },
                sweep_helper.STEP_SIZE: {
                    "min": 1,
                    "max": 10
                },
                sweep_helper.GAMMA: {
                    "min": 0.1,
                    "max": 0.9
                }
            }
        }
    }
}

In [None]:
sweep(sweep_config=finetune_sweep_config_b1, count=30, training_procedure=train_efficient_net)

## Evaluation