# Efficient Net - Repurposing/Finetuning
## Introduction

This notebook is an attempt to repurpose and finetune an EfficientNet model to the task of American Sign Language detection for the DSPRO2 project at HSLU.

## Setup
In this section all the necessary libraries are imported.

In [None]:
%pip install -r requirements.txt

In [34]:
import wandb
import torch
import torch.nn as nn
import torchvision.models as visionmodels
import torchvision.transforms as transforms
import lightning as L

from lightning.pytorch.loggers import WandbLogger

import nbformat

# Our own modules
from datapipeline.asl_image_data_module import ASLImageDataModule
from models.asl_model import ASLModel
from models.training import sweep, train

## Preprocessing
No general data preprocessing is necessary, however there will be random transforms applied to the images during training. The images are resized to 224x224 pixels, which is the input size of the EfficientNet model. The images are also normalized using the mean and standard deviation of the ImageNet dataset, which is the dataset on which the EfficientNet model was pretrained.

The following cells will show the loading of the dataset and the preparation of the mentioned transforms.

In [35]:
PATH = "/exchange/dspro2/silent-speech/ASL_Dataset"

In [36]:
img_size = 224

# See https://pytorch.org/vision/master/auto_examples/transforms/plot_transforms_illustrations.html#sphx-glr-auto-examples-transforms-plot-transforms-illustrations-py
# for more examples of transforms

# Open Idea: Grayscale for anti bias


data_transforms = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Idea: ColorJitter for anti bias
    transforms.RandomRotation(degrees=5),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet stats
])

In [37]:
datamodule = ASLImageDataModule(path=PATH, transforms=data_transforms, batch_size=32)

## Models

In [38]:
NUM_CLASSES = 28

In [39]:
class ASLEfficientNetRepurpose(nn.Module):
    def __init__(self, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float = 0.2, num_classes: int = NUM_CLASSES):
        super().__init__()
        self.model = efficientnet_model
        self.model.requires_grad_(False)
        self.model.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(self.model.classifier[1].in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [40]:
class ASLEfficientNetFinetune(ASLEfficientNetRepurpose):
    def __init__(self, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float = 0.2, unfreeze_features: int = 1, num_classes: int = NUM_CLASSES):
        super().__init__(efficientnet_model, dropout, num_classes)

        assert unfreeze_features > 0, "unfreeze_features must be greater than 0"
        assert unfreeze_features <= len(self.model.features), "unfreeze_features must be less than or equal to the number of features in the model"

        self.model.features[-unfreeze_features:].requires_grad_(True)

In [41]:
efficientnet_b0 = visionmodels.efficientnet_b0(weights=visionmodels.EfficientNet_B0_Weights.DEFAULT)


In [42]:
repurpose_model = ASLEfficientNetRepurpose(efficientnet_b0)
repurpose_model

ASLEfficientNetRepurpose(
  (model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
           

In [43]:
finetune_model = ASLEfficientNetFinetune(efficientnet_b0, unfreeze_features=2)
finetune_model

ASLEfficientNetFinetune(
  (model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
            

## Training

In [44]:
TUNE_TYPE = "tune_type"
EFFICIENTNET_MODEL = "efficientnet_model"

DROPOUT = "dropout"

NAME = "name"

In [45]:
def get_pretrained_efficientnet_model(model_type: str):
    if model_type == "b0":
        efficientnet_model = visionmodels.efficientnet_b0(weights=visionmodels.EfficientNet_B0_Weights.DEFAULT)
    elif model_type == "b1":
        efficientnet_model = visionmodels.efficientnet_b1(weights=visionmodels.EfficientNet_B1_Weights.DEFAULT)
    elif model_type == "b2":
        efficientnet_model = visionmodels.efficientnet_b2(weights=visionmodels.EfficientNet_B2_Weights.DEFAULT)
    elif model_type == "b3":
        efficientnet_model = visionmodels.efficientnet_b3(weights=visionmodels.EfficientNet_B3_Weights.DEFAULT)
    elif model_type == "b4":
        efficientnet_model = visionmodels.efficientnet_b4(weights=visionmodels.EfficientNet_B4_Weights.DEFAULT)
    elif model_type == "b5":
        efficientnet_model = visionmodels.efficientnet_b5(weights=visionmodels.EfficientNet_B5_Weights.DEFAULT)
    elif model_type == "b6":
        efficientnet_model = visionmodels.efficientnet_b6(weights=visionmodels.EfficientNet_B6_Weights.DEFAULT)
    elif model_type == "b7":
        efficientnet_model = visionmodels.efficientnet_b7(weights=visionmodels.EfficientNet_B7_Weights.DEFAULT)

    return efficientnet_model

In [46]:
def get_asl_efficientnet_model(type: str, efficientnet_model: visionmodels.efficientnet.EfficientNet, dropout: float, unfreeze_features: int = 1) -> nn.Module:
    if type == "repurpose":
        model = ASLEfficientNetRepurpose(efficientnet_model, dropout=dropout)
    elif type == "finetune":
        model = ASLEfficientNetFinetune(efficientnet_model, dropout=dropout, unfreeze_features=unfreeze_features)
    else:
        raise ValueError(f"Invalid model type: {type}")

    return model

In [47]:
OPTIMIZER = "optimizer"
LEARNING_RATE = "learning_rate"
WEIGHT_DECAY = "weight_decay"
MOMENTUM = "momentum"


def get_optimizer(optimizer_params: dict, model: nn.Module):
    optimizer = optimizer_params[NAME]
    learning_rate = optimizer_params[LEARNING_RATE]
    weight_decay = optimizer_params[WEIGHT_DECAY]

    if optimizer == "adam":
        return torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer == "adamw":
        return torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer == "rmsprop":
        momentum = optimizer_params[MOMENTUM]
        return torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)

In [48]:
LEARNING_RATE_SCHEDULER = "learning_rate_scheduler"
GAMMA = "gamma"
STEP_SIZE = "step_size"
FACTOR = "factor"


def get_learning_rate_scheduler(learning_rate_scheduler_params: dict, optimizer: torch.optim.Optimizer):
    learning_rate_scheduler = learning_rate_scheduler_params[NAME]
    if learning_rate_scheduler == "None":
        return None

    if learning_rate_scheduler == "step":
        step_size = learning_rate_scheduler_params[STEP_SIZE]
        gamma = learning_rate_scheduler_params[GAMMA]
        return torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    elif learning_rate_scheduler == "exponential":
        gamma = learning_rate_scheduler_params[GAMMA]
        return torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)
    elif learning_rate_scheduler == "constant":
        factor = learning_rate_scheduler_params[FACTOR]
        return torch.optim.lr_scheduler.ConstantLR(optimizer, factor=factor)

In [49]:
run_id = 0
SEED = 42


def train_efficient_net():
    global run_id
    run_id += 1

    L.seed_everything(SEED)

    wandb.init(name=f"efficientnet-{run_id}")

    wandb_logger = WandbLogger(log_model=True)

    # TODO: A lot of this could become a library

    config = wandb.config
    efficientnet_model = get_pretrained_efficientnet_model(config[EFFICIENTNET_MODEL])

    run_type = config[TUNE_TYPE]
    model = get_asl_efficientnet_model(run_type, efficientnet_model, dropout=config[DROPOUT], unfreeze_features=2)

    optimizer_params = config[OPTIMIZER]
    optimizer = get_optimizer(optimizer_params, model)

    learning_rate_scheduler_params = config[LEARNING_RATE_SCHEDULER]
    scheduler = get_learning_rate_scheduler(learning_rate_scheduler_params, optimizer)

    asl_model = ASLModel(model=model, criterion=nn.CrossEntropyLoss(), optimizer=optimizer, lr_scheduler=scheduler)

    train(
        model=asl_model,
        datamodule=datamodule,
        logger=wandb_logger,
        seed=SEED
    )

In [50]:
sweep_config = {
    "method": "random",
    "metric": {
        "name": f"{ASLModel.VALID_ACCURACY}",
        "goal": "maximize"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 5
    },
    "parameters": {
        TUNE_TYPE: {
            "values": ["repurpose", "finetune"]
        },
        EFFICIENTNET_MODEL: {
            "values": ["b0", "b1", "b2", "b3"]
        },
        DROPOUT: {
            "min": 0.1,
            "max": 0.5
        },
        OPTIMIZER: {
            "parameters": {
                NAME: {
                    "values": ["adam", "adamw", "rmsprop"]
                },
                LEARNING_RATE: {
                    "min": 1e-5,
                    "max": 1e-2,
                    "distribution": "log_uniform_values"
                },
                WEIGHT_DECAY: {
                    "min": 0,
                    "max": 1e-3,
                },
                MOMENTUM: {
                    "min": 0.8,
                    "max": 0.99
                }
            }
        },
        LEARNING_RATE_SCHEDULER: {
            "parameters": {
                NAME: {
                    "values": ["None", "step", "exponential", "constant"]
                },
                STEP_SIZE: {
                    "min": 1,
                    "max": 10
                },
                GAMMA: {
                    "min": 0.1,
                    "max": 0.9
                },
                FACTOR: {
                    "min": 0.1,
                    "max": 0.5,
                }
            }
        }
    }
}

In [51]:
sweep(sweep_config, 10, train_efficient_net)

Create sweep with ID: uh7ttpds
Sweep URL: https://wandb.ai/dspro2-silent-speech/silent-speech/sweeps/uh7ttpds


[34m[1mwandb[0m: Agent Starting Run: 6bejmekw with config:
[34m[1mwandb[0m: 	dropout: 0.20887927333981687
[34m[1mwandb[0m: 	efficientnet_model: b7
[34m[1mwandb[0m: 	learning_rate_scheduler: {'factor': 0.1778468001230843, 'gamma': 0.8698520832041572, 'name': 'constant', 'step_size': 10}
[34m[1mwandb[0m: 	optimizer: {'learning_rate': 0.002892763805049664, 'momentum': 0.8239078527970456, 'name': 'adamw', 'weight_decay': 0.00032830627104802544}
[34m[1mwandb[0m: 	tune_type: finetune
Seed set to 42


Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\loggers\wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


[34m[1mwandb[0m: [32m[41mERROR[0m Run 6bejmekw errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\kybur\AppData\Local\Temp\ipykernel_26552\1204933261.py", line 31, in train_efficient_net
[34m[1mwandb[0m: [32m[41mERROR[0m     train(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\models\training.py", line 39, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\loggers\wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


[34m[1mwandb[0m: [32m[41mERROR[0m Run 2x6h478b errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\kybur\AppData\Local\Temp\ipykernel_26552\1204933261.py", line 31, in train_efficient_net
[34m[1mwandb[0m: [32m[41mERROR[0m     train(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\models\training.py", line 39, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\loggers\wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


[34m[1mwandb[0m: [32m[41mERROR[0m Run w8kiuwel errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\wandb\agents\pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "C:\Users\kybur\AppData\Local\Temp\ipykernel_26552\1204933261.py", line 31, in train_efficient_net
[34m[1mwandb[0m: [32m[41mERROR[0m     train(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\models\training.py", line 39, in train
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "c:\Users\kybur\Repos\HSLU\dspro2\.venv\Lib\site-packages\lightning\pytorch\trainer\trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1

## Evaluation