In [None]:
try:
    import ablator
except:
    !pip install git+https://github.com/fostiropoulos/ablator.git@v0.0.1-mp
    print("Stopping RUNTIME! Please run again") # This script automatically restart runtime (if ablator is not found and installing is needed) so changes are applied
    import os

    os.kill(os.getpid(), 9)

# Import neccesary modules

In [2]:
from ablator import ModelConfig, TrainConfig, ParallelConfig, SchedulerConfig
from ablator import ModelWrapper, ParallelTrainer, configclass, ConfigBase, Literal, Optional
from ablator.config.hpo import SearchSpace

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import OneCycleLR, ReduceLROnPlateau, StepLR

import os
import shutil
from sklearn.metrics import accuracy_score

# 1 - Configurations

## 1.1 - Model configs

In [3]:
@configclass
class CustomModelConfig(ModelConfig):
  num_filter1: int
  num_filter2: int
  activation: str

model_config = CustomModelConfig(
    num_filter1 =32,
    num_filter2 = 64,
    activation = "relu"
)

class FashionCNN(nn.Module):
    def __init__(self, config: CustomModelConfig):
        super(FashionCNN, self).__init__()

        activation_list = {"relu": nn.ReLU(), "elu": nn.ELU(), "leakyRelu": nn.LeakyReLU()}

        num_filter1 = config.num_filter1
        num_filter2 = config.num_filter2
        activation = activation_list[config.activation]

        self.conv1 = nn.Conv2d(1, num_filter1, kernel_size=3, stride=1, padding=1)
        self.act1 = activation
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(num_filter1, num_filter2, kernel_size=3, stride=1, padding=1)
        self.act2 = activation
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(num_filter2, num_filter2, kernel_size=3, stride=1, padding=1)
        self.act3 = activation

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(num_filter2 * 7 * 7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.act2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.act3(x)
        x = self.flatten(x)
        x = self.fc1(x)

        return x

class MyModel(nn.Module):
    def __init__(self, config: CustomModelConfig) -> None:
        super().__init__()

        self.model = FashionCNN(config)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, x, labels=None):
        out = self.model(x)
        loss = None

        if labels is not None:
            loss = self.loss(out, labels)
            labels = labels.reshape(-1, 1)

        out = out.argmax(dim=-1)
        out = out.reshape(-1, 1)

        return {"y_pred": out, "y_true": labels}, loss

## 1.2 - Train configs

### 1.2.1 - Optimizer and scheduler configs

In [4]:
def create_optimizer(optimizer_name: str, model: nn.Module, lr: float):

    parameter_groups = [v for k, v in model.named_parameters()]

    adamw_parameters = {
      "betas": (0.0, 0.1),
      "eps": 0.001,
      "weight_decay": 0.1
    }
    adam_parameters = {
      "betas" : (0.0, 0.1),
      "weight_decay": 0.0
    }
    sgd_parameters = {
      "momentum": 0.9,
      "weight_decay": 0.1
    }

    Optimizer = None

    if optimizer_name == "adam":
        Optimizer = optim.Adam(parameter_groups, lr = lr, **adam_parameters)
    elif optimizer_name == "adamw":
        Optimizer = optim.AdamW(parameter_groups, lr = lr, **adamw_parameters)
    elif optimizer_name == "sgd":
        Optimizer = optim.SGD(parameter_groups, lr = lr, **sgd_parameters)

    return Optimizer

@configclass
class CustomOptimizerConfig(ConfigBase):
    name: Literal["adam", "adamw", "sgd"] = "adam"
    lr: float = 0.001

    def make_optimizer(self, model: nn.Module):
        return create_optimizer(self.name, model, self.lr)

optimizer_config = CustomOptimizerConfig(name = "adam", lr = 0.001)
optimizer_config

CustomOptimizerConfig(name='adam', lr=0.001)

In [5]:
def create_scheduler(scheduler_name: str, model: nn.Module, optimizer: torch.optim):

  parameters = scheduler_arguments(scheduler_name)
  del parameters["step_when"]

  Scheduler = None

  if scheduler_name == "step":
    Scheduler = StepLR(optimizer, **parameters)
  elif scheduler_name == "cycle":
    Scheduler = OneCycleLR(optimizer, **parameters)
  elif scheduler_name == "plateau":
    Scheduler = ReduceLROnPlateau(optimizer, **parameters)

  return Scheduler

def scheduler_arguments(scheduler_name):
  if scheduler_name == "step":
    return {
      "step_size" : 1,
      "gamma" : 0.99,
      "step_when": "epoch"
    }
  elif scheduler_name == "plateau":
    return {
      "patience":  10,
      "min_lr":  1e-5,
      "mode":  "min",
      "factor":   0.0,
      "threshold":  1e-4,
      "step_when": "val"
    }
  elif scheduler_name == "cycle":
    return {
      "max_lr": 1e-3,
      "total_steps": 7 * 1875,  # n.o epochs * len(dataloader)
      "step_when": "train"
    }

@configclass
class CustomSchedulerConfig(SchedulerConfig):
    def __init__(self, name, arguments=None):
        arguments = scheduler_arguments(name)
        super(CustomSchedulerConfig, self).__init__(name=name, arguments=arguments)

    def make_scheduler(self, model: torch.nn.Module, optimizer: torch.optim):
        return create_scheduler(self.name, model, optimizer)

scheduler_config = CustomSchedulerConfig(name = "step")
scheduler_config

CustomSchedulerConfig(name='step', arguments={'step_size': 1, 'gamma': 0.99, 'step_when': 'epoch'})

### 1.2.2 - Train config

In [6]:
@configclass
class CustomTrainConfig(TrainConfig):
  optimizer_config: CustomOptimizerConfig
  scheduler_config: Optional[CustomSchedulerConfig]

train_config = CustomTrainConfig(
    dataset="Fashion-mnist",
    batch_size=32,
    epochs=7,
    optimizer_config=optimizer_config,
    scheduler_config=scheduler_config
)

## 1.3 - Run config

In [8]:
search_space = {
    "model_config.num_filter1": SearchSpace(value_range = [32, 64], value_type = 'int'),
    "model_config.num_filter2": SearchSpace(value_range = [64, 128], value_type = 'int'),
    "train_config.optimizer_config.lr": SearchSpace(value_range = [0.001, 0.01], value_type = 'float'),
    "train_config.optimizer_config.name": SearchSpace(categorical_values = ["adam", "sgd", "adamw"]),
    "train_config.scheduler_config.name": SearchSpace(categorical_values = ["cycle", "step", "plateau"]),
    "model_config.activation": SearchSpace(categorical_values = ["relu", "elu", "leakyRelu"]),
}

@configclass
class CustomParallelConfig(ParallelConfig):
  model_config: CustomModelConfig
  train_config: CustomTrainConfig

parallel_config = CustomParallelConfig(
    train_config=train_config,
    model_config=model_config,
    metrics_n_batches = 300,
    experiment_dir = "/tmp/experiments-1/",
    device="cuda",
    amp=True,
    random_seed = 42,
    total_trials = 3,   # increase this based on your available resources
    concurrent_trials = 1,  # increase this based on your available resources
    search_space = search_space,
    optim_metrics = {"val_loss": "min"},
    optim_metric_name = "val_loss",
    gpu_mb_per_experiment = 512,
)

# 2 - Model wrapper

In [9]:
transform = transforms.ToTensor()

train_dataset = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_dataset = torchvision.datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

class MyModelWrapper(ModelWrapper):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def make_dataloader_train(self, run_config: CustomParallelConfig):
        return torch.utils.data.DataLoader(
            train_dataset,
            batch_size=run_config.train_config.batch_size,
            shuffle=True
        )

    def make_dataloader_val(self, run_config: CustomParallelConfig):
        return torch.utils.data.DataLoader(
            test_dataset,
            batch_size=run_config.train_config.batch_size,
            shuffle=False
        )

    def evaluation_functions(self):
        return {
            "accuracy": lambda y_true, y_pred: accuracy_score(y_true.flatten(), y_pred.flatten()),
        }

# 3 - Launch experiment

In [8]:
shutil.rmtree(parallel_config.experiment_dir, ignore_errors=True)

wrapper = MyModelWrapper(
    model_class=MyModel,
)

ablator = ParallelTrainer(
    wrapper=wrapper,
    run_config=parallel_config,
)

ablator.launch(working_directory = os.getcwd())



2023-09-22 22:16:49:  - [93mNo git repository was detected at /content. We recommend setting the working directory to a git repository to keep track of changes.[0m
[2m[36m(FileLogger pid=1050)[0m 2023-09-22 22:16:49:  - [93mNo git repository was detected at /content. We recommend setting the working directory to a git repository to keep track of changes.[0m
2023-09-22 22:16:49:  - Scheduling uid: bc16_d3c6_9019
Parameters: 
	train_config.optimizer_config.lr:(float)0.001->(float)0.007367092824298474
	train_config.scheduler_config.arguments.patience:(Missing)None->(int)10
	train_config.scheduler_config.arguments.min_lr:(Missing)None->(float)1e-05
	train_config.scheduler_config.arguments.step_size:(int)1->(Missing)None
	experiment_dir:(str)/content/experiments/->(str)/content/experiments/bc16_d3c6_9019
	train_config.scheduler_config.arguments.threshold:(Missing)None->(float)0.0001
	train_config.scheduler_config.arguments.verbose:(Missing)None->(bool)False
	model_config.num_filter1: