In [1]:
!pip install optuna -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m399.4/400.9 kB[0m [31m15.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

import torchvision
import torchvision.transforms as transforms

import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import tarfile
import os
from PIL import Image
import optuna


In [12]:
class ConvBlock(nn.Module):
  def __init__(self,
               in_channels,
               out_channels,
               kernel_size):
    super().__init__()
    self.block = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, padding=kernel_size//2),
        nn.ReLU(),
        nn.MaxPool2d(2, 2)
    )

  def forward(self, x):
    return self.block(x)

In [13]:
class FlexibleModel(nn.Module):
  def __init__(self,
               img_dim: int,
               num_blocks: int,
               in_channels: int,
               filters: list,
               kernel_sizes: list,
               fc_size: int,
               dropout_rate: float,
               num_classes: int):
    super().__init__()
    filters = [in_channels] + filters
    self.rep_learn = nn.ModuleList([ConvBlock(filters[i], filters[i+1], kernel_sizes[i]) for i in range(num_blocks)])
    self.flatten = nn.Flatten()
    self.fc = nn.Sequential(
        nn.Dropout(dropout_rate),
        nn.Linear(filters[-1] * (img_dim//2**num_blocks)**2, fc_size),
        nn.ReLU(),
        nn.Dropout(dropout_rate),
        nn.Linear(fc_size, num_classes)
    )

  def forward(self, x):
    for block in self.rep_learn:
      x = block(x)
    x = self.flatten(x)
    return self.fc(x)

In [14]:
def get_dataloaders():
  transform = transforms.Compose(
      [transforms.ToTensor(),
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform)
  train_set, _ = torch.utils.data.random_split(trainset, [0.05, 0.95])
  valset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=transform)
  val_set, _ = torch.utils.data.random_split(valset, [0.05, 0.95])
  train_dl = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=os.cpu_count())
  val_dl = DataLoader(val_set, batch_size=64, num_workers=os.cpu_count())
  return train_dl, val_dl

In [30]:
def train_step(model, train_dl, loss_fn, opt, metric_fn, device):
  model.train()
  losses = 0.0
  metric = 0.0
  for inputs, labels in train_dl:
    inputs, labels = inputs.to(device), labels.to(device)
    opt.zero_grad()
    outputs = model(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    opt.step()
    metric += metric_fn(outputs, labels)
    losses += loss.detach().cpu().item()
  return losses / len(train_dl), metric / len(train_dl)


def val_step(model, val_dl, loss_fn, metric_fn, device):
  model.eval()
  losses, metric = 0.0, 0.0
  with torch.no_grad():
    for inputs, labels in val_dl:
      inputs, labels = inputs.to(device), labels.to(device)
      outputs = model(inputs)
      loss = loss_fn(outputs, labels)
      metric += metric_fn(outputs, labels)
      losses += loss.detach().cpu().item()
  return losses / len(val_dl), metric / len(val_dl)


def train(model, train_dl, val_dl, loss_fn, metric_fn, opt, device, num_epochs):
  train_losses, train_metrics, val_losses, val_metrics = [], [], [], []
  for epoch in range(num_epochs):
    train_loss, train_metric = train_step(model, train_dl, loss_fn, opt, metric_fn, device)
    val_loss, val_metric = val_step(model, val_dl, loss_fn, metric_fn, device)
    train_losses.append(train_loss)
    train_metrics.append(train_metric)
    val_losses.append(val_loss)
    val_metrics.append(val_metric)
    print(f"{epoch + 1} Train Loss: {train_loss:.4f} Train Metric: {train_metric:.4f} Val Loss: {val_loss:.4f} Val Metric: {val_metric:.4f}")
  return train_losses, train_metrics, val_losses, val_metrics


def acccuracy(logits, labels):
  preds = torch.argmax(logits, dim=-1)
  return (torch.sum(preds == labels).to(float) / len(labels)).detach().cpu().item()




In [31]:
def objective(trial, device, train_dl, val_dl):
  #Selection over num_blocks: int, filters: list, kernel_sizes: list, fc_size: int, dropout_rate: float
  dropout_rate = trial.suggest_float("dropout_rate", 1e-1, 5e-1)
  fc_size = trial.suggest_int("fc_size", 64, 256)
  num_blocks = trial.suggest_int("num_blocks", 2, 5)
  filters = [trial.suggest_int(f"n_filters_{i}", 16, 64) for i in range(num_blocks)]
  kernel_sizes = [trial.suggest_categorical(f"kernel_size_{i}", [3, 7]) for i in range(num_blocks)]
  model = FlexibleModel(32, num_blocks, 3, filters, kernel_sizes, fc_size, dropout_rate, 10).to(device)
  loss_fn = nn.CrossEntropyLoss()
  metric_fn = acccuracy
  opt = torch.optim.Adam(model.parameters(), lr=0.001)
  train_loss, train_metric, val_loss, val_metric = train(model, train_dl, val_dl, loss_fn, metric_fn, opt, device, num_epochs=5)
  return val_metric[-1]

In [32]:
n_trials = 10
train_dl, val_dl = get_dataloaders()
device = torch.device('cpu')
study = optuna.create_study(direction='maximize')
study.optimize(lambda trial: objective(trial, device, train_dl, val_dl), n_trials=n_trials)

[I 2025-11-05 23:43:06,573] A new study created in memory with name: no-name-692e46e4-3977-4ea0-8081-26d0f9f53bf0


1 Train Loss: 2.1541 Train Metric: 0.1914 Val Loss: 1.9976 Val Metric: 0.2371
2 Train Loss: 1.9986 Train Metric: 0.2598 Val Loss: 1.8438 Val Metric: 0.3029
3 Train Loss: 1.8149 Train Metric: 0.3215 Val Loss: 1.7602 Val Metric: 0.3496
4 Train Loss: 1.6536 Train Metric: 0.3820 Val Loss: 1.6682 Val Metric: 0.3792


[I 2025-11-05 23:43:42,625] Trial 0 finished with value: 0.40024038461538464 and parameters: {'dropout_rate': 0.2411088921655444, 'fc_size': 172, 'num_blocks': 3, 'n_filters_0': 57, 'n_filters_1': 22, 'n_filters_2': 57, 'kernel_size_0': 3, 'kernel_size_1': 3, 'kernel_size_2': 7}. Best is trial 0 with value: 0.40024038461538464.


5 Train Loss: 1.5848 Train Metric: 0.4090 Val Loss: 1.5705 Val Metric: 0.4002
1 Train Loss: 2.1978 Train Metric: 0.1727 Val Loss: 2.0015 Val Metric: 0.2317
2 Train Loss: 1.9688 Train Metric: 0.2527 Val Loss: 1.8896 Val Metric: 0.2913
3 Train Loss: 1.8589 Train Metric: 0.2852 Val Loss: 1.8078 Val Metric: 0.3316
4 Train Loss: 1.7665 Train Metric: 0.3207 Val Loss: 1.7472 Val Metric: 0.3349


[I 2025-11-05 23:44:23,886] Trial 1 finished with value: 0.38671875 and parameters: {'dropout_rate': 0.1625781764861079, 'fc_size': 230, 'num_blocks': 4, 'n_filters_0': 54, 'n_filters_1': 18, 'n_filters_2': 46, 'n_filters_3': 50, 'kernel_size_0': 7, 'kernel_size_1': 3, 'kernel_size_2': 7, 'kernel_size_3': 3}. Best is trial 0 with value: 0.40024038461538464.


5 Train Loss: 1.6626 Train Metric: 0.3676 Val Loss: 1.6026 Val Metric: 0.3867
1 Train Loss: 2.1327 Train Metric: 0.2102 Val Loss: 1.8940 Val Metric: 0.3127
2 Train Loss: 1.8599 Train Metric: 0.3160 Val Loss: 1.7665 Val Metric: 0.3418
3 Train Loss: 1.6842 Train Metric: 0.3902 Val Loss: 1.6648 Val Metric: 0.3768
4 Train Loss: 1.6087 Train Metric: 0.4141 Val Loss: 1.6121 Val Metric: 0.4094


[I 2025-11-05 23:44:55,502] Trial 2 finished with value: 0.41781850961538464 and parameters: {'dropout_rate': 0.308079304631355, 'fc_size': 89, 'num_blocks': 2, 'n_filters_0': 38, 'n_filters_1': 27, 'kernel_size_0': 7, 'kernel_size_1': 3}. Best is trial 2 with value: 0.41781850961538464.


5 Train Loss: 1.5308 Train Metric: 0.4461 Val Loss: 1.5750 Val Metric: 0.4178
1 Train Loss: 2.2505 Train Metric: 0.1117 Val Loss: 2.2078 Val Metric: 0.1535
2 Train Loss: 2.1666 Train Metric: 0.1691 Val Loss: 2.0427 Val Metric: 0.2117
3 Train Loss: 2.0701 Train Metric: 0.1977 Val Loss: 1.9861 Val Metric: 0.2488
4 Train Loss: 1.9787 Train Metric: 0.2355 Val Loss: 1.9340 Val Metric: 0.2473


[I 2025-11-05 23:45:34,393] Trial 3 finished with value: 0.27554086538461536 and parameters: {'dropout_rate': 0.4271284519654659, 'fc_size': 166, 'num_blocks': 5, 'n_filters_0': 29, 'n_filters_1': 24, 'n_filters_2': 42, 'n_filters_3': 32, 'n_filters_4': 51, 'kernel_size_0': 7, 'kernel_size_1': 7, 'kernel_size_2': 3, 'kernel_size_3': 3, 'kernel_size_4': 7}. Best is trial 2 with value: 0.41781850961538464.


5 Train Loss: 1.9386 Train Metric: 0.2359 Val Loss: 1.9093 Val Metric: 0.2755
1 Train Loss: 2.2699 Train Metric: 0.1473 Val Loss: 2.1389 Val Metric: 0.1955
2 Train Loss: 2.1121 Train Metric: 0.1953 Val Loss: 2.0515 Val Metric: 0.2350
3 Train Loss: 2.0652 Train Metric: 0.1980 Val Loss: 2.0376 Val Metric: 0.2384
4 Train Loss: 1.9573 Train Metric: 0.2441 Val Loss: 1.8501 Val Metric: 0.2867


[I 2025-11-05 23:46:10,429] Trial 4 finished with value: 0.28951322115384615 and parameters: {'dropout_rate': 0.11270820242356688, 'fc_size': 86, 'num_blocks': 5, 'n_filters_0': 23, 'n_filters_1': 29, 'n_filters_2': 42, 'n_filters_3': 53, 'n_filters_4': 35, 'kernel_size_0': 7, 'kernel_size_1': 7, 'kernel_size_2': 3, 'kernel_size_3': 3, 'kernel_size_4': 3}. Best is trial 2 with value: 0.41781850961538464.


5 Train Loss: 1.9015 Train Metric: 0.2684 Val Loss: 1.8495 Val Metric: 0.2895
1 Train Loss: 2.1402 Train Metric: 0.1867 Val Loss: 1.9374 Val Metric: 0.2906
2 Train Loss: 1.9091 Train Metric: 0.2992 Val Loss: 1.8800 Val Metric: 0.3441
3 Train Loss: 1.8166 Train Metric: 0.3285 Val Loss: 1.7713 Val Metric: 0.3436
4 Train Loss: 1.7036 Train Metric: 0.3730 Val Loss: 1.7045 Val Metric: 0.3714


[I 2025-11-05 23:47:21,862] Trial 5 finished with value: 0.40805288461538464 and parameters: {'dropout_rate': 0.3930442242935491, 'fc_size': 256, 'num_blocks': 3, 'n_filters_0': 59, 'n_filters_1': 26, 'n_filters_2': 48, 'kernel_size_0': 7, 'kernel_size_1': 7, 'kernel_size_2': 7}. Best is trial 2 with value: 0.41781850961538464.


5 Train Loss: 1.6231 Train Metric: 0.3965 Val Loss: 1.5817 Val Metric: 0.4081
1 Train Loss: 2.1368 Train Metric: 0.2051 Val Loss: 1.9524 Val Metric: 0.2608
2 Train Loss: 1.9249 Train Metric: 0.2676 Val Loss: 1.7645 Val Metric: 0.3472
3 Train Loss: 1.7437 Train Metric: 0.3473 Val Loss: 1.6832 Val Metric: 0.3861
4 Train Loss: 1.6171 Train Metric: 0.3957 Val Loss: 1.6613 Val Metric: 0.3696


[I 2025-11-05 23:49:14,443] Trial 6 finished with value: 0.4411057692307692 and parameters: {'dropout_rate': 0.24366865393858594, 'fc_size': 167, 'num_blocks': 3, 'n_filters_0': 59, 'n_filters_1': 58, 'n_filters_2': 43, 'kernel_size_0': 7, 'kernel_size_1': 7, 'kernel_size_2': 7}. Best is trial 6 with value: 0.4411057692307692.


5 Train Loss: 1.5747 Train Metric: 0.4156 Val Loss: 1.5544 Val Metric: 0.4411
1 Train Loss: 2.1628 Train Metric: 0.1977 Val Loss: 1.9172 Val Metric: 0.3086
2 Train Loss: 1.9305 Train Metric: 0.2910 Val Loss: 1.8564 Val Metric: 0.3289
3 Train Loss: 1.8390 Train Metric: 0.3191 Val Loss: 1.7414 Val Metric: 0.4066
4 Train Loss: 1.7223 Train Metric: 0.3793 Val Loss: 1.6612 Val Metric: 0.3831


[I 2025-11-05 23:49:49,618] Trial 7 finished with value: 0.40564903846153844 and parameters: {'dropout_rate': 0.43738437193099033, 'fc_size': 74, 'num_blocks': 2, 'n_filters_0': 18, 'n_filters_1': 35, 'kernel_size_0': 7, 'kernel_size_1': 7}. Best is trial 6 with value: 0.4411057692307692.


5 Train Loss: 1.6590 Train Metric: 0.3879 Val Loss: 1.6143 Val Metric: 0.4056
1 Train Loss: 2.1102 Train Metric: 0.2336 Val Loss: 1.8584 Val Metric: 0.3359
2 Train Loss: 1.8650 Train Metric: 0.3063 Val Loss: 1.8193 Val Metric: 0.3510
3 Train Loss: 1.6978 Train Metric: 0.3766 Val Loss: 1.6334 Val Metric: 0.4056
4 Train Loss: 1.6182 Train Metric: 0.4207 Val Loss: 1.5685 Val Metric: 0.4216


[I 2025-11-05 23:50:31,872] Trial 8 finished with value: 0.4684495192307692 and parameters: {'dropout_rate': 0.48256758803306365, 'fc_size': 186, 'num_blocks': 2, 'n_filters_0': 31, 'n_filters_1': 39, 'kernel_size_0': 3, 'kernel_size_1': 7}. Best is trial 8 with value: 0.4684495192307692.


5 Train Loss: 1.5355 Train Metric: 0.4418 Val Loss: 1.5121 Val Metric: 0.4684
1 Train Loss: 2.0719 Train Metric: 0.2480 Val Loss: 1.8831 Val Metric: 0.2979
2 Train Loss: 1.8052 Train Metric: 0.3531 Val Loss: 1.6960 Val Metric: 0.3791
3 Train Loss: 1.5780 Train Metric: 0.4309 Val Loss: 1.6049 Val Metric: 0.4075
4 Train Loss: 1.4457 Train Metric: 0.4801 Val Loss: 1.6012 Val Metric: 0.4172


[I 2025-11-05 23:51:06,423] Trial 9 finished with value: 0.4426081730769231 and parameters: {'dropout_rate': 0.20448796288496052, 'fc_size': 249, 'num_blocks': 2, 'n_filters_0': 39, 'n_filters_1': 33, 'kernel_size_0': 3, 'kernel_size_1': 3}. Best is trial 8 with value: 0.4684495192307692.


5 Train Loss: 1.3372 Train Metric: 0.5020 Val Loss: 1.4769 Val Metric: 0.4426


In [34]:
trial = study.best_trial
print("Value: ", trial.value)
print("Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

Value:  0.4684495192307692
Params: 
    dropout_rate: 0.48256758803306365
    fc_size: 186
    num_blocks: 2
    n_filters_0: 31
    n_filters_1: 39
    kernel_size_0: 3
    kernel_size_1: 7


In [35]:
optuna.visualization.plot_optimization_history(study)

In [36]:
optuna.visualization.plot_param_importances(study)