In [1]:
! nvidia-smi

Tue Sep  5 07:57:49 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
%pip install --quiet optuna

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.0/226.0 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os

import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [4]:
DEVICE = torch.device("cuda")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 17 #30
N_VALID_EXAMPLES = BATCHSIZE * 5  #10

In [5]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [6]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

In [7]:
tl, vl = get_mnist()
len(tl), len(vl)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 10370264.59it/s]


Extracting /content/FashionMNIST/raw/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 176174.08it/s]


Extracting /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3234776.11it/s]


Extracting /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 6401505.19it/s]

Extracting /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw






(469, 79)

In [12]:
def objective(trial):

    # attributes = dir(trial)
    # print(attributes)

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            if trial._trial_id and (trial._trial_id % 71 == 0):
              print(f'Train -> trial_id : {trial._trial_id} , Epoch num is {epoch}, batch_idx is {batch_idx}, optimizer_name : {optimizer_name}')
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if trial._trial_id and (trial._trial_id % 71 == 0):
                    print(f'Valid -> trial_id : {trial._trial_id} , Epoch num is {epoch}, batch_idx is {batch_idx}, optimizer_name : {optimizer_name}')
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [13]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2023-09-05 08:09:55,140] A new study created in memory with name: no-name-cc824ea0-ecda-4cfe-8720-e85303fb4d80
[I 2023-09-05 08:09:59,213] Trial 0 finished with value: 0.6578125 and parameters: {'n_layers': 2, 'n_units_l0': 11, 'dropout_l0': 0.45078537965881454, 'n_units_l1': 101, 'dropout_l1': 0.48197065032968206, 'optimizer': 'RMSprop', 'lr': 0.0008070551054499255}. Best is trial 0 with value: 0.6578125.
[I 2023-09-05 08:10:02,357] Trial 1 finished with value: 0.2515625 and parameters: {'n_layers': 1, 'n_units_l0': 6, 'dropout_l0': 0.45940046657759265, 'optimizer': 'Adam', 'lr': 5.8132428790092807e-05}. Best is trial 0 with value: 0.6578125.
[I 2023-09-05 08:10:05,673] Trial 2 finished with value: 0.090625 and parameters: {'n_layers': 3, 'n_units_l0': 70, 'dropout_l0': 0.26211540032270503, 'n_units_l1': 30, 'dropout_l1': 0.32345233624046377, 'n_units_l2': 39, 'dropout_l2': 0.46214500004560033, 'optimizer': 'SGD', 'lr': 0.00017222157169568378}. Best is trial 0 with value: 0.6578125

Train -> trial_id : 71 , Epoch num is 0, batch_idx is 0, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 1, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 2, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 3, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 4, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 5, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 6, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 7, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 8, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 9, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 10, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 11, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx

[I 2023-09-05 08:12:12,338] Trial 71 pruned. 


Train -> trial_id : 71 , Epoch num is 0, batch_idx is 15, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 16, optimizer_name : Adam
Train -> trial_id : 71 , Epoch num is 0, batch_idx is 17, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 0, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 1, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 2, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 3, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 4, optimizer_name : Adam
Valid -> trial_id : 71 , Epoch num is 0, batch_idx is 5, optimizer_name : Adam


[I 2023-09-05 08:12:12,755] Trial 72 pruned. 
[I 2023-09-05 08:12:13,830] Trial 73 pruned. 
[I 2023-09-05 08:12:14,421] Trial 74 pruned. 
[I 2023-09-05 08:12:15,111] Trial 75 pruned. 
[I 2023-09-05 08:12:18,439] Trial 76 finished with value: 0.81875 and parameters: {'n_layers': 1, 'n_units_l0': 121, 'dropout_l0': 0.34031963460766895, 'optimizer': 'Adam', 'lr': 0.0053523417508515555}. Best is trial 44 with value: 0.8359375.
[I 2023-09-05 08:12:18,848] Trial 77 pruned. 
[I 2023-09-05 08:12:19,246] Trial 78 pruned. 
[I 2023-09-05 08:12:22,533] Trial 79 finished with value: 0.8421875 and parameters: {'n_layers': 1, 'n_units_l0': 112, 'dropout_l0': 0.35561382482302195, 'optimizer': 'Adam', 'lr': 0.00853671255922034}. Best is trial 79 with value: 0.8421875.
[I 2023-09-05 08:12:23,245] Trial 80 pruned. 
[I 2023-09-05 08:12:23,976] Trial 81 pruned. 
[I 2023-09-05 08:12:28,172] Trial 82 finished with value: 0.853125 and parameters: {'n_layers': 1, 'n_units_l0': 106, 'dropout_l0': 0.369342435496

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  63
  Number of complete trials:  37
Best trial:
  Value:  0.853125
  Params: 
    n_layers: 1
    n_units_l0: 106
    dropout_l0: 0.36934243549622037
    optimizer: Adam
    lr: 0.003344732088224441
