In [1]:
from pathlib import Path
import optuna
from optuna.trial import TrialState

import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms as T

In [2]:
REPO_PATH = '/home/ah19/runtime-monitoring'
DATASET = 'MNIST'
PREFIX = 'Regularization'
FILENAME_POSTFIX = f"{DATASET}_{PREFIX}"
SEED = 42

In [3]:
DEVICE = torch.device("cuda:0")
EPOCHS = 5
LIMIT_TRAIN_DATA = 0.2
LIMIT_TEST_DATA = 0.05
TIMOUT_MINUTES = 10
NUM_TRIALS = 25

In [4]:
import sys
sys.path.append(REPO_PATH + '/utilities')
sys.path.append(f'{REPO_PATH}/{DATASET}/trainingModels')

In [5]:
from utils import *
from plotFunctions import *
from pathManager import fetchPaths
from MNIST_Model import MNIST_Model

model_ = MNIST_Model

In [6]:
base = Path(REPO_PATH)
paths = fetchPaths(base, DATASET)

path = paths[DATASET.lower()]
path_dataset = paths['dataset']
path_trainingModels = paths['trainingModels']
path_trainingModels_regularization = paths['trainingModels_regularization']

path

PosixPath('/home/ah19/runtime-monitoring/MNIST')

In [7]:
feature_names = get_labels(DATASET)

tf_train = T.Compose([
    T.ToTensor(),
#     T.RandomHorizontalFlip(),
    T.Normalize((0.1307), (0.3015))
])


tf_test = T.Compose([
    T.ToTensor(),
    T.Normalize((0.1307), (0.3015))
])

train_data = get_dataset(DATASET, path_dataset, train=True, transform=tf_train)
test_data = get_dataset(DATASET, path_dataset, train=False, transform=tf_test)

train_split = int( len(train_data) * LIMIT_TRAIN_DATA )
test_split = int( len(test_data) * LIMIT_TEST_DATA )

train_data, _ = split_data(train_data, [train_split, len(train_data) - train_split], SEED)
test_data, _ = split_data(test_data, [test_split, len(test_data) - test_split], SEED)

len(train_data), len(test_data)

(12000, 500)

In [8]:
def define_model(trial):
    
    # dropout
    dropout = trial.suggest_float("dropout", 0.0, 0.5, step=0.1)
    
    # model
    model = model_(dropout=dropout, first_layer_norm=False)
    
    return model

In [9]:
def get_mnist(trial):
    batchsize = trial.suggest_categorical("batchsize", [32, 64, 128, 256])
    
    trainloader = get_dataLoader(train_data, batchsize, True)
    testloader = get_dataLoader(test_data, batchsize, False)

    return trainloader, testloader, batchsize

In [17]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD"])
    lr = trial.suggest_float("lr", 1e-4, 1e-1)
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    trainloader, testloader, batchsize = get_mnist(trial)
    
    # loss function
    loss_function = nn.CrossEntropyLoss()
    
    # regularization
    l2_ = trial.suggest_float("L2", 0.0, 0.1, step=0.001)
    l1_ = trial.suggest_float("L1", 0.0, 0.1, step=0.001)

    
    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(trainloader):

            data, target = data.to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            
            # regularization
            l2_loss = model._l2_regularization(l2_)
            l1_loss = model._l1_regularization(l1_)
            
            # calc loss
            loss = loss_function(output, target) + l1_loss + l2_loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        losses = []
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(testloader):
                
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                
                loss = loss_function(output, target)
                losses.append(loss.item())
                
                # Get the index of the max log-probability.
                correct += sum(target.to(DEVICE) == output.argmax(dim=1))

        accuracy = correct / len(testloader.dataset)

#         Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [18]:
samplers = ['TPESampler', 'RandomSampler', 'CmaEsSampler']
for sampler_name in samplers[:-1]:
    sampler = getattr(optuna.samplers, sampler_name)(seed=SEED)

    study = optuna.create_study(direction="maximize", sampler=sampler)
    study.optimize(objective, n_trials=NUM_TRIALS, timeout=TIMOUT_MINUTES*60)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial
    # trial = study.best_trials

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
        
    df_studys = study.trials_dataframe().sort_values('value', ascending=False)
    df_studys.to_csv(path_trainingModels_regularization / f'optuna-{DATASET}-{PREFIX}-{sampler_name}.csv' ,index=False)

[32m[I 2023-04-10 02:59:13,969][0m A new study created in memory with name: no-name-31727b6d-12eb-41c5-afe7-50aa0f4264fb[0m
[32m[I 2023-04-10 02:59:32,360][0m Trial 0 finished with value: 0.10200000554323196 and parameters: {'dropout': 0.2, 'optimizer': 'Adam', 'lr': 0.05990598257128396, 'batchsize': 256, 'L2': 0.06, 'L1': 0.07100000000000001}. Best is trial 0 with value: 0.10200000554323196.[0m
[32m[I 2023-04-10 02:59:49,697][0m Trial 1 finished with value: 0.30000001192092896 and parameters: {'dropout': 0.0, 'optimizer': 'Adam', 'lr': 0.021312677156759788, 'batchsize': 256, 'L2': 0.043000000000000003, 'L1': 0.029}. Best is trial 1 with value: 0.30000001192092896.[0m
[32m[I 2023-04-10 03:00:11,172][0m Trial 2 finished with value: 0.7280000448226929 and parameters: {'dropout': 0.30000000000000004, 'optimizer': 'SGD', 'lr': 0.036699548145039805, 'batchsize': 64, 'L2': 0.059000000000000004, 'L1': 0.004}. Best is trial 2 with value: 0.7280000448226929.[0m
[32m[I 2023-04-10 03

Study statistics: 
  Number of finished trials:  24
  Number of pruned trials:  0
  Number of complete trials:  24
Best trial:
  Value:  0.984000027179718
  Params: 
    dropout: 0.1
    optimizer: SGD
    lr: 0.038678762298957156
    batchsize: 32
    L2: 0.019
    L1: 0.0


[32m[I 2023-04-10 03:09:52,921][0m Trial 0 finished with value: 0.10200000554323196 and parameters: {'dropout': 0.2, 'optimizer': 'Adam', 'lr': 0.05990598257128396, 'batchsize': 256, 'L2': 0.06, 'L1': 0.07100000000000001}. Best is trial 0 with value: 0.10200000554323196.[0m
[32m[I 2023-04-10 03:10:08,443][0m Trial 1 finished with value: 0.4320000112056732 and parameters: {'dropout': 0.0, 'optimizer': 'Adam', 'lr': 0.021312677156759788, 'batchsize': 256, 'L2': 0.043000000000000003, 'L1': 0.029}. Best is trial 1 with value: 0.4320000112056732.[0m
[32m[I 2023-04-10 03:10:28,686][0m Trial 2 finished with value: 0.8320000171661377 and parameters: {'dropout': 0.30000000000000004, 'optimizer': 'SGD', 'lr': 0.036699548145039805, 'batchsize': 64, 'L2': 0.059000000000000004, 'L1': 0.004}. Best is trial 2 with value: 0.8320000171661377.[0m
[32m[I 2023-04-10 03:11:06,554][0m Trial 3 finished with value: 0.10200000554323196 and parameters: {'dropout': 0.30000000000000004, 'optimizer': 'A

Study statistics: 
  Number of finished trials:  25
  Number of pruned trials:  0
  Number of complete trials:  25
Best trial:
  Value:  0.8380000591278076
  Params: 
    dropout: 0.0
    optimizer: SGD
    lr: 0.05617159203719268
    batchsize: 32
    L2: 0.002
    L1: 0.01
