In [7]:
import os

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision import transforms

import optuna
from optuna.trial import TrialState

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_class = 10
LOG_INTERVAL = 200

In [9]:
def define_model(trial):
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []
    
    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int(f"n_units_l{i}", 4, 128)
        layers.append(nn.Linear(in_features, out_features)) # (B, 1, 28, 28) => (B, 28 * 28) 
        layers.append(nn.ReLU())
        p = trial.suggest_float(f"dropout_l{i}", 0.2, 0.5)
        layers.append(nn.Dropout(p))
        
        in_features = out_features
    layers.append(nn.Linear(in_features, num_class))

    return nn.Sequential(*layers)

In [10]:
def get_dataloader(batch_size):
    train_dataset = datasets.FashionMNIST('./cifar10/', train=True, download=True, transform=transforms.ToTensor())
    valid_dataset = datasets.FashionMNIST('./cifar10/', train=False, transform=transforms.ToTensor())

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )
    
    return train_loader, valid_loader

In [11]:
def objective(trial):
    model = define_model(trial).to(device)
    
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64])
    epochs = trial.suggest_int("epoch", 5, 20)
    
    train_loader, valid_loader = get_dataloader(batch_size)
    
    criterion = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        model.train()
        for batch_idx, (X_train, y_train) in enumerate(train_loader):
            X_train = X_train.view(X_train.size(0), -1).to(device) # flatten 역할
            y_train = y_train.to(device)

            optimizer.zero_grad()
            output = model(X_train)
            loss = criterion(output, y_train)
            loss.backward()
            optimizer.step()

            if batch_idx % LOG_INTERVAL == 0:
                print(f"Train Epoch: {epoch} [{(batch_idx + 1) * batch_size}/{len(train_loader.dataset)}]\tLoss: {loss.item():.6f}")
                
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (X_valid, y_valid) in enumerate(valid_loader):
                X_valid = X_valid.view(X_valid.size(0), -1).to(device)
                y_valid = y_valid.to(device)

                output = model(X_valid)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(y_valid.view_as(pred)).sum().item()
                
        accuracy = correct / len(valid_loader.dataset)

        trial.report(accuracy, epoch)

        if trial.should_prune(): # 잘라내야한다
            raise optuna.exceptions.TrialPruned()

        return accuracy

In [12]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600) # 100 개의 시도

# 
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2021-05-27 08:07:31,525][0m A new study created in memory with name: no-name-7bd8fcf3-b89c-4cf9-9839-8d55910694db[0m


Train Epoch: 0 [64/60000]	Loss: 2.320259
Train Epoch: 0 [12864/60000]	Loss: 1.003493
Train Epoch: 0 [25664/60000]	Loss: 1.253901
Train Epoch: 0 [38464/60000]	Loss: 1.026822
Train Epoch: 0 [51264/60000]	Loss: 1.062552


[32m[I 2021-05-27 08:07:37,373][0m Trial 0 finished with value: 0.7228 and parameters: {'n_layers': 3, 'n_units_l0': 124, 'dropout_l0': 0.33550063621548, 'n_units_l1': 87, 'dropout_l1': 0.48656991280952416, 'n_units_l2': 64, 'dropout_l2': 0.4302928111686385, 'optimizer': 'Adam', 'lr': 0.014910368946513042, 'batch_size': 64, 'epoch': 20}. Best is trial 0 with value: 0.7228.[0m


Train Epoch: 0 [8/60000]	Loss: 2.345330
Train Epoch: 0 [1608/60000]	Loss: 1.042247
Train Epoch: 0 [3208/60000]	Loss: 0.638430
Train Epoch: 0 [4808/60000]	Loss: 2.379360
Train Epoch: 0 [6408/60000]	Loss: 0.980853
Train Epoch: 0 [8008/60000]	Loss: 0.641927
Train Epoch: 0 [9608/60000]	Loss: 0.641924
Train Epoch: 0 [11208/60000]	Loss: 0.554690
Train Epoch: 0 [12808/60000]	Loss: 0.209407
Train Epoch: 0 [14408/60000]	Loss: 0.539827
Train Epoch: 0 [16008/60000]	Loss: 0.340083
Train Epoch: 0 [17608/60000]	Loss: 0.390163
Train Epoch: 0 [19208/60000]	Loss: 0.076503
Train Epoch: 0 [20808/60000]	Loss: 0.733841
Train Epoch: 0 [22408/60000]	Loss: 0.622734
Train Epoch: 0 [24008/60000]	Loss: 0.717688
Train Epoch: 0 [25608/60000]	Loss: 0.503317
Train Epoch: 0 [27208/60000]	Loss: 1.407466
Train Epoch: 0 [28808/60000]	Loss: 0.563821
Train Epoch: 0 [30408/60000]	Loss: 0.353752
Train Epoch: 0 [32008/60000]	Loss: 0.453105
Train Epoch: 0 [33608/60000]	Loss: 0.313914
Train Epoch: 0 [35208/60000]	Loss: 0.12424

[32m[I 2021-05-27 08:08:06,827][0m Trial 1 finished with value: 0.8152 and parameters: {'n_layers': 2, 'n_units_l0': 109, 'dropout_l0': 0.4149968283720924, 'n_units_l1': 59, 'dropout_l1': 0.3236666657692482, 'optimizer': 'Adam', 'lr': 0.0018213063504911014, 'batch_size': 8, 'epoch': 18}. Best is trial 1 with value: 0.8152.[0m


Train Epoch: 0 [32/60000]	Loss: 2.299277
Train Epoch: 0 [6432/60000]	Loss: 2.227624
Train Epoch: 0 [12832/60000]	Loss: 2.114434
Train Epoch: 0 [19232/60000]	Loss: 1.976840
Train Epoch: 0 [25632/60000]	Loss: 1.875812
Train Epoch: 0 [32032/60000]	Loss: 1.825418
Train Epoch: 0 [38432/60000]	Loss: 1.820161
Train Epoch: 0 [44832/60000]	Loss: 1.562962
Train Epoch: 0 [51232/60000]	Loss: 1.368564
Train Epoch: 0 [57632/60000]	Loss: 1.361802


[32m[I 2021-05-27 08:08:14,894][0m Trial 2 finished with value: 0.6489 and parameters: {'n_layers': 2, 'n_units_l0': 96, 'dropout_l0': 0.24986192451610442, 'n_units_l1': 103, 'dropout_l1': 0.2557825487859024, 'optimizer': 'Adam', 'lr': 1.1819050915983608e-05, 'batch_size': 32, 'epoch': 11}. Best is trial 1 with value: 0.8152.[0m


Train Epoch: 0 [32/60000]	Loss: 2.297961
Train Epoch: 0 [6432/60000]	Loss: 2.315097
Train Epoch: 0 [12832/60000]	Loss: 2.296214
Train Epoch: 0 [19232/60000]	Loss: 2.260385
Train Epoch: 0 [25632/60000]	Loss: 2.280383
Train Epoch: 0 [32032/60000]	Loss: 2.290054
Train Epoch: 0 [38432/60000]	Loss: 2.288836
Train Epoch: 0 [44832/60000]	Loss: 2.272677
Train Epoch: 0 [51232/60000]	Loss: 2.295753
Train Epoch: 0 [57632/60000]	Loss: 2.299253


[32m[I 2021-05-27 08:08:21,961][0m Trial 3 finished with value: 0.1042 and parameters: {'n_layers': 2, 'n_units_l0': 79, 'dropout_l0': 0.40942365122677427, 'n_units_l1': 91, 'dropout_l1': 0.2117203180858434, 'optimizer': 'SGD', 'lr': 2.283199028656177e-05, 'batch_size': 32, 'epoch': 13}. Best is trial 1 with value: 0.8152.[0m


Train Epoch: 0 [16/60000]	Loss: 2.338847
Train Epoch: 0 [3216/60000]	Loss: 1.815119
Train Epoch: 0 [6416/60000]	Loss: 1.406292
Train Epoch: 0 [9616/60000]	Loss: 0.895752
Train Epoch: 0 [12816/60000]	Loss: 1.341273
Train Epoch: 0 [16016/60000]	Loss: 0.891364
Train Epoch: 0 [19216/60000]	Loss: 0.932298
Train Epoch: 0 [22416/60000]	Loss: 1.286478
Train Epoch: 0 [25616/60000]	Loss: 0.700057
Train Epoch: 0 [28816/60000]	Loss: 0.930804
Train Epoch: 0 [32016/60000]	Loss: 0.532847
Train Epoch: 0 [35216/60000]	Loss: 1.243937
Train Epoch: 0 [38416/60000]	Loss: 1.004733
Train Epoch: 0 [41616/60000]	Loss: 0.705283
Train Epoch: 0 [44816/60000]	Loss: 0.694482
Train Epoch: 0 [48016/60000]	Loss: 0.401402
Train Epoch: 0 [51216/60000]	Loss: 0.781844
Train Epoch: 0 [54416/60000]	Loss: 0.632831
Train Epoch: 0 [57616/60000]	Loss: 0.650538


[32m[I 2021-05-27 08:08:33,676][0m Trial 4 finished with value: 0.786 and parameters: {'n_layers': 1, 'n_units_l0': 85, 'dropout_l0': 0.44001904593265295, 'optimizer': 'SGD', 'lr': 0.008333415764746709, 'batch_size': 16, 'epoch': 19}. Best is trial 1 with value: 0.8152.[0m


Train Epoch: 0 [8/60000]	Loss: 2.290019
Train Epoch: 0 [1608/60000]	Loss: 2.213070
Train Epoch: 0 [3208/60000]	Loss: 2.052474
Train Epoch: 0 [4808/60000]	Loss: 2.008010
Train Epoch: 0 [6408/60000]	Loss: 2.090426
Train Epoch: 0 [8008/60000]	Loss: 2.056666
Train Epoch: 0 [9608/60000]	Loss: 1.694011
Train Epoch: 0 [11208/60000]	Loss: 1.907596
Train Epoch: 0 [12808/60000]	Loss: 1.619586
Train Epoch: 0 [14408/60000]	Loss: 1.797589
Train Epoch: 0 [16008/60000]	Loss: 1.687090
Train Epoch: 0 [17608/60000]	Loss: 1.549659
Train Epoch: 0 [19208/60000]	Loss: 1.459925
Train Epoch: 0 [20808/60000]	Loss: 1.079351
Train Epoch: 0 [22408/60000]	Loss: 1.378888
Train Epoch: 0 [24008/60000]	Loss: 1.456910
Train Epoch: 0 [25608/60000]	Loss: 1.361648
Train Epoch: 0 [27208/60000]	Loss: 0.946781
Train Epoch: 0 [28808/60000]	Loss: 0.951937
Train Epoch: 0 [30408/60000]	Loss: 1.159877
Train Epoch: 0 [32008/60000]	Loss: 1.079100
Train Epoch: 0 [33608/60000]	Loss: 1.485723
Train Epoch: 0 [35208/60000]	Loss: 0.74459

[32m[I 2021-05-27 08:09:00,461][0m Trial 5 pruned. [0m


Train Epoch: 0 [64/60000]	Loss: 2.281360
Train Epoch: 0 [12864/60000]	Loss: 0.573774
Train Epoch: 0 [25664/60000]	Loss: 0.828099
Train Epoch: 0 [38464/60000]	Loss: 0.629642
Train Epoch: 0 [51264/60000]	Loss: 0.437352


[32m[I 2021-05-27 08:09:06,145][0m Trial 6 finished with value: 0.8303 and parameters: {'n_layers': 1, 'n_units_l0': 118, 'dropout_l0': 0.43773201064875555, 'optimizer': 'Adam', 'lr': 0.001138316015475726, 'batch_size': 64, 'epoch': 19}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.242649
Train Epoch: 0 [1608/60000]	Loss: 2.271122
Train Epoch: 0 [3208/60000]	Loss: 2.359314
Train Epoch: 0 [4808/60000]	Loss: 2.340886
Train Epoch: 0 [6408/60000]	Loss: 2.293002
Train Epoch: 0 [8008/60000]	Loss: 2.287336
Train Epoch: 0 [9608/60000]	Loss: 2.305013
Train Epoch: 0 [11208/60000]	Loss: 2.321278
Train Epoch: 0 [12808/60000]	Loss: 2.334670
Train Epoch: 0 [14408/60000]	Loss: 2.361128
Train Epoch: 0 [16008/60000]	Loss: 2.272957
Train Epoch: 0 [17608/60000]	Loss: 2.285165
Train Epoch: 0 [19208/60000]	Loss: 2.343708
Train Epoch: 0 [20808/60000]	Loss: 2.293174
Train Epoch: 0 [22408/60000]	Loss: 2.307005
Train Epoch: 0 [24008/60000]	Loss: 2.269520
Train Epoch: 0 [25608/60000]	Loss: 2.284564
Train Epoch: 0 [27208/60000]	Loss: 2.281371
Train Epoch: 0 [28808/60000]	Loss: 2.317449
Train Epoch: 0 [30408/60000]	Loss: 2.282894
Train Epoch: 0 [32008/60000]	Loss: 2.199380
Train Epoch: 0 [33608/60000]	Loss: 2.303812
Train Epoch: 0 [35208/60000]	Loss: 2.30636

[32m[I 2021-05-27 08:09:29,642][0m Trial 7 pruned. [0m


Train Epoch: 0 [32/60000]	Loss: 2.367381
Train Epoch: 0 [6432/60000]	Loss: 1.071431
Train Epoch: 0 [12832/60000]	Loss: 1.138504
Train Epoch: 0 [19232/60000]	Loss: 0.856029
Train Epoch: 0 [25632/60000]	Loss: 0.826838
Train Epoch: 0 [32032/60000]	Loss: 0.650629
Train Epoch: 0 [38432/60000]	Loss: 0.864307
Train Epoch: 0 [44832/60000]	Loss: 0.806219
Train Epoch: 0 [51232/60000]	Loss: 0.789979
Train Epoch: 0 [57632/60000]	Loss: 1.160054


[32m[I 2021-05-27 08:09:36,289][0m Trial 8 finished with value: 0.816 and parameters: {'n_layers': 1, 'n_units_l0': 27, 'dropout_l0': 0.39136402565361417, 'optimizer': 'RMSprop', 'lr': 0.003917405613776444, 'batch_size': 32, 'epoch': 17}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [32/60000]	Loss: 2.305115
Train Epoch: 0 [6432/60000]	Loss: 1.229172
Train Epoch: 0 [12832/60000]	Loss: 1.096908
Train Epoch: 0 [19232/60000]	Loss: 0.849331
Train Epoch: 0 [25632/60000]	Loss: 0.635375
Train Epoch: 0 [32032/60000]	Loss: 0.734050
Train Epoch: 0 [38432/60000]	Loss: 0.610806
Train Epoch: 0 [44832/60000]	Loss: 0.800610
Train Epoch: 0 [51232/60000]	Loss: 0.719748
Train Epoch: 0 [57632/60000]	Loss: 0.391333


[32m[I 2021-05-27 08:09:43,450][0m Trial 9 finished with value: 0.7963 and parameters: {'n_layers': 1, 'n_units_l0': 101, 'dropout_l0': 0.23059759945568564, 'optimizer': 'Adam', 'lr': 0.00011100076015133439, 'batch_size': 32, 'epoch': 17}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [64/60000]	Loss: 2.302724
Train Epoch: 0 [12864/60000]	Loss: 1.530972
Train Epoch: 0 [25664/60000]	Loss: 1.378257
Train Epoch: 0 [38464/60000]	Loss: 1.262552
Train Epoch: 0 [51264/60000]	Loss: 1.375744


[32m[I 2021-05-27 08:09:49,442][0m Trial 10 pruned. [0m


Train Epoch: 0 [64/60000]	Loss: 2.363656
Train Epoch: 0 [12864/60000]	Loss: 2.074535
Train Epoch: 0 [25664/60000]	Loss: 1.996794
Train Epoch: 0 [38464/60000]	Loss: 1.522930
Train Epoch: 0 [51264/60000]	Loss: 1.787448


[32m[I 2021-05-27 08:09:55,190][0m Trial 11 pruned. [0m


Train Epoch: 0 [16/60000]	Loss: 2.338589
Train Epoch: 0 [3216/60000]	Loss: 1.942925
Train Epoch: 0 [6416/60000]	Loss: 1.727291
Train Epoch: 0 [9616/60000]	Loss: 2.004922
Train Epoch: 0 [12816/60000]	Loss: 2.282870
Train Epoch: 0 [16016/60000]	Loss: 2.283678
Train Epoch: 0 [19216/60000]	Loss: 2.333515
Train Epoch: 0 [22416/60000]	Loss: 2.308000
Train Epoch: 0 [25616/60000]	Loss: 2.263001
Train Epoch: 0 [28816/60000]	Loss: 2.340006
Train Epoch: 0 [32016/60000]	Loss: 2.371715
Train Epoch: 0 [35216/60000]	Loss: 2.291284
Train Epoch: 0 [38416/60000]	Loss: 2.281595
Train Epoch: 0 [41616/60000]	Loss: 2.295832
Train Epoch: 0 [44816/60000]	Loss: 2.290695
Train Epoch: 0 [48016/60000]	Loss: 2.350034
Train Epoch: 0 [51216/60000]	Loss: 2.368475
Train Epoch: 0 [54416/60000]	Loss: 2.306365
Train Epoch: 0 [57616/60000]	Loss: 2.389275


[32m[I 2021-05-27 08:10:07,251][0m Trial 12 pruned. [0m


Train Epoch: 0 [32/60000]	Loss: 2.337615
Train Epoch: 0 [6432/60000]	Loss: 2.200248
Train Epoch: 0 [12832/60000]	Loss: 2.021037
Train Epoch: 0 [19232/60000]	Loss: 1.742382
Train Epoch: 0 [25632/60000]	Loss: 2.087462
Train Epoch: 0 [32032/60000]	Loss: 1.627377
Train Epoch: 0 [38432/60000]	Loss: 2.118093
Train Epoch: 0 [44832/60000]	Loss: 1.834525
Train Epoch: 0 [51232/60000]	Loss: 2.079364
Train Epoch: 0 [57632/60000]	Loss: 1.971386


[32m[I 2021-05-27 08:10:13,805][0m Trial 13 pruned. [0m


Train Epoch: 0 [64/60000]	Loss: 2.345598
Train Epoch: 0 [12864/60000]	Loss: 0.977047
Train Epoch: 0 [25664/60000]	Loss: 0.793141
Train Epoch: 0 [38464/60000]	Loss: 0.965154
Train Epoch: 0 [51264/60000]	Loss: 0.822806


[32m[I 2021-05-27 08:10:19,318][0m Trial 14 finished with value: 0.7949 and parameters: {'n_layers': 1, 'n_units_l0': 28, 'dropout_l0': 0.3699391917767526, 'optimizer': 'RMSprop', 'lr': 0.0004842456681371465, 'batch_size': 64, 'epoch': 11}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [32/60000]	Loss: 2.273023
Train Epoch: 0 [6432/60000]	Loss: 1.706706
Train Epoch: 0 [12832/60000]	Loss: 1.456020
Train Epoch: 0 [19232/60000]	Loss: 1.337716
Train Epoch: 0 [25632/60000]	Loss: 1.357651
Train Epoch: 0 [32032/60000]	Loss: 0.884595
Train Epoch: 0 [38432/60000]	Loss: 1.792423
Train Epoch: 0 [44832/60000]	Loss: 1.005556
Train Epoch: 0 [51232/60000]	Loss: 1.280961
Train Epoch: 0 [57632/60000]	Loss: 0.790354


[32m[I 2021-05-27 08:10:26,389][0m Trial 15 pruned. [0m


Train Epoch: 0 [64/60000]	Loss: 2.333264
Train Epoch: 0 [12864/60000]	Loss: 0.955925
Train Epoch: 0 [25664/60000]	Loss: 0.800671
Train Epoch: 0 [38464/60000]	Loss: 0.920683
Train Epoch: 0 [51264/60000]	Loss: 0.946427


[32m[I 2021-05-27 08:10:31,815][0m Trial 16 finished with value: 0.7974 and parameters: {'n_layers': 1, 'n_units_l0': 21, 'dropout_l0': 0.4591971781299946, 'optimizer': 'Adam', 'lr': 0.0029974612411739245, 'batch_size': 64, 'epoch': 20}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [16/60000]	Loss: 2.356792
Train Epoch: 0 [3216/60000]	Loss: 1.505217
Train Epoch: 0 [6416/60000]	Loss: 1.257414
Train Epoch: 0 [9616/60000]	Loss: 1.245850
Train Epoch: 0 [12816/60000]	Loss: 1.230914
Train Epoch: 0 [16016/60000]	Loss: 1.182352
Train Epoch: 0 [19216/60000]	Loss: 1.336984
Train Epoch: 0 [22416/60000]	Loss: 0.976817
Train Epoch: 0 [25616/60000]	Loss: 0.893711
Train Epoch: 0 [28816/60000]	Loss: 0.830466
Train Epoch: 0 [32016/60000]	Loss: 0.657053
Train Epoch: 0 [35216/60000]	Loss: 0.809718
Train Epoch: 0 [38416/60000]	Loss: 0.936679
Train Epoch: 0 [41616/60000]	Loss: 0.625566
Train Epoch: 0 [44816/60000]	Loss: 0.968109
Train Epoch: 0 [48016/60000]	Loss: 0.735855
Train Epoch: 0 [51216/60000]	Loss: 0.431530
Train Epoch: 0 [54416/60000]	Loss: 0.683184
Train Epoch: 0 [57616/60000]	Loss: 0.650693


[32m[I 2021-05-27 08:10:43,675][0m Trial 17 pruned. [0m


Train Epoch: 0 [32/60000]	Loss: 2.266807
Train Epoch: 0 [6432/60000]	Loss: 1.765525
Train Epoch: 0 [12832/60000]	Loss: 1.503903
Train Epoch: 0 [19232/60000]	Loss: 1.480017
Train Epoch: 0 [25632/60000]	Loss: 1.396910
Train Epoch: 0 [32032/60000]	Loss: 1.545645
Train Epoch: 0 [38432/60000]	Loss: 1.066320
Train Epoch: 0 [44832/60000]	Loss: 1.379285
Train Epoch: 0 [51232/60000]	Loss: 1.216347
Train Epoch: 0 [57632/60000]	Loss: 1.790370


[32m[I 2021-05-27 08:10:51,717][0m Trial 18 pruned. [0m


Train Epoch: 0 [64/60000]	Loss: 2.379465
Train Epoch: 0 [12864/60000]	Loss: 1.713464
Train Epoch: 0 [25664/60000]	Loss: 1.378343
Train Epoch: 0 [38464/60000]	Loss: 1.241703
Train Epoch: 0 [51264/60000]	Loss: 1.097566


[32m[I 2021-05-27 08:10:57,131][0m Trial 19 pruned. [0m


Train Epoch: 0 [32/60000]	Loss: 2.315771
Train Epoch: 0 [6432/60000]	Loss: 1.028970
Train Epoch: 0 [12832/60000]	Loss: 0.755880
Train Epoch: 0 [19232/60000]	Loss: 0.937524
Train Epoch: 0 [25632/60000]	Loss: 1.334903
Train Epoch: 0 [32032/60000]	Loss: 1.213804
Train Epoch: 0 [38432/60000]	Loss: 0.994059
Train Epoch: 0 [44832/60000]	Loss: 1.037905
Train Epoch: 0 [51232/60000]	Loss: 1.062809
Train Epoch: 0 [57632/60000]	Loss: 1.038319


[32m[I 2021-05-27 08:11:04,243][0m Trial 20 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.356654
Train Epoch: 0 [1608/60000]	Loss: 1.288231
Train Epoch: 0 [3208/60000]	Loss: 0.824331
Train Epoch: 0 [4808/60000]	Loss: 0.785284
Train Epoch: 0 [6408/60000]	Loss: 1.488716
Train Epoch: 0 [8008/60000]	Loss: 1.568234
Train Epoch: 0 [9608/60000]	Loss: 0.385298
Train Epoch: 0 [11208/60000]	Loss: 0.569292
Train Epoch: 0 [12808/60000]	Loss: 0.406234
Train Epoch: 0 [14408/60000]	Loss: 0.362712
Train Epoch: 0 [16008/60000]	Loss: 0.392545
Train Epoch: 0 [17608/60000]	Loss: 0.649733
Train Epoch: 0 [19208/60000]	Loss: 1.005689
Train Epoch: 0 [20808/60000]	Loss: 0.281415
Train Epoch: 0 [22408/60000]	Loss: 0.811788
Train Epoch: 0 [24008/60000]	Loss: 0.459202
Train Epoch: 0 [25608/60000]	Loss: 0.802871
Train Epoch: 0 [27208/60000]	Loss: 0.378051
Train Epoch: 0 [28808/60000]	Loss: 0.991560
Train Epoch: 0 [30408/60000]	Loss: 0.301171
Train Epoch: 0 [32008/60000]	Loss: 0.361546
Train Epoch: 0 [33608/60000]	Loss: 0.436444
Train Epoch: 0 [35208/60000]	Loss: 0.66633

[32m[I 2021-05-27 08:11:33,972][0m Trial 21 finished with value: 0.8212 and parameters: {'n_layers': 2, 'n_units_l0': 107, 'dropout_l0': 0.4006251445165232, 'n_units_l1': 40, 'dropout_l1': 0.3469817663569324, 'optimizer': 'Adam', 'lr': 0.001531850808705836, 'batch_size': 8, 'epoch': 18}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.293961
Train Epoch: 0 [1608/60000]	Loss: 1.951799
Train Epoch: 0 [3208/60000]	Loss: 1.682902
Train Epoch: 0 [4808/60000]	Loss: 1.219793
Train Epoch: 0 [6408/60000]	Loss: 0.695798
Train Epoch: 0 [8008/60000]	Loss: 1.005763
Train Epoch: 0 [9608/60000]	Loss: 0.754482
Train Epoch: 0 [11208/60000]	Loss: 0.484144
Train Epoch: 0 [12808/60000]	Loss: 0.410362
Train Epoch: 0 [14408/60000]	Loss: 0.873683
Train Epoch: 0 [16008/60000]	Loss: 0.903300
Train Epoch: 0 [17608/60000]	Loss: 0.241596
Train Epoch: 0 [19208/60000]	Loss: 0.696909
Train Epoch: 0 [20808/60000]	Loss: 1.484852
Train Epoch: 0 [22408/60000]	Loss: 0.761187
Train Epoch: 0 [24008/60000]	Loss: 0.445236
Train Epoch: 0 [25608/60000]	Loss: 0.758497
Train Epoch: 0 [27208/60000]	Loss: 0.599405
Train Epoch: 0 [28808/60000]	Loss: 1.109775
Train Epoch: 0 [30408/60000]	Loss: 0.732716
Train Epoch: 0 [32008/60000]	Loss: 0.673038
Train Epoch: 0 [33608/60000]	Loss: 0.536286
Train Epoch: 0 [35208/60000]	Loss: 0.47460

[32m[I 2021-05-27 08:12:05,286][0m Trial 22 finished with value: 0.8187 and parameters: {'n_layers': 2, 'n_units_l0': 127, 'dropout_l0': 0.38575636792973333, 'n_units_l1': 37, 'dropout_l1': 0.3872231095095028, 'optimizer': 'Adam', 'lr': 0.00023518327459933302, 'batch_size': 8, 'epoch': 19}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.343517
Train Epoch: 0 [1608/60000]	Loss: 1.682413
Train Epoch: 0 [3208/60000]	Loss: 1.755543
Train Epoch: 0 [4808/60000]	Loss: 1.222401
Train Epoch: 0 [6408/60000]	Loss: 1.218455
Train Epoch: 0 [8008/60000]	Loss: 0.838630
Train Epoch: 0 [9608/60000]	Loss: 1.133445
Train Epoch: 0 [11208/60000]	Loss: 0.787323
Train Epoch: 0 [12808/60000]	Loss: 0.949533
Train Epoch: 0 [14408/60000]	Loss: 0.616445
Train Epoch: 0 [16008/60000]	Loss: 0.724165
Train Epoch: 0 [17608/60000]	Loss: 1.345307
Train Epoch: 0 [19208/60000]	Loss: 0.888261
Train Epoch: 0 [20808/60000]	Loss: 0.870688
Train Epoch: 0 [22408/60000]	Loss: 0.593396
Train Epoch: 0 [24008/60000]	Loss: 1.262141
Train Epoch: 0 [25608/60000]	Loss: 0.960622
Train Epoch: 0 [27208/60000]	Loss: 0.844088
Train Epoch: 0 [28808/60000]	Loss: 1.248451
Train Epoch: 0 [30408/60000]	Loss: 0.484785
Train Epoch: 0 [32008/60000]	Loss: 0.686987
Train Epoch: 0 [33608/60000]	Loss: 0.894827
Train Epoch: 0 [35208/60000]	Loss: 0.40062

[32m[I 2021-05-27 08:12:34,656][0m Trial 23 finished with value: 0.8144 and parameters: {'n_layers': 2, 'n_units_l0': 115, 'dropout_l0': 0.3613425638713573, 'n_units_l1': 41, 'dropout_l1': 0.3827875997530835, 'optimizer': 'Adam', 'lr': 0.00019256779153352342, 'batch_size': 8, 'epoch': 20}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.254828
Train Epoch: 0 [1608/60000]	Loss: 1.075923
Train Epoch: 0 [3208/60000]	Loss: 0.739263
Train Epoch: 0 [4808/60000]	Loss: 0.518131
Train Epoch: 0 [6408/60000]	Loss: 1.034317
Train Epoch: 0 [8008/60000]	Loss: 1.282631
Train Epoch: 0 [9608/60000]	Loss: 0.840346
Train Epoch: 0 [11208/60000]	Loss: 0.721933
Train Epoch: 0 [12808/60000]	Loss: 0.500672
Train Epoch: 0 [14408/60000]	Loss: 1.659327
Train Epoch: 0 [16008/60000]	Loss: 0.972755
Train Epoch: 0 [17608/60000]	Loss: 0.395961
Train Epoch: 0 [19208/60000]	Loss: 0.265600
Train Epoch: 0 [20808/60000]	Loss: 0.780990
Train Epoch: 0 [22408/60000]	Loss: 0.605385
Train Epoch: 0 [24008/60000]	Loss: 1.527794
Train Epoch: 0 [25608/60000]	Loss: 0.697200
Train Epoch: 0 [27208/60000]	Loss: 0.942939
Train Epoch: 0 [28808/60000]	Loss: 0.399789
Train Epoch: 0 [30408/60000]	Loss: 0.688443
Train Epoch: 0 [32008/60000]	Loss: 0.730190
Train Epoch: 0 [33608/60000]	Loss: 0.622075
Train Epoch: 0 [35208/60000]	Loss: 0.31613

[32m[I 2021-05-27 08:13:04,477][0m Trial 24 finished with value: 0.8281 and parameters: {'n_layers': 2, 'n_units_l0': 128, 'dropout_l0': 0.3936828802507161, 'n_units_l1': 34, 'dropout_l1': 0.4053705874712352, 'optimizer': 'Adam', 'lr': 0.0008806416884750545, 'batch_size': 8, 'epoch': 19}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.321769
Train Epoch: 0 [1608/60000]	Loss: 1.424003
Train Epoch: 0 [3208/60000]	Loss: 1.534346
Train Epoch: 0 [4808/60000]	Loss: 1.422339
Train Epoch: 0 [6408/60000]	Loss: 0.722594
Train Epoch: 0 [8008/60000]	Loss: 0.700361
Train Epoch: 0 [9608/60000]	Loss: 0.661257
Train Epoch: 0 [11208/60000]	Loss: 0.531767
Train Epoch: 0 [12808/60000]	Loss: 0.715423
Train Epoch: 0 [14408/60000]	Loss: 1.020833
Train Epoch: 0 [16008/60000]	Loss: 1.142352
Train Epoch: 0 [17608/60000]	Loss: 0.731986
Train Epoch: 0 [19208/60000]	Loss: 1.185748
Train Epoch: 0 [20808/60000]	Loss: 0.951310
Train Epoch: 0 [22408/60000]	Loss: 0.611271
Train Epoch: 0 [24008/60000]	Loss: 1.183599
Train Epoch: 0 [25608/60000]	Loss: 1.415888
Train Epoch: 0 [27208/60000]	Loss: 0.908768
Train Epoch: 0 [28808/60000]	Loss: 0.339821
Train Epoch: 0 [30408/60000]	Loss: 0.608248
Train Epoch: 0 [32008/60000]	Loss: 0.874725
Train Epoch: 0 [33608/60000]	Loss: 0.538800
Train Epoch: 0 [35208/60000]	Loss: 0.92862

[32m[I 2021-05-27 08:13:39,033][0m Trial 25 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.270503
Train Epoch: 0 [1608/60000]	Loss: 1.274045
Train Epoch: 0 [3208/60000]	Loss: 1.411250
Train Epoch: 0 [4808/60000]	Loss: 0.879212
Train Epoch: 0 [6408/60000]	Loss: 0.981294
Train Epoch: 0 [8008/60000]	Loss: 0.740973
Train Epoch: 0 [9608/60000]	Loss: 1.355118
Train Epoch: 0 [11208/60000]	Loss: 1.011999
Train Epoch: 0 [12808/60000]	Loss: 0.508337
Train Epoch: 0 [14408/60000]	Loss: 1.174522
Train Epoch: 0 [16008/60000]	Loss: 0.773596
Train Epoch: 0 [17608/60000]	Loss: 0.226004
Train Epoch: 0 [19208/60000]	Loss: 0.664407
Train Epoch: 0 [20808/60000]	Loss: 1.155311
Train Epoch: 0 [22408/60000]	Loss: 0.627024
Train Epoch: 0 [24008/60000]	Loss: 0.488170
Train Epoch: 0 [25608/60000]	Loss: 0.971602
Train Epoch: 0 [27208/60000]	Loss: 1.061222
Train Epoch: 0 [28808/60000]	Loss: 0.238638
Train Epoch: 0 [30408/60000]	Loss: 0.609497
Train Epoch: 0 [32008/60000]	Loss: 0.556223
Train Epoch: 0 [33608/60000]	Loss: 0.486679
Train Epoch: 0 [35208/60000]	Loss: 0.85216

[32m[I 2021-05-27 08:14:08,668][0m Trial 26 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.329314
Train Epoch: 0 [1608/60000]	Loss: 2.020931
Train Epoch: 0 [3208/60000]	Loss: 1.801768
Train Epoch: 0 [4808/60000]	Loss: 1.586551
Train Epoch: 0 [6408/60000]	Loss: 1.474449
Train Epoch: 0 [8008/60000]	Loss: 1.818392
Train Epoch: 0 [9608/60000]	Loss: 1.054693
Train Epoch: 0 [11208/60000]	Loss: 1.266572
Train Epoch: 0 [12808/60000]	Loss: 1.097388
Train Epoch: 0 [14408/60000]	Loss: 1.227406
Train Epoch: 0 [16008/60000]	Loss: 0.738996
Train Epoch: 0 [17608/60000]	Loss: 0.895100
Train Epoch: 0 [19208/60000]	Loss: 1.550949
Train Epoch: 0 [20808/60000]	Loss: 1.439664
Train Epoch: 0 [22408/60000]	Loss: 0.880246
Train Epoch: 0 [24008/60000]	Loss: 0.944292
Train Epoch: 0 [25608/60000]	Loss: 1.006348
Train Epoch: 0 [27208/60000]	Loss: 0.552991
Train Epoch: 0 [28808/60000]	Loss: 1.377444
Train Epoch: 0 [30408/60000]	Loss: 0.981356
Train Epoch: 0 [32008/60000]	Loss: 1.152553
Train Epoch: 0 [33608/60000]	Loss: 1.289283
Train Epoch: 0 [35208/60000]	Loss: 1.10298

[32m[I 2021-05-27 08:14:38,520][0m Trial 27 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.285015
Train Epoch: 0 [1608/60000]	Loss: 1.754445
Train Epoch: 0 [3208/60000]	Loss: 0.892465
Train Epoch: 0 [4808/60000]	Loss: 1.140540
Train Epoch: 0 [6408/60000]	Loss: 0.856875
Train Epoch: 0 [8008/60000]	Loss: 1.012479
Train Epoch: 0 [9608/60000]	Loss: 0.915814
Train Epoch: 0 [11208/60000]	Loss: 0.916071
Train Epoch: 0 [12808/60000]	Loss: 0.847306
Train Epoch: 0 [14408/60000]	Loss: 0.347287
Train Epoch: 0 [16008/60000]	Loss: 0.490011
Train Epoch: 0 [17608/60000]	Loss: 0.450010
Train Epoch: 0 [19208/60000]	Loss: 0.518403
Train Epoch: 0 [20808/60000]	Loss: 0.277485
Train Epoch: 0 [22408/60000]	Loss: 0.706644
Train Epoch: 0 [24008/60000]	Loss: 0.979337
Train Epoch: 0 [25608/60000]	Loss: 0.245330
Train Epoch: 0 [27208/60000]	Loss: 0.236936
Train Epoch: 0 [28808/60000]	Loss: 1.040873
Train Epoch: 0 [30408/60000]	Loss: 0.743012
Train Epoch: 0 [32008/60000]	Loss: 1.009959
Train Epoch: 0 [33608/60000]	Loss: 0.564151
Train Epoch: 0 [35208/60000]	Loss: 0.65683

[32m[I 2021-05-27 08:15:07,484][0m Trial 28 finished with value: 0.8233 and parameters: {'n_layers': 2, 'n_units_l0': 94, 'dropout_l0': 0.3378047227954424, 'n_units_l1': 55, 'dropout_l1': 0.41967427768634386, 'optimizer': 'Adam', 'lr': 0.0018102989537961938, 'batch_size': 8, 'epoch': 16}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [64/60000]	Loss: 2.293413
Train Epoch: 0 [12864/60000]	Loss: 1.827337
Train Epoch: 0 [25664/60000]	Loss: 1.673729
Train Epoch: 0 [38464/60000]	Loss: 1.697715
Train Epoch: 0 [51264/60000]	Loss: 1.499424


[32m[I 2021-05-27 08:15:13,165][0m Trial 29 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.333226
Train Epoch: 0 [1608/60000]	Loss: 1.921189
Train Epoch: 0 [3208/60000]	Loss: 1.302608
Train Epoch: 0 [4808/60000]	Loss: 1.293393
Train Epoch: 0 [6408/60000]	Loss: 0.787334
Train Epoch: 0 [8008/60000]	Loss: 1.301576
Train Epoch: 0 [9608/60000]	Loss: 1.315153
Train Epoch: 0 [11208/60000]	Loss: 0.772483
Train Epoch: 0 [12808/60000]	Loss: 0.689455
Train Epoch: 0 [14408/60000]	Loss: 0.875026
Train Epoch: 0 [16008/60000]	Loss: 0.716912
Train Epoch: 0 [17608/60000]	Loss: 1.313047
Train Epoch: 0 [19208/60000]	Loss: 0.583735
Train Epoch: 0 [20808/60000]	Loss: 0.867107
Train Epoch: 0 [22408/60000]	Loss: 1.150181
Train Epoch: 0 [24008/60000]	Loss: 0.487436
Train Epoch: 0 [25608/60000]	Loss: 0.755576
Train Epoch: 0 [27208/60000]	Loss: 0.508780
Train Epoch: 0 [28808/60000]	Loss: 0.673987
Train Epoch: 0 [30408/60000]	Loss: 0.428215
Train Epoch: 0 [32008/60000]	Loss: 0.442427
Train Epoch: 0 [33608/60000]	Loss: 1.033808
Train Epoch: 0 [35208/60000]	Loss: 0.78506

[32m[I 2021-05-27 08:15:47,807][0m Trial 30 finished with value: 0.8248 and parameters: {'n_layers': 3, 'n_units_l0': 93, 'dropout_l0': 0.29779818045088624, 'n_units_l1': 49, 'dropout_l1': 0.43306368098508674, 'n_units_l2': 62, 'dropout_l2': 0.3414309173878272, 'optimizer': 'Adam', 'lr': 0.00041334374398381194, 'batch_size': 8, 'epoch': 14}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.353613
Train Epoch: 0 [1608/60000]	Loss: 1.877881
Train Epoch: 0 [3208/60000]	Loss: 1.217835
Train Epoch: 0 [4808/60000]	Loss: 1.405376
Train Epoch: 0 [6408/60000]	Loss: 0.640967
Train Epoch: 0 [8008/60000]	Loss: 1.068099
Train Epoch: 0 [9608/60000]	Loss: 0.674202
Train Epoch: 0 [11208/60000]	Loss: 0.977569
Train Epoch: 0 [12808/60000]	Loss: 1.135774
Train Epoch: 0 [14408/60000]	Loss: 0.906469
Train Epoch: 0 [16008/60000]	Loss: 1.424898
Train Epoch: 0 [17608/60000]	Loss: 0.375815
Train Epoch: 0 [19208/60000]	Loss: 0.597053
Train Epoch: 0 [20808/60000]	Loss: 1.128607
Train Epoch: 0 [22408/60000]	Loss: 0.927759
Train Epoch: 0 [24008/60000]	Loss: 0.602629
Train Epoch: 0 [25608/60000]	Loss: 0.322634
Train Epoch: 0 [27208/60000]	Loss: 0.770054
Train Epoch: 0 [28808/60000]	Loss: 0.584670
Train Epoch: 0 [30408/60000]	Loss: 0.508191
Train Epoch: 0 [32008/60000]	Loss: 0.258470
Train Epoch: 0 [33608/60000]	Loss: 0.373944
Train Epoch: 0 [35208/60000]	Loss: 0.79612

[32m[I 2021-05-27 08:16:23,155][0m Trial 31 finished with value: 0.8232 and parameters: {'n_layers': 3, 'n_units_l0': 94, 'dropout_l0': 0.2986436975795837, 'n_units_l1': 52, 'dropout_l1': 0.44264516156990696, 'n_units_l2': 63, 'dropout_l2': 0.3440416098705505, 'optimizer': 'Adam', 'lr': 0.0004307459522734561, 'batch_size': 8, 'epoch': 14}. Best is trial 6 with value: 0.8303.[0m


Train Epoch: 0 [8/60000]	Loss: 2.285689
Train Epoch: 0 [1608/60000]	Loss: 0.910478
Train Epoch: 0 [3208/60000]	Loss: 0.563467
Train Epoch: 0 [4808/60000]	Loss: 0.646847
Train Epoch: 0 [6408/60000]	Loss: 1.112649
Train Epoch: 0 [8008/60000]	Loss: 0.885577
Train Epoch: 0 [9608/60000]	Loss: 1.318089
Train Epoch: 0 [11208/60000]	Loss: 0.961815
Train Epoch: 0 [12808/60000]	Loss: 0.821550
Train Epoch: 0 [14408/60000]	Loss: 0.620627
Train Epoch: 0 [16008/60000]	Loss: 0.788300
Train Epoch: 0 [17608/60000]	Loss: 0.489945
Train Epoch: 0 [19208/60000]	Loss: 0.849441
Train Epoch: 0 [20808/60000]	Loss: 0.964230
Train Epoch: 0 [22408/60000]	Loss: 0.636320
Train Epoch: 0 [24008/60000]	Loss: 0.158996
Train Epoch: 0 [25608/60000]	Loss: 0.781604
Train Epoch: 0 [27208/60000]	Loss: 1.169378
Train Epoch: 0 [28808/60000]	Loss: 2.185668
Train Epoch: 0 [30408/60000]	Loss: 1.605166
Train Epoch: 0 [32008/60000]	Loss: 1.783168
Train Epoch: 0 [33608/60000]	Loss: 0.552827
Train Epoch: 0 [35208/60000]	Loss: 0.30830

[32m[I 2021-05-27 08:16:58,391][0m Trial 32 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.298205
Train Epoch: 0 [1608/60000]	Loss: 1.320230
Train Epoch: 0 [3208/60000]	Loss: 1.294235
Train Epoch: 0 [4808/60000]	Loss: 0.924600
Train Epoch: 0 [6408/60000]	Loss: 1.646131
Train Epoch: 0 [8008/60000]	Loss: 1.390320
Train Epoch: 0 [9608/60000]	Loss: 0.785748
Train Epoch: 0 [11208/60000]	Loss: 0.882395
Train Epoch: 0 [12808/60000]	Loss: 0.593369
Train Epoch: 0 [14408/60000]	Loss: 0.810192
Train Epoch: 0 [16008/60000]	Loss: 1.005859
Train Epoch: 0 [17608/60000]	Loss: 0.965570
Train Epoch: 0 [19208/60000]	Loss: 0.929206
Train Epoch: 0 [20808/60000]	Loss: 0.548052
Train Epoch: 0 [22408/60000]	Loss: 0.907895
Train Epoch: 0 [24008/60000]	Loss: 0.856566
Train Epoch: 0 [25608/60000]	Loss: 1.237324
Train Epoch: 0 [27208/60000]	Loss: 0.833708
Train Epoch: 0 [28808/60000]	Loss: 1.094032
Train Epoch: 0 [30408/60000]	Loss: 1.333667
Train Epoch: 0 [32008/60000]	Loss: 0.732327
Train Epoch: 0 [33608/60000]	Loss: 0.988203
Train Epoch: 0 [35208/60000]	Loss: 0.22693

[32m[I 2021-05-27 08:17:28,079][0m Trial 33 pruned. [0m


Train Epoch: 0 [8/60000]	Loss: 2.287897
Train Epoch: 0 [1608/60000]	Loss: 1.609064
Train Epoch: 0 [3208/60000]	Loss: 1.223159
Train Epoch: 0 [4808/60000]	Loss: 1.169493
Train Epoch: 0 [6408/60000]	Loss: 0.629120
Train Epoch: 0 [8008/60000]	Loss: 1.420549
Train Epoch: 0 [9608/60000]	Loss: 1.170269
Train Epoch: 0 [11208/60000]	Loss: 1.612399
Train Epoch: 0 [12808/60000]	Loss: 2.074721
Train Epoch: 0 [14408/60000]	Loss: 1.212459
Train Epoch: 0 [16008/60000]	Loss: 0.409768
Train Epoch: 0 [17608/60000]	Loss: 0.313329
Train Epoch: 0 [19208/60000]	Loss: 0.608351
Train Epoch: 0 [20808/60000]	Loss: 0.740305
Train Epoch: 0 [22408/60000]	Loss: 0.357406
Train Epoch: 0 [24008/60000]	Loss: 0.981329
Train Epoch: 0 [25608/60000]	Loss: 0.120731
Train Epoch: 0 [27208/60000]	Loss: 0.259260
Train Epoch: 0 [28808/60000]	Loss: 0.541450
Train Epoch: 0 [30408/60000]	Loss: 0.943284
Train Epoch: 0 [32008/60000]	Loss: 0.450106
Train Epoch: 0 [33608/60000]	Loss: 0.691698
Train Epoch: 0 [35208/60000]	Loss: 0.26256

[32m[I 2021-05-27 08:18:03,613][0m Trial 34 finished with value: 0.8227 and parameters: {'n_layers': 3, 'n_units_l0': 109, 'dropout_l0': 0.2772047902118284, 'n_units_l1': 49, 'dropout_l1': 0.41556583571200606, 'n_units_l2': 96, 'dropout_l2': 0.3943526968351876, 'optimizer': 'Adam', 'lr': 0.0007356313362278379, 'batch_size': 8, 'epoch': 15}. Best is trial 6 with value: 0.8303.[0m


Study statistics: 
  Number of finished trials:  35
  Number of pruned trials:  17
  Number of complete trials:  18
Best trial:
  Value:  0.8303
  Params: 
    n_layers: 1
    n_units_l0: 118
    dropout_l0: 0.43773201064875555
    optimizer: Adam
    lr: 0.001138316015475726
    batch_size: 64
    epoch: 19
