# Optimizing autoencoder network and training parameters with Optuna

In [4]:
# import this package 
from pytorch_pae import AE, utils

In [5]:
import optuna
from optuna.trial import TrialState
import logging

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

In [7]:
# import pytorch
import torch
from torchsummary import summary
print(torch.__version__)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

1.8.0
cuda:0


In [15]:
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

study_folder  = '/global/cscratch1/sd/vboehm/OptunaStudies/'
study_name    = "conv_AE_optimization_1"  # Unique identifier of the study.
study_name    = os.path.join(study_folder, study_name)
storage_name  = "sqlite:///{}.db".format(study_name)
EPOCHS        = 20
NUM_HOURS     = 1
N_TRIALS      = 10
SEED          = 314159

In [16]:
def objective(trial):
    
    
    ## data parameters
    dataset       = 'MNIST'
    loc           = '/global/cscratch1/sd/vboehm/Datasets'

    # padding values in each conv layer
    paddings     = [2]
    # stride values in each conv layer
    # whether tp apply a layer normalization after conv layer
    layer_norm   = [True]
    # whether to train elemntwise affine parameters for normalization layer 
    affine       = False
    # whether to Lipschitz regularize by bounding the spectral norm 
    spec_norm    = True
    # activation function after each layer
    activations  = ['ReLU']
    # whether to add a bias in each layer or not
    bias         = [True]

    # data dimensionality
    dim          = '2D'
    # latent space dimensionality
    latent_dim   = 8
    # number of channels in data
    input_c      = 1 
    # data dimensioality along one axis (only square data supported in 2D)
    input_dim    = 28
    # type of encoder and decoder network (either 'fc' or 'conv')
    encoder_type = 'conv'
    decoder_type = 'conv'

    # if True, the output is fed through a sigmoid layer to bring data values into range [0,1]
    final_sigmoid = True

    ## Training parameters
    nepochs       = EPOCHS
    batchsize     = 64
    initial_lr    = 1e-2

    optimizer        = 'Adam'
    criterion        = 'MSELoss'

    scheduler        = 'ExponentialLR'
    scheduler_params = {'gamma':0.95}
    
    n_layers   = trial.suggest_int('n_layers',1,3)
    latent_dim = trial.suggest_int('latent_dim',2,10)
    
    activations = activations*n_layers
    bias        = bias*n_layers
    layer_norm  = layer_norm*n_layers
    
    
    out_channels = []
    kernel_sizes = []
    strides      = []
    scale_facs   = []
    dropout_rate = []
    
    current_size = input_dim 
    for ii in range(n_layers):
        print(current_size)
        out_channels.append(trial.suggest_int('out_channel_%d'%ii,4,32))
        kernel_sizes.append(trial.suggest_int('kernel_size_%d'%ii,2,max(4,int(current_size)//2)))
        strides.append(trial.suggest_int('stride_%d'%ii,1,2))
        paddings.append(trial.suggest_int('padding_%d'%ii,1,kernel_sizes[ii]))
        current_size = utils.output_shape(current_size,strides[ii],paddings[ii],kernel_sizes[ii],dilation=1)
        if current_size>3*latent_dim:
            scale_facs.append(trial.suggest_int('scale_fac_%d'%ii,1,2))
        else:
            scale_facs.append(1)
        current_size = current_size//scale_facs[ii]
        #dropout_rate.append(trial.suggest_float('dropout_rate_%d'%ii,1e-3,1,log=True))

    
    general_params      = {'input_c': input_c, 'input_dim': input_dim, 'latent_dim': latent_dim, 'encoder_type': encoder_type, 'decoder_type': decoder_type, 'dim': dim}
    conv_network_params = {'n_layers': n_layers, 'out_channels': out_channels, 'kernel_sizes': kernel_sizes, 'scale_facs': scale_facs, 'paddings': paddings,\
                       'strides': strides,'activations': activations, 'spec_norm': spec_norm, 'layer_norm': layer_norm,\
                       'affine': affine,'final_sigmoid': final_sigmoid, 'bias':bias}

    training_params     = {'batchsize': batchsize, 'initial_lr': initial_lr, 'optimizer': optimizer, 'criterion': criterion, \
                       'scheduler': scheduler, 'scheduler_params':scheduler_params}
    data_params         = {'dataset':dataset, 'loc': loc}
    
    AE1                 = AE.Autoencoder(general_params,data_params,conv_network_params, conv_network_params, training_params, device)

    if dim =='1D':
        summary(AE1, (input_c,input_dim))
    else:
        summary(AE1, (input_c, input_dim, input_dim))

    train_loss, valid_loss = AE1.train(nepochs)
        
    
    return valid_loss[-1]

In [17]:
time = NUM_HOURS*60*60-600
study = optuna.create_study(direction='minimize',study_name=study_name, storage=storage_name,load_if_exists=True,  sampler=optuna.samplers.TPESampler(seed=SEED),
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10))
study.optimize(objective, n_trials=N_TRIALS, timeout=time)

[32m[I 2022-01-17 14:02:11,253][0m A new study created in RDB with name: /global/cscratch1/sd/vboehm/OptunaStudies/conv_AE_optimization_1[0m


A new study created in RDB with name: /global/cscratch1/sd/vboehm/OptunaStudies/conv_AE_optimization_1
A new study created in RDB with name: /global/cscratch1/sd/vboehm/OptunaStudies/conv_AE_optimization_1
28
15
9
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 15, 15]             160
         LayerNorm-2           [-1, 16, 15, 15]               0
              ReLU-3           [-1, 16, 15, 15]               0
 AdaptiveMaxPool2d-4           [-1, 16, 15, 15]               0
            Conv2d-5             [-1, 27, 9, 9]          10,827
         LayerNorm-6             [-1, 27, 9, 9]               0
              ReLU-7             [-1, 27, 9, 9]               0
 AdaptiveMaxPool2d-8             [-1, 27, 9, 9]               0
            Conv2d-9             [-1, 11, 7, 7]           1,199
        LayerNorm-10             [-1, 11, 7, 7]               0
             ReLU

[32m[I 2022-01-17 14:05:52,370][0m Trial 0 finished with value: 0.021656490862369537 and parameters: {'n_layers': 3, 'latent_dim': 6, 'out_channel_0': 16, 'kernel_size_0': 3, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.0019706980247466556, 'out_channel_1': 27, 'kernel_size_1': 5, 'stride_1': 2, 'padding_1': 3, 'dropout_rate_1': 0.026930771022633998, 'out_channel_2': 11, 'kernel_size_2': 2, 'stride_2': 2, 'padding_2': 1, 'dropout_rate_2': 0.003670440938988837}. Best is trial 0 with value: 0.021656490862369537.[0m


Trial 0 finished with value: 0.021656490862369537 and parameters: {'n_layers': 3, 'latent_dim': 6, 'out_channel_0': 16, 'kernel_size_0': 3, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.0019706980247466556, 'out_channel_1': 27, 'kernel_size_1': 5, 'stride_1': 2, 'padding_1': 3, 'dropout_rate_1': 0.026930771022633998, 'out_channel_2': 11, 'kernel_size_2': 2, 'stride_2': 2, 'padding_2': 1, 'dropout_rate_2': 0.003670440938988837}. Best is trial 0 with value: 0.021656490862369537.
Trial 0 finished with value: 0.021656490862369537 and parameters: {'n_layers': 3, 'latent_dim': 6, 'out_channel_0': 16, 'kernel_size_0': 3, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.0019706980247466556, 'out_channel_1': 27, 'kernel_size_1': 5, 'stride_1': 2, 'padding_1': 3, 'dropout_rate_1': 0.026930771022633998, 'out_channel_2': 11, 'kernel_size_2': 2, 'stride_2': 2, 'padding_2': 1, 'dropout_rate_2': 0.003670440938988837}. Best is trial 0 with value: 0.021656490862369537.
28
-----------------------

[32m[I 2022-01-17 14:08:18,862][0m Trial 1 finished with value: 0.01612713560461998 and parameters: {'n_layers': 1, 'latent_dim': 9, 'out_channel_0': 22, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 12, 'dropout_rate_0': 0.00758929521035104}. Best is trial 1 with value: 0.01612713560461998.[0m


Trial 1 finished with value: 0.01612713560461998 and parameters: {'n_layers': 1, 'latent_dim': 9, 'out_channel_0': 22, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 12, 'dropout_rate_0': 0.00758929521035104}. Best is trial 1 with value: 0.01612713560461998.
Trial 1 finished with value: 0.01612713560461998 and parameters: {'n_layers': 1, 'latent_dim': 9, 'out_channel_0': 22, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 12, 'dropout_rate_0': 0.00758929521035104}. Best is trial 1 with value: 0.01612713560461998.
28
15
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 30, 30]              40
         LayerNorm-2            [-1, 4, 30, 30]               0
              ReLU-3            [-1, 4, 30, 30]               0
 AdaptiveMaxPool2d-4            [-1, 4, 15, 15]               0
            Conv2d-5           [-1, 18, 18, 18]           1,170
         LayerNorm-6   

[32m[I 2022-01-17 14:11:25,765][0m Trial 2 finished with value: 0.016324758529663086 and parameters: {'n_layers': 2, 'latent_dim': 8, 'out_channel_0': 4, 'kernel_size_0': 3, 'stride_0': 1, 'padding_0': 3, 'scale_fac_0': 2, 'dropout_rate_0': 0.023029823213509836, 'out_channel_1': 18, 'kernel_size_1': 4, 'stride_1': 1, 'padding_1': 1, 'dropout_rate_1': 0.00490351726655337}. Best is trial 1 with value: 0.01612713560461998.[0m


Trial 2 finished with value: 0.016324758529663086 and parameters: {'n_layers': 2, 'latent_dim': 8, 'out_channel_0': 4, 'kernel_size_0': 3, 'stride_0': 1, 'padding_0': 3, 'scale_fac_0': 2, 'dropout_rate_0': 0.023029823213509836, 'out_channel_1': 18, 'kernel_size_1': 4, 'stride_1': 1, 'padding_1': 1, 'dropout_rate_1': 0.00490351726655337}. Best is trial 1 with value: 0.01612713560461998.
Trial 2 finished with value: 0.016324758529663086 and parameters: {'n_layers': 2, 'latent_dim': 8, 'out_channel_0': 4, 'kernel_size_0': 3, 'stride_0': 1, 'padding_0': 3, 'scale_fac_0': 2, 'dropout_rate_0': 0.023029823213509836, 'out_channel_1': 18, 'kernel_size_1': 4, 'stride_1': 1, 'padding_1': 1, 'dropout_rate_1': 0.00490351726655337}. Best is trial 1 with value: 0.01612713560461998.
28
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 12, 21, 21]           1,740
         LayerNorm-2      

[32m[I 2022-01-17 14:14:02,140][0m Trial 3 finished with value: 0.017897987738251686 and parameters: {'n_layers': 1, 'latent_dim': 8, 'out_channel_0': 12, 'kernel_size_0': 12, 'stride_0': 1, 'padding_0': 2, 'dropout_rate_0': 0.2126915953551854}. Best is trial 1 with value: 0.01612713560461998.[0m


Trial 3 finished with value: 0.017897987738251686 and parameters: {'n_layers': 1, 'latent_dim': 8, 'out_channel_0': 12, 'kernel_size_0': 12, 'stride_0': 1, 'padding_0': 2, 'dropout_rate_0': 0.2126915953551854}. Best is trial 1 with value: 0.01612713560461998.
Trial 3 finished with value: 0.017897987738251686 and parameters: {'n_layers': 1, 'latent_dim': 8, 'out_channel_0': 12, 'kernel_size_0': 12, 'stride_0': 1, 'padding_0': 2, 'dropout_rate_0': 0.2126915953551854}. Best is trial 1 with value: 0.01612713560461998.
28
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 31, 28, 28]             806
         LayerNorm-2           [-1, 31, 28, 28]               0
              ReLU-3           [-1, 31, 28, 28]               0
 AdaptiveMaxPool2d-4           [-1, 31, 14, 14]               0
           Flatten-5                 [-1, 6076]               0
            Linear-6        

[32m[I 2022-01-17 14:16:44,603][0m Trial 4 finished with value: 0.01903107576072216 and parameters: {'n_layers': 1, 'latent_dim': 7, 'out_channel_0': 31, 'kernel_size_0': 5, 'stride_0': 1, 'padding_0': 5, 'scale_fac_0': 2, 'dropout_rate_0': 0.012511325725821264}. Best is trial 1 with value: 0.01612713560461998.[0m


Trial 4 finished with value: 0.01903107576072216 and parameters: {'n_layers': 1, 'latent_dim': 7, 'out_channel_0': 31, 'kernel_size_0': 5, 'stride_0': 1, 'padding_0': 5, 'scale_fac_0': 2, 'dropout_rate_0': 0.012511325725821264}. Best is trial 1 with value: 0.01612713560461998.
Trial 4 finished with value: 0.01903107576072216 and parameters: {'n_layers': 1, 'latent_dim': 7, 'out_channel_0': 31, 'kernel_size_0': 5, 'stride_0': 1, 'padding_0': 5, 'scale_fac_0': 2, 'dropout_rate_0': 0.012511325725821264}. Best is trial 1 with value: 0.01612713560461998.
28
12
21
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 28, 12, 12]           2,296
         LayerNorm-2           [-1, 28, 12, 12]               0
              ReLU-3           [-1, 28, 12, 12]               0
 AdaptiveMaxPool2d-4           [-1, 28, 12, 12]               0
            Conv2d-5           [-1, 20, 21, 21]   

[32m[I 2022-01-17 14:20:35,539][0m Trial 5 finished with value: 0.010023165494203568 and parameters: {'n_layers': 3, 'latent_dim': 10, 'out_channel_0': 28, 'kernel_size_0': 9, 'stride_0': 2, 'padding_0': 5, 'dropout_rate_0': 0.4153483523414692, 'out_channel_1': 20, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'dropout_rate_1': 0.4565939885488035, 'out_channel_2': 31, 'kernel_size_2': 6, 'stride_2': 1, 'padding_2': 5, 'dropout_rate_2': 0.7934320102486501}. Best is trial 5 with value: 0.010023165494203568.[0m


Trial 5 finished with value: 0.010023165494203568 and parameters: {'n_layers': 3, 'latent_dim': 10, 'out_channel_0': 28, 'kernel_size_0': 9, 'stride_0': 2, 'padding_0': 5, 'dropout_rate_0': 0.4153483523414692, 'out_channel_1': 20, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'dropout_rate_1': 0.4565939885488035, 'out_channel_2': 31, 'kernel_size_2': 6, 'stride_2': 1, 'padding_2': 5, 'dropout_rate_2': 0.7934320102486501}. Best is trial 5 with value: 0.010023165494203568.
Trial 5 finished with value: 0.010023165494203568 and parameters: {'n_layers': 3, 'latent_dim': 10, 'out_channel_0': 28, 'kernel_size_0': 9, 'stride_0': 2, 'padding_0': 5, 'dropout_rate_0': 0.4153483523414692, 'out_channel_1': 20, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'dropout_rate_1': 0.4565939885488035, 'out_channel_2': 31, 'kernel_size_2': 6, 'stride_2': 1, 'padding_2': 5, 'dropout_rate_2': 0.7934320102486501}. Best is trial 5 with value: 0.010023165494203568.
28
10
--------------------------------

[32m[I 2022-01-17 14:23:40,404][0m Trial 6 finished with value: 0.018185188993811607 and parameters: {'n_layers': 2, 'latent_dim': 7, 'out_channel_0': 8, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.002828662125731526, 'out_channel_1': 9, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 5, 'dropout_rate_1': 0.120607338776516}. Best is trial 5 with value: 0.010023165494203568.[0m


Trial 6 finished with value: 0.018185188993811607 and parameters: {'n_layers': 2, 'latent_dim': 7, 'out_channel_0': 8, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.002828662125731526, 'out_channel_1': 9, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 5, 'dropout_rate_1': 0.120607338776516}. Best is trial 5 with value: 0.010023165494203568.
Trial 6 finished with value: 0.018185188993811607 and parameters: {'n_layers': 2, 'latent_dim': 7, 'out_channel_0': 8, 'kernel_size_0': 13, 'stride_0': 2, 'padding_0': 3, 'dropout_rate_0': 0.002828662125731526, 'out_channel_1': 9, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 5, 'dropout_rate_1': 0.120607338776516}. Best is trial 5 with value: 0.010023165494203568.
28
11
29
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 30, 11, 11]           4,350
         LayerNorm-2           [-1, 30, 11, 11]           

[32m[I 2022-01-17 14:27:29,404][0m Trial 7 finished with value: 0.029728304594755173 and parameters: {'n_layers': 3, 'latent_dim': 3, 'out_channel_0': 30, 'kernel_size_0': 12, 'stride_0': 2, 'padding_0': 11, 'scale_fac_0': 1, 'dropout_rate_0': 0.0016570487043416167, 'out_channel_1': 6, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 3, 'scale_fac_1': 1, 'dropout_rate_1': 0.0011609225729553483, 'out_channel_2': 30, 'kernel_size_2': 7, 'stride_2': 2, 'padding_2': 5, 'scale_fac_2': 1, 'dropout_rate_2': 0.9141056503156795}. Best is trial 5 with value: 0.010023165494203568.[0m


epoch: 19, training loss: 3.0149e-02, validation loss: 2.9728e-02, learning rate: 3.5849e-03
Trial 7 finished with value: 0.029728304594755173 and parameters: {'n_layers': 3, 'latent_dim': 3, 'out_channel_0': 30, 'kernel_size_0': 12, 'stride_0': 2, 'padding_0': 11, 'scale_fac_0': 1, 'dropout_rate_0': 0.0016570487043416167, 'out_channel_1': 6, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 3, 'scale_fac_1': 1, 'dropout_rate_1': 0.0011609225729553483, 'out_channel_2': 30, 'kernel_size_2': 7, 'stride_2': 2, 'padding_2': 5, 'scale_fac_2': 1, 'dropout_rate_2': 0.9141056503156795}. Best is trial 5 with value: 0.010023165494203568.
Trial 7 finished with value: 0.029728304594755173 and parameters: {'n_layers': 3, 'latent_dim': 3, 'out_channel_0': 30, 'kernel_size_0': 12, 'stride_0': 2, 'padding_0': 11, 'scale_fac_0': 1, 'dropout_rate_0': 0.0016570487043416167, 'out_channel_1': 6, 'kernel_size_1': 5, 'stride_1': 1, 'padding_1': 3, 'scale_fac_1': 1, 'dropout_rate_1': 0.0011609225729553483, 'out

[32m[I 2022-01-17 14:27:33,927][0m Trial 8 finished with value: 100.0 and parameters: {'n_layers': 3, 'latent_dim': 9, 'out_channel_0': 24, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 5, 'dropout_rate_0': 0.10636883456185045, 'out_channel_1': 18, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 2, 'dropout_rate_1': 0.04435175225641668, 'out_channel_2': 13, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 2, 'dropout_rate_2': 0.10685520723459814}. Best is trial 5 with value: 0.010023165494203568.[0m


Trial 8 finished with value: 100.0 and parameters: {'n_layers': 3, 'latent_dim': 9, 'out_channel_0': 24, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 5, 'dropout_rate_0': 0.10636883456185045, 'out_channel_1': 18, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 2, 'dropout_rate_1': 0.04435175225641668, 'out_channel_2': 13, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 2, 'dropout_rate_2': 0.10685520723459814}. Best is trial 5 with value: 0.010023165494203568.
Trial 8 finished with value: 100.0 and parameters: {'n_layers': 3, 'latent_dim': 9, 'out_channel_0': 24, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 5, 'dropout_rate_0': 0.10636883456185045, 'out_channel_1': 18, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 2, 'dropout_rate_1': 0.04435175225641668, 'out_channel_2': 13, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 2, 'dropout_rate_2': 0.10685520723459814}. Best is trial 5 with value: 0.010023165494203568.
28
20
45
-----------------

[32m[I 2022-01-17 14:32:06,495][0m Trial 9 finished with value: 0.015887374058365822 and parameters: {'n_layers': 3, 'latent_dim': 7, 'out_channel_0': 29, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 13, 'dropout_rate_0': 0.0011327476095305468, 'out_channel_1': 31, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 1, 'dropout_rate_1': 0.5262400465239191, 'out_channel_2': 25, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 4, 'scale_fac_2': 1, 'dropout_rate_2': 0.4391768397037223}. Best is trial 5 with value: 0.010023165494203568.[0m


Trial 9 finished with value: 0.015887374058365822 and parameters: {'n_layers': 3, 'latent_dim': 7, 'out_channel_0': 29, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 13, 'dropout_rate_0': 0.0011327476095305468, 'out_channel_1': 31, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 1, 'dropout_rate_1': 0.5262400465239191, 'out_channel_2': 25, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 4, 'scale_fac_2': 1, 'dropout_rate_2': 0.4391768397037223}. Best is trial 5 with value: 0.010023165494203568.
Trial 9 finished with value: 0.015887374058365822 and parameters: {'n_layers': 3, 'latent_dim': 7, 'out_channel_0': 29, 'kernel_size_0': 13, 'stride_0': 1, 'padding_0': 13, 'dropout_rate_0': 0.0011327476095305468, 'out_channel_1': 31, 'kernel_size_1': 2, 'stride_1': 1, 'padding_1': 2, 'scale_fac_1': 1, 'dropout_rate_1': 0.5262400465239191, 'out_channel_2': 25, 'kernel_size_2': 5, 'stride_2': 2, 'padding_2': 4, 'scale_fac_2': 1, 'dropout_rate_2': 0.4391768397037223}. Best is 

In [18]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  0
  Number of complete trials:  10
Best trial:
  Value:  0.010023165494203568
  Params: 
    dropout_rate_0: 0.4153483523414692
    dropout_rate_1: 0.4565939885488035
    dropout_rate_2: 0.7934320102486501
    kernel_size_0: 9
    kernel_size_1: 2
    kernel_size_2: 6
    latent_dim: 10
    n_layers: 3
    out_channel_0: 28
    out_channel_1: 20
    out_channel_2: 31
    padding_0: 5
    padding_1: 2
    padding_2: 5
    stride_0: 2
    stride_1: 1
    stride_2: 1
