# Optimizing autoencoder network and training parameters with Optuna

In [19]:
# import this package 
from pytorch_pae import AE, utils

In [2]:
import optuna
from optuna.trial import TrialState
import logging

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys

In [43]:
# import pytorch
import torch
from torchsummary import summary
print(torch.__version__)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

1.8.0
cuda:0


In [5]:
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

study_folder  = '/global/cscratch1/sd/vboehm/OptunaStudies/'
study_name    = "conv_AE_optimization"  # Unique identifier of the study.
study_name    = os.path.join(study_folder, study_name)
storage_name  = "sqlite:///{}.db".format(study_name)
EPOCHS        = 20
NUM_HOURS     = 1
N_TRIALS      = 10
SEED          = 314159

In [60]:
def objective(trial):
    
    
    ## data parameters
    dataset       = 'MNIST'
    loc           = '/global/cscratch1/sd/vboehm/Datasets'

    # padding values in each conv layer
    paddings     = [2]
    # stride values in each conv layer
    # whether tp apply a layer normalization after conv layer
    layer_norm   = [True]
    # whether to train elemntwise affine parameters for normalization layer 
    affine       = False
    # whether to Lipschitz regularize by bounding the spectral norm 
    spec_norm    = True
    # activation function after each layer
    activations  = ['ReLU']
    # whether to add a bias in each layer or not
    bias         = [True]

    # data dimensionality
    dim          = '2D'
    # latent space dimensionality
    latent_dim   = 8
    # number of channels in data
    input_c      = 1 
    # data dimensioality along one axis (only square data supported in 2D)
    input_dim    = 28
    # type of encoder and decoder network (either 'fc' or 'conv')
    encoder_type = 'conv'
    decoder_type = 'conv'

    # if True, the output is fed through a sigmoid layer to bring data values into range [0,1]
    final_sigmoid = True

    ## Training parameters
    nepochs       = EPOCHS
    batchsize     = 64
    initial_lr    = 1e-2

    optimizer        = 'Adam'
    criterion        = 'MSELoss'

    scheduler        = 'ExponentialLR'
    scheduler_params = {'gamma':0.95}
    
    n_layers   = trial.suggest_int('n_layers',1,3)
    latent_dim = trial.suggest_int('latent_dim',2,10)
    
    activations = activations*n_layers
    bias        = bias*n_layers
    layer_norm  = layer_norm*n_layers
    
    
    out_channels = []
    kernel_sizes = []
    strides      = []
    scale_facs   = []
    dropout_rate = []
    
    current_size = input_dim 
    for ii in range(n_layers):
        print(current_size)
        out_channels.append(trial.suggest_int('out_channel_%d'%ii,4,32))
        kernel_sizes.append(trial.suggest_int('kernel_size_%d'%ii,2,max(4,int(current_size)//2)))
        strides.append(trial.suggest_int('stride_%d'%ii,1,kernel_sizes[ii]))
        paddings.append(trial.suggest_int('padding_%d'%ii,0,kernel_sizes[ii]))
        current_size = utils.output_shape(current_size,strides[ii],paddings[ii],kernel_sizes[ii],dilation=1)
        if current_size>3*latent_dim:
            scale_facs.append(trial.suggest_int('scale_fac_%d'%ii,1,2))
        else:
            scale_facs.append(1)
        current_size = current_size//scale_facs[ii]
        dropout_rate.append(trial.suggest_float('dropout_rate_%d'%ii,1e-3,1,log=True))
        # if current_size<=latent_dim:
        #     n_layers=ii
        #     break
            

    
    general_params      = {'input_c': input_c, 'input_dim': input_dim, 'latent_dim': latent_dim, 'encoder_type': encoder_type, 'decoder_type': decoder_type, 'dim': dim}
    conv_network_params = {'n_layers': n_layers, 'out_channels': out_channels, 'kernel_sizes': kernel_sizes, 'scale_facs': scale_facs, 'paddings': paddings,\
                       'strides': strides,'activations': activations, 'spec_norm': spec_norm, 'dropout_rate':dropout_rate, 'layer_norm': layer_norm,\
                       'affine': affine,'final_sigmoid': final_sigmoid, 'bias':bias}

    training_params     = {'batchsize': batchsize, 'initial_lr': initial_lr, 'optimizer': optimizer, 'criterion': criterion, \
                       'scheduler': scheduler, 'scheduler_params':scheduler_params}
    data_params         = {'dataset':dataset, 'loc': loc}
    
    try:
        AE1                 = AE.Autoencoder(general_params,data_params,conv_network_params, conv_network_params, training_params, device)
    
        if dim =='1D':
            summary(AE1, (input_c,input_dim))
        else:
            summary(AE1, (input_c, input_dim, input_dim))

        train_loss, valid_loss = AE1.train(nepochs)
        
    except:
        valid_loss = 100
    
    return valid_loss[-1]

In [61]:
time = NUM_HOURS*60*60-600
study = optuna.create_study(direction='minimize',study_name=study_name, storage=storage_name,load_if_exists=True,  sampler=optuna.samplers.TPESampler(seed=SEED),
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10))
study.optimize(objective, n_trials=N_TRIALS, timeout=time)

[32m[I 2022-01-14 17:57:51,011][0m Using an existing study with name '/global/cscratch1/sd/vboehm/OptunaStudies/conv_AE_optimization' instead of creating a new one.[0m


Using an existing study with name '/global/cscratch1/sd/vboehm/OptunaStudies/conv_AE_optimization' instead of creating a new one.
28
10
5
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 10, 10]             160
         LayerNorm-2           [-1, 16, 10, 10]               0
              ReLU-3           [-1, 16, 10, 10]               0
 AdaptiveMaxPool2d-4           [-1, 16, 10, 10]               0
            Conv2d-5             [-1, 27, 5, 5]           6,939
         LayerNorm-6             [-1, 27, 5, 5]               0
              ReLU-7             [-1, 27, 5, 5]               0
 AdaptiveMaxPool2d-8             [-1, 27, 5, 5]               0
            Conv2d-9             [-1, 11, 4, 4]           1,199
        LayerNorm-10             [-1, 11, 4, 4]               0
             ReLU-11             [-1, 11, 4, 4]               0
AdaptiveMaxPool2d-12         

[33m[W 2022-01-14 18:01:26,139][0m Trial 38 failed, because the number of the values 20 did not match the number of the objectives 1.[0m


Trial 38 failed, because the number of the values 20 did not match the number of the objectives 1.


KeyboardInterrupt: 

In [None]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
