In [1]:
#!pip install optuna

Collecting optuna
  Downloading optuna-3.1.0-py3-none-any.whl (365 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m[31m8.0 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting PyYAML
  Using cached PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl (173 kB)
Collecting cmaes>=0.9.1
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting sqlalchemy>=1.3.0
  Downloading SQLAlchemy-2.0.7-cp39-cp39-macosx_11_0_arm64.whl (2.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m31m46.7 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting alembic>=1.5.0
  Downloading alembic-1.10.2-py3-none-any.whl (212 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
Collecting Mako
  Downl

In [3]:
import optuna

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
from torchvision import transforms

from torch.utils.data import Dataset, DataLoader

from tqdm.autonotebook import tqdm

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

import pandas as pd

from sklearn.metrics import accuracy_score

import time

from idlmam import train_simple_network, Flatten, weight_reset, set_seed, run_epoch

In [None]:
def objective(trial):
    
    train_subset = int(len(train_data)*0.8)
    test_subset = len(train_data)-train_subset
    
    split = torch.utils.data.random_split(train_data, [train_subset, test_subset])
    
    t_loader = DataLoader(split[0], batch_size=B, shuffle=True)
    v_loader = DataLoader(split[1], batch_size=B, shuffle=False)

    #Hidden layer size
    n = trial.suggest_int('neurons_per_layer', 16, 256) 
    layers = trial.suggest_int('hidden_layers', 1, 6) 
    #How many channels are in the input?
    C = 1
    #How many classes are there?
    classes = 10

    #At least one hidden layer, that take in D inputs
    sequential_layers = [
        nn.Flatten(),
        nn.Linear(D,  n),
        nn.Tanh(),
    ]
    #Now lets add in a variable number of hidden layers, depending on what Optuna gave us for the "layers" parameter
    for _ in range(layers-1):
        sequential_layers.append( nn.Linear(n,  n) )
        sequential_layers.append( nn.Tanh() )
    
    #Output layer
    sequential_layers.append( nn.Linear(n, classes) )
    
    #Now turn the list of layers into a PyTorch Sequential Module 
    fc_model = nn.Sequential(*sequential_layers)
    #What should our global learning rate be? Notice that we can ask for new hyper-parameters from optuna whenever we want.
    eta_global = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)

    
    optimizer = torch.optim.AdamW(fc_model.parameters(), lr=eta_global)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs//3)
    results = train_network(fc_model, loss_func, t_loader, test_loader=v_loader,
                                     epochs=10, optimizer=optimizer, lr_schedule=scheduler,
                                     score_funcs={'Accuracy': accuracy_score}, device=device, 
                                     disable_tqdm=True)

    return results['test Accuracy'].iloc[-1]  # A objective value linked with the Trial object.

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10) #Normally we would do more like 50-100 trials,
#but we are doing less to make sure this notebook runs in a reasonable amount of time

In [None]:
print(study.best_params) 

In [None]:
fig = optuna.visualization.plot_optimization_history(study)
fig.show()

In [None]:
fig = optuna.visualization.plot_slice(study)
fig.show()

In [None]:

fig = optuna.visualization.plot_contour(study, params=['neurons_per_layer', 'hidden_layers', "learning_rate"])
fig.show()

Prunable studies

In [None]:
def objectivePrunable(trial):
    
    train_subset = int(len(train_data)*0.8)
    test_subset = len(train_data)-train_subset
    
    split = torch.utils.data.random_split(train_data, [train_subset, test_subset])
    
    t_loader = DataLoader(split[0], batch_size=B, shuffle=True)
    v_loader = DataLoader(split[1], batch_size=B, shuffle=False)

    #Hidden layer size
    n = trial.suggest_int('neurons_per_layer', 1, 256) 
    layers = trial.suggest_int('hidden_layers', 1, 6) 
    #How many channels are in the input?
    C = 1
    #How many classes are there?
    classes = 10

    #At least one hidden layer, that take in D inputs
    sequential_layers = [
        Flatten(),
        nn.Linear(D,  n),
        nn.Tanh(),
    ]
    
    for _ in range(layers-1):
        sequential_layers.append( nn.Linear(n,  n) )
        sequential_layers.append( nn.Tanh() )
    
    #Output layer
    sequential_layers.append( nn.Linear(n, classes) )
    

    fc_model = nn.Sequential(*sequential_layers)
    
    eta_global = trial.suggest_loguniform('learning_rate', 1e-6, 1e+2)

    #We need to create the optimizer (and any learning rate schedule) outside of the train_network call so that the same optimizer is re-used between epochs
    optimizer = torch.optim.AdamW(fc_model.parameters(), lr=eta_global)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs//3)
    
    for epoch in range(10):
    
        results = train_network(fc_model, loss_func, t_loader, val_loader=v_loader,
                                         epochs=1, optimizer=optimizer, lr_schedule=scheduler,
                                         score_funcs={'Accuracy': accuracy_score}, device=device, 
                                         disable_tqdm=True)
        cur_accuracy = results['val Accuracy'].iloc[-1]
        trial.report(cur_accuracy, epoch)
        
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return cur_accuracy

In [None]:
study2 = optuna.create_study(direction='maximize')
study2.optimize(objectivePrunable, n_trials=20)