<a href="https://www.kaggle.com/code/averma111/pytorch-optuna-simple?scriptVersionId=129734990" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [2]:
!pip install optuna==3.1.0

Collecting optuna==3.1.0
  Downloading optuna-3.1.0-py3-none-any.whl (365 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.3/365.3 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: optuna
  Attempting uninstall: optuna
    Found existing installation: optuna 3.1.1
    Uninstalling optuna-3.1.1:
      Successfully uninstalled optuna-3.1.1
Successfully installed optuna-3.1.0
[0m

In [9]:
import os, sys
import argparse
import numpy as np
import pandas as pd

import json
import optuna
import torch
from torch.utils.data import DataLoader,Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection  import train_test_split

torch.manual_seed(42)

<torch._C.Generator at 0x7bcb9daeb730>

In [10]:
class BostonData(Dataset):
    def __init__(self,X,y,scale_data=True):
    # Apply scaling if necessary
      if scale_data:
            X = StandardScaler().fit_transform(X)
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y)
            
    def __getitem__(self,item):
        return self.X[item], self.y[item]
    
    def __len__(self):
        return len(self.X)

In [15]:
def MLP(trail,in_features,n_layers,dropout,n_output):
    
    layers = []
    fc_layer = in_features
    
    for i in range(n_layers):
        
        out_features = trial.suggest_int("n_units_l{}".format(i),2,in_features)
        trial.Trial.suggest_int()
        layers.append(torch.nn.Linear(in_features, out_features))
        layers.append(torch.nn.LeakyReLU())
        in_features = out_features

    layers.append(torch.nn.Linear(in_features, fc_layer)) 
    layers.append(torch.nn.LeakyReLU())

    layers.append(torch.nn.Dropout(dropout))
    layers.append(torch.nn.Linear(fc_layer,n_output)) 
    
    return torch.nn.Sequential(*layers)

In [16]:
def train_net(trial, params):
    # Load Boston dataset
    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    y = raw_df.values[1::2, 2]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    # Prepare Boston dataset
    trainloader = torch.utils.data.DataLoader(BostonData(X_train, y_train), batch_size=10, shuffle=True, num_workers=1)
    testloader = torch.utils.data.DataLoader(BostonData(X_test, y_test), batch_size=1, shuffle=True, num_workers=1)
    
    # Initialize the MLP
    net = MLP(trial,
            in_features= X.shape[1],
            n_layers=params['n_layers'] ,
            dropout=params['dropout'],
            n_output= 1)
    
  # Define the loss function and optimizer
    loss_function = torch.nn.MSELoss()
    optimizer = getattr(optim, params['optimizer'])(net.parameters(), lr= params['learning_rate'], weight_decay=params['weight_decay'])
  
    # Run the training loop
    for epoch in range(0, 5): # 5 epochs at maximum
        
        total_test_loss = []
        # Print epoch
        print(f'Starting epoch {epoch+1}')
        
        # Set current loss value
        current_loss = 0.0
        
        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
        
            # Get and prepare inputs
            inputs, targets = data
            inputs, targets = inputs.float(), targets.float()
            targets = targets.reshape((targets.shape[0], 1))
            
            # Zero the gradients
            optimizer.zero_grad()
            # Perform forward pass
            outputs = net(inputs)
            # Compute loss
            loss = loss_function(outputs, targets)
            # Perform backward pass
            loss.backward()
            # Perform optimization
            optimizer.step()
            
        net.eval()
        with torch.no_grad():
            for i, data in enumerate(testloader, 0):
                # Get and prepare inputs
                inputs, targets = data
                inputs, targets = inputs.float(), targets.float()
                targets = targets.reshape((targets.shape[0], 1))
                
                # # Perform forward pass
                test_outputs = net(inputs)
                test_loss = loss_function(test_outputs, targets)
                total_test_loss.append(test_loss.item())
        

    # Process is complete.
    return total_test_loss

In [17]:
def objective(trial):
    """
    Objective function to run bayesian hyperparameter tuning.

    :param trial: optuna study
    :param checkpoint_dir: checkpoint dir args
    :param cfg: config file
    :return: mean RMSE test loss
    """ 
    # For TPESampler 
    params = {
              'learning_rate': trial.suggest_loguniform('learning_rate', 1e-6, 1e-2), 
              'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]),
              'weight_decay': trial.suggest_loguniform('weight_decay', 1e-4, 1e-2),
              "n_layers" : trial.suggest_int("n_layers", 1, 4),
              "dropout" : trial.suggest_float('dropout',0.1,0.5,step = 0.1)
              }
    test_loss   = train_net(trial, params) 
    return np.mean(test_loss)

In [18]:
study = optuna.create_study(direction ="minimize", sampler=optuna.samplers.TPESampler())
study.optimize(lambda trial: objective(trial), n_trials= 100)

model_params = study.best_trial.params
model_params['optuna_best_trial_number'] =  study.best_trial.number 
model_params['optuna_best_trial_value'] = float( np.round(study.best_value,6) )
model_params["n_trials"] = 100

[32m[I 2023-05-16 01:45:08,705][0m A new study created in memory with name: no-name-f3d3e576-3c0b-4aa6-b076-f5f97eee2d60[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-6, 1e-2),
  'weight_decay': trial.suggest_loguniform('weight_decay', 1e-4, 1e-2),
[33m[W 2023-05-16 01:45:09,028][0m Trial 0 failed with parameters: {'learning_rate': 0.0070227471680817345, 'optimizer': 'Adam', 'weight_decay': 0.0016578667628263917, 'n_layers': 3, 'dropout': 0.2} because of the following error: NameError("name 'trial' is not defined").[0m
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_34/1094103061.py", line 2, in <lambda>
    study.optimize(lambda trial: objective(trial), n_trials= 100)
  File "/tmp/ipykernel_34/1182257263.py", line 18, in objective
    test_loss   = train_net(trial, params)
  File "/tmp/ipykernel_34/623243996.py", lin

NameError: name 'trial' is not defined