<a href="https://www.kaggle.com/code/averma111/optuna?scriptVersionId=129737637" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [18]:
!pip install optuna==3.1.0

[0m

In [27]:
import os, sys
import argparse
import numpy as np
import pandas as pd

import json
import optuna
import torch
from optuna.trial import TrialState
from torch.utils.data import DataLoader,Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection  import train_test_split

torch.manual_seed(42)

<torch._C.Generator at 0x7c55cb3fa250>

In [20]:
class BostonData(Dataset):
    def __init__(self,X,y,scale_data=True):
    # Apply scaling if necessary
      if scale_data:
            X = StandardScaler().fit_transform(X)
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y)
            
    def __getitem__(self,item):
        return self.X[item], self.y[item]
    
    def __len__(self):
        return len(self.X)

In [21]:
def MLP(trial,in_features,n_layers,dropout,n_output):
    
    layers = []
    fc_layer = in_features
    
    for i in range(n_layers):
        
        out_features = trial.suggest_int("n_units_l{}".format(i),2,in_features)
        layers.append(torch.nn.Linear(in_features, out_features))
        layers.append(torch.nn.LeakyReLU())
        in_features = out_features

    layers.append(torch.nn.Linear(in_features, fc_layer)) 
    layers.append(torch.nn.LeakyReLU())

    layers.append(torch.nn.Dropout(dropout))
    layers.append(torch.nn.Linear(fc_layer,n_output)) 
    
    return torch.nn.Sequential(*layers)

In [22]:
def train_net(trial, params):
    # Load Boston dataset
    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    y = raw_df.values[1::2, 2]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    # Prepare Boston dataset
    trainloader = torch.utils.data.DataLoader(BostonData(X_train, y_train), batch_size=10, shuffle=True, num_workers=1)
    testloader = torch.utils.data.DataLoader(BostonData(X_test, y_test), batch_size=1, shuffle=True, num_workers=1)
    
    # Initialize the MLP
    net = MLP(trial,
            in_features= X.shape[1],
            n_layers=params['n_layers'] ,
            dropout=params['dropout'],
            n_output= 1)
    
  # Define the loss function and optimizer
    loss_function = torch.nn.MSELoss()
    optimizer = getattr(torch.optim, params['optimizer'])(net.parameters(), lr= params['learning_rate'], weight_decay=params['weight_decay'])
  
    # Run the training loop
    for epoch in range(0, 5): # 5 epochs at maximum
        
        total_test_loss = []
        # Print epoch
        #print(f'Starting epoch {epoch+1}')
        
        # Set current loss value
        current_loss = 0.0
        
        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
        
            # Get and prepare inputs
            inputs, targets = data
            inputs, targets = inputs.float(), targets.float()
            targets = targets.reshape((targets.shape[0], 1))
            
            # Zero the gradients
            optimizer.zero_grad()
            # Perform forward pass
            outputs = net(inputs)
            # Compute loss
            loss = loss_function(outputs, targets)
            # Perform backward pass
            loss.backward()
            # Perform optimization
            optimizer.step()
            
        net.eval()
        with torch.no_grad():
            for i, data in enumerate(testloader, 0):
                # Get and prepare inputs
                inputs, targets = data
                inputs, targets = inputs.float(), targets.float()
                targets = targets.reshape((targets.shape[0], 1))
                
                # # Perform forward pass
                test_outputs = net(inputs)
                test_loss = loss_function(test_outputs, targets)
                total_test_loss.append(test_loss.item())
        

    # Process is complete.
    return total_test_loss

In [23]:
def objective(trial):
    """
    Objective function to run bayesian hyperparameter tuning.

    :param trial: optuna study
    :param checkpoint_dir: checkpoint dir args
    :param cfg: config file
    :return: mean RMSE test loss
    """ 
    # For TPESampler 
    params = {
              'learning_rate': trial.suggest_loguniform('learning_rate', 1e-6, 1e-2), 
              'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]),
              'weight_decay': trial.suggest_loguniform('weight_decay', 1e-4, 1e-2),
              "n_layers" : trial.suggest_int("n_layers", 1, 4),
              "dropout" : trial.suggest_float('dropout',0.1,0.5,step = 0.1)
              }
    test_loss   = train_net(trial, params) 
    return np.mean(test_loss)

In [None]:
study = optuna.create_study(direction ="minimize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials= 100,timeout=600)

    
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))
trial = study.best_trial
print("Best trial:", trial)
print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[32m[I 2023-05-16 06:10:48,514][0m A new study created in memory with name: no-name-f188a06e-4670-46da-9f73-d2283d009c3e[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-6, 1e-2),
  'weight_decay': trial.suggest_loguniform('weight_decay', 1e-4, 1e-2),
[32m[I 2023-05-16 06:10:51,957][0m Trial 0 finished with value: 19.882111912988062 and parameters: {'learning_rate': 0.006235426448023921, 'optimizer': 'RMSprop', 'weight_decay': 0.0016653213955281298, 'n_layers': 2, 'dropout': 0.30000000000000004, 'n_units_l0': 9, 'n_units_l1': 9}. Best is trial 0 with value: 19.882111912988062.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-6, 1e-2),
  'weight_decay': trial.suggest_loguniform('weight_decay', 1e-4, 1e-2),
[32m[I 2023-05-16 06:10:55,377][0m Trial 1 finished with value: 509.1098100762618 and parameters: {'learning_rate': 0.0006195690245840849, 'optimizer': 'Adam', 'weight_decay': 0.006027312640357356, 'n_layers': 1, 'dropout': 0.2, 'n_units_l0'