# Hyperparameter Search

- Example with **Optuna**: https://github.com/optuna/optuna-examples/blob/main/pytorch/pytorch_simple.py
  - notebook1: https://github.com/marcellusruben/medium-resources/blob/main/Optuna/Optuna.ipynb

## Optuna

In [221]:
import os
import pandas as pd
import numpy as np
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torchvision import transforms
from sklearn.model_selection import train_test_split

# Visualization
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice
# Path
import sys
sys.path.append('/home/sebacastillo/neuralnets/')
from src.utils import get_project_root
root = get_project_root()
# Infra
device = torch.device('cuda' if torch.cuda.is_available() else torch.device('cpu'))

In [222]:
input_file = '/data/concentlite.csv'
EXP_NAME = 'OPT001'
DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 10
BATCH_SIZE = 10     # Número de patrones en cada batch
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [223]:
def define_model_multiclass(trial, inputs):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = inputs
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 3, 32)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [224]:
def define_model_binary(trial, inputs):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = inputs
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 3, 32)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features

    layers.append(nn.Linear(in_features, 1))  # Output layer with single unit for binary classification
    layers.append(nn.Tanh())  # Tanh activation function to output values between -1 and 1

    return nn.Sequential(*layers)


In [225]:
def load_split_save_data(input_filename, output_name='EXP', split_type='train_test', train_ratio=0.75, validate_ratio=None, test_ratio=None):

    data = pd.read_csv(input_filename)

    # Check if 'exp' folder exists, create it if it doesn't
    if not os.path.exists('exp'):
        os.makedirs('exp')
    
    # Create a subfolder with the output_name
    output_path = os.path.join('exp', output_name)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
        
    if split_type == 'train_validate_test':
        if not validate_ratio or not test_ratio:
            raise ValueError("Please provide validate_ratio and test_ratio for 'train_validate_test' split type.")
        
        train_data, temp_data = train_test_split(data, train_size=train_ratio, random_state=42)
        validate_data, test_data = train_test_split(temp_data, train_size=validate_ratio / (validate_ratio + test_ratio), random_state=42)
        
        # Save the train, validate, and test data as CSV files in the output folder
        train_data.to_csv(os.path.join(output_path, f'{output_name}_train_data.csv'), index=False)
        validate_data.to_csv(os.path.join(output_path, f'{output_name}_validate_data.csv'), index=False)
        test_data.to_csv(os.path.join(output_path, f'{output_name}_test_data.csv'), index=False)


        return train_data, validate_data, test_data    

    elif split_type == 'train_test':
        train_data, test_data = train_test_split(data, train_size=train_ratio, random_state=42)
        
        # Save the train and test data as CSV files in the output folder
        train_data.to_csv(os.path.join(output_path, f'{output_name}_train_data.csv'), index=False)
        test_data.to_csv(os.path.join(output_path, f'{output_name}_test_data.csv'), index=False)


        return train_data, test_data
    
    else:
        raise ValueError("Invalid split_type. Use either 'train_validate_test' or 'train_test'.")

In [226]:
class DATASET(Dataset):  
    '''
    Esta clase maneja la lectura de los datos y provee un mecanismo
    para alimentar los modelos con los patrones.
    '''
    
    #===================================================
    def __init__(self, filename):
        
        #------------------------------------
        # LECTURA DE LOS DATOS
        data = pd.read_csv(filename, header=None).to_numpy() # Levanta los datos en formato numpy
        
        #------------------------------------
        # INSERTAMOS COLUMNA DEL "BIAS"
        #bias = -np.ones((len(data), 1))
        #data = np.concatenate((bias, data), axis=1)  # Insertamos el "bias" en la primera columna
        
        #------------------------------------
        # ALEATORIZO LOS PATRONES (filas)
        idxs = np.arange(len(data))  # Genero un vector de índices
        np.random.shuffle(idxs)
        data = data[idxs,:]
        
        #------------------------------------
        # SEPARO LOS DATOS
        self.x = data[:,:-1].astype(np.float32)
        self.y = data[:,-1].astype(np.float32)  # La clase está en la última columna
    
    #===================================================
    def __len__(self):
        '''
        Devuelve el número de patrones en el dataset.
        '''
        return len(self.x)
    
    
    #===================================================
    def __getitem__(self, idx):
        '''
        Devuelve el/los patrones indicados.
        '''
        return self.x[idx,:], self.y[idx]

In [227]:
def get_data(input_file):
    # Load FashionMNIST dataset.
    # Inicialize experiment
    datafile = str(root) + input_file
    train_data, test_data = load_split_save_data(datafile , output_name= EXP_NAME)
    # data
    filename_train_data = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + '_train_data.csv'
    filename_test_data = str(root) + '/exp/' + EXP_NAME + '/' + EXP_NAME  + '_test_data.csv'

    # Construimos los datasets para entrenamiento y validación
    trn = DATASET(filename_train_data)
    test = DATASET(filename_test_data)

    # Construimos los dataloaders para entrenamiento y validación
    train_loader = DataLoader(trn, batch_size=BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(test, batch_size=BATCH_SIZE, shuffle=False)

    return train_loader, valid_loader

In [228]:
# Define custom loss function
#loss_function = torch.nn.MSELoss()
loss_function = nn.MSELoss(reduction='mean').to(device)

In [229]:
def objective(trial):
    # Generate the model.
    model = define_model_binary(trial,inputs=2).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-3, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the dataset.
    train_loader, valid_loader = get_data(input_file)    

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data).squeeze()
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data).squeeze()
                # Get the prediction: 1 if output > 0 else -1
                pred = (output > 0).float() * 2 - 1
                correct += ((pred > 0) == (target > 0)).float().sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy


In [None]:
study = optuna.create_study(
    direction="maximize", 
    storage="sqlite:///db.sqlite3",  # Specify the storage URL here.
    study_name="concentlite18"
    )

In [231]:
study.optimize(objective, n_trials=10, timeout=600)

[32m[I 2023-05-10 16:56:01,911][0m Trial 0 finished with value: 0.291866028708134 and parameters: {'n_layers': 3, 'n_units_l0': 7, 'dropout_l0': 0.28396996595219803, 'n_units_l1': 12, 'dropout_l1': 0.2103325606957719, 'n_units_l2': 28, 'dropout_l2': 0.49021903749401613, 'optimizer': 'RMSprop', 'lr': 0.02819504061546719}. Best is trial 0 with value: 0.291866028708134.[0m
[32m[I 2023-05-10 16:56:05,288][0m Trial 1 finished with value: 0.4019138755980861 and parameters: {'n_layers': 2, 'n_units_l0': 26, 'dropout_l0': 0.3469966604485713, 'n_units_l1': 30, 'dropout_l1': 0.24200924661476692, 'optimizer': 'RMSprop', 'lr': 0.006727260620110395}. Best is trial 1 with value: 0.4019138755980861.[0m
[32m[I 2023-05-10 16:56:08,837][0m Trial 2 finished with value: 0.3014354066985646 and parameters: {'n_layers': 3, 'n_units_l0': 13, 'dropout_l0': 0.3947612622662538, 'n_units_l1': 11, 'dropout_l1': 0.3914347891172977, 'n_units_l2': 28, 'dropout_l2': 0.3378682005047925, 'optimizer': 'RMSprop', 

## Visualizations

In [104]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])

In [105]:
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

In [106]:
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))



Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  5
  Number of complete trials:  5


In [107]:
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:
  Value:  0.1674641148325359
  Params: 
    dropout_l0: 0.22677304480395166
    dropout_l1: 0.3008034084830614
    lr: 2.2248253832577537e-05
    n_layers: 2
    n_units_l0: 13
    n_units_l1: 18
    optimizer: SGD


In [108]:
plot_optimization_history(study)

In [None]:
plot_intermediate_values(study)

In [None]:
plot_parallel_coordinate(study)

In [None]:
plot_contour(study)
# plot_contour(study, params=["bagging_freq", "bagging_fraction"])

In [None]:
plot_slice(study)

In [None]:
plot_param_importances(study)

Learn which hyperparameters are affecting the trial duration with hyperparameter importance.

In [None]:
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.duration.total_seconds(), target_name="duration"
)

## Dashboard

Terminal run:

'optuna-dashboard sqlite:///db.sqlite3'