In [None]:
import os
import re
import numpy as np
import pandas as pd
import pickle
from typing import Literal, Union
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import copy
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchinfo import summary

import optuna
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score

# Load Dataset

In [2]:
def fit_scaler(X_train, scaler):
    scaler.fit(X_train)
    return scaler

def scale_features(data, scaler):
    return scaler.transform(data)

def make_dataloader(X, y, batch_size: int = 1, shuffle: bool = True, seed: int = 0):
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y).type(torch.LongTensor)
    tensor_dataset = TensorDataset(X_tensor, y_tensor)
    loader = DataLoader(tensor_dataset, batch_size=batch_size, shuffle=shuffle, generator=torch.Generator().manual_seed(seed))
    
    return loader

In [3]:
with open("datasets/processed/embed_and_cat_multilingual.pkl", "rb") as f:
    embed_and_cat_multilingual = pickle.load(f)

In [4]:
X_train, y_train, X_test, y_test, X_val, y_val  = [], [], [], [], [], []
for lang, split_dict in embed_and_cat_multilingual.items(): 
    X_train += split_dict["train"]["embedding"]
    y_train += split_dict["train"]["category"]
    X_test += split_dict["test"]["embedding"]
    y_test += split_dict["test"]["category"]
    X_val += split_dict["validation"]["embedding"]
    y_val += split_dict["validation"]["category"]
X_train, y_train, X_test, y_test, X_val, y_val = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), np.array(X_val), np.array(y_val)

In [5]:
scaler = StandardScaler()
scaler = fit_scaler(X_train=X_train, scaler=scaler)

X_train_scaled = scale_features(X_train, scaler)
X_test_scaled = scale_features(X_test, scaler)
X_val_scaled = scale_features(X_val, scaler)

# y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
# y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
# y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_loader = make_dataloader(X=X_train_scaled, y=y_train, batch_size=64, shuffle=True, seed=42)
test_loader = make_dataloader(X=X_test_scaled, y=y_test, batch_size=16, shuffle=False, seed=42)
val_loader = make_dataloader(X=X_val_scaled, y=y_val, batch_size=1, shuffle=False, seed=42)

# Build Neural Network Constructor

In [6]:
if torch.backends.mps.is_available():
    if torch.backends.mps.is_built():
        device = "mps"
    else:
        device = "cpu"
else:
    device = "cpu"

torch.manual_seed(42)
if device=="mps":
    torch.mps.manual_seed(42)

print("Device: ", device)

Device:  mps


In [7]:
class NNClassifier(nn.Module):
    def __init__(self,
                 input_dim_size: int,
                 output_dim_size: int,
                 layer_dims: list = [50,100,50,15],
                 layer_acts: Union[list, str] = "ReLU",
                 weight_init: init = init.kaiming_uniform_):
        super(NNClassifier, self).__init__()
        self.layers = []
        if type(layer_acts)==str:
            layer_acts = [getattr(torch.nn.modules.activation, layer_acts)()]*len(layer_dims)

        for layer_no, layer_dim in enumerate(layer_dims):
            if layer_no==0:
                self.layers.append(nn.Linear(input_dim_size, layer_dim))
            else:
                self.layers.append(nn.Linear(layer_dims[layer_no-1], layer_dim))
            self.layers.append(layer_acts[layer_no])
        self.layers.append(nn.Linear(layer_dims[layer_no], output_dim_size))
        for layer in self.layers:
            if not isinstance(layer, tuple({getattr(torch.nn.modules.activation, act) for act in torch.nn.modules.activation.__all__})):
                weight_init(layer.weight)

        self.linear_layer_stack = nn.Sequential(*self.layers)

    def forward(self, x):
        return self.linear_layer_stack(x)

# Example Regressor
model = NNClassifier(input_dim_size=X_train.shape[1],
                  output_dim_size=7,
                  layer_dims=[50,100,50,15],
                  layer_acts=[nn.ReLU(),nn.ReLU(),nn.ReLU(),nn.ReLU()])

summary(model, input_size=(1,1,1024))

Layer (type:depth-idx)                   Output Shape              Param #
NNClassifier                             [1, 1, 7]                 --
├─Sequential: 1-1                        [1, 1, 7]                 --
│    └─Linear: 2-1                       [1, 1, 50]                51,250
│    └─ReLU: 2-2                         [1, 1, 50]                --
│    └─Linear: 2-3                       [1, 1, 100]               5,100
│    └─ReLU: 2-4                         [1, 1, 100]               --
│    └─Linear: 2-5                       [1, 1, 50]                5,050
│    └─ReLU: 2-6                         [1, 1, 50]                --
│    └─Linear: 2-7                       [1, 1, 15]                765
│    └─ReLU: 2-8                         [1, 1, 15]                --
│    └─Linear: 2-9                       [1, 1, 7]                 112
Total params: 62,277
Trainable params: 62,277
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.06
Input size (MB): 0.00
Forward/bac

In [8]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
  acc = (correct / len(y_pred)) * 100 
  return acc

def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device,
               verbose: bool = False):
  train_loss, train_acc = 0, 0
  model.to(device)
  model.train()
  for batch, (X_train, y_train) in enumerate(data_loader):
    # Send data to GPU
    X_train, y_train = X_train.to(device), y_train.to(device)
    # 1. Forward pass
    y_logits = model(X_train) # model outputs raw logits 
    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels
    # Accumulate the loss values per batch
    loss = loss_fn(y_logits, y_train)
    # Accumulate loss and accuracy values per batch
    train_loss += loss
    train_acc += accuracy_fn(y_true=y_train, y_pred=y_pred)
    
    # 3. Calculate gradients and update parameters
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  # Calculate loss and accuracy per epoch and print out what's happening
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  if verbose:
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

  return train_loss, train_acc


def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device,
              verbose: bool = False):
  test_loss, test_acc = 0, 0
  model.to(device)
  model.eval()
  with torch.no_grad():
    for X_test, y_test in data_loader:
      # Send data to GPU
      X_test, y_test = X_test.to(device), y_test.to(device)
      # 1. Forward pass
      y_logits = model(X_test)
      y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels
      # Accumulate the loss and accuracy values per batch
      test_loss += loss_fn(y_logits, y_test)
      test_acc += accuracy_fn(y_true=y_test, y_pred=y_pred)

    # Adjust metrics and print out
    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
    if verbose:
      print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

  return test_loss, test_acc


def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn,
               device: torch.device = device):
  loss, acc = 0, 0
  model.eval()
  with torch.no_grad():
    for X, y in data_loader:
      # Send data to GPU
      X, y = X.to(device), y.to(device)
      # 1. Forward pass
      y_logits = model(X)
      y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels
      # Accumulate the loss and accuracy values per batch
      loss += loss_fn(y_logits, y)
      acc += accuracy_fn(y_true=y, y_pred=y_pred)

    # Scale loss and acc to find the average loss/acc per batch
    loss /= len(data_loader)
    acc /= len(data_loader)

  return {"model_name": model.__class__.__name__, # only works when model was created with a class
          "model_loss": loss.item(),
          "model_acc (%)": acc}

# Base Model Prediction

In [206]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

epochs = 500
for epoch in range(epochs):
  print(f"Epoch: {epoch}\n---------")
  _ = train_step(data_loader=train_loader,
                model=model,
                loss_fn=loss_fn,
                optimizer=optimizer,
                accuracy_fn=accuracy_fn,
                device=device,
                verbose=True)
  
  _ = test_step(data_loader=test_loader,
                model=model,
                loss_fn=loss_fn,
                accuracy_fn=accuracy_fn,
                device=device,
                verbose=True)

Epoch: 0
---------
Train loss: 0.00286 | Train accuracy: 100.00%
Test loss: 1.00398 | Test accuracy: 80.75%

Epoch: 1
---------
Train loss: 0.00287 | Train accuracy: 100.00%
Test loss: 1.00427 | Test accuracy: 80.75%

Epoch: 2
---------
Train loss: 0.00287 | Train accuracy: 100.00%
Test loss: 1.00459 | Test accuracy: 80.75%

Epoch: 3
---------
Train loss: 0.00286 | Train accuracy: 100.00%
Test loss: 1.00488 | Test accuracy: 80.75%

Epoch: 4
---------
Train loss: 0.00285 | Train accuracy: 100.00%
Test loss: 1.00517 | Test accuracy: 80.75%

Epoch: 5
---------
Train loss: 0.00284 | Train accuracy: 100.00%
Test loss: 1.00546 | Test accuracy: 80.75%

Epoch: 6
---------
Train loss: 0.00285 | Train accuracy: 100.00%
Test loss: 1.00577 | Test accuracy: 80.75%

Epoch: 7
---------
Train loss: 0.00284 | Train accuracy: 100.00%
Test loss: 1.00601 | Test accuracy: 80.75%

Epoch: 8
---------
Train loss: 0.00282 | Train accuracy: 100.00%
Test loss: 1.00630 | Test accuracy: 80.75%

Epoch: 9
---------


In [207]:
perf_evals = []
perf_eval_train = eval_model(data_loader=train_loader,
                        model=model,
                        loss_fn=loss_fn,
                        accuracy_fn=accuracy_fn,
                        device=device)
perf_eval_train["split"] = "train"
perf_evals.append(perf_eval_train)
perf_eval_test = eval_model(data_loader=test_loader,
                        model=model,
                        loss_fn=loss_fn,
                        accuracy_fn=accuracy_fn,
                        device=device)
perf_eval_test["split"] = "test"
perf_evals.append(perf_eval_test)
perf_eval_val = eval_model(data_loader=val_loader,
                        model=model,
                        loss_fn=loss_fn,
                        accuracy_fn=accuracy_fn,
                        device=device)
perf_eval_val["split"] = "validation"
perf_evals.append(perf_eval_val)

pd.DataFrame(perf_evals)[["split","model_loss","model_acc (%)"]]

Unnamed: 0,split,model_loss,model_acc (%)
0,train,0.001011,100.0
1,test,1.09733,81.21142
2,validation,1.176991,80.758017


In [198]:
rows_train, rows_test, rows_val = [], [], []
for lang, split_dict in embed_and_cat_multilingual.items(): 
    X_train_lang = split_dict["train"]["embedding"]
    y_train_lang = split_dict["train"]["category"]
    X_test_lang = split_dict["test"]["embedding"]
    y_test_lang = split_dict["test"]["category"]
    X_val_lang = split_dict["validation"]["embedding"]
    y_val_lang = split_dict["validation"]["category"]
    X_train_lang, y_train_lang, X_test_lang, y_test_lang, X_val_lang, y_val_lang = np.array(X_train_lang), np.array(y_train_lang), np.array(X_test_lang), np.array(y_test_lang), np.array(X_val_lang), np.array(y_val_lang)

    X_train_lang_scaled = scale_features(X_train_lang, scaler)
    X_test_lang_scaled = scale_features(X_test_lang, scaler)
    X_val_lang_scaled = scale_features(X_val_lang, scaler)

    train_loader_lang = make_dataloader(X=X_train_lang_scaled, y=y_train_lang, batch_size=64, shuffle=True, seed=42)
    test_loader_lang = make_dataloader(X=X_test_lang_scaled, y=y_test_lang, batch_size=1, shuffle=False, seed=42)
    val_loader_lang = make_dataloader(X=X_val_lang_scaled, y=y_val_lang, batch_size=1, shuffle=False, seed=42)

    perf_eval_train = eval_model(data_loader=train_loader_lang,
                                model=model,
                                loss_fn=loss_fn,
                                accuracy_fn=accuracy_fn,
                                device=device)
    perf_eval_train["lang"] = lang
    rows_train.append(perf_eval_train)
    perf_eval_test = eval_model(data_loader=test_loader_lang,
                                model=model,
                                loss_fn=loss_fn,
                                accuracy_fn=accuracy_fn,
                                device=device)
    perf_eval_test["lang"] = lang
    rows_test.append(perf_eval_test)
    perf_eval_val = eval_model(data_loader=val_loader_lang,
                                model=model,
                                loss_fn=loss_fn,
                                accuracy_fn=accuracy_fn,
                                device=device)
    perf_eval_val["lang"] = lang
    rows_val.append(perf_eval_val)

In [199]:
train_perf_df = pd.DataFrame(rows_train)[["lang","model_acc (%)"]].rename(columns={"model_acc (%)": "train_accuracy"})
test_perf_df = pd.DataFrame(rows_test)[["model_acc (%)"]].rename(columns={"model_acc (%)": "test_accuracy"})
val_perf_df = pd.DataFrame(rows_val)[["model_acc (%)"]].rename(columns={"model_acc (%)": "val_accuracy"})

pd.concat([train_perf_df, test_perf_df, val_perf_df], axis=1).round(0)

Unnamed: 0,lang,train_accuracy,test_accuracy,val_accuracy
0,tr-TR,100.0,80.0,82.0
1,en-US,100.0,80.0,82.0
2,es-ES,100.0,82.0,83.0
3,fr-FR,100.0,79.0,74.0
4,de-DE,100.0,83.0,83.0


# Hyperparameter Tuning

## Optuna

In [267]:
def objective(trial):
    num_epochs = trial.suggest_int("num_epochs", 50, 250)
    weight_init_name = trial.suggest_categorical("weight_init_name", ["kaiming_uniform_", "kaiming_normal_", "xavier_uniform_", "xavier_normal_"])
    weight_init = getattr(init, weight_init_name)
    
    layer_dims = trial.suggest_categorical("layer_dims", [[50,100,50,15],
                                                          [15,30,45,90,60,30,10]])
    
    act_name = trial.suggest_categorical("act_name", ["ReLU", "LeakyReLU", "RReLU"])
    layer_acts = [getattr(nn, act_name)() for _ in range(len(layer_dims))]

    model = NNClassifier(input_dim_size=X_train.shape[1],
                         output_dim_size=len(np.unique(y_train)),
                         layer_dims=layer_dims,
                         layer_acts=layer_acts,
                         weight_init=weight_init)
    
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    optimizer_name = trial.suggest_categorical("optimizer_name", ["Adam","RMSprop","SGD"])
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    test_accs = []
    for epoch in range(num_epochs):
        _ = train_step(data_loader=train_loader,
                        model=model,
                        loss_fn=loss_fn,
                        optimizer=optimizer,
                        accuracy_fn=accuracy_fn,
                        device=device,
                        verbose=False)
        
        _, test_acc = test_step(data_loader=test_loader,
                                model=model,
                                loss_fn=loss_fn,
                                accuracy_fn=accuracy_fn,
                                device=device,
                                verbose=False)
    #     if test_accs == []:
    #         best_model = copy.deepcopy(model)
    #     elif test_acc <= min(test_accs):
    #         best_model = copy.deepcopy(model)
    #     test_accs.append(test_acc)

    # _, test_acc = test_step(data_loader=test_loader,
    #                         model=best_model,
    #                         loss_fn=loss_fn,
    #                         accuracy_fn=accuracy_fn,
    #                         device=device,
    #                         verbose=False)

    return test_acc

In [269]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=15)

[I 2025-03-07 22:47:12,642] A new study created in memory with name: no-name-3ce7f291-a9ae-4a81-b1db-c3eed8424e2a
[I 2025-03-07 22:47:15,505] Trial 0 finished with value: 73.59825102880659 and parameters: {'num_epochs': 59, 'weight_init_name': 'kaiming_uniform_', 'layer_dims': [15, 30, 45, 90, 60, 30, 10], 'act_name': 'ReLU', 'learning_rate': 0.0001119282546175383, 'optimizer_name': 'Adam'}. Best is trial 0 with value: 73.59825102880659.
[I 2025-03-07 22:47:18,820] Trial 1 finished with value: 80.51697530864197 and parameters: {'num_epochs': 91, 'weight_init_name': 'xavier_normal_', 'layer_dims': [50, 100, 50, 15], 'act_name': 'RReLU', 'learning_rate': 0.0020540900304075525, 'optimizer_name': 'SGD'}. Best is trial 1 with value: 80.51697530864197.
[I 2025-03-07 22:47:25,321] Trial 2 finished with value: 77.99639917695474 and parameters: {'num_epochs': 145, 'weight_init_name': 'xavier_uniform_', 'layer_dims': [15, 30, 45, 90, 60, 30, 10], 'act_name': 'ReLU', 'learning_rate': 0.0038257011

In [270]:
trial = study.best_trial
best_params_optuna = trial.params

print(f"Best trial no {trial.number}:")
print("  Objective Value:", trial.value)
print("  Parameters:")
for key, value in best_params_optuna.items():
    print("    {}: {}".format(key,value))

Best trial no 3:
  Objective Value: 82.94753086419753
  Parameters:
    num_epochs: 64
    weight_init_name: kaiming_normal_
    layer_dims: [50, 100, 50, 15]
    act_name: ReLU
    learning_rate: 0.0007376662111107422
    optimizer_name: RMSprop


## Skorch

In [315]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train).type(torch.LongTensor)

skorch_model = NeuralNetClassifier(module=NNClassifier,
                                   criterion=nn.CrossEntropyLoss,
                                   device=device)

param_grid = {
    'max_epochs': np.arange(50,151),
    'optimizer': [optim.Adam,optim.SGD, optim.RMSprop],
    'optimizer__lr': np.arange(1e-4, 1e-1, 0.001),
    'module__weight_init': [init.kaiming_normal_, init.kaiming_uniform_, init.xavier_normal_, init.xavier_uniform_],
    'module__layer_dims': [[50,100,50,15],
                           [15,30,45,90,60,30,10]],
    'module__layer_acts': ["ReLU", "LeakyReLU", "RReLU"],
    'module__input_dim_size': [X_train.shape[1]],
    'module__output_dim_size': [7] 
}

grid_search = RandomizedSearchCV(skorch_model, 
                                param_grid, 
                                scoring="accuracy",
                                cv=3, n_iter=5, 
                                n_jobs=-1, verbose=False)
grid_search = grid_search.fit(X_train_tensor, y_train_tensor)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1    [36m76242.4584[0m       [32m0.1208[0m      [35m140.4924[0m  1.4709
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.9562[0m       [32m0.2354[0m        [35m1.9401[0m  1.5214
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1    [36m66324.0327[0m       [32m0.0979[0m      [35m113.2272[0m  1.5422
      2       [36m91.1852[0m       [32m0.2188[0m       [35m54.0265[0m  0.1039
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.9451[0m       [32m0.2146[0m        [35m1.9246[0m  1.5841
      2        [36m1.9332[0m       0.2354        [35m1.9244[0m  0.1022
  epoch    train_loss    valid_acc    valid_loss 

In [316]:
best_params_skorch = grid_search.best_params_

print("Objective Value:", grid_search.best_score_)
print("Parameters:")
for key, value in best_params_skorch.items():
    print("  {}: {}".format(key,value))

Objective Value: 0.8473201327576355
Parameters:
  optimizer__lr: 0.0641
  optimizer: <class 'torch.optim.adam.Adam'>
  module__weight_init: <function kaiming_normal_ at 0x10d080e00>
  module__output_dim_size: 7
  module__layer_dims: [50, 100, 50, 15]
  module__layer_acts: LeakyReLU
  module__input_dim_size: 1024
  max_epochs: 147


In [326]:
best_model_skorch = grid_search.best_estimator_
print("Best model is on this device: ", best_model_skorch.device)
print("********")

X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32, device=device)

y_train_preds = best_model_skorch.predict(X_train_tensor)
y_test_preds = best_model_skorch.predict(X_test_tensor)
y_val_preds = best_model_skorch.predict(X_val_tensor)

train_score = accuracy_score(y_train, y_train_preds)
test_score = accuracy_score(y_test, y_test_preds)
val_score = accuracy_score(y_val, y_val_preds)

print("Best model's train accuracy score: ", train_score)
print("Best model's test accuracy score: ", test_score)
print("Best model's validation accuracy score: ", val_score)

Best model is on this device:  mps
********
Best model's train accuracy score:  0.9872044506258693
Best model's test accuracy score:  0.8016336056009334
Best model's validation accuracy score:  0.8163265306122449


## Ray Tune

In [9]:
def generate_loaders_from_raw_data():
    with open("datasets/processed/embed_and_cat_multilingual.pkl", "rb") as f:
        embed_and_cat_multilingual = pickle.load(f)

    X_train, y_train, X_test, y_test, X_val, y_val  = [], [], [], [], [], []
    for lang, split_dict in embed_and_cat_multilingual.items(): 
        X_train += split_dict["train"]["embedding"]
        y_train += split_dict["train"]["category"]
        X_test += split_dict["test"]["embedding"]
        y_test += split_dict["test"]["category"]
        X_val += split_dict["validation"]["embedding"]
        y_val += split_dict["validation"]["category"]
    X_train, y_train, X_test, y_test, X_val, y_val = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), np.array(X_val), np.array(y_val)

    scaler = StandardScaler()
    scaler = fit_scaler(X_train=X_train, scaler=scaler)

    X_train_scaled = scale_features(X_train, scaler)
    X_test_scaled = scale_features(X_test, scaler)
    X_val_scaled = scale_features(X_val, scaler)

    train_loader = make_dataloader(X=X_train_scaled, y=y_train, batch_size=64, shuffle=True, seed=42)
    test_loader = make_dataloader(X=X_test_scaled, y=y_test, batch_size=16, shuffle=False, seed=42)
    val_loader = make_dataloader(X=X_val_scaled, y=y_val, batch_size=1, shuffle=False, seed=42)

    return train_loader, test_loader, val_loader
    

def train_classifier(config):
    num_epochs = config["num_epochs"]
    weight_init_name = config["weight_init_name"]
    weight_init = getattr(init, weight_init_name)

    layer_dims = config["layer_dims"]
    act_name = config["act_name"]
    layer_acts = [getattr(nn, act_name)() for _ in range(len(layer_dims))]

    model = NNClassifier(input_dim_size=1024,
                         output_dim_size=7,
                         layer_dims=layer_dims,
                         layer_acts=layer_acts,
                         weight_init=weight_init)
    learning_rate = config["learning_rate"]
    optimizer_name = config["optimizer_name"]
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    train_loader, test_loader, _ = generate_loaders_from_raw_data()

    for epoch in range(num_epochs):
        _ = train_step(data_loader=train_loader,
                        model=model,
                        loss_fn=criterion,
                        optimizer=optimizer,
                        accuracy_fn=accuracy_fn,
                        device=device,
                        verbose=False)
        
        test_loss, test_acc = test_step(data_loader=test_loader,
                                model=model,
                                loss_fn=criterion,
                                accuracy_fn=accuracy_fn,
                                device=device,
                                verbose=False)
    tune.report({"loss": test_loss, "accuracy": test_acc})
        
    print("Finished Training")

In [10]:
config = {
    'num_epochs': tune.randint(5, 10),
    'layer_dims': tune.choice([[15,30,15],
                               [15,90,10]]),
    'act_name': tune.choice(["ReLU", "LeakyReLU", "RReLU"]),
    'weight_init_name': tune.choice(["kaiming_normal_", "kaiming_uniform_", "xavier_normal_", "xavier_uniform_"]),
    'learning_rate': tune.loguniform(1e-4, 1e-1),
    'optimizer_name': tune.choice(["Adam", "SGD", "RMSprop"])
}

ray.shutdown()  # Clean any previous Ray instances
# Start Ray with specific CPU/GPU allocation
ray.init(num_cpus=12, num_gpus=0)

scheduler = ASHAScheduler(grace_period=5, # Run at least 5 epochs before stopping trials
                          reduction_factor=2 # # Reduce number of trials by 0x per iteration
                        )
tuner = tune.Tuner(
    tune.with_resources(
        tune.with_parameters(train_classifier),
        resources={"cpu": 5} #, "gpu": 0, "num_workers": 2}
    ),
    tune_config=tune.TuneConfig(
        metric="accuracy",
        mode="max",
        scheduler=scheduler,
        num_samples=3, # equivalent to parameter 'n_trials' of optuna
        max_concurrent_trials=2 # Run 2 trials in parallel
    ),
    param_space=config,
)
ray_tuner_results = tuner.fit()

0,1
Current time:,2025-03-08 17:12:17
Running for:,00:02:09.88
Memory:,45.4/64.0 GiB

Trial name,status,loc,act_name,layer_dims,learning_rate,num_epochs,optimizer_name,weight_init_name
train_classifier_0a580_00000,PENDING,,RReLU,"[15, 30, 15]",0.00293318,5,SGD,kaiming_uniform_
train_classifier_0a580_00001,PENDING,,ReLU,"[15, 90, 10]",0.000342652,7,RMSprop,kaiming_uniform_


[33m(raylet)[0m bash: /Users/toygunkarabas/Development/NLP: No such file or directory
[33m(raylet)[0m bash: line 0: exec: /Users/toygunkarabas/Development/NLP: cannot execute: No such file or directory
[33m(raylet)[0m bash: SLP/nlp_slp_env/bin/python: No such file or directory
[33m(raylet)[0m [2025-03-08 17:11:08,039 E 40153 28468980] (raylet) worker_pool.cc:581: Some workers of the worker process(40169) have not registered within the timeout. The process is dead, probably it crashed during start.
[33m(raylet)[0m bash: /Users/toygunkarabas/Development/NLP: No such file or directory
[33m(raylet)[0m bash: line 0: exec: /Users/toygunkarabas/Development/NLP: cannot execute: No such file or directory
[33m(raylet)[0m bash: SLP/nlp_slp_env/bin/python: No such file or directory
[33m(raylet)[0m bash: /Users/toygunkarabas/Development/NLP: No such file or directory
[33m(raylet)[0m bash: line 0: exec: /Users/toygunkarabas/Development/NLP: cannot execute: No such file or directory

In above implementation, something went wrong. I will check it later.

## Allegro

In [None]:
# This will be implemented later!

# Resources

### Optuna
- https://www.geeksforgeeks.org/hyperparameter-tuning-with-optuna-in-pytorch/

### Skorch
- https://machinelearningmastery.com/how-to-grid-search-hyperparameters-for-pytorch-models/

- https://memudualimatou.medium.com/skorch-hyper-parameter-tuning-with-pytorch-b5af0ba8d45c

- https://debuggercafe.com/hyperparameter-search-with-pytorch-and-skorch/

### Ray Tune
- https://pytorch.org/tutorials/beginner/hyperparameter_tuning_tutorial.html

- https://debuggercafe.com/hyperparameter-tuning-with-pytorch-and-ray-tune/

- https://www.geeksforgeeks.org/hyperparameter-tuning-with-ray-tune-in-pytorch/

- https://docs.ray.io/en/latest/tune/index.html

### Allegro
- https://medium.com/pytorch/accelerate-your-hyperparameter-optimization-with-pytorchs-ecosystem-tools-bc17001b9a49

- https://github.com/clearml/clearml/tree/master/examples/frameworks/pytorch/notebooks/image