In [10]:
import torch 
import torch.nn as nn 
import numpy as np 
from torch.utils.data import DataLoader, TensorDataset 
from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split 
import optuna 

In [11]:
torch.manual_seed(0)
np.random.seed(0)

In [12]:
X,y = make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=1)
X_train,X_val,y_train,y_val = train_test_split(
  X,y,test_size=0.2,random_state=1
)
X_train = torch.tensor(X_train,dtype=torch.float32)
X_val = torch.tensor(X_val,dtype=torch.float32)

y_train = torch.tensor(y_train,dtype=torch.float32).view(-1,1)
y_val = torch.tensor(y_val,dtype=torch.float32).view(-1,1)

train_loader = DataLoader(
  TensorDataset(X_train,y_train),
  batch_size=64,
  shuffle=True
)
val_loader = DataLoader(
  TensorDataset(X_val,y_val),
  batch_size=64
)

In [13]:
class BinaryClassifier(nn.Module):
  def __init__(self,dropout):
    super().__init__()
    self.net = nn.Sequential(
      nn.Linear(10,32),
      nn.ReLU(),
      nn.Dropout(dropout),
      nn.Linear(32,1)
    )
  def forward(self,x):
    return self.net(x)
    

In [14]:
def train_one_epoch(model,loader,optimizer,criterion,device):
  model.train()
  total_loss = 0.0
  for x,y in loader:
    x,y = x.to(device),y.to(device)
    optimizer.zero_grad()
    logits = model(x)
    loss = criterion(logits,y)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  return total_loss/len(loader)

def validate(model,loader,criterion,device):
  model.eval()
  total_loss = 0.0
  with torch.no_grad():
    for x,y in loader:
      x,y = x.to(device),y.to(device)
      logits = model(x)
      loss = criterion(logits,y)
      total_loss += loss.item()

  return total_loss / len(loader)

In [15]:
def objective(trial):
  device = "cuda" if torch.cuda.is_available() else "cpu"
  lr = trial.suggest_float("lr",1e-4,1e-2,log=True)
  dropout = trial.suggest_float("dropout",0.1,0.5)
  weight_decay = trial.suggest_float("weight_decay",1e-6,1e-3,log=True)

  model = BinaryClassifier(dropout=dropout).to(device)
  criterion = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.Adam(
    model.parameters(),
    lr = lr,
    weight_decay = weight_decay
  )

  max_epochs = 20
  for epoch in range(max_epochs):
    train_loss = train_one_epoch(
      model,train_loader,optimizer,criterion,device
    )
    val_loss = validate(
      model,val_loader,criterion,device
    )

    trial.report(val_loss,step=epoch)

    if trial.should_prune():
      raise optuna.exceptions.TrialPruned()

    return val_loss

In [16]:
study = optuna.create_study(
  direction="minimize",
  study_name="pytorch_finetuning"
)
study.optimize(objective,n_trials=30)

[I 2026-01-07 18:38:44,802] A new study created in memory with name: pytorch_finetuning
[I 2026-01-07 18:38:44,922] Trial 0 finished with value: 0.680860847234726 and parameters: {'lr': 0.00010683707321166706, 'dropout': 0.49569669331132377, 'weight_decay': 1.481930668407028e-06}. Best is trial 0 with value: 0.680860847234726.
[I 2026-01-07 18:38:44,930] Trial 1 finished with value: 0.6451137363910675 and parameters: {'lr': 0.0017821063190525883, 'dropout': 0.14128435173090123, 'weight_decay': 1.8171318979874065e-05}. Best is trial 1 with value: 0.6451137363910675.
[I 2026-01-07 18:38:44,940] Trial 2 finished with value: 0.7412804961204529 and parameters: {'lr': 0.0001080574953703763, 'dropout': 0.16571340520737118, 'weight_decay': 5.366284459125061e-06}. Best is trial 1 with value: 0.6451137363910675.
[I 2026-01-07 18:38:44,950] Trial 3 finished with value: 0.6523437947034836 and parameters: {'lr': 0.003587484974746162, 'dropout': 0.28959115646750205, 'weight_decay': 1.379660045716204

In [17]:
# =====================================================
# Imports
# =====================================================
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import optuna

# =====================================================
# Reproducibility
# =====================================================
torch.manual_seed(42)
np.random.seed(42)

# =====================================================
# Dataset
# =====================================================
X, y = make_classification(
    n_samples=1500,
    n_features=10,
    n_classes=2,
    n_informative=8,
    random_state=42
)

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_val   = torch.tensor(X_val, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_val   = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

train_ds = TensorDataset(X_train, y_train)
val_ds   = TensorDataset(X_val, y_val)

# =====================================================
# Dynamic Model Builder
# =====================================================
class DynamicNet(nn.Module):
    def __init__(self, input_dim, n_layers, hidden_size, activation, dropout):
        super().__init__()

        layers = []
        in_dim = input_dim

        for _ in range(n_layers):
            layers.append(nn.Linear(in_dim, hidden_size))

            if activation == "relu":
                layers.append(nn.ReLU())
            elif activation == "tanh":
                layers.append(nn.Tanh())
            elif activation == "elu":
                layers.append(nn.ELU())
            elif activation == "leaky_relu":
                layers.append(nn.LeakyReLU(0.1))

            layers.append(nn.Dropout(dropout))
            in_dim = hidden_size

        layers.append(nn.Linear(in_dim, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# =====================================================
# Training & Validation
# =====================================================
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0

    for x, y in loader:
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            total_loss += loss.item()

    return total_loss / len(loader)

# =====================================================
# Optuna Objective Function
# =====================================================
def objective(trial):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # ------------------------------
    # Architecture hyperparameters
    # ------------------------------
    n_layers = trial.suggest_int("n_layers", 1, 4)
    hidden_size = trial.suggest_categorical("hidden_size", [16, 32, 64, 128])
    activation = trial.suggest_categorical(
        "activation", ["relu", "tanh", "elu", "leaky_relu"]
    )
    dropout = trial.suggest_float("dropout", 0.0, 0.5)

    # ------------------------------
    # Optimization hyperparameters
    # ------------------------------
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
    optimizer_name = trial.suggest_categorical(
        "optimizer", ["Adam", "AdamW", "RMSprop"]
    )
    batch_size = trial.suggest_categorical(
        "batch_size", [32, 64, 128]
    )

    # ------------------------------
    # Data loaders
    # ------------------------------
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    # ------------------------------
    # Model
    # ------------------------------
    model = DynamicNet(
        input_dim=10,
        n_layers=n_layers,
        hidden_size=hidden_size,
        activation=activation,
        dropout=dropout
    ).to(device)

    criterion = nn.BCEWithLogitsLoss()

    # ------------------------------
    # Optimizer
    # ------------------------------
    if optimizer_name == "Adam":
        optimizer = torch.optim.Adam(
            model.parameters(), lr=lr, weight_decay=weight_decay
        )
    elif optimizer_name == "AdamW":
        optimizer = torch.optim.AdamW(
            model.parameters(), lr=lr, weight_decay=weight_decay
        )
    else:
        optimizer = torch.optim.RMSprop(
            model.parameters(), lr=lr, weight_decay=weight_decay
        )

    # ------------------------------
    # Training loop with pruning
    # ------------------------------
    max_epochs = 25

    for epoch in range(max_epochs):
        train_loss = train_one_epoch(
            model, train_loader, optimizer, criterion, device
        )

        val_loss = validate(
            model, val_loader, criterion, device
        )

        # Report to Optuna
        trial.report(val_loss, epoch)

        # Prune bad trials
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss

# =====================================================
# Run Study
# =====================================================
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=40)

# =====================================================
# Results
# =====================================================
print("\n================ BEST RESULT ================\n")
print("Best Validation Loss:", study.best_value)
print("\nBest Hyperparameters:")
for k, v in study.best_params.items():
    print(f"{k}: {v}")


[I 2026-01-07 18:43:44,618] A new study created in memory with name: no-name-55dbdcca-b4cd-443e-b44c-4142808c36c0
[I 2026-01-07 18:43:45,391] Trial 0 finished with value: 0.15571239348500968 and parameters: {'n_layers': 4, 'hidden_size': 64, 'activation': 'leaky_relu', 'dropout': 0.23636207760646727, 'lr': 0.0012840930119041073, 'weight_decay': 0.00012356647671634002, 'optimizer': 'AdamW', 'batch_size': 32}. Best is trial 0 with value: 0.15571239348500968.
[I 2026-01-07 18:43:45,611] Trial 1 finished with value: 0.37770040035247804 and parameters: {'n_layers': 1, 'hidden_size': 32, 'activation': 'elu', 'dropout': 0.48584896856264354, 'lr': 0.0020122929868617704, 'weight_decay': 6.704132518734996e-06, 'optimizer': 'Adam', 'batch_size': 64}. Best is trial 0 with value: 0.15571239348500968.
[I 2026-01-07 18:43:46,027] Trial 2 finished with value: 0.12907025776803493 and parameters: {'n_layers': 3, 'hidden_size': 128, 'activation': 'elu', 'dropout': 0.22576323885255273, 'lr': 0.00418721418



Best Validation Loss: 0.12907025776803493

Best Hyperparameters:
n_layers: 3
hidden_size: 128
activation: elu
dropout: 0.22576323885255273
lr: 0.004187214182175137
weight_decay: 1.575681547543402e-05
optimizer: Adam
batch_size: 128
