In [1]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold



In [2]:
# Define the choices explicitly
hidden_size_choices = [32, 64, 128, 256]
num_layers_choices = [2, 3, 4]
# activation_choices = [nn.ReLU, nn.Tanh, nn.Sigmoid]
l2_reg_lower, l2_reg_upper = 1e-6, 1e-2

In [None]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Generate synthetic data (y = sin(x) + noise)
np.random.seed(42)
x = np.linspace(-2*np.pi, 2*np.pi, 100).reshape(-1, 1)
y = np.sin(x) + 0.05 * np.random.randn(100, 1)

# Convert to PyTorch tensors
x_tensor = torch.tensor(x, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y, dtype=torch.float32).to(device)

# Split data into train and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_tensor.cpu().numpy(), y_tensor.cpu().numpy(), test_size=0.2, random_state=42)
x_train, x_val = torch.tensor(x_train, dtype=torch.float32).to(device), torch.tensor(x_val, dtype=torch.float32).to(device)
y_train, y_val = torch.tensor(y_train, dtype=torch.float32).to(device), torch.tensor(y_val, dtype=torch.float32).to(device)

# Define the hyperparameter search space
space = {
    'num_layers': hp.choice('num_layers', num_layers_choices),  # Number of hidden layers
    'hidden_size': hp.choice('hidden_size', hidden_size_choices),  # Neurons per layer
    'l2_reg': hp.loguniform('l2_reg', np.log(l2_reg_lower), np.log(l2_reg_upper))  # L2 regularization strength
}

  return torch._C._cuda_getDeviceCount() > 0


In [4]:
space['hidden_size'].inputs

<bound method Apply.inputs of <hyperopt.pyll.base.Apply object at 0x7539d30e66e0>>

In [None]:
# Define a neural network model
class SimpleNN(nn.Module):
    def __init__(self, num_layers, hidden_size):   # num_layers: number of hidden layers
        super(SimpleNN, self).__init__()
        layers = [nn.Linear(1, hidden_size), nn.Tanh()]
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.Tanh())
        layers.append(nn.Linear(hidden_size, 1))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class EarlyStopping:
    def __init__(self, patience=50, min_delta=1e-4):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float("inf")
        self.wait = 0

    def step(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.wait = 0
        else:
            self.wait += 1

        return self.wait >= self.patience
    
# Training function with K-Fold CV
def train_model_kfold(model, x_data, y_data, k=5, epochs=500, lr=0.01):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    fold_results = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(x_data)):
        print(f"\n🔹 Fold {fold + 1}/{k} 🔹")

        train_x, train_y = x_data[train_idx], y_data[train_idx]
        val_x, val_y = x_data[val_idx], y_data[val_idx]

        # model = SimpleNN().to(device)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50)

        train_losses, val_losses = [], []

        # Usage in training loop
        early_stopping = EarlyStopping(patience=50)

        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            y_pred = model(train_x)
            loss = criterion(y_pred, train_y)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

            # Validation loss (k-fold CV)
            model.eval()
            with torch.no_grad():
                val_pred = model(val_x)
                val_loss = criterion(val_pred, val_y).item()
                val_losses.append(val_loss)

            scheduler.step(val_loss)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Train Loss: {loss.item():.6f}, Val Loss: {val_loss:.6f}, LR: {optimizer.param_groups[0]['lr']:.6f}")

            # Check early stopping condition
            if early_stopping.step(val_loss):
                print(f"Stopping early at epoch {epoch}")
                break

        fold_results.append((train_losses, val_losses))

    avg_val_loss = np.mean([loss[-1] for _, loss in fold_results])
    print(f"\n✅ Average Validation Loss Across {k} Folds: {avg_val_loss:.6f}")

    return avg_val_loss

# Function to evaluate a given set of hyperparameters
def objective(params):
    print(f"Testing with: {params}")

    # Create the model with the given hyperparameters
    model = SimpleNN(num_layers=params['num_layers'], hidden_size=params['hidden_size']).to(device)
    
    # Train the model with K-Fold CV
    val_loss = train_model_kfold(model, x_tensor, y_tensor, k=5, epochs=500, lr=0.01)

    print(f"Validation Loss: {val_loss:.6f}")

    return {'loss': val_loss, 'status': STATUS_OK}



In [6]:
# Create a trials object to store optimization history
trials = Trials()

# Run Bayesian optimization
best_hyperparams = fmin(
    fn=objective,        # Function to minimize (validation loss)
    space=space,         # Hyperparameter search space
    algo=tpe.suggest,    # Tree-structured Parzen Estimator (TPE)
    max_evals=200,        # Number of trials to run
    trials=trials        # Store results
)

print("\n🎯 Best Hyperparameters Found:")
print(best_hyperparams)


Testing with: {'hidden_size': 256, 'l2_reg': 3.701479138793044e-05, 'num_layers': 3}
                                                       
🔹 Fold 1/5 🔹
Epoch 0, Train Loss: 0.540316, Val Loss: 16.892603, LR: 0.010000
Epoch 100, Train Loss: 0.035941, Val Loss: 0.044430, LR: 0.010000
Stopping early at epoch 128                            
                                                       
🔹 Fold 2/5 🔹
Epoch 0, Train Loss: 0.038420, Val Loss: 10.606874, LR: 0.010000
  0%|          | 0/200 [00:00<?, ?trial/s, best loss=?]




Epoch 100, Train Loss: 0.024913, Val Loss: 0.030829, LR: 0.010000
Epoch 200, Train Loss: 0.002672, Val Loss: 0.012111, LR: 0.010000
Epoch 300, Train Loss: 0.001302, Val Loss: 0.010020, LR: 0.010000
Epoch 400, Train Loss: 0.001050, Val Loss: 0.009505, LR: 0.010000
Stopping early at epoch 468                            
                                                       
🔹 Fold 3/5 🔹
Epoch 0, Train Loss: 0.003006, Val Loss: 4.562600, LR: 0.010000
Epoch 100, Train Loss: 0.001824, Val Loss: 0.004903, LR: 0.010000
Stopping early at epoch 172                            
                                                       
🔹 Fold 4/5 🔹
Epoch 0, Train Loss: 0.001531, Val Loss: 2.669500, LR: 0.010000
Epoch 100, Train Loss: 0.000762, Val Loss: 0.017401, LR: 0.010000
Epoch 200, Train Loss: 0.001785, Val Loss: 0.016243, LR: 0.010000
Stopping early at epoch 247                            
                                                       
🔹 Fold 5/5 🔹
Epoch 0, Train Loss: 0.004198, Val 

In [7]:
# Evaluate the model with the best hyperparameters
best_params = {'hidden_size': hidden_size_choices[best_hyperparams['hidden_size']], 'l2_reg': best_hyperparams['l2_reg'], 'num_layers': num_layers_choices[best_hyperparams['num_layers']]}
final_val_loss = objective(best_params)['loss']

print(f"\n🎯 Final Validation Loss with Best Hyperparameters: {final_val_loss:.6f}")

Testing with: {'hidden_size': 32, 'l2_reg': np.float64(1.0513989419703529e-05), 'num_layers': 3}

🔹 Fold 1/5 🔹
Epoch 0, Train Loss: 0.556042, Val Loss: 0.498334, LR: 0.010000


Epoch 100, Train Loss: 0.039903, Val Loss: 0.056555, LR: 0.010000
Epoch 200, Train Loss: 0.009580, Val Loss: 0.022934, LR: 0.010000
Stopping early at epoch 284

🔹 Fold 2/5 🔹
Epoch 0, Train Loss: 0.005893, Val Loss: 1.334529, LR: 0.010000
Epoch 100, Train Loss: 0.015764, Val Loss: 0.014056, LR: 0.010000
Stopping early at epoch 197

🔹 Fold 3/5 🔹
Epoch 0, Train Loss: 0.001967, Val Loss: 0.674943, LR: 0.010000
Epoch 100, Train Loss: 0.002132, Val Loss: 0.003549, LR: 0.010000
Epoch 200, Train Loss: 0.001770, Val Loss: 0.002419, LR: 0.010000
Stopping early at epoch 248

🔹 Fold 4/5 🔹
Epoch 0, Train Loss: 0.002081, Val Loss: 0.161306, LR: 0.010000
Epoch 100, Train Loss: 0.001919, Val Loss: 0.002537, LR: 0.010000
Stopping early at epoch 136

🔹 Fold 5/5 🔹
Epoch 0, Train Loss: 0.001976, Val Loss: 0.367855, LR: 0.010000
Epoch 100, Train Loss: 0.001566, Val Loss: 0.003692, LR: 0.010000
Stopping early at epoch 137

✅ Average Validation Loss Across 5 Folds: 0.004058
Validation Loss: 0.004058

🎯 Final

In [None]:
# Evaluate the model with the best hyperparameters
best_params = {'hidden_size': 64, 'l2_reg': 1.0238234674533273e-06, 'num_layers': 2}
final_val_loss = objective(best_params)['loss']

print(f"\n🎯 Validation Loss with some set of Hyperparameters: {final_val_loss:.6f}")

Testing with: {'hidden_size': 64, 'l2_reg': 1.0238234674533273e-06, 'num_layers': 2}

🔹 Fold 1/5 🔹
Epoch 0, Train Loss: 0.550797, Val Loss: 2.734138, LR: 0.010000
Epoch 100, Train Loss: 0.045306, Val Loss: 0.054257, LR: 0.010000
Epoch 200, Train Loss: 0.011895, Val Loss: 0.015785, LR: 0.010000
Epoch 300, Train Loss: 0.002097, Val Loss: 0.003036, LR: 0.010000
Epoch 400, Train Loss: 0.001764, Val Loss: 0.002181, LR: 0.010000
Stopping early at epoch 453

🔹 Fold 2/5 🔹
Epoch 0, Train Loss: 0.001902, Val Loss: 0.566046, LR: 0.010000
Stopping early at epoch 97

🔹 Fold 3/5 🔹
Epoch 0, Train Loss: 0.002110, Val Loss: 0.209286, LR: 0.010000
Stopping early at epoch 86

🔹 Fold 4/5 🔹
Epoch 0, Train Loss: 0.002138, Val Loss: 0.308353, LR: 0.010000
Epoch 100, Train Loss: 0.001643, Val Loss: 0.003132, LR: 0.010000
Stopping early at epoch 108

🔹 Fold 5/5 🔹
Epoch 0, Train Loss: 0.001838, Val Loss: 0.223217, LR: 0.010000
Stopping early at epoch 97

✅ Average Validation Loss Across 5 Folds: 0.003119
Valida