Claude Bot - Optuna Examples

Optuna with Your Exact Current Workflow

In [None]:
def objective_your_style(trial):
    # Suggest hyperparameters
    neurons = trial.suggest_int('neurons', 4, 64)
    lr = trial.suggest_float('lr', 1e-4, 1e-1, log=True)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    
    # Use your existing 4-fold CV on X_train
    cv_scores = []
    kfold = KFold(n_splits=4, shuffle=True, random_state=42)
    
    for train_idx, val_idx in kfold.split(X_train):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]
        
        model = create_model(neurons=neurons, dropout=dropout)
        # ... train model ...
        fold_score = evaluate_model(model, X_fold_val, y_fold_val)
        cv_scores.append(fold_score)
    
    return np.mean(cv_scores)

# Then proceed with your existing workflow:
study = optuna.create_study(direction='minimize')
best_params = study.optimize(objective_your_style, n_trials=100)

# Train final model on full X_train with best params
final_model = train_final_model(best_params, X_train, y_train, X_val, y_val)

# Evaluate on X_test (once)
test_loss = final_model.evaluate(X_test, y_test)

Recommendation for Your Case:

In [None]:
# Best of both worlds: Optuna + your existing CV workflow
def objective(trial):
    params = {
        'neurons': trial.suggest_int('neurons', 4, 64),
        'lr': trial.suggest_float('lr', 1e-4, 1e-1, log=True),
        'dropout': trial.suggest_float('dropout', 0.0, 0.5),
        'batch_size': trial.suggest_categorical('batch_size', [8, 16, 32])
    }
    
    # Your existing k-fold evaluation
    return k_fold_evaluate(params, X_train, y_train, k=4)

# Let Optuna find best hyperparameters more efficiently
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

# Continue with your workflow using best params

Integrating Both Systems  
Here's how to combine Optuna with your adaptive system:

In [None]:
class AdaptiveTrainer:
    def __init__(self, initial_lr, initial_batch_size, lr_multiplier=0.5, 
                 batch_multiplier=2, grad_threshold=1, patience=10):
        self.initial_lr = initial_lr
        self.initial_batch_size = initial_batch_size
        self.lr_multiplier = lr_multiplier
        self.batch_multiplier = batch_multiplier
        self.grad_threshold = grad_threshold
        self.patience = patience
        
    def train_with_adaptation(self, model, X_train, y_train, X_val, y_val, epochs=100):
        current_lr = self.initial_lr
        current_batch_size = self.initial_batch_size
        
        optimizer = torch.optim.Adam(model.parameters(), lr=current_lr)
        loss_history = []
        
        for epoch in range(epochs):
            # Train one epoch
            epoch_loss = self.train_epoch(model, optimizer, X_train, y_train, 
                                        current_batch_size)
            val_loss = self.evaluate(model, X_val, y_val)
            loss_history.append(val_loss)
            
            # Check if we should adapt
            if self.should_adapt(loss_history):
                current_lr *= self.lr_multiplier
                current_batch_size = min(current_batch_size * self.batch_multiplier, 
                                       len(X_train))  # Don't exceed dataset size
                
                # Update optimizer with new learning rate
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr
                    
                print(f"Epoch {epoch}: Adapted lr={current_lr:.6f}, "
                      f"batch_size={current_batch_size}")
        
        return loss_history
    
    def should_adapt(self, loss_history):
        if len(loss_history) < self.patience:
            return False
            
        # Calculate slope of recent losses
        recent_losses = loss_history[-self.patience:]
        x = np.arange(len(recent_losses))
        slope = np.polyfit(x, recent_losses, 1)[0]
        
        return abs(slope) < self.grad_threshold

def objective_with_adaptive_training(trial):
    # Optuna tunes INITIAL parameters and adaptation settings
    initial_lr = trial.suggest_float('initial_lr', 1e-4, 1e-1, log=True)
    initial_batch_size = trial.suggest_categorical('initial_batch_size', [8, 16, 32])
    
    # Optuna can also tune your adaptation strategy
    lr_multiplier = trial.suggest_float('lr_multiplier', 0.3, 0.8)
    batch_multiplier = trial.suggest_float('batch_multiplier', 1.5, 3.0)
    grad_threshold = trial.suggest_float('grad_threshold', 0.0001, 0.01, log=True)
    patience = trial.suggest_int('patience', 5, 20)
    
    # Cross-validation with adaptive training
    kfold = KFold(n_splits=4, shuffle=True, random_state=42)
    cv_scores = []
    
    for train_idx, val_idx in kfold.split(X_train):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]
        
        model = create_model()
        trainer = AdaptiveTrainer(
            initial_lr=initial_lr,
            initial_batch_size=initial_batch_size,
            lr_multiplier=lr_multiplier,
            batch_multiplier=batch_multiplier,
            grad_threshold=grad_threshold,
            patience=patience
        )
        
        # Train with adaptation
        loss_history = trainer.train_with_adaptation(
            model, X_fold_train, y_fold_train, X_fold_val, y_fold_val
        )
        
        cv_scores.append(loss_history[-1])
    
    return np.mean(cv_scores)

# Run optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective_with_adaptive_training, n_trials=100)

Look at all code after:   
In study.optimize(...,  n_trials=100), if you input n_trials=100_000, does optuna stop when already tried the most meaningful combinations or is optuna actually going to run each of the 100_000 trials?
