*Hyperparameter Searching Idea for Bert-based Model*

Creating some random choices in the terms of parameters. Create multiple configurations randomly, train a few epochs for each configuration and compare the results. Finally, obtain the best hyperparameter configuration.

In [None]:
def run_hyperparameter_search(n_trials=5, batch_size=8, max_length=128):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Get data loaders
    train_loader, val_loader, _ = get_dataloaders(
        batch_size=batch_size, 
        max_length=max_length
    )
    
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
    
    # Define the hyperparameter search space
    param_grid = {
        'learning_rate': [1e-5, 2e-5, 3e-5, 5e-5],
        'lstm_hidden_size': [256, 384, 512],
        'lstm_layers': [1, 2, 3],
        'lstm_dropout': [0.2, 0.3, 0.4],
        'hidden_dropout': [0.1, 0.2, 0.3, 0.4],
        'use_layer_norm': [True, False],
        'freeze_bert_layers': [0, 3, 6],
        'weight_decay': [0.0, 0.01, 0.05],
        'lr_multiplier': [5, 10, 15]
    }
    
    best_f1 = 0
    best_config = None
    results = []
    
    for trial in range(n_trials):
        print(f"\n===== Trial {trial+1}/{n_trials} =====")
        
        # Sample random hyperparameters and convert to Python native types
        config = {
            'learning_rate': float(np.random.choice(param_grid['learning_rate'])),
            'lstm_hidden_size': int(np.random.choice(param_grid['lstm_hidden_size'])),
            'lstm_layers': int(np.random.choice(param_grid['lstm_layers'])),
            'lstm_dropout': float(np.random.choice(param_grid['lstm_dropout'])),
            'hidden_dropout': float(np.random.choice(param_grid['hidden_dropout'])),
            'use_layer_norm': bool(np.random.choice(param_grid['use_layer_norm'])),
            'freeze_bert_layers': int(np.random.choice(param_grid['freeze_bert_layers'])),
            'weight_decay': float(np.random.choice(param_grid['weight_decay'])),
            'lr_multiplier': int(np.random.choice(param_grid['lr_multiplier']))
        }
        
        print("Configuration:")
        for k, v in config.items():
            print(f"  {k}: {v}")
        
        # Initialize model with sampled hyperparameters
        model = EnhancedBertForIdiomDetection(
            lstm_hidden_size=config['lstm_hidden_size'],
            lstm_layers=config['lstm_layers'],
            lstm_dropout=config['lstm_dropout'],
            hidden_dropout=config['hidden_dropout'],
            use_layer_norm=config['use_layer_norm'],
            freeze_bert_layers=config['freeze_bert_layers']
        )
        
        # Train for a few epochs to evaluate the configuration
        trial_epochs = 3  # Lower the epoch, faster the evaluation
        model = train_model(
            train_loader, 
            val_loader, 
            tokenizer,
            model=model,
            epochs=trial_epochs,
            lr=config['learning_rate'],
            weight_decay=config['weight_decay'],
            lr_multiplier=config['lr_multiplier'],
            patience=2  # Use shorter patience for hyperparameter search
        )
        
        # Evaluate with post-processing
        metrics = evaluate(model, val_loader, tokenizer, device, apply_postprocessing=True)
        f1_score = metrics['f1']
        
        print(f"Trial {trial+1} F1 Score: {f1_score:.4f}")
        
        # Save results
        config['f1_score'] = f1_score
        results.append(config)
        
        # Update best configuration
        if f1_score > best_f1:
            best_f1 = f1_score
            best_config = config
            print(f"New best configuration found! F1: {best_f1:.4f}")
    
    # Print results summary
    print("\n===== Hyperparameter Search Results =====")
    print(f"Best F1 Score: {best_f1:.4f}")
    print("Best Configuration:")
    for k, v in best_config.items():
        print(f"  {k}: {v}")
    
    # Sort all results by F1 score
    results.sort(key=lambda x: x['f1_score'], reverse=True)
    print("\nTop 3 configurations:")
    for i, config in enumerate(results[:3]):
        print(f"Rank {i+1}: F1={config['f1_score']:.4f}")
        for k, v in config.items():
            if k != 'f1_score':
                print(f"  {k}: {v}")
    
    return best_config