# FinBERT Hyperparameter Sweep 

This notebook extends the standard FinBERT training with:
- Hyperparameter optimization using W&B sweeps
- Automated experiment tracking
- Comparison of different configurations

## Prerequisites
```bash
pip install wandb
wandb login
```


## 1. Setup and Imports

In [None]:
from pathlib import Path
import shutil
import os
import logging
import sys
import numpy as np
sys.path.append('..')

from sklearn.metrics import classification_report
from transformers import AutoModelForSequenceClassification
import torch
from torch.nn import CrossEntropyLoss

from finbert.finbert import *
import finbert.utils as tools

# Weights & Biases
import wandb

%load_ext autoreload
%autoreload 2

project_dir = Path.cwd().parent
pd.set_option('max_colwidth', None)

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.ERROR)

print("Imports loaded successfully")
print(f"Project directory: {project_dir}")

## 2. Configuration

Set up paths and W&B project name.

In [None]:
# Paths
cl_path = project_dir/'models'/'sentiment'
cl_data_path = project_dir/'data'/'sentiment_data'

# W&B Configuration
WANDB_PROJECT = "finbert-hyperparameter-sweep"
WANDB_ENTITY = None  

print(f"Model path: {cl_path}")
print(f"Data path: {cl_data_path}")
print(f"W&B Project: {WANDB_PROJECT}")

## 3. Define Sweep Configuration

This defines the hyperparameter search space. W&B will automatically try different combinations.


In [None]:
sweep_config = {
    'method': 'bayes',  # 'grid', 'random', or 'bayes'
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 1e-5,
            'max': 5e-5
        },
        'num_train_epochs': {
            'values': [3, 4, 5, 6]
        },
        'train_batch_size': {
            'values': [16, 32, 64]
        },
        'warm_up_proportion': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 0.3
        },
        'max_seq_length': {
            'values': [48, 64, 96]
        },
        # Advanced parameters
        'discriminate': {
            'values': [True, False]
        },
        'gradual_unfreeze': {
            'values': [True, False]
        }
    }
}

print("Sweep configuration created")
print(f"  Method: {sweep_config['method']}")
print(f"  Optimization metric: {sweep_config['metric']['name']}")


In [None]:
def train_with_config(config=None):
    with wandb.init(config=config):
        config = wandb.config

        # Print basic config
        print("Starting run:")
        print(f"LR={config.learning_rate}, Epochs={config.num_train_epochs}")

        # Create BERT model
        bertmodel = AutoModelForSequenceClassification.from_pretrained(
            'bert-base-uncased',
            num_labels=3
        )

        # Create minimal FinBERT config
        finbert_config = Config(
            data_dir=cl_data_path,
            bert_model=bertmodel,
            num_train_epochs=config.num_train_epochs,
            model_dir=project_dir / 'models' / 'sentiment',
            max_seq_length=config.max_seq_length,
            train_batch_size=config.train_batch_size,
            learning_rate=config.learning_rate,
            output_mode='classification',
            warm_up_proportion=config.warm_up_proportion,
            local_rank=-1
        )

        # Initialize FinBERT
        finbert = FinBert(finbert_config)
        finbert.prepare_model(label_list=['positive', 'negative', 'neutral'])

        # Load data
        train_data = finbert.get_data('train')
        test_data = finbert.get_data('test')

        # Build model
        model = finbert.create_the_model()

        # Train
        trained_model = basic_train_loop(finbert, train_data, model)

        # Evaluate
        results = finbert.evaluate(examples=test_data, model=trained_model)
        results['prediction'] = results.predictions.apply(lambda x: np.argmax(x, axis=0))

        # Compute metrics
        metrics = calculate_metrics(results, finbert)

        # Log
        wandb.log(metrics)

        return metrics


def train_loop(finbert, train_data, model):
    train_loader = finbert.get_loader(train_data, 'train')
    optimizer = finbert.optimizer
    scheduler = finbert.scheduler

    model.train()

    for epoch in range(finbert.config.num_train_epochs):
        total_loss = 0

        for batch in train_loader:
            batch = tuple(t.to(finbert.device) for t in batch)
            input_ids, attention_mask, token_type_ids, label_ids, agree_ids = batch

            logits = model(input_ids, attention_mask, token_type_ids)[0]

            loss_fct = CrossEntropyLoss(weight=finbert.class_weights.to(finbert.device))
            loss = loss_fct(logits.view(-1, finbert.num_labels), label_ids.view(-1))

            loss.backward()
            total_loss += loss.item()

            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        wandb.log({"epoch_loss": total_loss})
        print(f"Epoch {epoch}: Loss={total_loss:.4f}")

    return model


def calculate_metrics(results, finbert):
    """
    Basic metric calculation.
    """
    from sklearn.metrics import f1_score

    loss_fn = CrossEntropyLoss(weight=finbert.class_weights)
    loss = loss_fn(
        torch.tensor(list(results['predictions'])),
        torch.tensor(list(results['labels']))
    ).item()

    accuracy = (results['labels'] == results['prediction']).mean()

    f1_scores = f1_score(results['labels'], results['prediction'], average=None)
    f1_macro = f1_score(results['labels'], results['prediction'], average='macro')

    return {
        "loss": loss,
        "accuracy": accuracy,
        "f1_positive": f1_scores[0],
        "f1_negative": f1_scores[1],
        "f1_neutral": f1_scores[2],
        "f1_macro": f1_macro,
    }