In [1]:
import os
import torch
import optuna

from transformers import set_seed

from functions import *
from utils.telegram import sendMessage
from utils.wrappers import timeit

In [2]:
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

set_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
datasets = [
    'btc_news',
    'eth_news',
    'reddit_r_bitcoin',
    'reddit_r_ethereum',
    'btc_tweets',
    'eth_tweets',
]
PROJECT_NAME = 'RoBERTa Eval-Finetuning'
N_TRIALS = 40

In [4]:
def run_hyperparam_opt(dataset_name: str, project_name: str, n_trials: int):
    ''' Runs hyperparamter optimisation of RoBERTa on given dataset.
    
    Args:
        dataset_name (str): Name of the dataset to optimise hyperparameters on
        project_name (str): Name of W&B project to log results to
        n_trials (int): Number of trials runs optuna should run on the dataset
    '''
    
    def objective(trial):
        ''' Objective function for optuna run. '''

        config = {
            'learning_rate': trial.suggest_float(
                name='learning_rate',
                low=5e-6,
                high=5e-2,
            ),
            'num_train_epochs': trial.suggest_int(
                name='num_train_epochs',
                low=2,
                high=9,
            ),
            'per_device_train_batch_size': trial.suggest_categorical(
                name='per_device_train_batch_size',
                choices=[8, 16, 32, 64],
            ),
            'warmup_steps': trial.suggest_int(
                name='warmup_steps',
                low=0,
                high=20,
            ),               
            'weight_decay': trial.suggest_float(
                name='weight_decay',
                low=0.001,
                high=0.2,
                log=True,
            ),
        } 

        return run_and_log_finetuning(dataset_name, project_name, **config)
    
    study = optuna.create_study(
        direction='maximize',
        study_name=dataset_name,
    )
    study.optimize(objective, n_trials=n_trials)

In [None]:
%%capture output
for DATASET_NAME in datasets:
    try:
        run_hyperparam_opt(DATASET_NAME, PROJECT_NAME, N_TRIALS)
        sendMessage(f'RoBERTa {DATASET_NAME} hyperparameter optimization successfully finished. \U0001F389')
    except Exception as e:
        sendMessage(f'RoBERTa {DATASET_NAME} hyperparameter optimization returned error: {e} \U0001F614')
        raise e