In [None]:
import optuna
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification, RobertaTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

class TextValueDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        text = row['generated_text']
        label = row['suddenness'] - 1  # Convert value to 0-4 classes
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
            return_attention_mask=True
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Load dataset
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    return df

# Create DataLoader
def create_dataloader(df, tokenizer, max_length, batch_size):
    dataset = TextValueDataset(df, tokenizer, max_length)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


# Define the model and tokenizer
model_name = "roberta-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

train_df = load_dataset('data/train.csv')
val_df = load_dataset('data/val.csv')
test_df = load_dataset('data/test.csv')

# Define model initialization function
def model_init():
    return RobertaForSequenceClassification.from_pretrained(model_name, num_labels=5)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# Base training arguments (will be modified by Optuna)
training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="steps",
    save_strategy="steps",
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"  # Used for early stopping
)

# Define the Optuna objective function
def objective(trial):
    # Define hyperparameter search space
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.01, 0.3)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    max_length = trial.suggest_int("max_length", 32, 128, step=32)
    num_train_epochs = trial.suggest_int("num_train_epochs", 3, 10)
    
    # Update the training arguments with trial parameters
    training_args.learning_rate = learning_rate
    training_args.weight_decay = weight_decay
    training_args.per_device_train_batch_size = batch_size
    training_args.per_device_eval_batch_size = batch_size
    training_args.num_train_epochs = num_train_epochs
    
    # Create a dataloader dynamically for the different max_length
    train_loader = create_dataloader(train_df, tokenizer, max_length, batch_size)
    val_loader = create_dataloader(val_df, tokenizer, max_length, batch_size)
    test_loader = create_dataloader(test_df, tokenizer, max_length, batch_size)
    
    # Initialize Trainer with dynamic parameters
    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=train_loader.dataset,
        eval_dataset=val_loader.dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    trainer.train()
    
    # Evaluate the model and return validation loss for Optuna to minimize
    eval_results = trainer.evaluate()
    return eval_results["eval_loss"]

# Run the hyperparameter search with Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)  # Adjust n_trials based on resources

# Output the best parameters found by Optuna
print("Best hyperparameters: ", study.best_params)

[I 2024-10-31 17:34:43,317] A new study created in memory with name: no-name-2263c024-5d52-41e5-9368-e6ec648795bd
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.5135,1.418873,0.414815,0.376066,0.414815,0.357173
1000,1.3932,1.406431,0.437037,0.371849,0.437037,0.395202
1500,1.3006,1.478686,0.427778,0.381958,0.427778,0.376794
2000,1.2278,1.518735,0.390741,0.436761,0.390741,0.369585
2500,1.1104,1.57418,0.368519,0.402329,0.368519,0.368643
3000,0.9819,1.705896,0.333333,0.386655,0.333333,0.350598
3500,0.9069,1.898836,0.368519,0.38897,0.368519,0.375708
4000,0.7965,2.011498,0.355556,0.393253,0.355556,0.368447
4500,0.7151,2.067103,0.362963,0.390683,0.362963,0.37335
5000,0.6328,2.148188,0.377778,0.399313,0.377778,0.385674


[I 2024-10-31 17:50:55,740] Trial 0 finished with value: 1.4064314365386963 and parameters: {'learning_rate': 1.2375477379900974e-05, 'weight_decay': 0.2807024702444702, 'batch_size': 8, 'max_length': 96, 'num_train_epochs': 10}. Best is trial 0 with value: 1.4064314365386963.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.4985,1.405672,0.433333,0.422089,0.433333,0.366642
1000,1.3775,1.397285,0.435185,0.419762,0.435185,0.407677
1500,1.26,1.532288,0.411111,0.391981,0.411111,0.365876
2000,1.1752,1.630949,0.37963,0.391327,0.37963,0.368166
2500,1.0239,1.591707,0.388889,0.404913,0.388889,0.384618
3000,0.8732,1.740285,0.375926,0.40021,0.375926,0.379045
3500,0.7902,1.85914,0.403704,0.418263,0.403704,0.409802
4000,0.6679,1.972553,0.409259,0.430208,0.409259,0.416281
4500,0.5687,2.123847,0.387037,0.431167,0.387037,0.402074
5000,0.4988,2.204791,0.398148,0.431361,0.398148,0.41138


[I 2024-10-31 18:04:51,518] Trial 1 finished with value: 1.3972845077514648 and parameters: {'learning_rate': 1.611664237062557e-05, 'weight_decay': 0.1982803541877104, 'batch_size': 8, 'max_length': 64, 'num_train_epochs': 9}. Best is trial 1 with value: 1.3972845077514648.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.6075,1.579971,0.27963,0.078193,0.27963,0.122212
1000,1.5991,1.585475,0.259259,0.067215,0.259259,0.106754
1500,1.5967,1.587132,0.259259,0.067215,0.259259,0.106754
2000,1.5997,1.592199,0.27963,0.078193,0.27963,0.122212
2500,1.5975,1.587434,0.259259,0.067215,0.259259,0.106754
3000,1.5945,1.580332,0.27963,0.078193,0.27963,0.122212
3500,1.5963,1.583525,0.259259,0.067215,0.259259,0.106754
4000,1.5935,1.580796,0.27963,0.078193,0.27963,0.122212
4500,1.5967,1.584209,0.27963,0.078193,0.27963,0.122212
5000,1.5903,1.578552,0.259259,0.067215,0.259259,0.106754


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-31 18:18:35,084] Trial 2 finished with value: 1.5785515308380127 and parameters: {'learning_rate': 4.989881796534001e-05, 'weight_decay': 0.02011365437102238, 'batch_size': 8, 'max_length': 96, 'num_train_epochs': 9}. Best is trial 1 with value: 1.3972845077514648.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream tas

Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.4994,1.41512,0.431481,0.363146,0.431481,0.378273
1000,1.3784,1.400725,0.416667,0.393941,0.416667,0.386811
1500,1.2715,1.482646,0.418519,0.395131,0.418519,0.366273
2000,1.2015,1.540064,0.372222,0.357136,0.372222,0.35507
2500,1.0789,1.549845,0.388889,0.399145,0.388889,0.380539
3000,0.9834,1.622156,0.390741,0.391014,0.390741,0.387248
3500,0.9268,1.681188,0.37963,0.393425,0.37963,0.38278


[I 2024-10-31 18:28:14,656] Trial 3 finished with value: 1.40072500705719 and parameters: {'learning_rate': 1.17901691213219e-05, 'weight_decay': 0.043738892396425956, 'batch_size': 8, 'max_length': 64, 'num_train_epochs': 6}. Best is trial 1 with value: 1.3972845077514648.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.447,1.386817,0.416667,0.349446,0.416667,0.376236
1000,1.2328,1.584324,0.381481,0.341998,0.381481,0.358478
1500,0.9619,1.765047,0.388889,0.395197,0.388889,0.389599


[I 2024-10-31 18:33:49,697] Trial 4 finished with value: 1.3868168592453003 and parameters: {'learning_rate': 2.808714294276454e-05, 'weight_decay': 0.2464485891227325, 'batch_size': 16, 'max_length': 96, 'num_train_epochs': 6}. Best is trial 4 with value: 1.3868168592453003.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.527,1.513245,0.388889,0.250071,0.388889,0.281712
1000,1.4331,1.415172,0.437037,0.492939,0.437037,0.390833
1500,1.3107,1.518444,0.409259,0.369948,0.409259,0.369575
2000,1.2123,1.624337,0.403704,0.402436,0.403704,0.384719
2500,1.0593,1.645497,0.409259,0.418509,0.409259,0.407428
3000,0.9395,1.72624,0.381481,0.400088,0.381481,0.388185


  _warn_prf(average, modifier, msg_start, len(result))
