In [1]:
import optuna
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import Trainer, TrainingArguments, RobertaForSequenceClassification, RobertaTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

class TextValueDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        text = row['generated_text']
        label = row['suddenness'] - 1  # Convert value to 0-4 classes
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
            return_attention_mask=True
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Load dataset
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    return df

# Create DataLoader
def create_dataloader(df, tokenizer, max_length, batch_size):
    dataset = TextValueDataset(df, tokenizer, max_length)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


# Define the model and tokenizer
model_name = "roberta-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

train_df = load_dataset('data/train.csv')
val_df = load_dataset('data/val.csv')
test_df = load_dataset('data/test.csv')

# Define model initialization function
def model_init():
    return RobertaForSequenceClassification.from_pretrained(model_name, num_labels=5)

# Compute metrics function for evaluation
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# Base training arguments (will be modified by Optuna)
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"  # Used for early stopping
)

# Define the Optuna objective function
def objective(trial):
    # Define hyperparameter search space
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.01, 0.3)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    max_length = trial.suggest_int("max_length", 64, 256, step=32)
    num_train_epochs = trial.suggest_int("num_train_epochs", 3, 10)
    
    # Update the training arguments with trial parameters
    training_args.learning_rate = learning_rate
    training_args.weight_decay = weight_decay
    training_args.per_device_train_batch_size = batch_size
    training_args.per_device_eval_batch_size = batch_size
    training_args.num_train_epochs = num_train_epochs
    
    # Create a dataloader dynamically for the different max_length
    train_loader = create_dataloader(train_df, tokenizer, max_length, batch_size)
    val_loader = create_dataloader(val_df, tokenizer, max_length, batch_size)
    test_loader = create_dataloader(test_df, tokenizer, max_length, batch_size)
    
    # Initialize Trainer with dynamic parameters
    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=train_loader.dataset,
        eval_dataset=val_loader.dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    trainer.train()
    
    # Evaluate the model and return validation loss for Optuna to minimize
    eval_results = trainer.evaluate()
    return eval_results["eval_loss"]

# Run the hyperparameter search with Optuna
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)  # Adjust n_trials based on resources

# Output the best parameters found by Optuna
print("Best hyperparameters: ", study.best_params)

[I 2024-10-29 21:40:19,175] A new study created in memory with name: no-name-00e6da03-2840-4888-aa65-8461e92c70ef
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5211,1.419441,0.422222,0.286713,0.422222,0.330044
2,1.4328,1.467599,0.398148,0.344975,0.398148,0.362912
3,1.3353,1.517758,0.396296,0.37937,0.396296,0.371765
4,1.098,1.638979,0.361111,0.403331,0.361111,0.361424
5,0.9237,1.870518,0.407407,0.388677,0.407407,0.396653
6,0.827,2.130147,0.375926,0.450329,0.375926,0.39757
7,0.5632,2.486439,0.362963,0.406867,0.362963,0.376723
8,0.4551,2.851539,0.374074,0.435418,0.374074,0.393155
9,0.371,3.180795,0.362963,0.404944,0.362963,0.378286
10,0.2356,3.461296,0.351852,0.396882,0.351852,0.368647


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 21:57:48,673] Trial 0 finished with value: 1.4194414615631104 and parameters: {'learning_rate': 3.630902234210563e-05, 'weight_decay': 0.28787429340022275, 'batch_size': 8, 'max_length': 192, 'num_train_epochs': 10}. Best is trial 0 with value: 1.4194414615631104.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream t

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5161,1.396965,0.409259,0.297981,0.409259,0.344197
2,1.3709,1.426372,0.396296,0.381567,0.396296,0.385737
3,1.257,1.517614,0.387037,0.393731,0.387037,0.384472
4,0.9693,1.759681,0.351852,0.393475,0.351852,0.360038
5,0.8257,1.869668,0.366667,0.380856,0.366667,0.371915
6,0.7161,2.208246,0.362963,0.396681,0.362963,0.374735
7,0.4824,2.391719,0.355556,0.386506,0.355556,0.366715
8,0.4153,2.506539,0.361111,0.397065,0.361111,0.374382


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 22:10:05,557] Trial 1 finished with value: 1.3969647884368896 and parameters: {'learning_rate': 2.1207133008770832e-05, 'weight_decay': 0.14000684621591616, 'batch_size': 8, 'max_length': 128, 'num_train_epochs': 8}. Best is trial 1 with value: 1.3969647884368896.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream t

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.421986,0.435185,0.307715,0.435185,0.349391
2,1.446200,1.43992,0.409259,0.370798,0.409259,0.37564
3,1.446200,1.539466,0.383333,0.392247,0.383333,0.386759
4,1.210600,1.694515,0.344444,0.422981,0.344444,0.359351
5,0.919800,1.857468,0.383333,0.382605,0.383333,0.381399
6,0.919800,2.014029,0.355556,0.386389,0.355556,0.367799


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 22:15:51,647] Trial 2 finished with value: 1.42198646068573 and parameters: {'learning_rate': 3.757969710883605e-05, 'weight_decay': 0.2217176444558854, 'batch_size': 16, 'max_length': 64, 'num_train_epochs': 6}. Best is trial 1 with value: 1.3969647884368896.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task 

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5088,1.428459,0.424074,0.306127,0.424074,0.35403
2,1.3902,1.430153,0.390741,0.39911,0.390741,0.386231
3,1.2683,1.523931,0.4,0.384209,0.4,0.388535
4,1.0231,1.626028,0.381481,0.391784,0.381481,0.382059


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 22:24:25,949] Trial 3 finished with value: 1.4284592866897583 and parameters: {'learning_rate': 2.2808120671355137e-05, 'weight_decay': 0.27571285377049176, 'batch_size': 8, 'max_length': 256, 'num_train_epochs': 4}. Best is trial 1 with value: 1.3969647884368896.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream t

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.428149,0.368519,0.355148,0.368519,0.301333
2,No log,1.383001,0.444444,0.412287,0.444444,0.419293
3,No log,1.446332,0.416667,0.401228,0.416667,0.399497
4,1.356200,1.517676,0.411111,0.415771,0.411111,0.403008
5,1.356200,1.587904,0.425926,0.400047,0.425926,0.407239
6,1.356200,1.684778,0.42037,0.426268,0.42037,0.421922
7,0.975200,1.832406,0.403704,0.443183,0.403704,0.413248
8,0.975200,1.882874,0.388889,0.422846,0.388889,0.39852
9,0.975200,1.90949,0.387037,0.420498,0.387037,0.397377


  _warn_prf(average, modifier, msg_start, len(result))


[I 2024-10-29 22:32:43,017] Trial 4 finished with value: 1.383001446723938 and parameters: {'learning_rate': 2.331734270688e-05, 'weight_decay': 0.18375395018784377, 'batch_size': 32, 'max_length': 96, 'num_train_epochs': 9}. Best is trial 4 with value: 1.383001446723938.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.377967,0.427778,0.307722,0.427778,0.354476
2,1.448100,1.405338,0.418519,0.397657,0.418519,0.394971
3,1.448100,1.48342,0.411111,0.385854,0.411111,0.392198
4,1.254200,1.618667,0.385185,0.405407,0.385185,0.380355
5,1.045600,1.674659,0.396296,0.368728,0.396296,0.378791
6,1.045600,1.815008,0.398148,0.40284,0.398148,0.397004
7,0.864900,1.958056,0.375926,0.388986,0.375926,0.374518
8,0.691700,2.062716,0.359259,0.391592,0.359259,0.370613
9,0.691700,2.146031,0.37037,0.400774,0.37037,0.381486
10,0.583500,2.177813,0.359259,0.386116,0.359259,0.370413


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 22:49:18,519] Trial 5 finished with value: 1.3779667615890503 and parameters: {'learning_rate': 1.4433348592218637e-05, 'weight_decay': 0.21763387786803015, 'batch_size': 16, 'max_length': 224, 'num_train_epochs': 10}. Best is trial 5 with value: 1.3779667615890503.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.383894,0.435185,0.308788,0.435185,0.353065
2,1.447400,1.415185,0.398148,0.385363,0.398148,0.383862
3,1.447400,1.514721,0.405556,0.410417,0.405556,0.405923
4,1.231900,1.640782,0.374074,0.434058,0.374074,0.379195
5,0.955700,1.73666,0.401852,0.387256,0.401852,0.393443
6,0.955700,1.932663,0.381481,0.431572,0.381481,0.398202
7,0.732800,2.109416,0.372222,0.42153,0.372222,0.384989
8,0.503800,2.340701,0.362963,0.430611,0.362963,0.379972
9,0.503800,2.452177,0.375926,0.41292,0.375926,0.389896
10,0.370400,2.498825,0.377778,0.412073,0.377778,0.391114


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 22:58:42,769] Trial 6 finished with value: 1.3838938474655151 and parameters: {'learning_rate': 2.7799870743528936e-05, 'weight_decay': 0.16887674010414375, 'batch_size': 16, 'max_length': 64, 'num_train_epochs': 10}. Best is trial 5 with value: 1.3779667615890503.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream 

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.380826,0.431481,0.316462,0.431481,0.361576
2,1.446000,1.403513,0.418519,0.383908,0.418519,0.392226
3,1.446000,1.458329,0.4,0.378725,0.4,0.383536
4,1.228400,1.621293,0.37037,0.395515,0.37037,0.37144
5,0.989400,1.6612,0.398148,0.391773,0.398148,0.394629
6,0.989400,1.815065,0.37963,0.390781,0.37963,0.382288
7,0.816700,1.873186,0.372222,0.406833,0.372222,0.38509


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 23:08:16,565] Trial 7 finished with value: 1.380826473236084 and parameters: {'learning_rate': 1.8569501681426505e-05, 'weight_decay': 0.015447199283284065, 'batch_size': 16, 'max_length': 160, 'num_train_epochs': 7}. Best is trial 5 with value: 1.3779667615890503.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream 

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5021,1.381991,0.444444,0.322638,0.444444,0.367836
2,1.382,1.462654,0.390741,0.391765,0.390741,0.385544
3,1.2709,1.509953,0.396296,0.371312,0.396296,0.380484
4,0.9885,1.736707,0.361111,0.405448,0.361111,0.369466
5,0.8648,1.85854,0.394444,0.396219,0.394444,0.394411
6,0.767,2.00762,0.381481,0.404241,0.381481,0.390129


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 23:17:51,612] Trial 8 finished with value: 1.3819905519485474 and parameters: {'learning_rate': 2.0469844308717504e-05, 'weight_decay': 0.2130710341754846, 'batch_size': 8, 'max_length': 160, 'num_train_epochs': 6}. Best is trial 5 with value: 1.3779667615890503.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream ta

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.389233,0.42963,0.303179,0.42963,0.349826
2,1.442700,1.40897,0.405556,0.38379,0.405556,0.387138
3,1.442700,1.468088,0.411111,0.399942,0.411111,0.404829
4,1.203500,1.671319,0.394444,0.42207,0.394444,0.395307
5,0.901400,1.940397,0.375926,0.383098,0.375926,0.378981
6,0.901400,2.166032,0.390741,0.42895,0.390741,0.403618
7,0.629100,2.457632,0.372222,0.421634,0.372222,0.387625
8,0.409400,2.528838,0.383333,0.42546,0.383333,0.397928


  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-10-29 23:26:25,105] Trial 9 finished with value: 1.3892327547073364 and parameters: {'learning_rate': 3.2330403920463426e-05, 'weight_decay': 0.29669206872972104, 'batch_size': 16, 'max_length': 96, 'num_train_epochs': 8}. Best is trial 5 with value: 1.3779667615890503.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream t

OutOfMemoryError: CUDA out of memory. Tried to allocate 24.00 MiB. GPU 0 has a total capacity of 10.57 GiB of which 16.00 MiB is free. Process 667779 has 2.89 GiB memory in use. Including non-PyTorch memory, this process has 7.66 GiB memory in use. Of the allocated memory 7.44 GiB is allocated by PyTorch, and 31.98 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)