##### **Installing dependencies**

In [1]:
!pip install ipython-autotime gdown evaluate accelerate bitsandbytes peft loralib huggingface_hub transformers peft

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Collecting peft
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting loralib
  Downloading loralib-0.1.2-py3-none-any.whl.metadata (15 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m

##### **Importing dependencies**

In [2]:
%load_ext autotime
import pandas as pd
import numpy as np
import nltk
import os
import zipfile
import tarfile
import re
import gdown
import gzip
import shutil
import wandb
import time
import torch
import psutil
# import torch_xla
# import torch_xla.core.xla_model as xm
# import torch_xla.debug.metrics as met

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, precision_recall_fscore_support
from datasets import Dataset

from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    RobertaTokenizerFast, 
    RobertaForSequenceClassification,
    GPT2TokenizerFast, 
    GPT2ForSequenceClassification,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    pipeline,
    BitsAndBytesConfig,
    DataCollatorForSeq2Seq,
    DataCollatorWithPadding,
    AdamW,
    get_scheduler
)
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import time
import evaluate
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    PeftModel,
    PeftConfig,
)
from huggingface_hub import login
import kagglehub

# from nltk.corpus import stopwords
# from nltk import word_tokenize
# from nltk.stem import WordNetLemmatizer
# from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score
# from sklearn.naive_bayes import MultinomialNB
# from sklearn.linear_model import LogisticRegression
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from google.colab import files
# from scipy.sparse import hstack
# from gensim.models import Word2Vec

import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", message=".*clean_up_tokenization_spaces.*")
# warnings.filterwarnings("ignore", message="Some weights of DistilBertForSequenceClassification were not initialized.*")
warnings.filterwarnings("ignore", message="Some weights of RobertaForSequenceClassification were not initialized.*")
warnings.filterwarnings("ignore", category=FutureWarning, message=".*GradScaler.*")
warnings.filterwarnings("ignore", message=".*evaluation_strategy.*")
warnings.filterwarnings("ignore", message=".*gather along dimension 0.*")

time: 17.4 s (started: 2025-01-05 23:58:53 +00:00)


In [3]:
# Disable wandb Logging
os.environ["WANDB_MODE"] = "disabled"
wandb.init()

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# device = xm.xla_device()  # Change device to TPU

Using device: cuda
time: 5.98 s (started: 2025-01-05 23:59:12 +00:00)


##### **Supporting functions**

In [4]:
def clean_review(review):
    review = re.sub(r'<.*?>', '', review)
    review = re.sub(r'http\S+|www\S+|https\S+', '', review, flags=re.MULTILINE)
    review = review.strip()
    return review

def preprocess_function(examples):
    inputs = tokenizer(examples["review"], truncation=True, padding=True, max_length=128)
    inputs["labels"] = [1 if label.lower() == "positive" else 0 for label in examples["sentiment"]]
    return inputs

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted")
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

time: 791 µs (started: 2025-01-05 23:59:19 +00:00)


##### **Loading data**

In [5]:
train_df_full = pd.read_csv("/kaggle/input/imdb-dataset/train.csv")
train_df = train_df_full.sample(n=3000, random_state=42)
train_df['review'] = train_df['review'].apply(clean_review)
train_df.reset_index(drop=True, inplace=True)

time: 797 ms (started: 2025-01-05 23:59:25 +00:00)


In [6]:
test_df_full = pd.read_csv("/kaggle/input/imdb-dataset/test.csv")
test_df = test_df_full.sample(n=2000, random_state=42)
test_df['review'] = test_df['review'].apply(clean_review)
test_df.reset_index(drop=True, inplace=True)

time: 518 ms (started: 2025-01-05 23:59:26 +00:00)


In [7]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

time: 72.9 ms (started: 2025-01-05 23:59:32 +00:00)


### **Experimentations for RoBERTa - Phase 1:** keeping LoRA hyperparams fixed

In [8]:
model_checkpoint = "roberta-base"
tokenizer = RobertaTokenizerFast.from_pretrained(model_checkpoint)
model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2).to(device)

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

# Fixed LoRA parameters
rank = 8 
target_matrices = ["attention.self.query", "attention.self.key", "attention.self.value"]
# target_matrices = ["attention.self.query", "attention.self.key", "attention.self.value", "attention.output.dense"]
lora_alpha = 16
lora_dropout = 0.1

# Changing hyperparams for batch size, epochs and learning rates
batch_sizes = [8, 16]
epochs_list = [3, 5]
learning_rates = [3e-5, 1e-4]

training_dropout = 0.1 # Fixed

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

time: 11.3 s (started: 2025-01-05 23:32:42 +00:00)


In [10]:
print(f"Model is running on device: {model.device}")

Model is running on device: cuda:0
time: 527 µs (started: 2025-01-05 23:33:00 +00:00)


In [11]:
# Results storage
results_phase_1 = []

# Experimenting with batch size, epochs, and learning rate (keeping LoRA parameters fixed)
for batch_size in batch_sizes:
    for epochs in epochs_list:
        for learning_rate in learning_rates:
            # LoRA configuration (fixed, with all matrices)
            lora_config = LoraConfig(
                r=rank,
                lora_alpha=lora_alpha,
                target_modules=target_matrices,
                lora_dropout=lora_dropout,
                task_type="SEQ_CLS"
            )

            # Apply LoRA to the model
            model_with_lora = get_peft_model(model, lora_config)
            
            start_time = time.time()
            print(f"\nRunning experiment with: Batch Size: {batch_size}, Epochs: {epochs}, Learning Rate: {learning_rate}")

            num_parameters = sum(p.numel() for p in model_with_lora.parameters())
            trainable_parameters = sum(p.numel() for p in model_with_lora.parameters() if p.requires_grad)
            trainable_percentage = (trainable_parameters / num_parameters) * 100
            
            print(f"Model has {num_parameters:,} total parameters")
            print(f"Model has {trainable_parameters:,} trainable parameters")
            print(f"{trainable_percentage:.2f}% of the parameters are trainable")

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                gpu_memory = torch.cuda.memory_allocated() / 1024**2  # in MB
                print(f"GPU memory allocated: {gpu_memory:.2f} MB")

            wandb.config.update({"model/num_parameters": model.num_parameters()}, allow_val_change=True)

            # Training arguments
            output_dir = f"./results_phase1_r{rank}_alpha{lora_alpha}_drop{lora_dropout}_targets{'_'.join(target_matrices)}_bs{batch_size}_epochs{epochs}_lr{learning_rate}"
            training_args = TrainingArguments(
                output_dir=output_dir,
                evaluation_strategy="epoch",
                learning_rate=learning_rate,
                per_device_train_batch_size=8,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=epochs,
                weight_decay=0.01,
                save_total_limit=1,
                save_strategy="epoch",
                logging_dir="./logs",
                logging_steps=10,
                load_best_model_at_end=True
            )

            # Trainer
            trainer = Trainer(
                model=model_with_lora,
                args=training_args,
                train_dataset=tokenized_train,
                eval_dataset=tokenized_test,
                tokenizer=tokenizer,
                compute_metrics=compute_metrics
            )

            # Train and evaluate
            trainer.train()
            metrics = trainer.evaluate()

            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"Training time: {elapsed_time:.2f} seconds")

            # Log results for Phase 1
            results_phase_1.append({
                "Model": "RoBERTa",
                "Batch Size": batch_size,
                "Epochs": epochs,
                "Learning Rate": learning_rate,
                "Rank": rank,
                "Alpha": lora_alpha,
                "LoRA Dropout": lora_dropout,
                "Target Matrices": target_matrices,
                "Accuracy": metrics["eval_accuracy"],
                "Precision": metrics["eval_precision"],
                "Recall": metrics["eval_recall"],
                "F1-Score": metrics["eval_f1"]                
            })


Running experiment with: Batch Size: 8, Epochs: 3, Learning Rate: 3e-05
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 480.68 MB




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6659,0.650086,0.846,0.85079,0.846,0.845075
2,0.2956,0.366737,0.86,0.861138,0.86,0.860032
3,0.2923,0.337767,0.8745,0.874692,0.8745,0.874412


Training time: 140.11 seconds

Running experiment with: Batch Size: 8, Epochs: 3, Learning Rate: 0.0001
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2324,0.308161,0.89,0.891022,0.89,0.889808
2,0.2354,0.315274,0.8925,0.892545,0.8925,0.892511
3,0.2264,0.302369,0.893,0.893669,0.893,0.892859


Training time: 139.22 seconds

Running experiment with: Batch Size: 8, Epochs: 5, Learning Rate: 3e-05
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.541,0.484581,0.8595,0.86139,0.8595,0.85909
2,0.2844,0.377795,0.8615,0.863891,0.8615,0.861482
3,0.3069,0.309983,0.879,0.879258,0.879,0.878903
4,0.3986,0.323433,0.8825,0.883524,0.8825,0.88229
5,0.2078,0.320414,0.8825,0.884163,0.8825,0.882209


Training time: 225.44 seconds

Running experiment with: Batch Size: 8, Epochs: 5, Learning Rate: 0.0001
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.237,0.292744,0.8885,0.890105,0.8885,0.888235
2,0.2342,0.324362,0.8955,0.895791,0.8955,0.895527
3,0.1927,0.304498,0.8935,0.897061,0.8935,0.893057
4,0.3266,0.304133,0.898,0.897992,0.898,0.897993
5,0.1158,0.307258,0.8995,0.900699,0.8995,0.899311


Training time: 225.48 seconds

Running experiment with: Batch Size: 16, Epochs: 3, Learning Rate: 3e-05
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6306,0.60654,0.855,0.858986,0.855,0.854263
2,0.2922,0.360492,0.861,0.861907,0.861,0.861037
3,0.2912,0.340752,0.8765,0.876816,0.8765,0.876388


Training time: 134.58 seconds

Running experiment with: Batch Size: 16, Epochs: 3, Learning Rate: 0.0001
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2324,0.308161,0.89,0.891022,0.89,0.889808
2,0.2354,0.315274,0.8925,0.892545,0.8925,0.892511
3,0.2264,0.302369,0.893,0.893669,0.893,0.892859


Training time: 134.21 seconds

Running experiment with: Batch Size: 16, Epochs: 5, Learning Rate: 3e-05
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.541,0.484581,0.8595,0.86139,0.8595,0.85909
2,0.2844,0.377795,0.8615,0.863891,0.8615,0.861482
3,0.3069,0.309983,0.879,0.879258,0.879,0.878903
4,0.3986,0.323433,0.8825,0.883524,0.8825,0.88229
5,0.2078,0.320414,0.8825,0.884163,0.8825,0.882209


Training time: 217.67 seconds

Running experiment with: Batch Size: 16, Epochs: 5, Learning Rate: 0.0001
Model has 125,681,668 total parameters
Model has 1,034,498 trainable parameters
0.82% of the parameters are trainable
GPU memory allocated: 508.77 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.237,0.292744,0.8885,0.890105,0.8885,0.888235
2,0.2342,0.324362,0.8955,0.895791,0.8955,0.895527
3,0.1927,0.304498,0.8935,0.897061,0.8935,0.893057
4,0.3266,0.304133,0.898,0.897992,0.898,0.897993
5,0.1158,0.307257,0.8995,0.900699,0.8995,0.899311


Training time: 217.75 seconds
time: 23min 54s (started: 2025-01-05 23:33:08 +00:00)


In [12]:
# Testing evaluations saved
results_df_phase_1 = pd.DataFrame(results_phase_1)
results_df_phase_1.to_csv("6_FT_RoBERTa_Experiments_FixedLoRA.csv", index=False)

time: 5.18 ms (started: 2025-01-05 23:57:11 +00:00)


### **Experimentations for RoBERTa - Phase 2:** changing LoRA hyperparameters

In [8]:
model_checkpoint = "roberta-base"
tokenizer = RobertaTokenizerFast.from_pretrained(model_checkpoint)
model = RobertaForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2).to(device)

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

# Fixed parameters for batch size and epochs, etc
fixed_batch_size = 8
fixed_epochs = 5
fixed_learning_rate = 1e-4
training_dropout = 0.1

# LoRA parameter combinations
ranks = [8, 16]
target_matrices_list = [
    ["attention.self.query"],
    ["attention.self.query", "attention.self.key"],
    ["attention.self.query", "attention.self.key", "attention.self.value"]
]
lora_alpha = 16
lora_dropouts = [0.1, 0.2]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

time: 7.78 s (started: 2025-01-05 23:59:46 +00:00)


In [None]:
# Results storage for Phase 2
results_phase_2 = []

# Experimenting with LoRA parameters (keeping batch size, epochs, learning rate, and training dropout fixed)
for rank in ranks:
    for target_matrices in target_matrices_list:
        for lora_dropout in lora_dropouts:
            # LoRA configuration (varying LoRA parameters)
            lora_config = LoraConfig(
                r=rank,
                lora_alpha=lora_alpha,  # Fixed lora_alpha
                target_modules=target_matrices,
                lora_dropout=lora_dropout,
                task_type="SEQ_CLS"
            )

            # Apply LoRA to the model
            model_with_lora = get_peft_model(model, lora_config)

            start_time = time.time()
            print(f"\nRunning experiment with: Rank: {rank}, Target Matrices: {target_matrices}, LoRA Dropout: {lora_dropout}")

            num_parameters = sum(p.numel() for p in model_with_lora.parameters())
            trainable_parameters = sum(p.numel() for p in model_with_lora.parameters() if p.requires_grad)
            trainable_percentage = (trainable_parameters / num_parameters) * 100
            
            print(f"Model has {num_parameters:,} total parameters")
            print(f"Model has {trainable_parameters:,} trainable parameters")
            print(f"{trainable_percentage:.2f}% of the parameters are trainable")

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                gpu_memory = torch.cuda.memory_allocated() / 1024**2  # in MB
                print(f"GPU memory allocated: {gpu_memory:.2f} MB")

            wandb.config.update({"model/num_parameters": model.num_parameters()}, allow_val_change=True)

            # Training arguments (fixed batch size, epochs, learning rate, and training dropout)
            output_dir = f"./results_phase2_r{rank}_alpha{lora_alpha}_drop{lora_dropout}_targets{'_'.join(target_matrices)}_bs{fixed_batch_size}_epochs{fixed_epochs}_lr{fixed_learning_rate}"
            training_args = TrainingArguments(
                output_dir=output_dir,
                evaluation_strategy="epoch",
                learning_rate=fixed_learning_rate,
                per_device_train_batch_size=fixed_batch_size,
                per_device_eval_batch_size=fixed_batch_size,
                num_train_epochs=fixed_epochs,
                weight_decay=0.01,
                save_total_limit=1,
                save_strategy="epoch",
                logging_dir="./logs",
                logging_steps=10,
                load_best_model_at_end=True
            )

            # Trainer
            trainer = Trainer(
                model=model_with_lora,
                args=training_args,
                train_dataset=tokenized_train,
                eval_dataset=tokenized_test,
                tokenizer=tokenizer,
                compute_metrics=compute_metrics
            )

            # Train and evaluate
            trainer.train()
            metrics = trainer.evaluate()

            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"Training time: {elapsed_time:.2f} seconds")

            # Log results for Phase 2
            results_phase_2.append({
                "Model": "RoBERTa",
                "Batch Size": fixed_batch_size,
                "Epochs": fixed_epochs,
                "Learning Rate": fixed_learning_rate,
                "Rank": rank,
                "Alpha": lora_alpha,  # Fixed alpha
                "LoRA Dropout": lora_dropout,
                "Target Matrices": target_matrices,
                "Accuracy": metrics["eval_accuracy"],
                "Precision": metrics["eval_precision"],
                "Recall": metrics["eval_recall"],
                "F1-Score": metrics["eval_f1"]
            })


Running experiment with: Rank: 8, Target Matrices: ['attention.self.query'], LoRA Dropout: 0.1
Model has 125,386,756 total parameters
Model has 739,586 trainable parameters
0.59% of the parameters are trainable
GPU memory allocated: 479.56 MB




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3485,0.31667,0.8735,0.873516,0.8735,0.873506
2,0.2702,0.316777,0.8815,0.88149,0.8815,0.881484
3,0.267,0.301581,0.8875,0.887797,0.8875,0.887406
4,0.4767,0.314295,0.89,0.890534,0.89,0.889873
5,0.2315,0.314036,0.888,0.888933,0.888,0.887815


Training time: 205.21 seconds

Running experiment with: Rank: 8, Target Matrices: ['attention.self.query'], LoRA Dropout: 0.2
Model has 125,386,756 total parameters
Model has 739,586 trainable parameters
0.59% of the parameters are trainable
GPU memory allocated: 504.27 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3267,0.31425,0.873,0.873023,0.873,0.873008
2,0.2619,0.319419,0.884,0.884103,0.884,0.88402
3,0.2592,0.30409,0.889,0.889165,0.889,0.888932
4,0.4734,0.315953,0.893,0.893172,0.893,0.892934
5,0.2328,0.315999,0.889,0.889474,0.889,0.88888


Training time: 202.81 seconds

Running experiment with: Rank: 8, Target Matrices: ['attention.self.query', 'attention.self.key'], LoRA Dropout: 0.1
Model has 125,534,212 total parameters
Model has 887,042 trainable parameters
0.71% of the parameters are trainable
GPU memory allocated: 504.83 MB


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2289,0.326145,0.8725,0.872491,0.8725,0.872478
2,0.3014,0.327389,0.884,0.883993,0.884,0.883982
3,0.2514,0.308229,0.8915,0.891686,0.8915,0.89143


In [None]:
# Testing evaluations saved
results_df_phase_2 = pd.DataFrame(results_phase_2)
results_df_phase_2.to_csv("6_FT_RoBERTa_Experiments_FixedTrainingHyp.csv", index=False)