In [14]:
!pip install git+https://github.com/huggingface/transformers.git

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /private/var/folders/mv/_dg3pqgn2zdf7f95_1dg07rw0000gn/T/pip-req-build-tcumavgi
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /private/var/folders/mv/_dg3pqgn2zdf7f95_1dg07rw0000gn/T/pip-req-build-tcumavgi
  Resolved https://github.com/huggingface/transformers.git to commit 05260a1fc1c8571a2b421ce72b680d5f1bc3e5a4
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.48.0.dev0-py3-none-any.whl size=10290584 sha256=59e4bcb3bf1048fc8971001a9c5ab9ad3b87aec7b96738d60aacbcfcbb7be585
  Stored in directory: /private/var/folder

In [1]:
import pandas as pd 
from datasets import load_dataset, Dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, ModernBertForSequenceClassification
from transformers import Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def prepare_datasets(tokenizer, dataset_name="financial_phrasebank", subset_name="sentences_50agree", max_length=128, random_state=42):
    # Load the dataset
    dataset = load_dataset(dataset_name, subset_name, trust_remote_code=True)
    
    # Convert to Pandas DataFrame
    df = pd.DataFrame(dataset['train'])

    # Stratify split into train, validation, and test
    train_texts, test_texts, train_labels, test_labels = train_test_split(
        df['sentence'], df['label'], test_size=0.2, stratify=df['label'], random_state=random_state
    )
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        train_texts, train_labels, test_size=0.1, stratify=train_labels, random_state=random_state
    )

    # Create DataFrames for each split
    train_df = pd.DataFrame({'sentence': train_texts, 'label': train_labels})
    val_df = pd.DataFrame({'sentence': val_texts, 'label': val_labels})
    test_df = pd.DataFrame({'sentence': test_texts, 'label': test_labels})

    # Convert DataFrames to Hugging Face Dataset format
    train_dataset = Dataset.from_pandas(train_df)
    val_dataset = Dataset.from_pandas(val_df)
    test_dataset = Dataset.from_pandas(test_df)

    # Define tokenization function
    def tokenize_function(example):
        return tokenizer(
            example["sentence"], 
            padding="max_length", 
            truncation=True, 
            max_length=max_length
        )

    # Tokenize datasets
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)
    test_dataset = test_dataset.map(tokenize_function, batched=True)

    # Remove raw text and prepare for Hugging Face Trainer
    train_dataset = train_dataset.remove_columns(["sentence"])
    val_dataset = val_dataset.remove_columns(["sentence"])
    test_dataset = test_dataset.remove_columns(["sentence"])

    train_dataset = train_dataset.rename_column("label", "labels")
    val_dataset = val_dataset.rename_column("label", "labels")
    test_dataset = test_dataset.rename_column("label", "labels")

    train_dataset.set_format("torch")
    val_dataset.set_format("torch")
    test_dataset.set_format("torch")

    return train_dataset, val_dataset, test_dataset

In [3]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc}


def get_training_args(MODEL_NAME):
    
    training_args = TrainingArguments(
        output_dir=f"./results/{MODEL_NAME}",        # Directory to save checkpoints
        evaluation_strategy="epoch",                # Evaluate at the end of each epoch
        learning_rate=5e-5,                         # Typical learning rate for BERT
        per_device_train_batch_size=16,             # Adjust based on hardware
        per_device_eval_batch_size=16,    
        num_train_epochs=3,                         # Number of training epochs
        weight_decay=0.01,                          # Regularization
        logging_dir=f"./logs/{MODEL_NAME}",         # Directory for logs
        logging_steps=10,                           # Log every 10 steps
        save_total_limit=2,                         # Limit number of saved checkpoints
        save_strategy="epoch",                      # Save at the end of each epoch
        report_to=["tensorboard"],                  # Enable TensorBoard logging
        load_best_model_at_end=True,                # Automatically load the best model at the end
    )

    return training_args

## ModernBERT

In [4]:
model_name = "answerdotai/ModernBERT-base"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = ModernBertForSequenceClassification.from_pretrained(
    "answerdotai/ModernBERT-base",
    num_labels=3  
)

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
training_args = get_training_args("answerdotai/ModernBERT-base")
train_dataset, val_dataset, test_dataset = prepare_datasets(tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

Map: 100%|██████████| 3488/3488 [00:00<00:00, 29966.19 examples/s]
Map: 100%|██████████| 388/388 [00:00<00:00, 26490.05 examples/s]
Map: 100%|██████████| 970/970 [00:00<00:00, 28950.11 examples/s]
  trainer = Trainer(
                                       
  0%|          | 0/654 [00:26<?, ?it/s]         

{'loss': 0.9879, 'grad_norm': 5.624727725982666, 'learning_rate': 4.923547400611621e-05, 'epoch': 0.05}


                                       
  0%|          | 0/654 [00:37<?, ?it/s]         

{'loss': 0.766, 'grad_norm': 3.766676902770996, 'learning_rate': 4.847094801223242e-05, 'epoch': 0.09}


                                       
  0%|          | 0/654 [00:48<?, ?it/s]         

{'loss': 0.783, 'grad_norm': 5.730255603790283, 'learning_rate': 4.7706422018348626e-05, 'epoch': 0.14}


                                       
  0%|          | 0/654 [00:59<?, ?it/s]         

{'loss': 0.6573, 'grad_norm': 7.4723968505859375, 'learning_rate': 4.694189602446483e-05, 'epoch': 0.18}


                                       
  0%|          | 0/654 [01:10<?, ?it/s]         

{'loss': 0.6103, 'grad_norm': 16.22762680053711, 'learning_rate': 4.617737003058104e-05, 'epoch': 0.23}


                                       
  0%|          | 0/654 [01:21<?, ?it/s]         

{'loss': 0.5438, 'grad_norm': 10.942617416381836, 'learning_rate': 4.541284403669725e-05, 'epoch': 0.28}


                                       
  0%|          | 0/654 [01:32<?, ?it/s]         

{'loss': 0.6038, 'grad_norm': 16.863340377807617, 'learning_rate': 4.4648318042813456e-05, 'epoch': 0.32}


                                       
  0%|          | 0/654 [01:43<?, ?it/s]         

{'loss': 0.6496, 'grad_norm': 7.224388599395752, 'learning_rate': 4.3883792048929664e-05, 'epoch': 0.37}


                                       
  0%|          | 0/654 [01:54<?, ?it/s]         

{'loss': 0.4344, 'grad_norm': 11.440454483032227, 'learning_rate': 4.311926605504588e-05, 'epoch': 0.41}


                                       
  0%|          | 0/654 [02:05<?, ?it/s]          

{'loss': 0.4612, 'grad_norm': 8.966048240661621, 'learning_rate': 4.235474006116208e-05, 'epoch': 0.46}


                                       
  0%|          | 0/654 [02:16<?, ?it/s]          

{'loss': 0.4541, 'grad_norm': 6.86417818069458, 'learning_rate': 4.159021406727829e-05, 'epoch': 0.5}


                                       
  0%|          | 0/654 [02:27<?, ?it/s]          

{'loss': 0.4077, 'grad_norm': 6.682182312011719, 'learning_rate': 4.0825688073394495e-05, 'epoch': 0.55}


                                       
  0%|          | 0/654 [02:38<?, ?it/s]          

{'loss': 0.3947, 'grad_norm': 9.855279922485352, 'learning_rate': 4.00611620795107e-05, 'epoch': 0.6}


                                       
  0%|          | 0/654 [02:48<?, ?it/s]          

{'loss': 0.3975, 'grad_norm': 7.9052252769470215, 'learning_rate': 3.929663608562692e-05, 'epoch': 0.64}


                                       
  0%|          | 0/654 [02:59<?, ?it/s]          

{'loss': 0.3414, 'grad_norm': 4.221914768218994, 'learning_rate': 3.8532110091743125e-05, 'epoch': 0.69}


                                       
  0%|          | 0/654 [03:10<?, ?it/s]          

{'loss': 0.3814, 'grad_norm': 11.539739608764648, 'learning_rate': 3.7767584097859326e-05, 'epoch': 0.73}


                                       
  0%|          | 0/654 [03:21<?, ?it/s]          

{'loss': 0.3898, 'grad_norm': 9.371356964111328, 'learning_rate': 3.7003058103975534e-05, 'epoch': 0.78}


                                       
  0%|          | 0/654 [03:32<?, ?it/s]          

{'loss': 0.2929, 'grad_norm': 4.233006477355957, 'learning_rate': 3.623853211009174e-05, 'epoch': 0.83}


                                       
  0%|          | 0/654 [03:43<?, ?it/s]          

{'loss': 0.3003, 'grad_norm': 7.201789855957031, 'learning_rate': 3.5474006116207956e-05, 'epoch': 0.87}


                                       
  0%|          | 0/654 [03:54<?, ?it/s]          

{'loss': 0.4909, 'grad_norm': 11.971671104431152, 'learning_rate': 3.4709480122324164e-05, 'epoch': 0.92}


                                       
  0%|          | 0/654 [04:05<?, ?it/s]          

{'loss': 0.4099, 'grad_norm': 2.583198070526123, 'learning_rate': 3.394495412844037e-05, 'epoch': 0.96}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                       
[A                                              

  0%|          | 0/654 [04:35<?, ?it/s]        
[A
[A

{'eval_loss': 0.3923242390155792, 'eval_accuracy': 0.8412371134020619, 'eval_runtime': 21.2914, 'eval_samples_per_second': 45.558, 'eval_steps_per_second': 2.865, 'epoch': 1.0}


                                       
  0%|          | 0/654 [04:40<?, ?it/s]          

{'loss': 0.3711, 'grad_norm': 6.550925254821777, 'learning_rate': 3.318042813455658e-05, 'epoch': 1.01}


                                       
  0%|          | 0/654 [04:51<?, ?it/s]          

{'loss': 0.2398, 'grad_norm': 14.893263816833496, 'learning_rate': 3.241590214067278e-05, 'epoch': 1.06}


                                       
  0%|          | 0/654 [05:02<?, ?it/s]          

{'loss': 0.1814, 'grad_norm': 6.569476127624512, 'learning_rate': 3.1651376146788995e-05, 'epoch': 1.1}


                                       
  0%|          | 0/654 [05:13<?, ?it/s]          

{'loss': 0.2434, 'grad_norm': 11.043304443359375, 'learning_rate': 3.08868501529052e-05, 'epoch': 1.15}


                                       
  0%|          | 0/654 [05:24<?, ?it/s]          

{'loss': 0.2727, 'grad_norm': 2.3373351097106934, 'learning_rate': 3.012232415902141e-05, 'epoch': 1.19}


                                       
  0%|          | 0/654 [05:35<?, ?it/s]          

{'loss': 0.2179, 'grad_norm': 4.085222244262695, 'learning_rate': 2.9357798165137618e-05, 'epoch': 1.24}


                                       
  0%|          | 0/654 [05:46<?, ?it/s]          

{'loss': 0.2271, 'grad_norm': 5.537531852722168, 'learning_rate': 2.8593272171253826e-05, 'epoch': 1.28}


                                       
  0%|          | 0/654 [05:57<?, ?it/s]          

{'loss': 0.2435, 'grad_norm': 6.38084602355957, 'learning_rate': 2.782874617737003e-05, 'epoch': 1.33}


                                       
  0%|          | 0/654 [06:08<?, ?it/s]          

{'loss': 0.1672, 'grad_norm': 7.183953762054443, 'learning_rate': 2.7064220183486238e-05, 'epoch': 1.38}


                                       
  0%|          | 0/654 [06:19<?, ?it/s]          

{'loss': 0.2571, 'grad_norm': 23.130128860473633, 'learning_rate': 2.629969418960245e-05, 'epoch': 1.42}


                                       
  0%|          | 0/654 [06:30<?, ?it/s]          

{'loss': 0.2168, 'grad_norm': 9.449993133544922, 'learning_rate': 2.5535168195718656e-05, 'epoch': 1.47}


                                       
  0%|          | 0/654 [06:40<?, ?it/s]          

{'loss': 0.1786, 'grad_norm': 4.80010461807251, 'learning_rate': 2.4770642201834864e-05, 'epoch': 1.51}


                                       
  0%|          | 0/654 [06:51<?, ?it/s]          

{'loss': 0.1888, 'grad_norm': 5.654148101806641, 'learning_rate': 2.4006116207951072e-05, 'epoch': 1.56}


                                       
  0%|          | 0/654 [07:02<?, ?it/s]          

{'loss': 0.2543, 'grad_norm': 14.6257905960083, 'learning_rate': 2.324159021406728e-05, 'epoch': 1.61}


                                       
  0%|          | 0/654 [07:13<?, ?it/s]          

{'loss': 0.1294, 'grad_norm': 1.5706292390823364, 'learning_rate': 2.2477064220183487e-05, 'epoch': 1.65}


                                       
  0%|          | 0/654 [07:24<?, ?it/s]          

{'loss': 0.0887, 'grad_norm': 11.431336402893066, 'learning_rate': 2.1712538226299695e-05, 'epoch': 1.7}


                                       
  0%|          | 0/654 [07:35<?, ?it/s]          

{'loss': 0.279, 'grad_norm': 0.5250476002693176, 'learning_rate': 2.0948012232415903e-05, 'epoch': 1.74}


                                       
  0%|          | 0/654 [07:46<?, ?it/s]          

{'loss': 0.2128, 'grad_norm': 7.426390647888184, 'learning_rate': 2.018348623853211e-05, 'epoch': 1.79}


                                       
  0%|          | 0/654 [07:57<?, ?it/s]          

{'loss': 0.1493, 'grad_norm': 0.8958475589752197, 'learning_rate': 1.9418960244648318e-05, 'epoch': 1.83}


                                       
  0%|          | 0/654 [08:08<?, ?it/s]          

{'loss': 0.1877, 'grad_norm': 13.89301872253418, 'learning_rate': 1.8654434250764526e-05, 'epoch': 1.88}


                                       
  0%|          | 0/654 [08:20<?, ?it/s]          

{'loss': 0.2212, 'grad_norm': 5.5201191902160645, 'learning_rate': 1.7889908256880737e-05, 'epoch': 1.93}


                                       
  0%|          | 0/654 [08:32<?, ?it/s]          

{'loss': 0.285, 'grad_norm': 8.326737403869629, 'learning_rate': 1.712538226299694e-05, 'epoch': 1.97}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                       
[A                                              

  0%|          | 0/654 [09:02<?, ?it/s]        
[A
[A

{'eval_loss': 0.5533730983734131, 'eval_accuracy': 0.8082474226804124, 'eval_runtime': 23.2564, 'eval_samples_per_second': 41.709, 'eval_steps_per_second': 2.623, 'epoch': 2.0}


                                       
  0%|          | 0/654 [09:09<?, ?it/s]          

{'loss': 0.0914, 'grad_norm': 3.0151000022888184, 'learning_rate': 1.636085626911315e-05, 'epoch': 2.02}


                                       
  0%|          | 0/654 [09:22<?, ?it/s]          

{'loss': 0.1166, 'grad_norm': 9.93114948272705, 'learning_rate': 1.559633027522936e-05, 'epoch': 2.06}


                                       
  0%|          | 0/654 [09:34<?, ?it/s]          

{'loss': 0.0973, 'grad_norm': 0.09154054522514343, 'learning_rate': 1.4831804281345565e-05, 'epoch': 2.11}


                                       
  0%|          | 0/654 [09:46<?, ?it/s]          

{'loss': 0.0564, 'grad_norm': 1.0788754224777222, 'learning_rate': 1.4067278287461774e-05, 'epoch': 2.16}


                                       
  0%|          | 0/654 [09:58<?, ?it/s]          

{'loss': 0.0336, 'grad_norm': 2.8967196941375732, 'learning_rate': 1.3302752293577984e-05, 'epoch': 2.2}


                                       
  0%|          | 0/654 [10:10<?, ?it/s]          

{'loss': 0.0132, 'grad_norm': 0.3692537844181061, 'learning_rate': 1.253822629969419e-05, 'epoch': 2.25}


                                       
  0%|          | 0/654 [10:21<?, ?it/s]          

{'loss': 0.093, 'grad_norm': 6.363702774047852, 'learning_rate': 1.1773700305810397e-05, 'epoch': 2.29}


                                       
  0%|          | 0/654 [10:33<?, ?it/s]          

{'loss': 0.0167, 'grad_norm': 0.12205004692077637, 'learning_rate': 1.1009174311926607e-05, 'epoch': 2.34}


                                       
  0%|          | 0/654 [10:45<?, ?it/s]          

{'loss': 0.036, 'grad_norm': 0.019864311441779137, 'learning_rate': 1.0244648318042814e-05, 'epoch': 2.39}


                                       
  0%|          | 0/654 [10:57<?, ?it/s]          

{'loss': 0.0491, 'grad_norm': 0.34659332036972046, 'learning_rate': 9.480122324159022e-06, 'epoch': 2.43}


                                       
  0%|          | 0/654 [11:09<?, ?it/s]          

{'loss': 0.0521, 'grad_norm': 0.23147127032279968, 'learning_rate': 8.71559633027523e-06, 'epoch': 2.48}


                                       
  0%|          | 0/654 [11:21<?, ?it/s]          

{'loss': 0.0363, 'grad_norm': 6.417152404785156, 'learning_rate': 7.951070336391438e-06, 'epoch': 2.52}


                                       
  0%|          | 0/654 [11:33<?, ?it/s]          

{'loss': 0.0151, 'grad_norm': 0.22738933563232422, 'learning_rate': 7.186544342507645e-06, 'epoch': 2.57}


                                       
  0%|          | 0/654 [11:45<?, ?it/s]          

{'loss': 0.085, 'grad_norm': 14.012743949890137, 'learning_rate': 6.422018348623854e-06, 'epoch': 2.61}


                                       
  0%|          | 0/654 [11:57<?, ?it/s]          

{'loss': 0.0199, 'grad_norm': 0.04947716370224953, 'learning_rate': 5.657492354740062e-06, 'epoch': 2.66}


                                       
  0%|          | 0/654 [12:09<?, ?it/s]          

{'loss': 0.0178, 'grad_norm': 0.02492533065378666, 'learning_rate': 4.892966360856269e-06, 'epoch': 2.71}


                                       
  0%|          | 0/654 [12:21<?, ?it/s]          

{'loss': 0.049, 'grad_norm': 10.35219669342041, 'learning_rate': 4.128440366972477e-06, 'epoch': 2.75}


                                       
  0%|          | 0/654 [12:33<?, ?it/s]          

{'loss': 0.0325, 'grad_norm': 0.14696256816387177, 'learning_rate': 3.363914373088685e-06, 'epoch': 2.8}


                                       
  0%|          | 0/654 [12:45<?, ?it/s]          

{'loss': 0.1098, 'grad_norm': 0.9328455328941345, 'learning_rate': 2.599388379204893e-06, 'epoch': 2.84}


                                       
  0%|          | 0/654 [12:57<?, ?it/s]          

{'loss': 0.0526, 'grad_norm': 0.11854902654886246, 'learning_rate': 1.8348623853211011e-06, 'epoch': 2.89}


                                       
  0%|          | 0/654 [13:10<?, ?it/s]          

{'loss': 0.0092, 'grad_norm': 5.995052814483643, 'learning_rate': 1.0703363914373088e-06, 'epoch': 2.94}


                                       
  0%|          | 0/654 [13:21<?, ?it/s]          

{'loss': 0.0649, 'grad_norm': 0.12783144414424896, 'learning_rate': 3.0581039755351683e-07, 'epoch': 2.98}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                       
[A                                              

  0%|          | 0/654 [13:55<?, ?it/s]        
[A
[A

{'eval_loss': 0.7690773606300354, 'eval_accuracy': 0.8536082474226804, 'eval_runtime': 25.3949, 'eval_samples_per_second': 38.197, 'eval_steps_per_second': 2.402, 'epoch': 3.0}


                                       
100%|██████████| 654/654 [13:43<00:00,  1.26s/it]

{'train_runtime': 823.3679, 'train_samples_per_second': 12.709, 'train_steps_per_second': 0.794, 'train_loss': 0.25571310659158486, 'epoch': 3.0}





TrainOutput(global_step=654, training_loss=0.25571310659158486, metrics={'train_runtime': 823.3679, 'train_samples_per_second': 12.709, 'train_steps_per_second': 0.794, 'total_flos': 891428705132544.0, 'train_loss': 0.25571310659158486, 'epoch': 3.0})

In [7]:
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pd.DataFrame(pred_labels, columns=["prediction"]).to_csv("predictions/ModernBERT.csv", index=False)

100%|██████████| 61/61 [00:22<00:00,  2.71it/s]


## BERT

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", 
    num_labels = 3
)

train_dataset, val_dataset, test_dataset = prepare_datasets(tokenizer)

In [None]:
training_args = get_training_args("bert-base-uncased")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pd.DataFrame(pred_labels, columns=["prediction"]).to_csv("predictions/BERT.csv", index=False)

## LoRA

In [None]:
training_args = get_training_args("lora")

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", 
    num_labels = 3
)

train_dataset, val_dataset, test_dataset = prepare_datasets(tokenizer)

In [None]:
lora_config = LoraConfig(
    r=8,  # Low-rank dimension
    lora_alpha=32,  # Scaling factor
    target_modules=["query", "value"],  # Apply LoRA to attention layers
    lora_dropout=0.1,  # Dropout rate
    bias="none",  # Options: "none", "all", or "lora_only"
    task_type="SEQ_CLS"  # Task type: Sequence Classification
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pd.DataFrame(pred_labels, columns=["prediction"]).to_csv("predictions/LoRA.csv", index=False)

## Distil-BERT

In [None]:
training_args = get_training_args("distilbert")

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=3
)

train_dataset, val_dataset, test_dataset = prepare_datasets(tokenizer)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pd.DataFrame(pred_labels, columns=["prediction"]).to_csv("predictions/DistilBERT.csv", index=False)

In [None]:
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pd.DataFrame(pred_labels, columns=["prediction"]).to_csv("predictions/T5.csv", index=False)