# VeRA Tuning

In [3]:
import pandas as pd

train_data = pd.read_csv('train_data.csv')
validation_data = pd.read_csv('validation_data.csv')
test_data = pd.read_csv('test_data.csv')

In [4]:
print(train_data.head(5))

   labels                                              input
0       3  Lots of fun stuff!: This box was stuffed full ...
1       4  Where is the link to subscribe for 8-13?: We l...
2       4  Worth it!: Loved everything in there, definite...
3       4                Educational!: My grandkids love it!
4       4  Great small company to support: I love these b...


In [5]:
from datasets import Dataset

train_dataset = Dataset.from_pandas(train_data)
val_dataset = Dataset.from_pandas(validation_data)
test_dataset = Dataset.from_pandas(test_data)

In [6]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
def preprocess_function(examples):
    return tokenizer(examples['input'], truncation=True, padding='max_length', max_length=128)

train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/8557 [00:00<?, ? examples/s]

Map:   0%|          | 0/2852 [00:00<?, ? examples/s]

Map:   0%|          | 0/2853 [00:00<?, ? examples/s]

In [7]:
from transformers import AutoModelForSequenceClassification

num_labels = len(train_data['labels'].unique())
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("vinai/bertweet-base", num_labels=num_labels, problem_type="single_label_classification")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
print(num_labels)

5


In [9]:
import numpy as np
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
np.object = object
np.bool = bool
np.int = int
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='./results_lora',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {'accuracy': (predictions == labels).mean()}


In [10]:
from peft import VeraConfig, get_peft_model,TaskType

vera_config = VeraConfig(task_type=TaskType.SEQ_CLS, r=128, target_modules=["query", "value"], modules_to_save=["classifier"])
model = get_peft_model(model, vera_config)
model.print_trainable_parameters()

trainable params: 615,941 || all params: 135,519,754 || trainable%: 0.4545


In [11]:
    
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [12]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,1.2784,1.219905,0.540323
2,1.1814,1.137307,0.55575
3,1.1249,1.078307,0.584151
4,1.0781,1.03098,0.602735
5,1.0535,1.001039,0.615358
6,1.0127,0.986263,0.614656
7,0.9934,0.963372,0.632539
8,0.9909,0.953574,0.635344
9,0.9927,0.950402,0.635344
10,0.981,0.948385,0.635344


TrainOutput(global_step=5350, training_loss=1.0618675217673044, metrics={'train_runtime': 1285.7072, 'train_samples_per_second': 66.555, 'train_steps_per_second': 4.161, 'total_flos': 5669233125473280.0, 'train_loss': 1.0618675217673044, 'epoch': 10.0})

In [13]:
results = trainer.evaluate(test_dataset)
print(results)

{'eval_loss': 0.956710696220398, 'eval_accuracy': 0.6361724500525763, 'eval_runtime': 17.153, 'eval_samples_per_second': 166.327, 'eval_steps_per_second': 10.435, 'epoch': 10.0}


In [15]:
model_path = "./Vera_128"
trainer.save_model(model_path)