# CS614 Assignment 1 - LLM Training Code

In [None]:
!pip install transformers peft evaluate datasets 



In [117]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
import evaluate, torch, numpy as np, time, transformers
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset, get_dataset_split_names

## **Dataset**:
The dataset is obtained from https://huggingface.co/datasets/zeroshot/twitter-financial-news-sentiment. The dataset consists of 11,931 finance-related tweets and is used to train and evaluate the performance of sequence classification models on sentiment classification.

## **Task:**
Summarise news articles using the selected LLM.

In [104]:
#Load dataset
ds = load_dataset("zeroshot/twitter-financial-news-sentiment") 


In [105]:
#Get split names
get_dataset_split_names("zeroshot/twitter-financial-news-sentiment") 

['train', 'validation']

In [130]:
#load train, validation and test dataset
train = ds["train"].shuffle(seed=42)
val = ds["validation"].shuffle(seed=42)

#load small subset to speed up training
train_dataset = train.select(range(1000))
val_dataset = val.select(range(1000))

In [131]:
#check the attributes (features) of dataset
train.features

{'text': Value('string'), 'label': Value('int64')}

`text`: Financial-related tweet
<br>`label`: Reference sentiment (0: Bearish, 1: Bullish, 2: Neutral)

In [132]:
#check random subset of data
train[19]

{'text': 'Extreme Networks +3% after $100M buyback', 'label': 1}

In [133]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


## **Import Model:**

BERT (Sequence Classification model of 110 million parameters) is used to perform this sentiment classification task. bert-cased variant is used as Capitalisation of letters can convey different meanings in tweets such as company names, emotions. 

In [135]:
model_name = "google-bert/bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
#create functions to tokenize and compute evaluation metric
def tokenize_text(tweets):
  return tokenizer(tweets["text"], return_tensors="pt", padding="max_length").to("cuda")

accuracy = evaluate.load("accuracy")
f1_macro = evaluate.load("f1")

def compute_metrics(eval_pred):
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  # return accuracy.compute(predictions=predictions, references=labels)
  return {"Accuracy": accuracy.compute(predictions=predictions, references=labels), "F1_macro":f1_macro.compute(predictions=predictions, references=labels, average="macro")}

In [148]:
train_dataset = train_dataset.map(tokenize_text, batched=True)
val_dataset = val_dataset.map(tokenize_text, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

## Evaluation: Accuracy and F1-Score (macro)
Accuracy - determines how well the model performs overall in classifying the tweet sentiment correctly 
macro-F1 - measures how well the model can classify each sentiment class accurately, by averaging F1 over number of classes. This is robust against datasets with class imbalances (which is useful as tweets dataset have the majority class of neutral tweets and hence, the model has a higher probability of getting higher accuracy in predicting neutral when it is unsure). If minority F1 score low, it will show in the macro F1 score.

In [161]:
#set baseline hyperparameters - using TrainingArguments default values
base_training_args = TrainingArguments(
    report_to="none",
    num_train_epochs=3,
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0, #regularisation - same effect as dropout (reduce overfitting by reducing weights)
    warmup_ratio=0,
    gradient_accumulation_steps=1,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-8,
    logging_dir='./logs',
    logging_steps=10,
    output_dir="test_trainer",
    eval_strategy="epoch")

In [162]:
#training default settings
def model_instance():
    transformers.set_seed(42) #to initialise model at same checkpoint
    return AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype="auto", num_labels=3).to(device)

base_trainer = Trainer(
    model=model_instance(),
    args=base_training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer)
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [163]:
def train_LLM(trainer_class):
    start_time = time.time()
    trainer_class.train()
    end_time = time.time()
    time_taken = end_time - start_time
    print(time_taken)

In [164]:
train_LLM(base_trainer)

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

## Tune hyperparameters (Full fine tune)

In [None]:
training_args = TrainingArguments(
    num_train_epochs=10,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01, #regularisation - same effect as dropout (reduce overfitting by reducing weights)
    warmup_ratio=0.1,
    gradient_accumulation_steps=2,
    adam_beta1=0.9,
    adam_beta2=0.999,
    adam_epsilon=1e-8,
    logging_dir='./logs',
    logging_steps=10,
    output_dir="test_trainer",
    eval_strategy="epoch",
    load_best_model_at_end=True)

In [None]:
#training based on hyperparameters stated in previous cell
tuned_trainer = Trainer(
    model=model_instance(),
    args=base_training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer)
)

### LoRA finetuning

In [None]:
lora_config = LoraConfig(
    r = 8 # low-rank (as BERT is considered small)
    
)