**Dependencies**

In [None]:
!pip install accelerate
!pip install bitsandbytes
!pip install trl
!pip install peft

**Preparing Dataset**

In [1]:
import pandas as pd
df = pd.read_csv('/kaggle/input/twitter-tweets-sentiment-dataset/Tweets.csv')
df = df.dropna()

text = df['selected_text']
sentiment = df['sentiment']

print('Dataset Lenght: ',len(text))

train_size = int(0.9 * len(text))
val_size = len(text) - train_size
print('training size: ',train_size)
print('val_size: ', val_size)

Dataset Lenght:  27480
training size:  24732
val_size:  2748


In [2]:
train_ds = df[:train_size].reset_index(drop=True)
val_ds = df[train_size:].reset_index(drop=True)

In [4]:
def compute_ratio(ds):
    texts = ds['selected_text']
    sentiments = ds['sentiment']
    print(len(texts[0]))
    max_len = 0
    min_len = 90000
    
    positives = 0
    negatives = 0
    neutral = 0
    
    for i in range(len(ds)):
        
        if sentiments[i] == 'positive':
            positives = positives + 1
            
        elif sentiments[i] == 'negative':
            negatives = negatives + 1
            
        else:
            neutral = neutral + 1
        
        max_len = max(max_len, len(texts[i]))
        min_len = min(min_len, len(texts[i]))
        
    return positives , negatives, neutral, max_len, min_len


print(compute_ratio(train_ds))
print(compute_ratio(val_ds))
    
    

35
(7795, 6987, 9950, 139, 1)
15
(787, 794, 1167, 141, 1)


**Logging to Hugging Face**

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
sentiment_mapping = {"positive": 0, "negative": 1, "neutral": 2}
train_ds['sentiment'] = [sentiment_mapping[sent] for sent in train_ds['sentiment']]
val_ds['sentiment'] = [sentiment_mapping[sent] for sent in val_ds['sentiment']]


In [6]:
from transformers import AutoTokenizer
from datasets import Dataset
import torch



tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

train_ds = Dataset.from_pandas(train_ds)
val_ds = Dataset.from_pandas(val_ds)


def tokenize_function(examples):
    tokens =  tokenizer(examples["selected_text"], padding="max_length", truncation=True , return_tensors='pt')

    # Convert each tensor in the dictionary to float16
    for key in tokens.keys():
        tokens[key] = tokens[key].to(dtype=torch.float16 if tokens[key].dtype.is_floating_point else tokens[key].dtype)

    return tokens


tokenized_train_ds = train_ds.map(tokenize_function)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Map:   0%|          | 0/24732 [00:00<?, ? examples/s]

In [7]:
tokenized_train_ds['sentiment'][:10]

[2, 1, 1, 1, 1, 2, 0, 2, 2, 0]

**LORA Configuration**

In [10]:
from peft import LoraConfig, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, r=1, lora_alpha=1, lora_dropout=0.1
)

In [11]:
from accelerate import Accelerator

# Initialize the accelerator
accelerator = Accelerator(mixed_precision="fp16")

**Importing model**

In [12]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained(
    'bert-base-cased',
    num_labels=3,
)

model = accelerator.prepare(model)

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
from peft import get_peft_model
model = get_peft_model(model, lora_config)

In [15]:
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad) , sum(p.numel() for p in model.parameters() if 1)

# Assuming 'model' is your original PyTorch model
trainable_parameters , original_parameters = count_trainable_params(model)

print('Trainable parameters are: ',trainable_parameters/original_parameters * 100,'%')


Trainable parameters are:  0.03615170036478414 %


In [16]:
model.to('cuda')

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(28996, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default

In [26]:
import torch
from torch import nn

# class_weights = torch.tensor([0.1,0.2,0.1,0.2,0.1,0.4]).to('cuda')

def compute_loss(model, inputs, attention_mask, labels):
    
    inputs = inputs.view(inputs.size(0),inputs.size(-1)).to('cuda')
#     labels = labels.view(labels.size(0),labels.size(-1)).to('cuda')
    attention_mask = attention_mask.view(attention_mask.size(0),attention_mask.size(-1)).to('cuda')
    
    outputs = model(inputs,attention_mask)
    criterion = nn.CrossEntropyLoss()
    loss = criterion(outputs.logits, labels)
    return loss

**Training Loop**

In [17]:
def rename_sentiment_to_labels(examples):
    examples["labels"] = examples.pop("sentiment")
    return examples

# Apply the renaming function to the datasets
tokenized_train_ds = tokenized_train_ds.map(rename_sentiment_to_labels, batched=True)

Map:   0%|          | 0/24732 [00:00<?, ? examples/s]

In [33]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch",
                                 num_train_epochs=25,)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
)



In [34]:
##custom training loop
import torch

def custom_training_loop():
    optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
    model.train()

    for epoch in range(training_args.num_train_epochs):
        total_loss = 0.0

        data_loader = trainer.get_train_dataloader()

        for index , batch in enumerate(data_loader):
            inputs = batch['input_ids'] 
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            # Calculate loss
            loss = compute_loss(model, inputs, attention_mask, labels)

            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()
            if index % 500 == 0:
              print(index, ' of ',len(data_loader))

        print(f"Epoch {epoch + 1}: Average Loss: {total_loss / len(trainer.get_train_dataloader())}")
custom_training_loop()

0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 1: Average Loss: 0.8313468009194921
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 2: Average Loss: 0.5378294685207317
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 3: Average Loss: 0.44587951873032733
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 4: Average Loss: 0.41555936075747013
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 5: Average Loss: 0.4011515626469374
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 6: Average Loss: 0.38791713268673206
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 7: Average Loss: 0.38112071677223136
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 8: Average Loss: 0.37045793814818345
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 9: Average Loss: 0.36312796546804105
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 10: Average Loss: 0.3567399597771341
0  of  1546
5

**Saving Model**

In [40]:
epoch = 25
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
PATH = '/kaggle/working/bert_model.pth'
torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
#     'loss': loss,
#     'lora_state_dict': model.lora.state_dict(),  # Assuming the LoRA adapter is accessible via `model.lora`
}, PATH)


**Inference**

In [78]:
import torch
sentence = val_ds['selected_text'][10]
tokenized_sentence = tokenizer(sentence,return_tensors="pt").to('cuda')
print('sentence is: ',sentence)

with torch.no_grad():
    outputs = model(**tokenized_sentence)

# The model outputs a tuple, the first element is the logits
logits = outputs.logits
print(logits)
# Apply softmax to get probabilities
probabilities = torch.nn.functional.softmax(logits, dim=-1)

print(probabilities)
emotion = ['positive','negative','neutral']

max_index = torch.argmax(probabilities[0])
print(max_index.item())
print('actual_label: ',val_ds['sentiment'][10])

sentence is:  sorry I missed you at the farm today,
tensor([[-2.1660,  1.2646, -0.5708]], device='cuda:0')
tensor([[0.0272, 0.8390, 0.1339]], device='cuda:0')
1
actual_label:  1


**Testing Dataset Accuracy**

In [15]:
prediction = []
for i in range(len(val_ds)):
    
    sentence = val_ds['selected_text'][i]
    tokenized_sentence = tokenizer(sentence,return_tensors="pt").to('cuda')
    
    with torch.no_grad():
        outputs = model(**tokenized_sentence)
        
    logits = outputs.logits
    # Apply softmax to get probabilities
    probabilities = torch.nn.functional.softmax(logits, dim=-1)


    max_index = torch.argmax(probabilities[0])
    prediction.append(max_index)

2024-07-12 10:29:15.618774: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-12 10:29:15.618883: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-12 10:29:15.873350: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [34]:
correct_outcomes = 0

for i in range(len(prediction)):
    if prediction[i] == val_ds['sentiment'][i]:
        correct_outcomes += 1
        
print('Accuracy: ',correct_outcomes/len(prediction))

Accuracy:  0.9057496360989811


**Loading my Finetuned Model**

In [8]:
from accelerate import Accelerator

# Initialize the accelerator
accelerator = Accelerator(mixed_precision="fp16")

In [9]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained(
    'bert-base-cased',
    num_labels=3,
)

model = accelerator.prepare(model)

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
from peft import LoraConfig, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, r=1, lora_alpha=1, lora_dropout=0.1
)

In [11]:
from peft import get_peft_model
model = get_peft_model(model, lora_config)

In [20]:
import torch
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)

In [13]:
model.to('cuda')

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(28996, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.1, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default

In [21]:
import torch
checkpoint = torch.load('/kaggle/working/bert_model.pth')

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [32]:
tokenized_train_ds['labels'][:10]

[2, 1, 1, 1, 1, 2, 0, 2, 2, 0]

**Continue Training**

In [22]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch",
                                 num_train_epochs=5,)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
)

2024-07-12 10:45:55.310479: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-12 10:45:55.310598: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-12 10:45:55.502035: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [23]:
import torch
from torch import nn

# class_weights = torch.tensor([0.1,0.2,0.1,0.2,0.1,0.4]).to('cuda')

def compute_loss(model, inputs, attention_mask, labels):
    
    inputs = inputs.view(inputs.size(0),inputs.size(-1)).to('cuda')
#     labels = labels.view(labels.size(0),labels.size(-1)).to('cuda')
    attention_mask = attention_mask.view(attention_mask.size(0),attention_mask.size(-1)).to('cuda')
    
    outputs = model(inputs,attention_mask)
    criterion = nn.CrossEntropyLoss()
    loss = criterion(outputs.logits, labels)
    return loss

In [None]:
##custom training loop
import torch

def custom_training_loop():
    optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5) #decayed 5 times
    model.train()

    for epoch in range(training_args.num_train_epochs):
        total_loss = 0.0

        data_loader = trainer.get_train_dataloader()

        for index , batch in enumerate(data_loader):
            inputs = batch['input_ids'] 
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            # Calculate loss
            loss = compute_loss(model, inputs, attention_mask, labels)

            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()
            if index % 500 == 0:
              print(index, ' of ',len(data_loader))

        print(f"Epoch {epoch + 1}: Average Loss: {total_loss / len(trainer.get_train_dataloader())}")
custom_training_loop()

0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 1: Average Loss: 0.3014703040332913
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 2: Average Loss: 0.3000309510580607
0  of  1546
500  of  1546
1000  of  1546
1500  of  1546
Epoch 3: Average Loss: 0.29783447505845984
0  of  1546
500  of  1546
1000  of  1546
