# **Bert Fine-Tuning For Sentiment Analysis**

**Import Libraries**

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel
from transformers.models.bert.modeling_bert import BertForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader
from transformers import AdamW


**Switch To GPU**

In [2]:
device = torch.device("cuda")

**Load Dataset**

In [9]:
df = pd.read_csv('movie.csv', encoding='utf-8')
df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


**Split Dataset**

In [10]:
x_train, x_val, y_train, y_val = train_test_split(df.text, df.label, random_state = 42, test_size = 0.2, stratify = df.label)

**Import and Load Bert Model**

In [11]:
# import BERT-base pretrained model
BERT = BertModel.from_pretrained('bert-base-uncased')
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

**Convert Text Into Tokens**

In [12]:
train_tokens = tokenizer.batch_encode_plus(x_train.tolist(), max_length = 250, pad_to_max_length=True, truncation=True)
val_tokens = tokenizer.batch_encode_plus(x_val.tolist(), max_length = 250, pad_to_max_length=True, truncation=True)



**Convert Tokenizer Return Into Tensors**

In [13]:
train_ids = torch.tensor(train_tokens['input_ids'])
train_masks = torch.tensor(train_tokens['attention_mask'])
train_label = torch.tensor(y_train.tolist())
val_ids = torch.tensor(val_tokens['input_ids'])
val_masks = torch.tensor(val_tokens['attention_mask'])
val_label = torch.tensor(y_val.tolist())

**Load TensorDataset and DataLoaders For Further Data Preprocessing**

In [14]:
train_data = TensorDataset(train_ids, train_masks, train_label)
val_data = TensorDataset(val_ids, val_masks, val_label)
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)
val_loader = DataLoader(val_data, batch_size = 32, shuffle = True)

**Define Forward and Backward Pass For Added Layers**

In [15]:
class Model(nn.Module):
  def __init__(self, bert):
    super(Model, self).__init__()
    self.bert = bert
    self.dropout = nn.Dropout(0.1)
    self.relu = nn.ReLU()
    self.fc1 = nn.Linear(768, 512)
    self.fc2 = nn.Linear(512, 2)
    self.softmax = nn.LogSoftmax(dim=1)
  def forward(self, sent_id, mask):
    # Pass the inputs to the model
    outputs = self.bert(sent_id, mask)
    cls_hs = outputs.last_hidden_state[:, 0, :]
    x = self.fc1(cls_hs)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.fc2(x)
    x = self.softmax(x)
    return x

**Move Model To GPU**

In [16]:
model = Model(BERT)
# push the model to GPU
model = model.to(device)

**Define Optimizer**

In [17]:
# optimizer from hugging face transformers
from transformers import AdamW
# define the optimizer
optimizer = AdamW(model.parameters(),lr = 1e-5)



**Define Training Function**

In [18]:
def train():
  model.train()
  total_loss, total_accuracy = 0, 0
  total_preds = []
  for step, batch in enumerate(train_loader):
    # Move batch to GPU if available
    batch = [item.to(device) for item in batch]
    sent_id, mask, labels = batch
    # Clear previously calculated gradients
    optimizer.zero_grad()
    # Get model predictions for the current batch
    preds = model(sent_id, mask)
    # Calculate the loss between predictions and labels
    loss_function = nn.CrossEntropyLoss()
    loss = loss_function(preds, labels)
    # Add to the total loss
    total_loss += loss.item()
    # Backward pass and gradient update
    loss.backward()
    optimizer.step()
    # Move predictions to CPU and convert to numpy array
    preds = preds.detach().cpu().numpy()
    # Append the model predictions
    total_preds.append(preds)
  # Compute the average loss
  avg_loss = total_loss / len(train_loader)
  # Concatenate the predictions
  total_preds = np.concatenate(total_preds, axis=0)
  # Return the average loss and predictions
  return avg_loss, total_preds

**Define Evaluation Function**

In [19]:
def evaluate():
  model.eval()
  total_loss, total_accuracy = 0, 0
  total_preds = []
  for step, batch in enumerate(val_loader):
    # Move batch to GPU if available
    batch = [item.to(device) for item in batch]
    sent_id, mask, labels = batch
    # Clear previously calculated gradients
    optimizer.zero_grad()
    # Get model predictions for the current batch
    preds = model(sent_id, mask)
    # Calculate the loss between predictions and labels
    loss_function = nn.CrossEntropyLoss()
    loss = loss_function(preds, labels)
    # Add to the total loss
    total_loss += loss.item()
    # Backward pass and gradient update
    loss.backward()
    optimizer.step()
    # Move predictions to CPU and convert to numpy array
    preds = preds.detach().cpu().numpy()
    # Append the model predictions
    total_preds.append(preds)
  # Compute the average loss
  avg_loss = total_loss / len(val_loader)
  # Concatenate the predictions
  total_preds = np.concatenate(total_preds, axis=0)
  # Return the average loss and predictions
  return avg_loss, total_preds

**Train Model**

In [20]:
# set initial loss to infinite
best_valid_loss = float('inf')
#defining epochs
epochs = 5
# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]
#for each epoch
for epoch in range(epochs):
  print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
  #train model
  train_loss, _ = train()
  #evaluate model
  valid_loss, _ = evaluate()
  #save the best model
  if valid_loss < best_valid_loss:
    best_valid_loss = valid_loss
    torch.save(model.state_dict(), 'saved_weights.pt')
    # append training and validation loss
  train_losses.append(train_loss)
  valid_losses.append(valid_loss)
  print(f'\nTraining Loss: {train_loss:.3f}')
  print(f'Validation Loss: {valid_loss:.3f}')


 Epoch 1 / 5

Training Loss: 0.264
Validation Loss: 0.214

 Epoch 2 / 5

Training Loss: 0.156
Validation Loss: 0.110

 Epoch 3 / 5

Training Loss: 0.100
Validation Loss: 0.035

 Epoch 4 / 5

Training Loss: 0.062
Validation Loss: 0.020

 Epoch 5 / 5

Training Loss: 0.040
Validation Loss: 0.012
