**Transfer Learning with BERT - Sentiment Analysis**

In [2]:
import os
import pandas as pd
import requests
import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from bs4 import BeautifulSoup
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import numpy as np



In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**Import data**

In [4]:
data = pd.read_csv('.\processed_data\processed_data.csv',index_col='Unnamed: 0')
labels = pd.read_csv('.\processed_data\processed_labels.csv',index_col='Unnamed: 0')

data = data.rename(columns={"0": 'reviews'})
labels = labels.rename(columns={"0": 'sentiment'})

In [4]:
labels =np.array([1 if x =='positive' else 0 for x in labels['sentiment'].values])

**Define custom dataset and data loader**

In [5]:
pre_trained_model = 'bert-base-cased'
tokenizer = BertTokenizer.from_pretrained(pre_trained_model)
class BertTorchDataset(Dataset):
    def __init__(self,data,labels,tokenizer,max_len):
        self.data = data
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        review = str(self.data[idx])
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(review,
                                              add_special_tokens=True,
                                              truncation=True,
                                              max_length = self.max_len,
                                              return_token_type_ids=False,
                                              pad_to_max_length=True,
                                              return_attention_mask=True,
                                              return_tensors='pt'
                                              
                                             )
        
        return{
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [6]:
def prepare_data_loader(data,labels, tokenizer, max_len=250, batch_size=30):
    dataset = BertTorchDataset(
        data=np.squeeze(np.array(data)),
        labels=np.array(labels),
        tokenizer=tokenizer,
        max_len=max_len
    )
    
    return DataLoader(dataset,batch_size=batch_size)

X_train, X_test, y_train, y_test = train_test_split(data,labels,test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test,y_test,test_size=0.4)
train_loader = prepare_data_loader(X_train,y_train, tokenizer, max_len=250, batch_size=30)
test_loader = prepare_data_loader(X_test, y_test, tokenizer,max_len=250, batch_size=30)
val_loader = prepare_data_loader(X_val, y_val, tokenizer,max_len=250, batch_size=30)

**Define BERT model**

In [7]:
class BertSentimentClassifier(nn.Module):
    def __init__(self, n_classes):
        super(BertSentimentClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(pre_trained_model)
        self.drop = nn.Dropout(p=0.1)
        self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
    def forward(self, input_ids, attention_mask):
        _, pooled_output = self.bert(
          input_ids=input_ids,
          attention_mask=attention_mask
        )
        output = self.drop(pooled_output)
        return self.out(output)

In [8]:
model = BertSentimentClassifier(n_classes=2)
model = model.to(device)
n_epochs = 5
total_steps = len(train_loader) * n_epochs
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)
criterion= nn.CrossEntropyLoss().to(device)

**Define train, evaluation, and test functions**

In [10]:
def train_model(model,optimizer,criterion,scheduler,train_loader):
    model = model.train()
    epoch_loss = 0
    epoch_acc = 0
    for i, data in enumerate(train_loader, 0):
        input_ids = data['input_ids']
        attention_mask = data['attention_mask']
        targets = data['label']
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        outputs = model(input_ids=input_ids,attention_mask=attention_mask).to(device)
        loss = criterion(outputs,targets).to(device)
        _, pred = torch.max(outputs, dim=1)
        acc = torch.sum(pred == targets)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss/len(train_loader), epoch_acc/len(X_train)

In [13]:
def evaluation_model(model,optimizer,criterion,val_loader):
    model.eval()
    with torch.no_grad():
        epoch_loss = 0
        epoch_acc = 0
        for i, data in enumerate(val_loader, 0):
            input_ids = data['input_ids']
            attention_mask = data['attention_mask']
            targets = data['label']
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            targets = targets.to(device)

            outputs = model(input_ids=input_ids,attention_mask=attention_mask).to(device)
            _, pred = torch.max(outputs, dim=1)
            acc = torch.sum(pred == targets)

            loss = criterion(outputs,targets).to(device)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return  epoch_loss/len(val_loader), epoch_acc/len(X_val)

In [25]:
def test_model(model,optimizer,criterion,test_loader):
    the_model.eval()
    with torch.no_grad():
        epoch_loss = 0
        epoch_acc = 0
        for i, data in enumerate(test_loader, 0):
            input_ids = data['input_ids']
            attention_mask = data['attention_mask']
            targets = data['label']
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            targets = targets.to(device)

            outputs = model(input_ids=input_ids,attention_mask=attention_mask).to(device)
            _, pred = torch.max(outputs, dim=1)
            acc = torch.sum(pred == targets)

            loss = criterion(outputs,targets).to(device)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return  epoch_loss/len(test_loader), epoch_acc/len(X_test)

**Train model**

In [14]:
n_epochs = 2

for epoch in range(n_epochs):
    
    train_loss, train_acc = train_model(model,optimizer,criterion,scheduler,train_loader)
    val_loss, val_acc = evaluation_model(model,optimizer,criterion,val_loader)
    
    print(f'Epoch: {epoch+1} Train Loss: {train_loss:.3f} Train Acc: {train_acc*100:.2f}% Val Loss: {val_loss:.3f} Val Acc: {val_acc*100:.2f}%')

Epoch: 1 Train Loss: 0.132 Train Acc: 95.56% Val Loss: 0.248 Val Acc: 91.92%
Epoch: 2 Train Loss: 0.073 Train Acc: 98.07% Val Loss: 0.330 Val Acc: 91.97%


**Save trained parameters**

In [19]:
PATH = "trained_BERT.pt"
torch.save(model.state_dict(),PATH)

**Upload trained parameters and to test the model performance on test data**

In [21]:
the_model = BertSentimentClassifier(n_classes=2)
the_model.load_state_dict(torch.load("trained_BERT.pt"))

<All keys matched successfully>

In [26]:
test_loss, test_acc = test_model(model,optimizer,criterion,test_loader)
print(f'Test Loss: {test_loss:.3f} Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.329 Test Acc: 91.90%
