# finetune

we finetune the bert model and add a classification layer after last_hidden_state.

In [11]:
import transformers
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import os
import numpy as np
import pandas as pd

## data

In [12]:
# load data from tsv files
data_dir = os.path.join(os.getcwd(), 'data')
train_file = os.path.join(data_dir, 'train.tsv')
test_file = os.path.join(data_dir, 'test.tsv')

train_data = pd.read_csv(train_file, sep='\t', keep_default_na=False)
test_data = pd.read_csv(test_file, sep='\t', keep_default_na=False)

In [13]:
train_data.head()

Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment
0,1,1,A series of escapades demonstrating the adage ...,1
1,2,1,A series of escapades demonstrating the adage ...,2
2,3,1,A series,2
3,4,1,A,2
4,5,1,series,2


In [14]:
# max length of each phrase tokens
train_data['Phrase'].apply(lambda x: len(x.split())).max(), \
    test_data['Phrase'].apply(lambda x: len(x.split())).max()

(52, 56)

In [15]:
# create a DataLoader
from torch.utils.data import Dataset, DataLoader

class FinetuneDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=60):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.loc[idx, 'Phrase']
        label = self.data.loc[idx, 'Sentiment']

        inputs = self.tokenizer(text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()

        return input_ids, attention_mask, label

## training

In [26]:
# download bert model
dir = os.path.join(os.getcwd(), "models")
bert_base_model = "bert-base-uncased"
tokenizer = transformers.BertTokenizer.from_pretrained(
    bert_base_model, cache_dir=dir
)
bert_model = transformers.BertModel.from_pretrained(
    bert_base_model, cache_dir=dir
    )

In [27]:
# add a classification layer after the bert model
class BertClassifier(nn.Module):
    def __init__(self, bert_model, num_classes):
        super(BertClassifier, self).__init__()
        self.bert = bert_model
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask)
        last_hidden_state = outputs[0]
        cls_output = last_hidden_state[:, 0, :]
        logits = self.classifier(cls_output)
        return logits

In [28]:
# training method
from tqdm import tqdm
from sklearn.metrics import accuracy_score

def train(model, train_loader, test_loader, optimizer, criterion, device, epoch_num=2,
          test_only=False):
    model.to(device)
    model.train()
    for epoch in range(epoch_num):
        total_loss = 0
        for input_ids, attention_mask, labels in tqdm(train_loader):
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            optimizer.zero_grad()
            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_accuracy = accuracy_score(labels.cpu(), logits.argmax(dim=1).cpu())
        print(f'Epoch {epoch + 1}/{epoch_num}, Loss: {total_loss:.4f}')
        print(f'Training Accuracy: {total_accuracy:.4f}')
        # evaluate on the validation set
        model.eval()
        with torch.no_grad():
            total_accuracy = 0
            for input_ids, attention_mask, labels in tqdm(test_loader):
                input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
                logits = model(input_ids, attention_mask)
                total_accuracy += accuracy_score(labels.cpu(), logits.argmax(dim=1).cpu())
            print(f'Validation Accuracy: {total_accuracy/len(test_loader):.4f}')
    
    

In [29]:
# split train_data into train and test
from sklearn.model_selection import train_test_split

trainset_data, testset_data = train_test_split(train_data, test_size=0.2, random_state=42)
trainset_data = trainset_data.reset_index(drop=True)
testset_data = testset_data.reset_index(drop=True)

# create a DataLoader
trainset_dataset = FinetuneDataset(trainset_data, tokenizer)
testset_dataset = FinetuneDataset(testset_data, tokenizer)

trainset_loader = DataLoader(trainset_dataset, batch_size=32, shuffle=True)
testset_loader = DataLoader(testset_dataset, batch_size=32, shuffle=False)

In [30]:
model = BertClassifier(bert_model, num_classes=5)
optimizer = torch.optim.AdamW([
    {'params': model.bert.parameters(), 'lr': 5e-6},
    {'params': model.classifier.parameters(), 'lr': 5e-4}
])
criterion = nn.CrossEntropyLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epoch_num = 3

train(model, trainset_loader, testset_loader, optimizer, criterion, device, epoch_num)


100%|██████████| 3902/3902 [20:14<00:00,  3.21it/s]


Epoch 1/3, Loss: 3190.6724
Training Accuracy: 0.6875


100%|██████████| 976/976 [01:32<00:00, 10.56it/s]


Validation Accuracy: 0.6849


100%|██████████| 3902/3902 [20:09<00:00,  3.23it/s]


Epoch 2/3, Loss: 2664.1749
Training Accuracy: 0.6875


100%|██████████| 976/976 [01:32<00:00, 10.51it/s]


Validation Accuracy: 0.7036


100%|██████████| 3902/3902 [20:09<00:00,  3.23it/s]


Epoch 3/3, Loss: 2331.5419
Training Accuracy: 0.6250


100%|██████████| 976/976 [01:32<00:00, 10.51it/s]

Validation Accuracy: 0.7062





## save and upload

# api

we call the deepseek api, and set the appropriate prompt to do the sentiment analysis.

we compare the accuracy with 2 kinds of system prompt.

**system prompt:**
1. zero-shot prompt:(we provide no example and ask the problems directly)
> You are a sentiment analysis expert. Your task is to classify movie review phrases into one of five sentiment categories: negative (0), somewhat negative (1), neutral (2), somewhat positive (3), or positive (4). You should carefully consider the intensity and context of the phrases to determine the most appropriate label. The output should be in the format "Label: \label{number}, Sentiment: \sentiment{text}".

2. few-shot prompt:(we provide some examples and ask the problems)
> You are a sentiment analysis expert. Your task is to classify movie review phrases into one of five sentiment categories: negative (0), somewhat negative (1), neutral (2), somewhat positive (3), or positive (4). You should carefully consider the intensity and context of the phrases to determine the most appropriate label.  
> 
> **Examples:**  
> 1. Phrase: "Terrible acting and a boring plot."  
>    Output: Label: 0, Sentiment: negative  
> 
> 2. Phrase: "The pacing was slow, but the visuals were decent."  
>    Output: Label: 1, Sentiment: somewhat negative  
> 
> 3. Phrase: "It was an average movie with some good moments."  
>    Output: Label: 2, Sentiment: neutral  
> 
> 4. Phrase: "The humor was clever, and the characters were engaging."  
>    Output: Label: 3, Sentiment: somewhat positive  
> 
> 5. Phrase: "A masterpiece with brilliant performances and a captivating story."  
>    Output: Label: 4, Sentiment: positive  
> 
> **Guidelines:**  
> - Use the examples above as a reference for intensity and context.  
> - Ensure your output matches the format: "Label: \label{number}, Sentiment: \sentiment{text}".  

**user prompt:**
> Classify the sentiment of this movie review phrase:
> 
> Phrase: "The plot was predictable, but the visuals were stunning."
> 
> Output format: Label: \label{number}, Sentiment: \sentiment{text}