In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report

In [28]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }
  
class TextClassifier(torch.nn.Module):
    def __init__(self, model_name, num_labels):
        super(TextClassifier, self).__init__()
        self.bert = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
    
    def forward(self, input_ids, attention_mask, labels=None):
        return self.bert(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
  
class Utils:
    @staticmethod    
    def load_data(file_path):
        df = pd.read_csv(file_path)
        df = df.dropna()
        return df['text'].tolist(), df['y'].tolist()
    
    @staticmethod
    def create_data_loader(texts, labels, tokenizer, max_len, batch_size):
        dataset = TextDataset(texts, labels, tokenizer, max_len)
        return DataLoader(dataset, batch_size=batch_size, shuffle=True)

    @staticmethod
    def get_base_model_path(task):
        return '../models/baseline/'+task+'-base-model.pth'
    
    @staticmethod
    def get_mini_test_data(task):
        return "..\\data\\orig\\processed\\test\\"+task+"-data.csv"
    
    @staticmethod
    def eval_model(model, data_loader, device):
        model = model.eval()
        predictions, true_labels = [], []

        with torch.no_grad():
            for data in data_loader:
                input_ids = data['input_ids'].to(device)
                attention_mask = data['attention_mask'].to(device)
                labels = data['labels'].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                _, preds = torch.max(logits, dim=1)
                
                predictions.extend(preds.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())

        return accuracy_score(true_labels, predictions), classification_report(true_labels, predictions)

tasks = {
    'news':{
        'model_base':Utils.get_base_model_path('news'),
        'test_data':Utils.get_mini_test_data('news'),
        'MAX_LEN':32,
    },
    'spam':{
        'model_base':Utils.get_base_model_path('spam'),
        'test_data':Utils.get_mini_test_data('spam'),
        'MAX_LEN':48,
    },
    'sentiment':{
        'model_base':Utils.get_base_model_path('sentiment'),
        'test_data':Utils.get_mini_test_data('sentiment'),
        'MAX_LEN':48,
    }
}

class Config:
    MODEL_NAME = 'bert-base-uncased'

tokenizer = BertTokenizer.from_pretrained(Config.MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
df_out = pd.DataFrame({'type':['base','gpt-2','llama3','flan'],'news':[0,0,0,0],'spam':[0,0,0,0],'sentiment':[0,0,0,0]})
df_out.set_index('type', inplace=True)
predictions = {}

In [29]:
tasks

{'news': {'model_base': '../models/baseline/news-base-model.pth',
  'test_data': '..\\data\\orig\\processed\\test\\news-data.csv',
  'MAX_LEN': 32},
 'spam': {'model_base': '../models/baseline/spam-base-model.pth',
  'test_data': '..\\data\\orig\\processed\\test\\spam-data.csv',
  'MAX_LEN': 48},
 'sentiment': {'model_base': '../models/baseline/sentiment-base-model.pth',
  'test_data': '..\\data\\orig\\processed\\test\\sentiment-data.csv',
  'MAX_LEN': 48}}

In [30]:
data_type = 'model_base'
for task in tasks:
    if task in ['news','spam']:
        continue
    class Config:
        MAX_LEN = tasks[task]['MAX_LEN']
        BATCH_SIZE = 20
        MODEL_NAME = 'bert-base-uncased'
    test_texts, test_labels = Utils.load_data(tasks[task]['test_data'])
    print("Len: ",len(test_texts))
    test_data_loader = Utils.create_data_loader(test_texts, test_labels, tokenizer, Config.MAX_LEN, Config.BATCH_SIZE)
    model = TextClassifier(model_name=Config.MODEL_NAME, num_labels=2)
    model.load_state_dict(torch.load(tasks[task][data_type]))
    predictions[task] = Utils.eval_model(model, test_data_loader, device)
    print(predictions[task][0])
    df_out.at['base',task] = predictions[task][0]
    
df_out
    

Len:  500


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load(tasks[task][data_type]))


0.718


  df_out.at['base',task] = predictions[task][0]


Unnamed: 0_level_0,news,spam,sentiment
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
base,0.521739,0.956912,0.718
gpt-2,0.0,0.0,0.0
llm2,0.0,0.0,0.0
llm3,0.0,0.0,0.0


In [25]:
df_out

Unnamed: 0_level_0,news,spam,sentiment
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
base,0.521739,0.956912,0
gpt-2,0.0,0.0,0
llm2,0.0,0.0,0
llm3,0.0,0.0,0
