In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
from transformers import BertTokenizer
import torch
import torch.nn as nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def predict(model, tokenizer, text, max_length=256):

    # Prepare the text for the model
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding='max_length', max_length=max_length)

    # Move the inputs to the device
    for key in inputs:
        inputs[key] = inputs[key].to(device)

    # Make the prediction
    with torch.no_grad():
        logits = model(inputs['input_ids'])
        final_logits, _ = torch.mode(logits, dim=0)  # Voting mechanism
        preds = torch.argmax(final_logits, dim=1)

    # Return the prediction
    return preds.item()

class CustomBERTModel(nn.Module):
    def __init__(self, num_labels):
        super(CustomBERTModel, self).__init__()

        # Load and move to the correct device directly here
        self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
        self.bert_model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=num_labels).to(device)
        
        self.xlm_roberta_tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
        self.xlm_roberta_model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=num_labels).to(device)
        
        self.distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-multilingual-cased')
        self.distilbert_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-multilingual-cased', num_labels=num_labels).to(device)

    def forward(self, input_ids):
        # Return stacked logits
        bert_logits = self.bert_model(input_ids).logits
        xlm_roberta_logits = self.xlm_roberta_model(input_ids).logits
        distilbert_logits = self.distilbert_model(input_ids).logits

        all_logits = torch.stack((bert_logits, xlm_roberta_logits, distilbert_logits), dim=0)
        return all_logits

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')
# Load Best Entire Model
loaded_entire_model = torch.load('entire_custom_bert_model.pth')
loaded_entire_model.eval()
model = loaded_entire_model

In [2]:
files = ['test_df','dev_en_news', 'dev_en_reviews', 'dev_en_twitter', 'dev_nl_news', 'dev_nl_reviews', 'dev_nl_twitter']
results = {}

for file in files:
    # Read CSV
    df = pd.read_csv(f"{file}.csv")
    texts = df['text'].tolist()  # replace 'text_column_name' with your actual column name
    true_labels = df['label'].tolist()  # replace 'label_column_name' with your actual column name

    # Predict for all rows
    predicted_labels = [predict(model, tokenizer, text) for text in texts]

    # Calculate metrics
    accuracy = accuracy_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels, average='macro')  # adjust this based on your data (binary vs multiclass)

    results[file] = {'Accuracy': accuracy, 'F1 Score': f1}

# Print the results
for file, metrics in results.items():
    print(f"File: {file}")
    print(f"Accuracy: {metrics['Accuracy']:.4f}")
    print(f"F1 Score: {metrics['F1 Score']:.4f}")
    print("----------")

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


File: test_df
Accuracy: 0.7583
F1 Score: 0.7555
----------
File: dev_en_news
Accuracy: 0.9750
F1 Score: 0.9750
----------
File: dev_en_reviews
Accuracy: 0.8150
F1 Score: 0.8121
----------
File: dev_en_twitter
Accuracy: 0.9600
F1 Score: 0.9600
----------
File: dev_nl_news
Accuracy: 0.9700
F1 Score: 0.9700
----------
File: dev_nl_reviews
Accuracy: 0.8400
F1 Score: 0.8400
----------
File: dev_nl_twitter
Accuracy: 0.9250
F1 Score: 0.9247
----------
