In [1]:
import torch
from transformers import AutoModelForSequenceClassification
import copy

In [2]:
#Loading local client models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

twitter_model = AutoModelForSequenceClassification.from_pretrained(
    "models/twitter_local_finbert"
).to(device)

news_model = AutoModelForSequenceClassification.from_pretrained(
    "models/news_local_finbert"
).to(device)

reports_model = AutoModelForSequenceClassification.from_pretrained(
    "models/reports_local_finbert"
).to(device)

In [3]:
#Fed Average Function
def fedavg(models):
    global_model = copy.deepcopy(models[0])
    global_dict = global_model.state_dict()

    for key in global_dict.keys():
        global_dict[key] = torch.mean(
            torch.stack([model.state_dict()[key].float() for model in models]),
            dim=0
        )

    global_model.load_state_dict(global_dict)
    return global_model

In [4]:
#Round 1 of Federated Learning (5 to 20 for Research)
client_models = [twitter_model, news_model, reports_model]

global_model = fedavg(client_models)
global_model.to(device)

print("FedAvg aggregation completed")

FedAvg aggregation completed


In [5]:
#Saving Global Model
import os

os.makedirs("models/global_fedavg_finbert", exist_ok=True)
global_model.save_pretrained("models/global_fedavg_finbert")

print("Global FedAvg model saved")

Global FedAvg model saved


In [6]:
#Loading the Global Model
from transformers import AutoModelForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

global_model = AutoModelForSequenceClassification.from_pretrained(
    "models/global_fedavg_finbert"
).to(device)

global_model.eval()

print("Global FedAvg model loaded")

Global FedAvg model loaded


In [7]:
#Loading the FinBert base tokenizer for fair comparison
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

In [8]:
#Evaluation Function to evaluate on Each Clients Data
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )

            preds = torch.argmax(outputs.logits, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average="weighted"
    )
    cm = confusion_matrix(all_labels, all_preds)

    return accuracy, precision, recall, f1, cm

In [9]:
#Loading Validation Split of each client
import pandas as pd

X_tw_val = pd.read_csv("data/splits/twitter_val_text.csv").squeeze()
y_tw_val = pd.read_csv("data/splits/twitter_val_labels.csv").squeeze()

X_news_val = pd.read_csv("data/splits/news_val_text.csv").squeeze()
y_news_val = pd.read_csv("data/splits/news_val_labels.csv").squeeze()

X_reports_val = pd.read_csv("data/splits/reports_val_text.csv").squeeze()
y_reports_val = pd.read_csv("data/splits/reports_val_labels.csv").squeeze()

print("All validation splits loaded")

All validation splits loaded


In [11]:
#Re-Creating Dataset and DataLoaders
from torch.utils.data import Dataset
import torch

#DataSet Class
class FinSentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts.tolist()
        self.labels = labels.tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

#Dataset Loaders
from torch.utils.data import DataLoader
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

tw_val_loader = DataLoader(
    FinSentimentDataset(X_tw_val, y_tw_val, tokenizer),
    batch_size=16,
    shuffle=False
)

news_val_loader = DataLoader(
    FinSentimentDataset(X_news_val, y_news_val, tokenizer),
    batch_size=16,
    shuffle=False
)

reports_val_loader = DataLoader(
    FinSentimentDataset(X_reports_val, y_reports_val, tokenizer),
    batch_size=16,
    shuffle=False
)

print("Validation DataLoaders created")

Validation DataLoaders created


In [12]:
#Global Evaluation Twitter Client
acc_t, prec_t, rec_t, f1_t, cm_t = evaluate_model(
    global_model, tw_val_loader, device
)

print("Global FedAvg — Twitter")
print(f"Accuracy: {acc_t:.4f}")
print(f"F1-score: {f1_t:.4f}")
print("Confusion Matrix:\n", cm_t)

Global FedAvg — Twitter
Accuracy: 0.6835
F1-score: 0.6704
Confusion Matrix:
 [[ 84  56 176]
 [  0   0   0]
 [ 17  26 510]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
#Global Evaluation News Client
acc_n, prec_n, rec_n, f1_n, cm_n = evaluate_model(
    global_model, news_val_loader, device
)

print("Global FedAvg — News")
print(f"Accuracy: {acc_n:.4f}")
print(f"F1-score: {f1_n:.4f}")
print("Confusion Matrix:\n", cm_n)

Global FedAvg — News
Accuracy: 0.5612
F1-score: 0.5431
Confusion Matrix:
 [[ 25  63   2]
 [  4 182 246]
 [  3   1 201]]


In [14]:
#Global Evaluation Reports Client
acc_r, prec_r, rec_r, f1_r, cm_r = evaluate_model(
    global_model, reports_val_loader, device
)

print("Global FedAvg — Reports")
print(f"Accuracy: {acc_r:.4f}")
print(f"F1-score: {f1_r:.4f}")
print("Confusion Matrix:\n", cm_r)

Global FedAvg — Reports
Accuracy: 0.6575
F1-score: 0.6437
Confusion Matrix:
 [[ 44  67  18]
 [ 36 263 170]
 [  6   3 269]]


In [15]:
#Results Comparison Table
import pandas as pd

results = pd.DataFrame({
    "Client": ["Twitter", "News", "Reports"],
    "Local F1": [0.8130, 0.8267, 0.7769],
    "FedAvg F1": [f1_t, f1_n, f1_r]
})

print(results)

    Client  Local F1  FedAvg F1
0  Twitter    0.8130   0.670384
1     News    0.8267   0.543086
2  Reports    0.7769   0.643707
