**Note**:
* Here I have divided the data into train and test for calculating the metrics for test data. If you are providing whole data as test data, then please un-comment the line written below train-test split code. You will get the desired results.
* I have saved model for 'BCEWithLogitsLoss as "model1" and one based on 'FocalLoss' as "model2". While running this code, loss function name should be changed for correct results (make changes in this code - "loss_type = 'BCEWithLogitsLoss'  # or 'FocalLoss', depending on the model")

In [12]:
import torch
from transformers import BertTokenizer
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split


In [16]:
import torch
import torch.nn as nn
from transformers import BertModel

In [13]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]['Title'] + ' ' + self.texts.iloc[idx]['abstractText']
        labels = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(labels)
        }

In [24]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = nn.functional.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        else:
            BCE_loss = nn.functional.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [17]:
class MultiLabelClassifier(nn.Module):
    def __init__(self, n_classes, loss_type='BCEWithLogitsLoss', steps_per_epoch=None, n_epochs=3, lr=2e-5):
        super(MultiLabelClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.classifier = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024, n_classes)
        )

        if loss_type == 'BCEWithLogitsLoss':
            self.criterion = nn.BCEWithLogitsLoss()
        elif loss_type == 'FocalLoss':
            self.criterion = FocalLoss(alpha=1, gamma=2, logits=True, reduce=True)
        else:
            raise ValueError("Invalid loss type provided: choose 'BCEWithLogitsLoss' or 'FocalLoss'")

        self.steps_per_epoch = steps_per_epoch
        self.n_epochs = n_epochs
        self.lr = lr

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return self.classifier(output.pooler_output)

In [14]:
df = pd.read_csv(r'/content/Multi-Label Text Classification Dataset.csv', engine='python', on_bad_lines='skip')
df['Title'] = df['Title'].fillna('')
df['abstractText'] = df['abstractText'].fillna('')

# Define the label mappings
label_mappings = {
    "A": "Anatomy",
    "B": "Organisms",
    "C": "Diseases",
    "D": "Chemicals and Drugs",
    "E": "Analytical, Diagnostic and Therapeutic Techniques, and Equipment",
    "F": "Psychiatry and Psychology",
    "G": "Phenomena and Processes",
    "H": "Disciplines and Occupations",
    "I": "Anthropology, Education, Sociology, and Social Phenomena",
    "J": "Technology, Industry, and Agriculture",
    "L": "Information Science",
    "M": "Named Groups",
    "N": "Health Care",
    "Z": "Geographicals"
}

# Extract the labels for each row and convert them into a list of lists
labels = []
for index, row in df.iterrows():
    row_labels = [label for label, present in row[label_mappings.keys()].items() if present == 1]
    labels.append(row_labels)

# Encode labels as one-hot vectors
mlb = MultiLabelBinarizer()
df['one_hot_labels'] = list(mlb.fit_transform(labels))

# Split the dataset into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(df[['Title', 'abstractText']], df['one_hot_labels'], test_size=0.1, random_state=42)

# NOTE : Above step can be removed if data is only provided for testing and un-commenting below code
# x_test = df


In [15]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
test_dataset = TextDataset(x_test, np.array(y_test.tolist()), tokenizer)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
mlb.fit(labels)  # Fit the mlb instance to labels to set up the classes

model_path = '/content/drive/MyDrive/multi_label_text_classification_model1.pth'

# Determine the loss type used for training the saved model
loss_type = 'BCEWithLogitsLoss'  # or 'FocalLoss', depending on the model

model = MultiLabelClassifier(n_classes=len(mlb.classes_), loss_type=loss_type)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda')))
model.eval()

In [None]:
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']
        outputs = model(input_ids, attention_mask)
        preds = torch.sigmoid(outputs).round().int()
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Calculate metrics for each class
for i, class_name in enumerate(mlb.classes_):
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels[:, i], all_preds[:, i], average='binary')
    print(f"Class: {class_name} - Precision: {precision}, Recall: {recall}, F1-Score: {f1}")

# Calculate micro and macro averages for precision, recall, and F1-score
precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(all_labels, all_preds, average='micro')
precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

print(f"Micro Average - Precision: {precision_micro}, Recall: {recall_micro}, F1-Score: {f1_micro}")
print(f"Macro Average - Precision: {precision_macro}, Recall: {recall_macro}, F1-Score: {f1_macro}")

# Calculate overall accuracy
overall_accuracy = accuracy_score(all_labels, all_preds)
print(f"Overall Test Accuracy: {overall_accuracy}")