## Inference

In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
data = pd.read_csv("Multi-Label Text Classification Dataset.csv")
df_data = proc_data = data.drop(["meshMajor","pmid","meshid","meshroot"], axis=1)

In [4]:
df_data = df_data.sample(n = 50000, random_state = 4)
# Dropping null row
df_data = df_data.dropna()

In [5]:
# Combine title and abstract to increase power
df_data["mixed"] = df_data["Title"] + ". " + df_data["abstractText"]
df_data.drop(columns=["abstractText", "Title"], axis=1, inplace=True)

In [6]:
df_data.head(3)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,L,M,N,Z,mixed
16477,1,1,1,0,1,0,0,0,0,0,0,1,1,0,Ocular findings in subacute sclerosing panence...
5969,1,1,0,1,0,0,1,0,0,0,1,0,0,0,Notch signaling suppresses IgH gene expression...
46459,0,1,1,1,1,0,1,0,0,0,0,0,1,0,Immunological and physiological effects of chr...


In [7]:
import re
from nltk.corpus import stopwords

import nltk
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def clean_text(text):
    # Remove punctuation and symbols
    text = re.sub(r'[^\w\s]', '', text)
    # Remove numbers
    text = re.sub(r'\d+', '', text)
    # Convert text to lowercase
    text = text.lower()
    # Remove stop words
    text = ' '.join([word for word in text.split() if word not in stop_words])
    # Remove single characters
    text = ' '.join([word for word in text.split() if len(word) > 1])    
    return text


df_data["combined"] = df_data["mixed"].apply(clean_text)
df_data.drop(columns=["mixed"], axis=1, inplace=True)

df_data.head(2)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\RISHU\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,A,B,C,D,E,F,G,H,I,J,L,M,N,Z,combined
16477,1,1,1,0,1,0,0,0,0,0,0,1,1,0,ocular findings subacute sclerosing panencepha...
5969,1,1,0,1,0,0,1,0,0,0,1,0,0,0,notch signaling suppresses igh gene expression...


In [8]:
# split into train and test
df_train, df_test = train_test_split(df_data, random_state=77, test_size=0.30, shuffle=True)
# split test into test and validation datasets
df_test, df_valid = train_test_split(df_test, random_state=88, test_size=0.10, shuffle=True)

print(f"Train: {df_train.shape}, Test: {df_test.shape}, Valid: {df_valid.shape}")

Train: (34998, 15), Test: (13500, 15), Valid: (1500, 15)


In [9]:
from transformers import DistilBertTokenizer, DistilBertModel

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [10]:

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_len, target_list):
        self.tokenizer = tokenizer
        self.df = df
        self.title = list(df['combined'])
        self.targets = self.df[target_list].values
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split())
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
          'input_ids': inputs['input_ids'].flatten(),
          'attention_mask': inputs['attention_mask'].flatten(),
          'targets': torch.FloatTensor(self.targets[index]),
          'title': title
        }

In [11]:
target_list = list(df_data.columns)
target_list = target_list[:-1]

In [12]:
# Hyperparameters
MAX_LEN = 128
TEST_BATCH_SIZE = 16

test_dataset = CustomDataset(df_test, tokenizer, MAX_LEN, target_list)

test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

In [13]:
class DistilBERTClass(torch.nn.Module):
    def __init__(self):
        super(DistilBERTClass, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.2)
        self.classifier = torch.nn.Linear(768, 14)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

model = DistilBERTClass()
model.to(device)

DistilBERTClass(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(in

In [14]:
model = DistilBERTClass()
data_dir = "D:\\Sem 2\\NLP\\Assignment_3\\task"
model.load_state_dict(torch.load(os.path.join(data_dir,"output","trained_model.bin")))
model = model.to(device)

In [15]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

optimizer = torch.optim.Adam(model.parameters(), lr = 1e-5)

In [16]:
def eval_model(validation_loader, model, optimizer):
    losses = []
    correct_predictions = 0
    num_samples = 0
    # set model to eval mode (turn off dropout, fix batch norm)
    model.eval()

    with torch.no_grad():
        for batch_idx, data in enumerate(validation_loader, 0):
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask)

            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

            # validation accuracy
            # add sigmoid, for the training sigmoid is in BCEWithLogitsLoss
            outputs = torch.sigmoid(outputs).cpu().detach().numpy().round()
            targets = targets.cpu().detach().numpy()
            correct_predictions += np.sum(outputs==targets)
            num_samples += targets.size   # total number of elements in the 2D array

    return float(correct_predictions)/num_samples, np.mean(losses)

In [17]:
# Evaluate the model using the test data
test_acc, test_loss = eval_model(test_data_loader, model, optimizer)
test_acc

0.8844338624338625

In [18]:
from sklearn.metrics import confusion_matrix, classification_report

def get_predictions(model, data_loader):
    """
    Outputs:
      predictions -
    """
    model = model.eval()

    titles = []
    predictions = []
    prediction_probs = []
    target_values = []

    with torch.no_grad():
      for data in data_loader:
        title = data["title"]
        ids = data["input_ids"].to(device, dtype = torch.long)
        mask = data["attention_mask"].to(device, dtype = torch.long)
        targets = data["targets"].to(device, dtype = torch.float)

        # outputs = model(ids, mask, token_type_ids)
        outputs = model(ids, mask)
        # add sigmoid, for the training sigmoid is in BCEWithLogitsLoss
        outputs = torch.sigmoid(outputs).detach().cpu()
        # thresholding at 0.5
        preds = outputs.round()
        targets = targets.detach().cpu()

        titles.extend(title)
        predictions.extend(preds)
        prediction_probs.extend(outputs)
        target_values.extend(targets)

    predictions = torch.stack(predictions)
    prediction_probs = torch.stack(prediction_probs)
    target_values = torch.stack(target_values)

    return titles, predictions, prediction_probs, target_values

In [19]:
titles, predictions, prediction_probs, target_values = get_predictions(model, test_data_loader)

print(f"titles:{len(titles)} \npredictions:{predictions.shape} \nprediction_probs:{prediction_probs.shape} \ntarget_values:{target_values.shape}")


titles:13500 
predictions:torch.Size([13500, 14]) 
prediction_probs:torch.Size([13500, 14]) 
target_values:torch.Size([13500, 14])


In [20]:
print(classification_report(target_values, predictions, target_names=target_list))

              precision    recall  f1-score   support

           A       0.81      0.81      0.81      6241
           B       0.97      0.98      0.98     12559
           C       0.91      0.86      0.89      7152
           D       0.92      0.93      0.92      8340
           E       0.83      0.93      0.88     10599
           F       0.76      0.78      0.77      2359
           G       0.84      0.89      0.86      9066
           H       0.63      0.15      0.24      1640
           I       0.73      0.57      0.64      1482
           J       0.73      0.48      0.58      1435
           L       0.69      0.50      0.58      2027
           M       0.88      0.90      0.89      5871
           N       0.85      0.74      0.79      6261
           Z       0.84      0.69      0.76      2213

   micro avg       0.87      0.85      0.86     77245
   macro avg       0.81      0.73      0.76     77245
weighted avg       0.86      0.85      0.85     77245
 samples avg       0.87   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#### Another way to inference result

In [21]:
from sklearn.metrics import precision_recall_fscore_support

# Calculate precision, recall, and F1-score for each class
precision, recall, f1, _ = precision_recall_fscore_support(target_values, predictions, average=None)

# Print precision, recall, and F1-score for each class
print("\nClass-wise Metrics:")
for i, label in enumerate(['A','B','C','D','E','F','G','H','I','J','L','M','N','Z']):
    print(f"Class: {label}")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1-Score: {f1[i]}")
    print("")

# Calculate micro and macro average precision, recall, and F1-score
micro_precision, micro_recall, micro_f1, _ = precision_recall_fscore_support(target_values.ravel(), predictions.ravel(), average='micro')
macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(target_values.ravel(), predictions.ravel(), average='macro')


# Print aggregate metrics
print("\nAggregate Metrics:")
print(f"Micro Average Precision: {micro_precision}")
print(f"Micro Average Recall: {micro_recall}")
print(f"Micro Average F1-Score: {micro_f1}")
print("")
print(f"Macro Average Precision: {macro_precision}")
print(f"Macro Average Recall: {macro_recall}")
print(f"Macro Average F1-Score: {macro_f1}")


Class-wise Metrics:
Class: A
Precision: 0.8133633151300996
Recall: 0.8114084281365166
F1-Score: 0.8123846955963744

Class: B
Precision: 0.9701997169366252
Recall: 0.9824826817421769
F1-Score: 0.9763025675515291

Class: C
Precision: 0.9088771310993533
Recall: 0.8646532438478747
F1-Score: 0.886213814846661

Class: D
Precision: 0.9174831779010743
Recall: 0.93189448441247
F1-Score: 0.9246326809826899

Class: E
Precision: 0.8344449134655973
Recall: 0.9325408057363902
F1-Score: 0.8807699162359651

Class: F
Precision: 0.7625928984310487
Recall: 0.7829588808817295
F1-Score: 0.7726417067559088

Class: G
Precision: 0.836544437538844
Recall: 0.8908007941760424
F1-Score: 0.8628205128205129

Class: H
Precision: 0.6329787234042553
Recall: 0.14512195121951219
F1-Score: 0.2361111111111111

Class: I
Precision: 0.7251512532411409
Recall: 0.5661268556005398
F1-Score: 0.6358469117089807

Class: J
Precision: 0.7322751322751323
Recall: 0.48222996515679445
F1-Score: 0.5815126050420169

Class: L
Precision: 0