Deep Learning 2023</br>
Assignment 2</br>
Anand Manojkumar Parikh - 20CS10007</br>
Part Of Speech Tagging (using RNN and LSTM)</br>




Installing library pytorch-lightning

In [5]:
%pip install pytorch-lightning -q -U

Importing requirements, mount drive

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from google.colab import drive , files
import pytorch_lightning as pl
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from sklearn.metrics import classification_report
drive.mount('/content/drive/')
# use GPU for all purposes since CPU will take too much time, so in the future, "device" means GPU
device = torch.device('cuda:0')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


1. Preprocessing (Data parsing and formatting)

In [7]:
SEQ_LEN = 25

# The following mapping was created using information from https://multiconer.github.io/
# 0-Padding (<PAD>)
# 1-Location (LOC) : Facility, HumanSettlement, Station, OtherLOC
# 2-Creative Work (CW) : VisualWork, MusicalWork, WrittenWork, ArtWork, Software, OtherCW
# 3-Group (GRP) : MusicalGRP, PublicCorp, PrivateCorp, AerospaceManufacturer, SportsGRP, CarManufacturer, TechCorp, ORG
# 4-Person (PER) : Scientist, Artist, Athlete, Politician, Cleric, SportsManager, OtherPER
# 5-Product (PROD) : Clothing, Vehicle, Food, Drink, OtherPROD
# 6-Medical (MED) : Medication/Vaccine, MedicalProcedure, AnatomicalStructure, Symptom, Disease

# This had to be hardcoded since coarse tags never appear in the dataset
fine_2_coarse = {"<PAD>" : "<PAD>" , 
                "B-Facility" : "Location", "B-HumanSettlement" : "Location", "B-Station" : "Location", "B-OtherLOC" : "Location" , 
                "I-Facility" : "Location", "I-HumanSettlement" : "Location", "I-Station" : "Location", "I-OtherLOC" : "Location" ,
                "B-VisualWork" : "Creative Works" , "B-MusicalWork" : "Creative Works" , "B-WrittenWork" : "Creative Works" , "B-ArtWork" : "Creative Works" , "B-Software" : "Creative Works" , "B-OtherCW" : "Creative Works" , 
                "I-VisualWork" : "Creative Works" , "I-MusicalWork" : "Creative Works" , "I-WrittenWork" : "Creative Works" , "I-ArtWork" : "Creative Works" , "I-Software" : "Creative Works" , "I-OtherCW" : "Creative Works" ,
                "B-MusicalGRP" : "Group" , "B-PublicCorp" : "Group" , "B-PrivateCorp" : "Group" , "B-AerospaceManufacturer" : "Group" , "B-SportsGRP" : "Group" , "B-CarManufacturer" : "Group" , "B-TechCorp" : "Group" , "B-ORG" : "Group" , "B-OtherCorp" : "Group" , 
                "I-MusicalGRP" : "Group" , "I-PublicCorp" : "Group" , "I-PrivateCorp" : "Group" , "I-AerospaceManufacturer" : "Group" , "I-SportsGRP" : "Group" , "I-CarManufacturer" : "Group" , "I-TechCorp" : "Group" , "I-ORG" : "Group" , "I-OtherCorp" : "Group" , 
                "B-Scientist" : "Person" , "B-Artist" : "Person" , "B-Athlete" : "Person" , "B-Politician" : "Person" , "B-Cleric" : "Person" , "B-SportsManager" : "Person" , "B-OtherPER" : "Person" ,
                "I-Scientist" : "Person" , "I-Artist" : "Person" , "I-Athlete" : "Person" , "I-Politician" : "Person" , "I-Cleric" : "Person" , "I-SportsManager" : "Person" , "I-OtherPER" : "Person" ,
                "B-Clothing" : "Product" , "B-Vehicle" : "Product" , "B-Food" : "Product" , "B-Drink" : "Product" , "B-OtherPROD" : "Product" ,
                "I-Clothing" : "Product" , "I-Vehicle" : "Product" , "I-Food" : "Product" , "I-Drink" : "Product" , "I-OtherPROD" : "Product" ,
                "B-Medication/Vaccine" : "Medical" , "B-MedicalProcedure" : "Medical" , "B-AnatomicalStructure" : "Medical" , "B-Symptom" : "Medical" , "B-Disease" : "Medical" , 
                "I-Medication/Vaccine" : "Medical" , "I-MedicalProcedure" : "Medical" , "I-AnatomicalStructure" : "Medical" , "I-Symptom" : "Medical" , "I-Disease" : "Medical" , 
                "O" : "O"}

common_coarse_tag_to_idx = {"<PAD>" : 0 , "Location" : 1 , "Creative Works" : 2 , "Group" : 3 , "Person" : 4 , "Product" : 5 , "Medical" : 6 , "O" : 7}

# function to preprocess a given file (in .conll format)
# it returns the training, validation and testing words and tags along with the mappings from word to index and fine tag to index learnt from the dataset
def preprocess(files):
    tot = 0
    max = 0
    retval = []
    word_to_idx = {"<PAD>" : 0, "<UNK>" : 1}
    fine_tag_to_idx = {"<PAD>" : 0}

    for file_no in range(len(files)):
        file = open(files[file_no] , 'r')
        lines = file.readlines()
        saved = 1
        num_lines = 0
        word_sentence = []
        tag_sentence = []
        word_sentence_list = []
        tag_sentence_list = []

        for line in lines:

            if line[0] == '\n':
                if saved == 0:
                    saved = 1
                    word_sentence_list.insert(num_lines , word_sentence.copy())
                    tag_sentence_list.insert(num_lines , tag_sentence.copy())
                    while len(word_sentence_list[num_lines]) < SEQ_LEN:
                        word_sentence_list[num_lines].append("<PAD>")
                        tag_sentence_list[num_lines].append("<PAD>")
                    if len(word_sentence_list[num_lines]) > SEQ_LEN:
                        word_sentence_list[num_lines] = word_sentence_list[num_lines][:SEQ_LEN]
                        tag_sentence_list[num_lines] = tag_sentence_list[num_lines][:SEQ_LEN]
                    tot += len(word_sentence)
                    if max < len(word_sentence):
                        max = len(word_sentence)
                    word_sentence.clear()
                    tag_sentence.clear()
                    num_lines = num_lines+1
                continue

            if line[0] == '#':
                saved = 0
                continue

            word,tag = line.split(" _ _ ")
            word = word.lower()
            tag = tag[:-1]

            word_sentence.append(word)
            tag_sentence.append(tag)

            if word not in word_to_idx:
                word_to_idx[word] = len(word_to_idx)
            if tag not in fine_tag_to_idx:
                fine_tag_to_idx[tag] = len(fine_tag_to_idx)

        X = torch.tensor([[word_to_idx[word] for word in words] for words in word_sentence_list], dtype=torch.int).type(torch.LongTensor)
        Y = torch.tensor([[fine_tag_to_idx[tag] for tag in tags] for tags in tag_sentence_list], dtype=torch.int).type(torch.LongTensor)
        Z = torch.tensor([[common_coarse_tag_to_idx[fine_2_coarse[tag]] for tag in tags] for tags in tag_sentence_list], dtype=torch.int).type(torch.LongTensor)
        results = [X,Y,Z]
        retval.insert(file_no , results.copy())
    print(f"Max sentence len = { max }")
    print(f"Avg sentence len = { tot / (len(retval[0][0]) + len(retval[1][0]) + len(retval[2][0])) }")
    return retval[0][0] , retval[0][1] , retval[0][2] , retval[1][0] , retval[1][1] , retval[1][2] , retval[2][0] , retval[2][1] , retval[2][2] , word_to_idx , fine_tag_to_idx


In [8]:
# Preprocess english files
en_train_X , en_train_Y , en_train_Z , en_val_X , en_val_Y , en_val_Z , en_test_X , en_test_Y , en_test_Z, en_word_to_idx , en_fine_tag_to_idx = preprocess(['/content/drive/My Drive/Colab Notebooks/multiconer2023/EN-English/en_train.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/EN-English/en_dev.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/EN-English/en_test.conll'])
print(f"Number of English training examples = {len(en_train_X)}")
print(f"Number of English development examples = {len(en_val_X)}")
print(f"Number of English testing examples = {len(en_test_X)}")
print(f"Size of English vocabulary = {len(en_word_to_idx)}")

Max sentence len = 68
Avg sentence len = 15.095542710244406
Number of English training examples = 16778
Number of English development examples = 871
Number of English testing examples = 249980
Size of English vocabulary = 242188


In [9]:
# Preprocess hindi files
hi_train_X , hi_train_Y , hi_train_Z , hi_val_X , hi_val_Y , hi_val_Z , hi_test_X , hi_test_Y , hi_test_Z , hi_word_to_idx , hi_fine_tag_to_idx = preprocess(['/content/drive/My Drive/Colab Notebooks/multiconer2023/HI-Hindi/hi_train.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/HI-Hindi/hi_dev.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/HI-Hindi/hi_test.conll'])
print(f"Number of Hindi training examples = {len(hi_train_X)}")
print(f"Number of Hindi development examples = {len(hi_val_X)}")
print(f"Number of Hindi testing examples = {len(hi_test_X)}")
print(f"Size of Hindi vocabulary = {len(hi_word_to_idx)}")

Max sentence len = 45
Avg sentence len = 16.233385881940794
Number of Hindi training examples = 9632
Number of Hindi development examples = 514
Number of Hindi testing examples = 18399
Size of Hindi vocabulary = 30628


In [10]:
# Preprocess bangla files
bn_train_X , bn_train_Y , bn_train_Z , bn_val_X , bn_val_Y , bn_val_Z , bn_test_X , bn_test_Y , bn_test_Z , bn_word_to_idx , bn_fine_tag_to_idx = preprocess(['/content/drive/My Drive/Colab Notebooks/multiconer2023/BN-Bangla/bn_train.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/BN-Bangla/bn_dev.conll' , '/content/drive/My Drive/Colab Notebooks/multiconer2023/BN-Bangla/bn_test.conll'])
print(f"Number of Bangla training examples = {len(bn_train_X)}")
print(f"Number of Bangla development examples = {len(bn_val_X)}")
print(f"Number of Bangla testing examples = {len(bn_test_X)}")
print(f"Size of Bangla vocabulary = {len(bn_word_to_idx)}")

Max sentence len = 85
Avg sentence len = 13.084691095298265
Number of Bangla training examples = 9708
Number of Bangla development examples = 507
Number of Bangla testing examples = 19859
Size of Bangla vocabulary = 42621


2. Model Creation (Bi-LSTM)

In [11]:
# Hyperparameters used while training
EMBEDDING_DIM = 600
HIDDEN_DIM = 800
NUM_EPOCHS = 10
BATCH_SIZE = 32
NUM_LAYERS = 1
PATIENCE = 3

# Model definition
class MyPOSModel(pl.LightningModule):

    def __init__(self , vocab_size , tagset_size , embedding_dim , hidden_dim , num_layers = 1 , bidirectional = True):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size , embedding_dim)
        self.lstm = nn.LSTM(embedding_dim , hidden_dim , batch_first = True , num_layers = num_layers , bidirectional = bidirectional)
        if bidirectional:
            self.fc = nn.Linear(2 * hidden_dim , tagset_size)
        else:
            self.fc = nn.Linear(hidden_dim , tagset_size)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self , x):
        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds)
        tag_space = self.fc(lstm_out)
        tag_scores = nn.functional.log_softmax(tag_space , dim = 2)
        return tag_scores

    def training_step(self , batch , batch_idx):
        x , y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1 , y_hat.shape[-1]) , y.view(-1))
        self.log('train_loss' , loss)
        return loss

    def validation_step(self , batch , batch_idx):
        x , y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1 , y_hat.shape[-1]) , y.view(-1))
        self.log('val_loss' , loss)
        return loss

    def test_step(self , batch , batch_idx):
        x , y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1 , y_hat.shape[-1]) , y.view(-1))
        self.log('test_loss' , loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters())
        return optimizer


In [12]:
# Create fine and coarse english models
en_model_fine = MyPOSModel(vocab_size = len(en_word_to_idx) , tagset_size = len(en_fine_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)
en_model_coarse = MyPOSModel(vocab_size = len(en_word_to_idx) , tagset_size = len(common_coarse_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)

In [13]:
# Create fine and coarse hindi models
hi_model_fine = MyPOSModel(vocab_size = len(hi_word_to_idx) , tagset_size = len(hi_fine_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)
hi_model_coarse = MyPOSModel(vocab_size = len(hi_word_to_idx) , tagset_size = len(common_coarse_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)

In [14]:
# Create fine and coarse bangla models
bn_model_fine = MyPOSModel(vocab_size = len(bn_word_to_idx) , tagset_size = len(bn_fine_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)
bn_model_coarse = MyPOSModel(vocab_size = len(bn_word_to_idx) , tagset_size = len(common_coarse_tag_to_idx) , embedding_dim = EMBEDDING_DIM , hidden_dim = HIDDEN_DIM , num_layers = NUM_LAYERS , bidirectional = True)

3. Model Training (using train and dev set)

In [15]:
# Function to train the model using train and val sets and find loss on test set
def model_trainer(model , train_X , train_Y , val_X , val_Y , test_X , test_Y):
    train_dataset = TensorDataset(train_X , train_Y)
    train_loader = DataLoader(train_dataset , batch_size = BATCH_SIZE , shuffle = True)

    val_dataset = TensorDataset(val_X , val_Y)
    val_loader = DataLoader(val_dataset , batch_size = BATCH_SIZE)

    test_dataset = TensorDataset(test_X , test_Y)
    test_loader = DataLoader(test_dataset , batch_size = BATCH_SIZE)

    early_stopping = EarlyStopping(monitor = "val_loss" , patience = PATIENCE , mode = "min")
    trainer = pl.Trainer(max_epochs = NUM_EPOCHS , devices = 1 , accelerator = "gpu" , callbacks = [early_stopping])
    trainer.fit(model , train_dataloaders = train_loader , val_dataloaders = val_loader)
    trainer.test(dataloaders = test_loader)

In [16]:
# Train and save fine english model
model_trainer(en_model_fine , en_train_X , en_train_Y , en_val_X , en_val_Y , en_test_X , en_test_Y)
torch.save(en_model_fine.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/en_fine.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 145 M 
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 108 K 
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
154 M     Trainable params
0         Non-trainable params
154 M     Total params
617.578   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_18/checkpoints/epoch=4-step=2625.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_18/checkpoints/epoch=4-step=2625.ckpt


Testing: 0it [00:00, ?it/s]

In [17]:
# Train and save coarse english model
model_trainer(en_model_coarse , en_train_X , en_train_Z , en_val_X , en_val_Z , en_test_X , en_test_Z)
torch.save(en_model_coarse.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/en_coarse.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 145 M 
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 12.8 K
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
154 M     Trainable params
0         Non-trainable params
154 M     Total params
617.194   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_19/checkpoints/epoch=4-step=2625.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_19/checkpoints/epoch=4-step=2625.ckpt


Testing: 0it [00:00, ?it/s]

In [18]:
# Train and save fine hindi model
model_trainer(hi_model_fine , hi_train_X , hi_train_Y , hi_val_X , hi_val_Y , hi_test_X , hi_test_Y)
torch.save(hi_model_fine.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/hi_fine.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 18.4 M
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 108 K 
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
27.5 M    Trainable params
0         Non-trainable params
27.5 M    Total params
109.834   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_20/checkpoints/epoch=5-step=1806.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_20/checkpoints/epoch=5-step=1806.ckpt


Testing: 0it [00:00, ?it/s]

In [19]:
# Train and save coarse hindi model
model_trainer(hi_model_coarse , hi_train_X , hi_train_Z , hi_val_X , hi_val_Z , hi_test_X , hi_test_Z)
torch.save(hi_model_coarse.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/hi_coarse.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 18.4 M
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 12.8 K
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
27.4 M    Trainable params
0         Non-trainable params
27.4 M    Total params
109.450   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_21/checkpoints/epoch=4-step=1505.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_21/checkpoints/epoch=4-step=1505.ckpt


Testing: 0it [00:00, ?it/s]

In [20]:
# Train and save fine bangla model
model_trainer(bn_model_fine , bn_train_X , bn_train_Y , bn_val_X , bn_val_Y , bn_test_X , bn_test_Y)
torch.save(bn_model_fine.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/bn_fine.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 25.6 M
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 108 K 
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
34.7 M    Trainable params
0         Non-trainable params
34.7 M    Total params
138.617   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_22/checkpoints/epoch=5-step=1824.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_22/checkpoints/epoch=5-step=1824.ckpt


Testing: 0it [00:00, ?it/s]

In [21]:
# Train and save coarse bangla model
model_trainer(bn_model_coarse , bn_train_X , bn_train_Z , bn_val_X , bn_val_Z , bn_test_X , bn_test_Z)
torch.save(bn_model_coarse.state_dict(), "/content/drive/My Drive/Colab Notebooks/multiconer2023/bn_coarse.pt")

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 25.6 M
1 | lstm      | LSTM             | 9.0 M 
2 | fc        | Linear           | 12.8 K
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
34.6 M    Trainable params
0         Non-trainable params
34.6 M    Total params
138.233   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_23/checkpoints/epoch=4-step=1520.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_23/checkpoints/epoch=4-step=1520.ckpt


Testing: 0it [00:00, ?it/s]

4. Model Inference

In [22]:
CHECKPOINT = 1000

# Function to test the model on the test set and print the classification report - precision, recall, f1-score (both weighted and macro average)
def model_tester(model , test_X , test_Y , tag_to_idx):
    idx_to_tag = {idx: tag for tag, idx in tag_to_idx.items()}

    test_dataset = TensorDataset(test_X, test_Y)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    model = model.to(device)
    model.eval()

    y_true = []
    y_pred = []
    count = 0

    with torch.no_grad():
        for x, y in test_loader:
            # if(count % CHECKPOINT == 0):
            #     print(f"Iterations done: {count}")
            count += 1
            x = x.to(device)
            y = y.to(device)

            # Forward pass
            y_hat = model(x)

            y_pred += [idx_to_tag[i] for i in y_hat.argmax(-1).cpu().numpy().flatten().tolist()]
            y_true += [idx_to_tag[i] for i in y.cpu().numpy().flatten().tolist()]

    print(classification_report(y_true, y_pred))

In [23]:
# Load and test fine english model
en_model_fine.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/en_fine.pt"))
model_tester(en_model_fine , en_test_X , en_test_Y , en_fine_tag_to_idx)

                         precision    recall  f1-score   support

                  <PAD>       1.00      1.00      1.00   2499089
B-AerospaceManufacturer       0.49      0.47      0.48      1013
  B-AnatomicalStructure       0.54      0.26      0.35      5824
              B-ArtWork       0.41      0.18      0.25      1264
               B-Artist       0.61      0.55      0.58     56981
              B-Athlete       0.55      0.50      0.52     27554
      B-CarManufacturer       0.54      0.33      0.41      2977
               B-Cleric       0.39      0.24      0.30      4725
             B-Clothing       0.47      0.13      0.20      2229
              B-Disease       0.59      0.28      0.38      5600
                B-Drink       0.40      0.23      0.30      2235
             B-Facility       0.44      0.41      0.42     16134
                 B-Food       0.29      0.09      0.14      5288
      B-HumanSettlement       0.73      0.62      0.67     41013
     B-MedicalProcedure 

In [24]:
# Load and test coarse english model
en_model_coarse.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/en_coarse.pt"))
model_tester(en_model_coarse , en_test_X , en_test_Z , common_coarse_tag_to_idx)

                precision    recall  f1-score   support

         <PAD>       1.00      1.00      1.00   2499089
Creative Works       0.67      0.56      0.61    169176
         Group       0.70      0.51      0.59    133847
      Location       0.76      0.68      0.71    130520
       Medical       0.55      0.36      0.43     33019
             O       0.93      0.97      0.95   2948139
        Person       0.81      0.81      0.81    290844
       Product       0.41      0.26      0.32     44866

      accuracy                           0.94   6249500
     macro avg       0.73      0.64      0.68   6249500
  weighted avg       0.93      0.94      0.93   6249500



In [25]:
# Load and test fine hindi model
hi_model_fine.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/hi_fine.pt"))
model_tester(hi_model_fine , hi_test_X , hi_test_Y , hi_fine_tag_to_idx)

                         precision    recall  f1-score   support

                  <PAD>       1.00      1.00      1.00    172076
B-AerospaceManufacturer       0.57      0.05      0.09        85
  B-AnatomicalStructure       0.85      0.67      0.75       485
              B-ArtWork       0.40      0.00      0.01       425
               B-Artist       0.58      0.48      0.52      1847
              B-Athlete       0.73      0.63      0.67      1166
      B-CarManufacturer       0.88      0.90      0.89       146
               B-Cleric       0.72      0.90      0.80       188
             B-Clothing       0.72      0.81      0.76        75
              B-Disease       0.88      0.69      0.77       628
                B-Drink       0.74      0.83      0.78       134
             B-Facility       0.74      0.52      0.61       854
                 B-Food       0.72      0.64      0.68       424
      B-HumanSettlement       0.82      0.69      0.75      5807
     B-MedicalProcedure 

In [26]:
# Load and test coarse hindi model
hi_model_coarse.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/hi_coarse.pt"))
model_tester(hi_model_coarse , hi_test_X , hi_test_Z , common_coarse_tag_to_idx)

                precision    recall  f1-score   support

         <PAD>       1.00      1.00      1.00    172076
Creative Works       0.80      0.59      0.68      6689
         Group       0.90      0.81      0.85      9441
      Location       0.88      0.72      0.79     10394
       Medical       0.82      0.74      0.78      2747
             O       0.96      0.99      0.97    244083
        Person       0.80      0.74      0.77     12214
       Product       0.74      0.65      0.69      2331

      accuracy                           0.97    459975
     macro avg       0.86      0.78      0.82    459975
  weighted avg       0.96      0.97      0.96    459975



In [27]:
# Load and test fine bangla model
bn_model_fine.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/bn_fine.pt"))
model_tester(bn_model_fine , bn_test_X , bn_test_Y , bn_fine_tag_to_idx)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                         precision    recall  f1-score   support

                  <PAD>       1.00      1.00      1.00    240692
B-AerospaceManufacturer       0.31      0.05      0.09        97
  B-AnatomicalStructure       0.86      0.66      0.75       532
              B-ArtWork       0.40      0.02      0.03       455
               B-Artist       0.59      0.45      0.51      2744
              B-Athlete       0.54      0.38      0.45      1086
      B-CarManufacturer       0.83      0.94      0.88        84
               B-Cleric       0.56      0.68      0.62       240
             B-Clothing       0.29      0.71      0.41        17
              B-Disease       0.90      0.74      0.81       553
                B-Drink       0.75      0.90      0.82       120
             B-Facility       0.72      0.57      0.64       894
                 B-Food       0.59      0.48      0.53       453
      B-HumanSettlement       0.82      0.72      0.77      6011
     B-MedicalProcedure 

  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
# Load and test coarse bangla model
bn_model_coarse.load_state_dict(torch.load("/content/drive/My Drive/Colab Notebooks/multiconer2023/bn_coarse.pt"))
model_tester(bn_model_coarse , bn_test_X , bn_test_Z , common_coarse_tag_to_idx)

                precision    recall  f1-score   support

         <PAD>       1.00      1.00      1.00    240692
Creative Works       0.84      0.58      0.69      8334
         Group       0.88      0.81      0.84      8621
      Location       0.85      0.78      0.81     10677
       Medical       0.83      0.74      0.78      2585
             O       0.96      0.98      0.97    208686
        Person       0.81      0.76      0.78     14628
       Product       0.70      0.60      0.65      2252

      accuracy                           0.97    496475
     macro avg       0.86      0.78      0.82    496475
  weighted avg       0.97      0.97      0.97    496475



5. Function to predict the NERs present in the input sentence

In [37]:
# Function to predict the NERs present in the input sentence
def predictor(model , test_X , test_Y , word_to_idx , tag_to_idx , max_iter):
    test_dataset = TensorDataset(test_X , test_Y)
    test_loader = DataLoader(test_dataset , batch_size = BATCH_SIZE)

    idx_to_tag = {idx: tag for tag, idx in tag_to_idx.items()}
    idx_to_word = {idx: word for word, idx in word_to_idx.items()}
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
    model = model.to(device)
    model.eval()

    iter = 0
    with torch.no_grad():
        for x, y in test_loader:
            if iter == max_iter:
                break
            iter += 1
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x)

            x_sent = [idx_to_word[i] for i in x.cpu().numpy().flatten().tolist()]
            y_pred = [idx_to_tag[i] for i in y_hat.argmax(-1).cpu().numpy().flatten().tolist()]
            y_true = [idx_to_tag[i] for i in y.cpu().numpy().flatten().tolist()]

            print(f"Iteration : {iter} ------------------------------------------")
            print("Sentence")
            print(x_sent)
            print("Predicted tags")
            print(y_pred)
            print("True tags")
            print(y_true)

In [38]:
# Predict fine tags of english (for demonstration purposes, we use the first 10 sentences of the test set)
predictor(en_model_fine , en_test_X , en_test_Y , en_word_to_idx , en_fine_tag_to_idx , 10)

Iteration : 1 ------------------------------------------
Sentence
['the', 'species', 'was', 'described', 'by', 'dietrich', 'brandis', 'after', 'the', 'forester', 't.', 'f.', 'bourdillon', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'by', 'this', 'time', 'she', 'was', 'competing', 'against', 'a', 'new', 'generation', 'of', 'young', 'drivers', 'including', 'stirling', 'moss', 'and', 'peter', 'collins', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'their', 'son', 'was', 'the', 'opera', 'producer', 'knut', 'hendriksen', '(', '1944', '–', '2020', ')', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'he', 'is', 'the', 'younger', 'brother', 'of', 'adam', 'mosseri', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'smes', ':', 'laura', 'j.', 'van', "'t", 'veer', 'et', 'al', '.

In [39]:
# Predict coarse tags of english (for demonstration purposes, we use the first 10 sentences of the test set)
predictor(en_model_coarse , en_test_X , en_test_Z , en_word_to_idx , common_coarse_tag_to_idx , 10)

Iteration : 1 ------------------------------------------
Sentence
['the', 'species', 'was', 'described', 'by', 'dietrich', 'brandis', 'after', 'the', 'forester', 't.', 'f.', 'bourdillon', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'by', 'this', 'time', 'she', 'was', 'competing', 'against', 'a', 'new', 'generation', 'of', 'young', 'drivers', 'including', 'stirling', 'moss', 'and', 'peter', 'collins', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'their', 'son', 'was', 'the', 'opera', 'producer', 'knut', 'hendriksen', '(', '1944', '–', '2020', ')', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'he', 'is', 'the', 'younger', 'brother', 'of', 'adam', 'mosseri', '.', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', 'smes', ':', 'laura', 'j.', 'van', "'t", 'veer', 'et', 'al', '.