In [1]:
import os
import argparse

from data import load_train_test_split, load_old_train_test_split, SelectModelInputs, BIOTagger, RandomlyUKNTokens, EvaluationDataCollator, RandomlyReplaceTokens
from transformers import TrainingArguments, AutoTokenizer, DataCollatorForTokenClassification

from model.configuration_bionexttager import BioNExtTaggerConfig
from model.modeling_bionexttagger import BioNExtTaggerModel
from trainer import NERTrainer
from metrics import NERMetrics



In [2]:
model_checkpoint = "michiyasunaga/BioLinkBERT-large"

CONTEXT_SIZE = 64

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.model_max_length = 512

biotagger = BIOTagger()
transforms = [biotagger, SelectModelInputs()]

train_augmentation = None

train_ds, test_ds = load_old_train_test_split("../../dataset/",
                                          tokenizer=tokenizer,
                                          context_size=CONTEXT_SIZE,
                                          train_transformations=transforms,
                                          train_augmentations=train_augmentation,
                                          test_transformations=None)

Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Running this sequence through the model will result in indexing errors


In [3]:
id2label = {0:"O", 
        1:"B-GeneOrGeneProduct", 2:"I-GeneOrGeneProduct",
        3:"B-DiseaseOrPhenotypicFeature", 4:"I-DiseaseOrPhenotypicFeature",
        5:"B-ChemicalEntity", 6:"I-ChemicalEntity",
        7:"B-SequenceVariant", 8:"I-SequenceVariant",
        9:"B-OrganismTaxon", 10:"I-OrganismTaxon",
        11:"B-CellLine", 12:"I-CellLine"}

label2id = {v:k for k,v in id2label.items()}


config = BioNExtTaggerConfig.from_pretrained(model_checkpoint,
                                                id2label = id2label,
                                                label2id = label2id,
                                                augmentation = None,
                                                context_size = CONTEXT_SIZE,
                                                percentage_tags = 0.2,
                                                p_augmentation = 0.5,
                                                freeze = False,
                                                crf_reduction = "mean")

model = BioNExtTaggerModel.from_pretrained(model_checkpoint, config=config)
model.training_mode()

You are using a model of type bert to instantiate a model of type crf-tagger. This is not supported for all configurations of models and can yield errors.
Some weights of BioNExtTaggerModel were not initialized from the model checkpoint at michiyasunaga/BioLinkBERT-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'crf.end_transitions', 'crf.start_transitions', 'crf.transitions', 'dense.bias', 'dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
import torch

dl = torch.utils.data.DataLoader(train_ds, 
                                 batch_size=8,
                                 collate_fn=DataCollatorForTokenClassification(tokenizer=tokenizer, 
                                                        padding="longest",
                                                        label_pad_token_id=tokenizer.pad_token_id),)

In [5]:
sample = next(iter(dl))

In [6]:
model(**sample)

(tensor(1112.5940, grad_fn=<MeanBackward0>),
 tensor([[[ 0.1314,  0.1425, -0.1065,  ...,  0.2232, -0.0677,  0.0995],
          [-0.2288,  0.1166,  0.3832,  ...,  0.2447,  0.0535, -0.0102],
          [-0.2490,  0.1743,  0.3375,  ...,  0.1630, -0.0473,  0.0080],
          ...,
          [ 0.0009,  0.0647,  0.0020,  ..., -0.0402, -0.1703,  0.3503],
          [ 0.0521,  0.1442, -0.0608,  ...,  0.2008,  0.0634,  0.1576],
          [ 0.1314,  0.1425, -0.1065,  ...,  0.2232, -0.0677,  0.0995]],
 
         [[ 0.1209,  0.1868, -0.0999,  ...,  0.2724, -0.0539,  0.0797],
          [-0.0904,  0.2252,  0.2537,  ...,  0.3074, -0.1605,  0.0993],
          [-0.1693,  0.1520,  0.1501,  ...,  0.2120, -0.0771,  0.1270],
          ...,
          [-0.0803,  0.1711,  0.1855,  ...,  0.2423, -0.1174,  0.2133],
          [-0.0357,  0.1522,  0.1749,  ...,  0.2545, -0.0666,  0.2004],
          [-0.1062,  0.2567,  0.2339,  ...,  0.3484, -0.0575,  0.1502]],
 
         [[ 0.2488,  0.0982, -0.1258,  ...,  0.1433,  0

In [11]:
model2= BioNExtTaggerModel.from_pretrained("IEETA/BioNExt-Tagger", cache_dir="../../trained_models/tagger")

In [11]:
model2(**sample)

(tensor(0.2666, grad_fn=<MeanBackward0>),
 tensor([[[ 4.7006e+00, -6.5339e+00,  5.2031e-01,  ..., -2.5788e-01,
           -6.9013e+00,  2.4141e+00],
          [ 4.2888e+00, -6.6116e+00,  9.3731e-01,  ..., -2.6071e-03,
           -7.0781e+00,  2.5486e+00],
          [ 4.2261e+00, -6.6249e+00,  1.0259e+00,  ...,  5.0407e-02,
           -7.1159e+00,  2.5762e+00],
          ...,
          [ 5.8322e+00, -6.0646e+00, -5.6425e-01,  ..., -9.5071e-01,
           -6.2811e+00,  1.8675e+00],
          [ 4.4670e+00, -6.6336e+00,  7.8425e-01,  ..., -7.5192e-02,
           -7.0676e+00,  2.5124e+00],
          [ 4.2491e+00, -5.7781e+00,  1.6972e-01,  ..., -2.6779e-01,
           -6.3762e+00,  2.1115e+00]],
 
         [[ 4.3542e+00, -6.6502e+00,  9.2685e-01,  ..., -2.2874e-02,
           -7.0973e+00,  2.5447e+00],
          [ 5.7428e+00, -5.8342e+00, -3.1436e-01,  ..., -9.6783e-01,
           -6.0599e+00,  1.9640e+00],
          [ 3.4926e+00, -6.0575e+00,  1.0011e+00,  ...,  1.5342e-01,
           -6.3

In [21]:
def compare_models(model1, model2):
    for (name1, module1), (name2, module2) in zip(model1.named_modules(), model2.named_modules()):
        print(f"Comparing Layer: {name1} -> {name2}")
        print(f"Model 1: {module1}")
        print(f"Model 2: {module2}")
        if hasattr(module1, 'weight') and hasattr(module2, 'weight'):
            print(f"Weights: {module1.weight.shape} vs {module2.weight.shape}")
        if hasattr(module1, 'bias') and hasattr(module2, 'bias'):
            print(f"Biases: {module1.bias.shape if module1.bias is not None else 'None'} vs {module2.bias.shape if module2.bias is not None else 'None'}")
        print()

In [31]:
compare_models(model, model2)

Comparing Layer:  -> 
Model 1: BioNExtTaggerModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28895, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNor

In [12]:
def compare_weight_magnitudes(model1, model2):
    for (name1, module1), (name2, module2) in zip(model1.named_modules(), model2.named_modules()):
        if hasattr(module1, 'weight') and hasattr(module2, 'weight'):
            norm1 = torch.norm(module1.weight, p='fro').item()
            norm2 = torch.norm(module2.weight, p='fro').item()
            if norm1 != norm2:
                print(f"Layer: {name1}")
                print(f"Model 1 Weight Magnitude (Frobenius norm): {norm1}")
                print(f"Model 2 Weight Magnitude (Frobenius norm): {norm2}")
                print(f"Difference in Magnitudes: {abs(norm1 - norm2)}")
                print()

In [13]:
compare_weight_magnitudes(model, model2)

Layer: bert.embeddings.word_embeddings
Model 1 Weight Magnitude (Frobenius norm): 375.8812255859375
Model 2 Weight Magnitude (Frobenius norm): 375.8370361328125
Difference in Magnitudes: 0.044189453125

Layer: bert.embeddings.position_embeddings
Model 1 Weight Magnitude (Frobenius norm): 22.521188735961914
Model 2 Weight Magnitude (Frobenius norm): 22.51564598083496
Difference in Magnitudes: 0.005542755126953125

Layer: bert.embeddings.token_type_embeddings
Model 1 Weight Magnitude (Frobenius norm): 1.7138431072235107
Model 2 Weight Magnitude (Frobenius norm): 1.712285041809082
Difference in Magnitudes: 0.001558065414428711

Layer: bert.embeddings.LayerNorm
Model 1 Weight Magnitude (Frobenius norm): 16.448768615722656
Model 2 Weight Magnitude (Frobenius norm): 16.450185775756836
Difference in Magnitudes: 0.0014171600341796875

Layer: bert.encoder.layer.0.attention.self.query
Model 1 Weight Magnitude (Frobenius norm): 50.29220199584961
Model 2 Weight Magnitude (Frobenius norm): 50.29047

In [9]:
model.classifier.weight

Parameter containing:
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [9]:

torch.nn.init.xavier_uniform_(model.classifier.weight.data)

tensor([[-0.0370,  0.0153,  0.0188,  ..., -0.0121,  0.0532,  0.0256],
        [-0.0752, -0.0420, -0.0308,  ..., -0.0207,  0.0266,  0.0622],
        [ 0.0109,  0.0734, -0.0455,  ...,  0.0307,  0.0141,  0.0621],
        ...,
        [-0.0215, -0.0407, -0.0288,  ...,  0.0053,  0.0627,  0.0069],
        [ 0.0576, -0.0488,  0.0196,  ..., -0.0574, -0.0502, -0.0313],
        [-0.0638,  0.0228, -0.0646,  ...,  0.0100,  0.0714, -0.0597]])

In [5]:
model.classifier.weight

Parameter containing:
tensor([[ 0.0070, -0.0028,  0.0200,  ...,  0.0216, -0.0230,  0.0187],
        [ 0.0020, -0.0092, -0.0128,  ..., -0.0070, -0.0008, -0.0073],
        [ 0.0090, -0.0141, -0.0174,  ...,  0.0209, -0.0195, -0.0021],
        ...,
        [ 0.0120, -0.0242,  0.0238,  ..., -0.0100, -0.0196,  0.0247],
        [ 0.0019, -0.0030, -0.0235,  ..., -0.0257,  0.0282, -0.0026],
        [-0.0295,  0.0046, -0.0225,  ..., -0.0286, -0.0205, -0.0068]],
       requires_grad=True)

In [6]:
model.classifier.reset_parameters()

In [7]:
model.crf.end_transitions

Parameter containing:
tensor([ 0.0274, -0.0410, -0.0128, -0.0804, -0.0125,  0.0043,  0.0294, -0.0007,
         0.0005,  0.0273,  0.0639,  0.0117,  0.0823], requires_grad=True)