# Setup 

In [1]:
!pip install transformers
!pip install datasets
!pip install huggingface_hub
!pip install seqeval 
!pip install evaluate --quiet
!pip install tf2crf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 4.7 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 81.9 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 90.1 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.7.1-py3-none-any.whl (451 kB)
[K     |████████████████████████████████| 451 kB 4.9 MB/s

In [2]:
from transformers import AutoTokenizer

from transformers import TFBertModel, TFAutoModelForTokenClassification, TFAutoModel
from datasets import load_dataset, load_metric

import numpy as np

import tensorflow as tf
import keras

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Embedding, Input, Dense, LSTM, Dropout, Bidirectional, Layer

from tf2crf import CRF
from tf2crf import ModelWithCRFLoss

from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from tqdm.notebook import tqdm
from seqeval.metrics import classification_report

from tensorflow.keras.preprocessing.sequence import pad_sequences

np.random.seed(42)

# Functions

In [3]:
def get_padded_arr(inputs, max_length=180, pad_type='post', trunc_type='post'):
    """
    Returns a padded array from an input array, 
    padded to the max_length using pad_type and trunc_type.
    Uses pad_sequences() from tensorflow.keras.preprocessing.sequence.

    Parameters
    ----------
    inputs: nested list of ints
      input sequence of tokenized input_ids
    max_length: int
      maximum length to which to pad sequence
    pad_type: string
      padding type; ex: post, pre, etc.
    trunc_type: string
      truncation type; ex: post, pre, etc.

    Returns
    -------
    nested numpy array of ints
      numpy array of each numpy array of input_ids padded to max_length

    Example
    --------
    >>> get_padded_arr([[465, 1093, 669], [110, 549]], 5, 'post', 'post')
    array([[ 465, 1093,  669,    0,    0],
          [ 110,  549,    0,    0,    0]], dtype=int32)
    """

    padded_arr = pad_sequences(inputs, padding=pad_type, truncating=trunc_type, maxlen=max_length)
    input_arr = np.array([np.array(x) for x in padded_arr])

    return input_arr

In [4]:
def get_argmax_predictions(y_pred):
    """
    Returns an array of label predictions in the form of numpy arrays,
    from a vector of probability distributions output by softmax layer.

    Parameters
    ----------
    y_pred: nested list of floats
      vector of probability distributions created by softmax function

    Returns
    -------
    array of numpy array of ints
      array of predicted labels (numpy array) per example

    Example
    --------
    >>> get_argmax_predictions([[0.9, 0.05, 0.05], [0.04, 0.9, 0.06], 
                                [0.03, 0.07, 0.9], [0.9, 0.03, 0.07]])
    [0, 1, 2, 0]
    """

    predicted_labels = []
    for idx, input in enumerate(y_pred):
      input_preds = []
      for token in input:
        input_preds.append(np.argmax(token))
      predicted_labels.append(np.array(input_preds))

    return predicted_labels

In [5]:
def print_classification_report(test_labels, test_attention_mask, preds, label_names):
    """
    Prints the classification report with micro, macro, and weighted
    precision, recall, and f1_score for label classes found in the set.

    Parameters
    ----------
    test_labels: nested list of integers
      nested array of true labels sourced from dataset

    test_attention_mask: nested list of integers
      nested array of attention mask values created by tokenizer

    preds: nested list of integers
      nested array of predicted labels generated from model.predict()

    label_names: list of strings
      names of labels at their respective indices

    Returns
    -------
    Prints classification report, we use weighted average in our paper
    to represent the results, but we have micro and macro precision, recall,
    and f1 scores.
    """

    flatten_labels = []
    flatten_preds = []
    # Flatten true labels and predicted labels into one array each
    # to make comparison easily
    for i in tqdm(range(len(test_labels))):
      j = sum(test_attention_mask[i]).numpy() - 1
      flatten_labels.extend(test_labels[i][1:j])
      flatten_preds.extend(preds[i][1:j])
    flatten_labels = list(map(lambda x: label_names[x], flatten_labels))
    flatten_preds = list(map(lambda x: label_names[x], flatten_preds))

    # Generate precision, recall, and f1 scores
    # for overall labels.
    for s in classification_report([flatten_labels], [flatten_preds], digits=4).split('\n'):
      print(s)

# Load Datsets

In [6]:
## NCBI Dataset ## 
## Biomed Inform. 2014 February ; 47: 1–10. doi:10.1016/j.jbi.2013.12.006. 
## The NCBI Disease corpus consists of 793 PubMed abstracts, 
## which are separated into training (593), development (100) and test (100) subsets
## It includes 6,892 disease mentions, which are mapped to 790 unique disease concepts

ncbi_dataset = load_dataset("ncbi_disease")

Downloading builder script:   0%|          | 0.00/5.83k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/9.69k [00:00<?, ?B/s]

Downloading and preparing dataset ncbi_disease/ncbi_disease to /root/.cache/huggingface/datasets/ncbi_disease/ncbi_disease/1.0.0/92314c7992b0b8a5ea2ad101be33f365b684a2cc011e0ffa29c691e6d32b2d03...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/284k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/51.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/52.4k [00:00<?, ?B/s]

   

Extracting data files #0:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #1:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #2:   0%|          | 0/1 [00:00<?, ?obj/s]

Generating train split:   0%|          | 0/5433 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/924 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/941 [00:00<?, ? examples/s]

Dataset ncbi_disease downloaded and prepared to /root/.cache/huggingface/datasets/ncbi_disease/ncbi_disease/1.0.0/92314c7992b0b8a5ea2ad101be33f365b684a2cc011e0ffa29c691e6d32b2d03. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
## GENE MENTION DATASET ##
## Genome Biology 2008, 9(Suppl 2):S2 

## BioCreative II Gene Mention Recognition (BC2GM) Dataset contains data 
## where participants are asked to identify a gene mention in a sentence 
## by giving its start and end characters. The training set consists of a 
## set of sentences, and for each sentence a set of gene mentions (GENE annotations).
## Includes  15,000 annotated training sentences.
## 

gm_dataset = load_dataset("bc2gm_corpus")

Downloading builder script:   0%|          | 0.00/5.47k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.14k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.64k [00:00<?, ?B/s]

Downloading and preparing dataset bc2gm_corpus/bc2gm_corpus to /root/.cache/huggingface/datasets/bc2gm_corpus/bc2gm_corpus/1.0.0/198658a8f6102c50f2230023a1fc49526139e3955e86bb62fafb48edfbbaca60...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/870k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/175k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/358k [00:00<?, ?B/s]

   

Extracting data files #0:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #1:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #2:   0%|          | 0/1 [00:00<?, ?obj/s]

Generating train split:   0%|          | 0/12501 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2501 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5001 [00:00<?, ? examples/s]

Dataset bc2gm_corpus downloaded and prepared to /root/.cache/huggingface/datasets/bc2gm_corpus/bc2gm_corpus/1.0.0/198658a8f6102c50f2230023a1fc49526139e3955e86bb62fafb48edfbbaca60. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
## CHEMICAL DISEASE RELATIONS DATASET ##
## Database, Volume 2016, 2016, baw068, https://doi.org/10.1093/database/baw068

## The BioCreative V CDR task corpus is manually annotated for chemicals,
## diseases and chemical-induced disease (CID) relations. 
## It consists of 1500 PubMed articles with 4409 annotated chemicals, 
## 5818 diseases and 3116 chemical-disease interactions.

cdr_dataset = load_dataset("ghadeermobasher/BC5CDR-Chemical-Disease")

Downloading builder script:   0%|          | 0.00/3.97k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/4.94k [00:00<?, ?B/s]

Downloading and preparing dataset bc5_cdr-chemical-disease/BC5CDR-Disease to /root/.cache/huggingface/datasets/ghadeermobasher___bc5_cdr-chemical-disease/BC5CDR-Disease/1.0.0/e5aecd23775fc588b74557367dd5bfcde7d56a097454619727120ce7ddd58028...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/256k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/252k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/267k [00:00<?, ?B/s]

   

Extracting data files #2:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #1:   0%|          | 0/1 [00:00<?, ?obj/s]

Extracting data files #0:   0%|          | 0/1 [00:00<?, ?obj/s]

Generating train split: 0 examples [00:00, ? examples/s]

['Selegiline', '-', 'induced', 'postural', 'hypotension', 'in', 'Parkinson', "'", 's', 'disease', ':', 'a', 'longitudinal', 'study', 'on', 'the', 'effects', 'of', 'drug', 'withdrawal', '.']
['OBJECTIVES', ':', 'The', 'United', 'Kingdom', 'Parkinson', "'", 's', 'Disease', 'Research', 'Group', '(', 'UKPDRG', ')', 'trial', 'found', 'an', 'increased', 'mortality', 'in', 'patients', 'with', 'Parkinson', "'", 's', 'disease', '(', 'PD', ')', 'randomized', 'to', 'receive', '10', 'mg', 'selegiline', 'per', 'day', 'and', 'L', '-', 'dopa', 'compared', 'with', 'those', 'taking', 'L', '-', 'dopa', 'alone', '.']
['Recently', ',', 'we', 'found', 'that', 'therapy', 'with', 'selegiline', 'and', 'L', '-', 'dopa', 'was', 'associated', 'with', 'selective', 'systolic', 'orthostatic', 'hypotension', 'which', 'was', 'abolished', 'by', 'withdrawal', 'of', 'selegiline', '.']
['This', 'unwanted', 'effect', 'on', 'postural', 'blood', 'pressure', 'was', 'not', 'the', 'result', 'of', 'underlying', 'autonomic', 'fa

Generating validation split: 0 examples [00:00, ? examples/s]

['22', '-', 'oxacalcitriol', 'suppresses', 'secondary', 'hyperparathyroidism', 'without', 'inducing', 'low', 'bone', 'turnover', 'in', 'dogs', 'with', 'renal', 'failure', '.']
['BACKGROUND', ':', 'Calcitriol', 'therapy', 'suppresses', 'serum', 'levels', 'of', 'parathyroid', 'hormone', '(', 'PTH', ')', 'in', 'patients', 'with', 'renal', 'failure', 'but', 'has', 'several', 'drawbacks', ',', 'including', 'hypercalcemia', 'and', '/', 'or', 'marked', 'suppression', 'of', 'bone', 'turnover', ',', 'which', 'may', 'lead', 'to', 'adynamic', 'bone', 'disease', '.']
['A', 'new', 'vitamin', 'D', 'analogue', ',', '22', '-', 'oxacalcitriol', '(', 'OCT', ')', ',', 'has', 'been', 'shown', 'to', 'have', 'promising', 'characteristics', '.']
['This', 'study', 'was', 'undertaken', 'to', 'determine', 'the', 'effects', 'of', 'OCT', 'on', 'serum', 'PTH', 'levels', 'and', 'bone', 'turnover', 'in', 'states', 'of', 'normal', 'or', 'impaired', 'renal', 'function', '.']
['METHODS', ':', 'Sixty', 'dogs', 'were', '

Generating test split: 0 examples [00:00, ? examples/s]

['Torsade', 'de', 'pointes', 'ventricular', 'tachycardia', 'during', 'low', 'dose', 'intermittent', 'dobutamine', 'treatment', 'in', 'a', 'patient', 'with', 'dilated', 'cardiomyopathy', 'and', 'congestive', 'heart', 'failure', '.']
['The', 'authors', 'describe', 'the', 'case', 'of', 'a', '56', '-', 'year', '-', 'old', 'woman', 'with', 'chronic', ',', 'severe', 'heart', 'failure', 'secondary', 'to', 'dilated', 'cardiomyopathy', 'and', 'absence', 'of', 'significant', 'ventricular', 'arrhythmias', 'who', 'developed', 'QT', 'prolongation', 'and', 'torsade', 'de', 'pointes', 'ventricular', 'tachycardia', 'during', 'one', 'cycle', 'of', 'intermittent', 'low', 'dose', '(', '2', '.', '5', 'mcg', '/', 'kg', 'per', 'min', ')', 'dobutamine', '.']
['This', 'report', 'of', 'torsade', 'de', 'pointes', 'ventricular', 'tachycardia', 'during', 'intermittent', 'dobutamine', 'supports', 'the', 'hypothesis', 'that', 'unpredictable', 'fatal', 'arrhythmias', 'may', 'occur', 'even', 'with', 'low', 'doses', '

  0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
print(ncbi_dataset['train'].shape)
print(ncbi_dataset['validation'].shape)
print(ncbi_dataset['test'].shape)

(5433, 3)
(924, 3)
(941, 3)


In [10]:
print(gm_dataset['train'].shape)
print(gm_dataset['validation'].shape)
print(gm_dataset['test'].shape)

(12501, 3)
(2501, 3)
(5001, 3)


In [11]:
print(cdr_dataset['train'].shape)
print(cdr_dataset['validation'].shape)
print(cdr_dataset['test'].shape)

(4561, 3)
(4582, 3)
(4798, 3)


# Model Definitions

In [228]:
# VANILLA BERT MODEL

def create_bert_model(num_labels, max_length=180, dropout=0.3) -> tf.keras.Model:
    """
    Creates a plain BERT model that takes in input_ids, attention_mask
    and will output a softmax probability distribution over a set of num_labels
    number of possible labels.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    max_length: int
      maximum length of inputs
    dropout: float
      dropout rate
    
    Returns
    -------
    a custom BERT model built on the pretrained bert-large-NER model
    for multi-class classification
    
    """
    bert_model = TFAutoModel.from_pretrained('dslim/bert-large-NER', num_labels=num_labels, ignore_mismatched_sizes=True)

    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    bert_inputs = {'input_ids': input_ids,
                    'attention_mask': attention_mask}      

    bert_output = bert_model(bert_inputs)

    last_hidden_state = bert_output[0]

    embedding = tf.keras.layers.Dropout(dropout)(last_hidden_state)

    output = tf.keras.layers.Dense(num_labels, activation='softmax')(embedding)

    model = tf.keras.models.Model(inputs = [input_ids, attention_mask], outputs = output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                        loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
                        metrics=['accuracy'])

    return model

In [73]:
# VANILLA BIOBERT MODEL

def create_biobert_model(num_labels, max_length=180, dropout=0.3) -> tf.keras.Model:
    """
    Create a plain BioBERT model that takes in input_ids, attention_mask
    and will output a softmax probability distribution over a set of num_labels
    number of possible labels.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    max_length: int
      maximum length of inputs
    dropout: float
      dropout rate
    
    Returns
    -------
    a custom BioBERT model built on the pretrained biobert-base-cased-v1.1 model
    for multi-class classification
    
    """
    biobert_model = TFAutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.1', num_labels=num_labels, from_pt=True)

    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    biobert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    biobert_output = biobert_model(biobert_inputs)

    last_hidden_state = biobert_output[0]

    embedding = tf.keras.layers.Dropout(dropout)(last_hidden_state)

    output = tf.keras.layers.Dense(num_labels, activation='softmax')(embedding)

    model = tf.keras.models.Model(inputs = [input_ids, attention_mask], outputs = output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 5e-5),
                       loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                        metrics=['accuracy'])

    return model

In [None]:
# BERT CRF MODEL

def create_bert_crf_model(num_labels, max_length=180) -> tf.keras.Model:
    """
    Create a layered BERT+CRF model that takes in input_ids, attention_mask
    and will output predicted labels for each example.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    max_length: int
      maximum length of inputs
    
    Returns
    -------
    a custom BERT+CRF layered model built on the pretrained 
    bert-large-NER model
    
    """
    bert_model = TFAutoModel.from_pretrained('dslim/bert-large-NER', num_labels=num_labels, ignore_mismatched_sizes=True)

    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    bert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    bert_output = bert_model(bert_inputs)
    last_hidden_state = bert_output[0]
    crf_layer = CRF(num_labels)
    crf_out = crf_layer(last_hidden_state)
    base_model = keras.Model([input_ids, attention_mask], outputs=crf_out)

    # wrapping base model with ModelWithCRFLoss wrapper in order to use
    # CRF loss function from wrapper class
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 1e-5), metrics=['accuracy'])

    return model

In [155]:
# BERT LSTM CRF MODEL

def create_bert_lstm_crf_model(num_labels, LSTM_units=64, max_length=180) -> tf.keras.Model:
    """
    Create a layered BERT+LSTM+CRF model that takes in input_ids, attention_mask
    and will output predicted labels for each example.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    LSTM_units: integer
      number of units representing the dimensionality of the output space
    max_length: int
      maximum length of inputs
    
    Returns
    -------
    a custom BERT+BiLSTM+CRF layered model built on the pretrained 
    bert-large-NER model
    
    """    
    bert_model = TFAutoModel.from_pretrained('dslim/bert-large-NER', num_labels=num_labels, ignore_mismatched_sizes=True)
    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    bert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    bert_output = bert_model(bert_inputs)
    last_hidden_state = bert_output[0]

    lstm_layer = tf.keras.layers.LSTM(LSTM_units, return_sequences=True)(last_hidden_state)
    crf_layer = CRF(num_labels)
    crf_out = crf_layer(lstm_layer)
    base_model = keras.Model([input_ids, attention_mask], outputs=crf_out)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 1e-5), metrics=['accuracy'])

    return model

In [None]:
# BERT BILSTM CRF MODEL

def create_bert_bilstm_crf_model(num_labels, LSTM_units=64, max_length=180) -> tf.keras.Model:
    """
    Create a layered BERT+BiLSTM+CRF model that takes in input_ids, attention_mask
    and will output predicted labels for each example.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    LSTM_units: integer
      number of units representing the dimensionality of the output space
    max_length: int
      maximum length of inputs
    
    Returns
    -------
    a custom BERT+BiLSTM+CRF layered model built on the pretrained 
    bert-large-NER model
    
    """    
    bert_model = TFAutoModel.from_pretrained('dslim/bert-large-NER', num_labels=num_labels, ignore_mismatched_sizes=True)
    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    bert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    bert_output = bert_model(bert_inputs)
    last_hidden_state = bert_output[0]

    bilstm_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(LSTM_units, return_sequences=True))(last_hidden_state)
    crf_layer = CRF(num_labels)
    crf_out = crf_layer(bilstm_layer)
    base_model = keras.Model([input_ids, attention_mask], outputs=crf_out)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 1e-5), metrics=['accuracy'])

    return model

In [104]:
# BIOBERT + CRF MODEL

def create_biobert_crf_model(num_labels, max_length=180) -> tf.keras.Model:
    """
    Create a layered BioBERT+CRF model that takes in input_ids, attention_mask
    and will output predicted labels for each example.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    max_length: int
      maximum length of inputs
    
    Returns
    -------
    a custom BioBERT+CRF layered model built on the pretrained 
    biobert-base-cased-v1.1 model
    
    """
    biobert_model = TFAutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.1', num_labels=num_labels, from_pt=True)

    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    biobert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}    
    biobert_output = biobert_model(biobert_inputs)

    last_hidden_state = biobert_output[0]

    crf_layer = CRF(num_labels)
    crf_out = crf_layer(last_hidden_state)
    base_model = keras.Model([input_ids, attention_mask], outputs=crf_out)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 5e-5), metrics=['accuracy'])

    return model

In [105]:
# BIOBERT + BiLSTM + CRF MODEL

def create_biobert_bilstm_crf_model(num_labels, LSTM_units=64, max_length=180):
    """
    Create a layered BioBERT+BiLSTM+CRF model that takes in input_ids, attention_mask
    and will output predicted labels for each example.

    Parameters
    ----------
    num_labels: integer
      the number of classes to compute the probabilities for
    LSTM_units: integer
      number of units representing the dimensionality of the output space
    max_length: int
      maximum length of inputs
    
    Returns
    -------
    a custom BioBERT+BiLSTM+CRF layered model built on the pretrained 
    biobert-base-cased-v1.1 model
    
    """   
    biobert_model = TFAutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.1', num_labels=num_labels, from_pt=True)

    input_ids = Input(shape=(max_length,), dtype=tf.int64, name='input_ids')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask')

    biobert_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    biobert_output = biobert_model(biobert_inputs)
    last_hidden_state = biobert_output[0]

    bilstm_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(LSTM_units, return_sequences=True))(last_hidden_state)
    crf_layer = CRF(num_labels)
    crf_out = crf_layer(bilstm_layer)
    base_model = keras.Model([input_ids, attention_mask], outputs=crf_out)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= 5e-5), metrics=['accuracy'])

    return model

# Tokenization

In [74]:
# @title Create Unified Label Mapping Across Datasets
# rename "tags" to "ner_tags" for consistency
if 'tags' in cdr_dataset['train'].features.keys():
  cdr_dataset = cdr_dataset.rename_column("tags", "ner_tags")

# get labels from each dataset
ncbi_label_names = ncbi_dataset["train"].features["ner_tags"].feature.names
gm_label_names = gm_dataset["train"].features["ner_tags"].feature.names
cdr_label_names = ['O', 'B-Chemical', 'B-Disease', 'I-Disease', 'I-Chemical']

# create an unified label-to-index mapping across datasets
# keys: unique labels, values: indexes
label2index = {}
id = 0
for n in ncbi_label_names + gm_label_names + cdr_label_names:
  if n not in label2index:
    label2index[n] = id
    id += 1

label_names = list(label2index.keys())
label2index

from datasets import Features, Value, Sequence, ClassLabel
# define wanted output columns from the mapping process
# this ensures that the tokenized datasets have the same features for concatenation 
features = Features({
  'id': Value(dtype='string', id=None),
  'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
  'ner_tags': Sequence(feature=ClassLabel(names=label_names, id=None), length=-1, id=None),
  'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
  'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
  'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
  'labels': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None)
})

bert_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
# @title Map Labels To Tokenized Data (The Original Labels Were At Word-Level)
def bert_process(data, label_names):
  inputs = bert_tokenizer(data['tokens'], is_split_into_words=True, add_special_tokens=False)
  ner_tags = []
  for i, labels in enumerate(data['ner_tags']):
    word_ids = inputs.word_ids(i)
    label_ids = []
    for wid in word_ids:
      if wid is None: 
        label_ids.append(-100)
      else:
        label_ids.append(labels[wid])
    ner_tags.append(label_ids)
  inputs['labels'] = ner_tags
  return inputs

biobert_tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")
# @title Map Labels To Tokenized Data (The Original Labels Were At Word-Level)
def biobert_process(data, label_names):
  inputs = biobert_tokenizer(data['tokens'], is_split_into_words=True, add_special_tokens=False)
  ner_tags = []
  for i, labels in enumerate(data['ner_tags']):
    word_ids = inputs.word_ids(i)
    label_ids = []
    for wid in word_ids:
      if wid is None: 
        label_ids.append(-100)
      else:
        label_ids.append(labels[wid])
    ner_tags.append(label_ids)
  inputs['labels'] = ner_tags
  return inputs

Downloading:   0%|          | 0.00/313 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [75]:
#create BERT tokenized datasets
bert_tokenized_datasets = { 
    name: data.map(
      bert_process, 
      features=features,
      batched=True,
      fn_kwargs={
        'label_names': label_names,
        #'label2index': label2index,
      }
  ) for name, data, label_names in [
      ('ncbi', ncbi_dataset, ncbi_label_names),
      ('gm', gm_dataset, gm_label_names),
      ('cdr', cdr_dataset, cdr_label_names)
  ] 
}
#create bioBERT tokenized datasets
biobert_tokenized_datasets = { 
    name: data.map(
      biobert_process, 
      features=features,
      batched=True,
      fn_kwargs={
        'label_names': label_names,
        #'label2index': label2index,
      }
  ) for name, data, label_names in [
      ('ncbi', ncbi_dataset, ncbi_label_names),
      ('gm', gm_dataset, gm_label_names),
      ('cdr', cdr_dataset, cdr_label_names)
  ] 
}



  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

# NCBI Dataset - Padding

In [76]:
# Create class label list
ncbi_label_list = ncbi_dataset["train"].features[f"ner_tags"].feature.names
ncbi_label_list

['O', 'B-Disease', 'I-Disease']

In [77]:
# examine the maximum length of any BERT tokenized inputs 
print(max([len(x) for x in bert_tokenized_datasets['ncbi']['train']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['ncbi']['validation']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['ncbi']['test']['input_ids']]))

176
120
155


In [78]:
# examine the maximum length of any BioBERT tokenized inputs
print(max([len(x) for x in biobert_tokenized_datasets['ncbi']['train']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['ncbi']['validation']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['ncbi']['test']['input_ids']]))

177
126
143


In [79]:
# declare NCBI specific global variables for model parameters
NCBI_MAX_LENGTH = 180
NCBI_NUM_LABELS = len(ncbi_label_list)

In [80]:
# create input and attention tensors using BERT tokenized datasets with max length
bert_ncbi_train_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['train']['input_ids'], NCBI_MAX_LENGTH))
bert_ncbi_train_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['train']['attention_mask'], NCBI_MAX_LENGTH))

bert_ncbi_test_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['test']['input_ids'], NCBI_MAX_LENGTH))
bert_ncbi_test_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['test']['attention_mask'], NCBI_MAX_LENGTH))

bert_ncbi_val_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['validation']['input_ids'], NCBI_MAX_LENGTH))
bert_ncbi_val_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['ncbi']['validation']['attention_mask'], NCBI_MAX_LENGTH))

# create padded label arrays with max length
bert_ncbi_train_labels = get_padded_arr(bert_tokenized_datasets['ncbi']['train']['labels'], NCBI_MAX_LENGTH)
bert_ncbi_test_labels = get_padded_arr(bert_tokenized_datasets['ncbi']['test']['labels'], NCBI_MAX_LENGTH)
bert_ncbi_val_labels = get_padded_arr(bert_tokenized_datasets['ncbi']['validation']['labels'], NCBI_MAX_LENGTH)

In [81]:
# create input and attention tensors using BioBERT tokenized datasets with max length
biobert_ncbi_train_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['train']['input_ids'], NCBI_MAX_LENGTH))
biobert_ncbi_train_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['train']['attention_mask'], NCBI_MAX_LENGTH))

biobert_ncbi_test_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['test']['input_ids'], NCBI_MAX_LENGTH))
biobert_ncbi_test_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['test']['attention_mask'], NCBI_MAX_LENGTH))

biobert_ncbi_val_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['validation']['input_ids'], NCBI_MAX_LENGTH))
biobert_ncbi_val_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['ncbi']['validation']['attention_mask'], NCBI_MAX_LENGTH))

# create padded label arrays with max length
biobert_ncbi_train_labels = get_padded_arr(biobert_tokenized_datasets['ncbi']['train']['labels'], NCBI_MAX_LENGTH)
biobert_ncbi_test_labels = get_padded_arr(biobert_tokenized_datasets['ncbi']['test']['labels'], NCBI_MAX_LENGTH)
biobert_ncbi_val_labels = get_padded_arr(biobert_tokenized_datasets['ncbi']['validation']['labels'], NCBI_MAX_LENGTH)

# Gene Mention Dataset - Padding

In [137]:
gm_label_list = gm_dataset["train"].features[f"ner_tags"].feature.names
gm_label_list

['O', 'B-GENE', 'I-GENE']

In [138]:
print(max([len(x) for x in bert_tokenized_datasets['gm']['train']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['gm']['validation']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['gm']['test']['input_ids']]))

277
295
287


In [139]:
print(max([len(x) for x in biobert_tokenized_datasets['gm']['train']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['gm']['validation']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['gm']['test']['input_ids']]))

276
291
280


In [140]:
GM_MAX_LENGTH = 295
GM_NUM_LABELS = len(gm_label_list)

In [141]:
bert_gm_train_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['train']['input_ids'], GM_MAX_LENGTH))
bert_gm_train_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['train']['attention_mask'], GM_MAX_LENGTH))

bert_gm_test_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['test']['input_ids'], GM_MAX_LENGTH))
bert_gm_test_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['test']['attention_mask'], GM_MAX_LENGTH))

bert_gm_val_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['validation']['input_ids'], GM_MAX_LENGTH))
bert_gm_val_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['gm']['validation']['attention_mask'], GM_MAX_LENGTH))

bert_gm_train_labels = get_padded_arr(bert_tokenized_datasets['gm']['train']['labels'], GM_MAX_LENGTH)
bert_gm_test_labels = get_padded_arr(bert_tokenized_datasets['gm']['test']['labels'], GM_MAX_LENGTH)
bert_gm_val_labels = get_padded_arr(bert_tokenized_datasets['gm']['validation']['labels'], GM_MAX_LENGTH)

In [142]:
biobert_gm_train_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['train']['input_ids'], GM_MAX_LENGTH))
biobert_gm_train_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['train']['attention_mask'], GM_MAX_LENGTH))

biobert_gm_test_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['test']['input_ids'], GM_MAX_LENGTH))
biobert_gm_test_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['test']['attention_mask'], GM_MAX_LENGTH))

biobert_gm_val_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['validation']['input_ids'], GM_MAX_LENGTH))
biobert_gm_val_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['gm']['validation']['attention_mask'], GM_MAX_LENGTH))

biobert_gm_train_labels = get_padded_arr(biobert_tokenized_datasets['gm']['train']['labels'], GM_MAX_LENGTH)
biobert_gm_test_labels = get_padded_arr(biobert_tokenized_datasets['gm']['test']['labels'], GM_MAX_LENGTH)
biobert_gm_val_labels = get_padded_arr(biobert_tokenized_datasets['gm']['validation']['labels'], GM_MAX_LENGTH)

# Chemical Disease Relation Dataset - Padding

In [170]:
cdr_label_list = cdr_dataset["train"].features[f"ner_tags"].feature.names
cdr_label_list

['O', 'B-Disease', 'I-Disease', 'B-Chemical', 'I-Chemical']

In [171]:
print(max([len(x) for x in bert_tokenized_datasets['cdr']['train']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['cdr']['validation']['input_ids']]))
print(max([len(x) for x in bert_tokenized_datasets['cdr']['test']['input_ids']]))

248
298
188


In [172]:
print(max([len(x) for x in biobert_tokenized_datasets['cdr']['train']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['cdr']['validation']['input_ids']]))
print(max([len(x) for x in biobert_tokenized_datasets['cdr']['test']['input_ids']]))

249
298
185


In [173]:
CDR_MAX_LENGTH = 300
CDR_NUM_LABELS = len(cdr_label_list)

In [174]:
bert_cdr_train_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['train']['input_ids'], CDR_MAX_LENGTH))
bert_cdr_train_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['train']['attention_mask'], CDR_MAX_LENGTH))

bert_cdr_test_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['test']['input_ids'], CDR_MAX_LENGTH))
bert_cdr_test_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['test']['attention_mask'], CDR_MAX_LENGTH))

bert_cdr_val_input_ids = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['validation']['input_ids'], CDR_MAX_LENGTH))
bert_cdr_val_attention_mask = tf.convert_to_tensor(get_padded_arr(bert_tokenized_datasets['cdr']['validation']['attention_mask'], CDR_MAX_LENGTH))

bert_cdr_train_labels = get_padded_arr(bert_tokenized_datasets['cdr']['train']['labels'], CDR_MAX_LENGTH)
bert_cdr_test_labels = get_padded_arr(bert_tokenized_datasets['cdr']['test']['labels'], CDR_MAX_LENGTH)
bert_cdr_val_labels = get_padded_arr(bert_tokenized_datasets['cdr']['validation']['labels'], CDR_MAX_LENGTH)

In [175]:
biobert_cdr_train_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['train']['input_ids'], CDR_MAX_LENGTH))
biobert_cdr_train_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['train']['attention_mask'], CDR_MAX_LENGTH))

biobert_cdr_test_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['test']['input_ids'], CDR_MAX_LENGTH))
biobert_cdr_test_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['test']['attention_mask'], CDR_MAX_LENGTH))

biobert_cdr_val_input_ids = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['validation']['input_ids'], CDR_MAX_LENGTH))
biobert_cdr_val_attention_mask = tf.convert_to_tensor(get_padded_arr(biobert_tokenized_datasets['cdr']['validation']['attention_mask'], CDR_MAX_LENGTH))

biobert_cdr_train_labels = get_padded_arr(biobert_tokenized_datasets['cdr']['train']['labels'], CDR_MAX_LENGTH)
biobert_cdr_test_labels = get_padded_arr(biobert_tokenized_datasets['cdr']['test']['labels'], CDR_MAX_LENGTH)
biobert_cdr_val_labels = get_padded_arr(biobert_tokenized_datasets['cdr']['validation']['labels'], CDR_MAX_LENGTH)




# BERT - NCBI

In [229]:
# create an instance of the BERT model with NCBI parameters
ncbi_bert_model = create_bert_model(NCBI_NUM_LABELS, NCBI_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [230]:
# Train with sample train/validation dataset
history = ncbi_bert_model.fit([bert_ncbi_train_input_ids[:100], bert_ncbi_train_attention_mask[:100]], 
                                          bert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids[:100], bert_ncbi_val_attention_mask[:100]], bert_ncbi_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [231]:
ncbi_bert_model.summary()

Model: "model_32"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask (InputLayer)    [(None, 180)]        0           []                               
                                                                                                  
 input_ids (InputLayer)         [(None, 180)]        0           []                               
                                                                                                  
 tf_bert_model_32 (TFBertModel)  TFBaseModelOutputWi  333579264  ['attention_mask[0][0]',         
                                thPoolingAndCrossAt               'input_ids[0][0]']              
                                tentions(last_hidde                                               
                                n_state=(None, 180,                                        

In [232]:
# Train with full train/validation dataset
history = ncbi_bert_model.fit([bert_ncbi_train_input_ids, bert_ncbi_train_attention_mask], 
                                          bert_ncbi_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids, bert_ncbi_val_attention_mask], bert_ncbi_val_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [233]:
#loss, accuracy = ncbi_bert_model.evaluate([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask], bert_ncbi_test_labels, verbose=0)

# create predictions using test input_ids and attention_mask
bert_ncbi_predictions = ncbi_bert_model.predict([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask])



In [234]:
# for BERT, since model predictions is a softmax probability distribution
# over the number of classes, we need to use get_argmax_predictions
# to return predictions in a list of expected integer labels
bert_ncbi_preds_list = get_argmax_predictions(bert_ncbi_predictions)

In [235]:
# print the classification report
print_classification_report(bert_ncbi_test_labels, bert_ncbi_test_attention_mask, bert_ncbi_preds_list, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8210    0.8779    0.8485      2220

   micro avg     0.8210    0.8779    0.8485      2220
   macro avg     0.8210    0.8779    0.8485      2220
weighted avg     0.8210    0.8779    0.8485      2220



# BioBERT - NCBI

In [110]:
ncbi_biobert_model = create_biobert_model(NCBI_NUM_LABELS, NCBI_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [111]:
# Train with sample train/ validation dataset

history = ncbi_biobert_model.fit([biobert_ncbi_train_input_ids[:500], biobert_ncbi_train_attention_mask[:500]], 
                                          biobert_ncbi_train_labels[:500], batch_size=8, epochs=5, 
                                          validation_data=([biobert_ncbi_val_input_ids[:500], biobert_ncbi_val_attention_mask[:500]], biobert_ncbi_val_labels[:500]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [169]:
ncbi_biobert_model.summary()

Model: "model_13"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask (InputLayer)    [(None, 180)]        0           []                               
                                                                                                  
 input_ids (InputLayer)         [(None, 180)]        0           []                               
                                                                                                  
 tf_bert_model_13 (TFBertModel)  TFBaseModelOutputWi  108310272  ['attention_mask[0][0]',         
                                thPoolingAndCrossAt               'input_ids[0][0]']              
                                tentions(last_hidde                                               
                                n_state=(None, 180,                                        

In [112]:
# Train with full train/ validation dataset

history = ncbi_biobert_model.fit([biobert_ncbi_train_input_ids, biobert_ncbi_train_attention_mask], 
                                          biobert_ncbi_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([biobert_ncbi_val_input_ids, biobert_ncbi_val_attention_mask], biobert_ncbi_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [113]:
#loss, accuracy = ncbi_biobert_model.evaluate([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask], biobert_ncbi_test_labels, verbose=0)

ncbi_biobert_predictions = ncbi_biobert_model.predict([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask])



In [114]:
ncbi_biobert_preds_list = get_argmax_predictions(ncbi_biobert_predictions)

In [115]:
print_classification_report(biobert_ncbi_test_labels, biobert_ncbi_test_attention_mask, ncbi_biobert_preds_list, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8395    0.8963    0.8670      2294

   micro avg     0.8395    0.8963    0.8670      2294
   macro avg     0.8395    0.8963    0.8670      2294
weighted avg     0.8395    0.8963    0.8670      2294




# BERT+CRF - NCBI

In [None]:
ncbi_bert_crf_model = create_bert_crf_model(NCBI_NUM_LABELS, NCBI_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset

history = ncbi_bert_crf_model.fit([bert_ncbi_train_input_ids[:100], bert_ncbi_train_attention_mask[:100]], 
                                          bert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids[:100], bert_ncbi_val_attention_mask[:100]], bert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
ncbi_bert_crf_model.summary()

Model: "model_with_crf_loss_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_7 (Functional)        ((None, 180),             333582348 
                              (None, 180, 3),                    
                              (None,),                           
                              (3, 3))                            
                                                                 
Total params: 333,582,352
Trainable params: 333,582,348
Non-trainable params: 4
_________________________________________________________________


In [None]:
# Train with full train/ validation dataset
history = ncbi_bert_crf_model.fit([bert_ncbi_train_input_ids, bert_ncbi_train_attention_mask], 
                                          bert_ncbi_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids, bert_ncbi_val_attention_mask], bert_ncbi_val_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [None]:
# loss, accuracy = ncbi_bert_crf_model.evaluate([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask], bert_ncbi_test_labels, verbose=0)

ncbi_bert_crf_predictions = ncbi_bert_crf_model.predict([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask])



In [None]:
print_classification_report(bert_ncbi_test_labels, bert_ncbi_test_attention_mask, ncbi_bert_crf_predictions, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8678    0.8577    0.8627      2220

   micro avg     0.8678    0.8577    0.8627      2220
   macro avg     0.8678    0.8577    0.8627      2220
weighted avg     0.8678    0.8577    0.8627      2220



# BERT+LSTM+CRF - NCBI

In [197]:
ncbi_bert_lstm_crf_model = create_bert_lstm_crf_model(NCBI_NUM_LABELS, 16, NCBI_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [198]:
# Train with sample train/ validation dataset

history = ncbi_bert_lstm_crf_model.fit([bert_ncbi_train_input_ids[:100], bert_ncbi_train_attention_mask[:100]], 
                                          bert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids[:100], bert_ncbi_val_attention_mask[:100]], bert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [199]:
ncbi_bert_lstm_crf_model.summary()

Model: "model_with_crf_loss_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_26 (Functional)       ((None, 180),             333645948 
                              (None, 180, 3),                    
                              (None,),                           
                              (3, 3))                            
                                                                 
Total params: 333,645,952
Trainable params: 333,645,948
Non-trainable params: 4
_________________________________________________________________


In [200]:
# Train with full train/ validation dataset

history = ncbi_bert_lstm_crf_model.fit([bert_ncbi_train_input_ids, bert_ncbi_train_attention_mask], 
                                          bert_ncbi_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids, bert_ncbi_val_attention_mask], bert_ncbi_val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [201]:
#loss, accuracy = ncbi_bert_bilstm_crf_model.evaluate([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask], bert_ncbi_test_labels, verbose=0)

ncbi_bert_bilstm_crf_predictions = ncbi_bert_lstm_crf_model.predict([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask])



In [202]:
print_classification_report(bert_ncbi_test_labels, bert_ncbi_test_attention_mask, ncbi_bert_bilstm_crf_predictions, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8557    0.8973    0.8760      2220

   micro avg     0.8557    0.8973    0.8760      2220
   macro avg     0.8557    0.8973    0.8760      2220
weighted avg     0.8557    0.8973    0.8760      2220



# BERT+BILSTM+CRF - NCBI

In [None]:
ncbi_bert_bilstm_crf_model = create_bert_bilstm_crf_model(NCBI_NUM_LABELS, 768, NCBI_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['dropout_73', 'classifier']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset

history = ncbi_bert_bilstm_crf_model.fit([bert_ncbi_train_input_ids[:100], bert_ncbi_train_attention_mask[:100]], 
                                          bert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids[:100], bert_ncbi_val_attention_mask[:100]], bert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
ncbi_bert_bilstm_crf_model.summary()

Model: "model_with_crf_loss_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_8 (Functional)        ((None, 180),             344600076 
                              (None, 180, 3),                    
                              (None,),                           
                              (3, 3))                            
                                                                 
Total params: 344,600,080
Trainable params: 344,600,076
Non-trainable params: 4
_________________________________________________________________


In [None]:
# Train with full train/ validation dataset

history = ncbi_bert_bilstm_crf_model.fit([bert_ncbi_train_input_ids, bert_ncbi_train_attention_mask], 
                                          bert_ncbi_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_ncbi_val_input_ids, bert_ncbi_val_attention_mask], bert_ncbi_val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
#loss, accuracy = ncbi_bert_bilstm_crf_model.evaluate([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask], bert_ncbi_test_labels, verbose=0)

ncbi_bert_bilstm_crf_predictions = ncbi_bert_bilstm_crf_model.predict([bert_ncbi_test_input_ids, bert_ncbi_test_attention_mask])



In [None]:
print_classification_report(bert_ncbi_test_labels, bert_ncbi_test_attention_mask, ncbi_bert_bilstm_crf_predictions, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8648    0.8613    0.8630      2220

   micro avg     0.8648    0.8613    0.8630      2220
   macro avg     0.8648    0.8613    0.8630      2220
weighted avg     0.8648    0.8613    0.8630      2220




# BioBERT+CRF - NCBI

In [116]:
ncbi_biobert_crf_model = create_biobert_crf_model(NCBI_NUM_LABELS, NCBI_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [117]:
# Train with sample train/ validation dataset

history = ncbi_biobert_crf_model.fit([biobert_ncbi_train_input_ids[:100], biobert_ncbi_train_attention_mask[:100]], 
                                          biobert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_ncbi_val_input_ids[:100], biobert_ncbi_val_attention_mask[:100]], biobert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [160]:
ncbi_biobert_crf_model.summary()

Model: "model_with_crf_loss_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_14 (Functional)       ((None, 180),             108312588 
                              (None, 180, 3),                    
                              (None,),                           
                              (3, 3))                            
                                                                 
Total params: 108,312,592
Trainable params: 108,312,588
Non-trainable params: 4
_________________________________________________________________


In [121]:
# Train with full train/ validation dataset
history = ncbi_biobert_crf_model.fit([biobert_ncbi_train_input_ids, biobert_ncbi_train_attention_mask], 
                                          biobert_ncbi_train_labels, batch_size=8, epochs=2, 
                                          validation_data=([biobert_ncbi_val_input_ids, biobert_ncbi_val_attention_mask], biobert_ncbi_val_labels))


Epoch 1/2
Epoch 2/2


In [122]:
#loss, accuracy = ncbi_biobert_crf_model.evaluate([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask], biobert_ncbi_test_labels, verbose=0)

ncbi_biobert_crf_predictions = ncbi_biobert_crf_model.predict([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask])



In [123]:
print_classification_report(biobert_ncbi_test_labels, biobert_ncbi_test_attention_mask, ncbi_biobert_crf_predictions, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8539    0.8714    0.8626      2294

   micro avg     0.8539    0.8714    0.8626      2294
   macro avg     0.8539    0.8714    0.8626      2294
weighted avg     0.8539    0.8714    0.8626      2294



# BioBERT+BILSTM+CRF - NCBI

In [269]:
ncbi_biobert_bilstm_crf_model = create_biobert_bilstm_crf_model(NCBI_NUM_LABELS, 700, NCBI_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [263]:
# Train with sample train/ validation dataset
history = ncbi_biobert_bilstm_crf_model.fit([biobert_ncbi_train_input_ids[:100], biobert_ncbi_train_attention_mask[:100]], 
                                          biobert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_ncbi_val_input_ids[:100], biobert_ncbi_val_attention_mask[:100]], biobert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [270]:
# Train with sample train/ validation dataset
history = ncbi_biobert_bilstm_crf_model.fit([biobert_ncbi_train_input_ids[:100], biobert_ncbi_train_attention_mask[:100]], 
                                          biobert_ncbi_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_ncbi_val_input_ids[:100], biobert_ncbi_val_attention_mask[:100]], biobert_ncbi_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [247]:
ncbi_biobert_bilstm_crf_model.summary()

Model: "model_with_crf_loss_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_36 (Functional)       ((None, 180),             113560332 
                              (None, 180, 3),                    
                              (None,),                           
                              (3, 3))                            
                                                                 
Total params: 113,560,336
Trainable params: 113,560,332
Non-trainable params: 4
_________________________________________________________________


In [271]:
# Train with full train/ validation dataset
history = ncbi_biobert_bilstm_crf_model.fit([biobert_ncbi_train_input_ids, biobert_ncbi_train_attention_mask], 
                                          biobert_ncbi_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([biobert_ncbi_val_input_ids, biobert_ncbi_val_attention_mask], biobert_ncbi_val_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [272]:
# loss, accuracy = ncbi_biobert_bilstm_crf_model.evaluate([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask], biobert_ncbi_test_labels, verbose=0)

ncbi_biobert_bilstm_crf_predictions = ncbi_biobert_bilstm_crf_model.predict([biobert_ncbi_test_input_ids, biobert_ncbi_test_attention_mask])



In [273]:
print_classification_report(biobert_ncbi_test_labels, biobert_ncbi_test_attention_mask, ncbi_biobert_bilstm_crf_predictions, ncbi_label_list)

  0%|          | 0/941 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8081    0.9159    0.8586      2294

   micro avg     0.8081    0.9159    0.8586      2294
   macro avg     0.8081    0.9159    0.8586      2294
weighted avg     0.8081    0.9159    0.8586      2294



# BERT - GM

In [None]:
gm_bert_model = create_bert_model(GM_NUM_LABELS, GM_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset
history = gm_bert_model.fit([bert_gm_train_input_ids[:100], bert_gm_train_attention_mask[:100]], 
                                          bert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_gm_val_input_ids[:100], bert_gm_val_attention_mask[:100]], bert_gm_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train with full train/ validation dataset

history = gm_bert_model.fit([bert_gm_train_input_ids, bert_gm_train_attention_mask], 
                                          bert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([bert_gm_val_input_ids, bert_gm_val_attention_mask], bert_gm_val_labels))


Epoch 1/2
Epoch 2/2


In [None]:
#loss, accuracy = gm_bert_model.evaluate([bert_gm_test_input_ids, bert_gm_test_attention_mask], bert_gm_test_labels, verbose=0)

bert_gm_predictions = gm_bert_model.predict([bert_gm_test_input_ids, bert_gm_test_attention_mask])



In [None]:
bert_gm_preds_list = get_argmax_predictions(bert_gm_predictions)

In [None]:
print_classification_report(bert_gm_test_labels, bert_gm_test_attention_mask, bert_gm_preds_list, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8269    0.8565    0.8415     16110

   micro avg     0.8269    0.8565    0.8415     16110
   macro avg     0.8269    0.8565    0.8415     16110
weighted avg     0.8269    0.8565    0.8415     16110



# BioBERT - GM

In [143]:
gm_biobert_model = create_biobert_model(GM_NUM_LABELS, GM_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [144]:
# Train with sample train/ validation dataset

history = gm_biobert_model.fit([biobert_gm_train_input_ids[:100], biobert_gm_train_attention_mask[:100]], 
                                          biobert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_gm_val_input_ids[:100], biobert_gm_val_attention_mask[:100]], biobert_gm_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [145]:
# Train with full train/ validation dataset

history = gm_biobert_model.fit([biobert_gm_train_input_ids, biobert_gm_train_attention_mask], 
                                          biobert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([biobert_gm_val_input_ids, biobert_gm_val_attention_mask], biobert_gm_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [146]:
#loss, accuracy = gm_biobert_model.evaluate([biobert_gm_test_input_ids, biobert_gm_test_attention_mask], biobert_gm_test_labels, verbose=0)

gm_biobert_predictions = gm_biobert_model.predict([biobert_gm_test_input_ids, biobert_gm_test_attention_mask])



In [147]:
biobert_gm_preds_list = get_argmax_predictions(gm_biobert_predictions)

In [148]:
print_classification_report(biobert_gm_test_labels, biobert_gm_test_attention_mask, biobert_gm_preds_list, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

        GENE     0.8364    0.8724    0.8540     16054

   micro avg     0.8364    0.8724    0.8540     16054
   macro avg     0.8364    0.8724    0.8540     16054
weighted avg     0.8364    0.8724    0.8540     16054




# BERT+CRF - GM

In [None]:
gm_bert_crf_model = create_bert_crf_model(GM_NUM_LABELS, GM_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset

history = gm_bert_crf_model.fit([bert_gm_train_input_ids[:100], bert_gm_train_attention_mask[:100]], 
                                          bert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_gm_val_input_ids[:100], bert_gm_val_attention_mask[:100]], bert_gm_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train with full train/ validation dataset

history = gm_bert_crf_model.fit([bert_gm_train_input_ids, bert_gm_train_attention_mask], 
                                          bert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([bert_gm_val_input_ids, bert_gm_val_attention_mask], bert_gm_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
#loss, accuracy = gm_bert_crf_model.evaluate([bert_gm_test_input_ids, bert_gm_test_attention_mask], bert_gm_test_labels, verbose=0)

bert_crf_gm_predictions = gm_bert_crf_model.predict([bert_gm_test_input_ids, bert_gm_test_attention_mask])



In [None]:
print_classification_report(bert_gm_test_labels, bert_gm_test_attention_mask, bert_crf_gm_predictions, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8535    0.8644    0.8589     16110

   micro avg     0.8535    0.8644    0.8589     16110
   macro avg     0.8535    0.8644    0.8589     16110
weighted avg     0.8535    0.8644    0.8589     16110



# BERT+BILSTM+CRF - GM

In [None]:
gm_bert_bilstm_crf_model = create_bert_bilstm_crf_model(GM_NUM_LABELS, 768, GM_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset

history = gm_bert_bilstm_crf_model.fit([bert_gm_train_input_ids[:100], bert_gm_train_attention_mask[:100]], 
                                          bert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_gm_val_input_ids[:100], bert_gm_val_attention_mask[:100]], bert_gm_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train with full train/ validation dataset

history = gm_bert_bilstm_crf_model.fit([bert_gm_train_input_ids, bert_gm_train_attention_mask], 
                                          bert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([bert_gm_val_input_ids, bert_gm_val_attention_mask], bert_gm_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
#loss, accuracy = gm_bert_bilstm_crf_model.evaluate([bert_gm_test_input_ids, bert_gm_test_attention_mask], bert_gm_test_labels, verbose=0)

gm_bert_bilstm_crf_predictions = gm_bert_bilstm_crf_model.predict([bert_gm_test_input_ids, bert_gm_test_attention_mask])



In [None]:
print_classification_report(bert_gm_test_labels, bert_gm_test_attention_mask, gm_bert_bilstm_crf_predictions, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8129    0.8815    0.8458     16110

   micro avg     0.8129    0.8815    0.8458     16110
   macro avg     0.8129    0.8815    0.8458     16110
weighted avg     0.8129    0.8815    0.8458     16110




# BioBERT+CRF - GM

In [149]:
gm_biobert_crf_model = create_biobert_crf_model(GM_NUM_LABELS, GM_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [150]:
# Train with sample train/ validation dataset

history = gm_biobert_crf_model.fit([biobert_gm_train_input_ids[:100], biobert_gm_train_attention_mask[:100]], 
                                          biobert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_gm_val_input_ids[:100], biobert_gm_val_attention_mask[:100]], biobert_gm_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [151]:
# Train with full train/ validation dataset

history = gm_biobert_crf_model.fit([biobert_gm_train_input_ids, biobert_gm_train_attention_mask], 
                                          biobert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([biobert_gm_val_input_ids, biobert_gm_val_attention_mask], biobert_gm_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [152]:
#loss, accuracy = gm_biobert_crf_model.evaluate([biobert_gm_test_input_ids, biobert_gm_test_attention_mask], biobert_gm_test_labels, verbose=0)

gm_biobert_crf_predictions = gm_biobert_crf_model.predict([biobert_gm_test_input_ids, biobert_gm_test_attention_mask])



In [153]:
print_classification_report(biobert_gm_test_labels, biobert_gm_test_attention_mask, gm_biobert_crf_predictions, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

        GENE     0.8376    0.8647    0.8510     16054

   micro avg     0.8376    0.8647    0.8510     16054
   macro avg     0.8376    0.8647    0.8510     16054
weighted avg     0.8376    0.8647    0.8510     16054



# BioBERT+BILSTM+CRF - GM

In [162]:
gm_biobert_bilstm_crf_model = create_biobert_bilstm_crf_model(GM_NUM_LABELS, 768, GM_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [163]:
# Train with sample train/ validation dataset

history = gm_biobert_bilstm_crf_model.fit([biobert_gm_train_input_ids[:100], biobert_gm_train_attention_mask[:100]], 
                                          biobert_gm_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_gm_val_input_ids[:100], biobert_gm_val_attention_mask[:100]], biobert_gm_val_labels[:100]))

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [164]:
# Train with full train/ validation dataset

history = gm_biobert_bilstm_crf_model.fit([biobert_gm_train_input_ids, biobert_gm_train_attention_mask], 
                                          biobert_gm_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([biobert_gm_val_input_ids, biobert_gm_val_attention_mask], biobert_gm_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [165]:
#loss, accuracy = gm_biobert_bilstm_crf_model.evaluate([biobert_gm_test_input_ids, biobert_gm_test_attention_mask], biobert_gm_test_labels, verbose=0)

gm_biobert_bilstm_crf_predictions = gm_biobert_bilstm_crf_model.predict([biobert_gm_test_input_ids, biobert_gm_test_attention_mask])



In [166]:
print_classification_report(biobert_gm_test_labels, biobert_gm_test_attention_mask, gm_biobert_bilstm_crf_predictions, gm_label_list)

  0%|          | 0/5001 [00:00<?, ?it/s]

              precision    recall  f1-score   support

        GENE     0.8128    0.8247    0.8187     16054

   micro avg     0.8128    0.8247    0.8187     16054
   macro avg     0.8128    0.8247    0.8187     16054
weighted avg     0.8128    0.8247    0.8187     16054



# BERT - CDR

In [None]:
cdr_bert_model = create_bert_model(CDR_NUM_LABELS, CDR_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['classifier', 'dropout_73']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset
history = cdr_bert_model.fit([bert_cdr_train_input_ids[:100], bert_cdr_train_attention_mask[:100]], 
                                          bert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_cdr_val_input_ids[:100], bert_cdr_val_attention_mask[:100]], bert_cdr_val_labels[:100]))


Epoch 1/5


  return dispatch_target(*args, **kwargs)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train with full train/ validation dataset

history = cdr_bert_model.fit([bert_cdr_train_input_ids, bert_cdr_train_attention_mask], 
                                          bert_cdr_train_labels, batch_size=8, epochs=3, 
                                          validation_data=([bert_cdr_val_input_ids, bert_cdr_val_attention_mask], bert_cdr_val_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
#loss, accuracy = cdr_bert_model.evaluate([bert_cdr_test_input_ids, bert_cdr_test_attention_mask], bert_cdr_test_labels, verbose=0)

cdr_bert_predictions = cdr_bert_model.predict([bert_cdr_test_input_ids, bert_cdr_test_attention_mask])



In [None]:
cdr_bert_pred_list = get_argmax_predictions(cdr_bert_predictions)

In [None]:
print_classification_report(bert_cdr_test_labels, bert_cdr_test_attention_mask, cdr_bert_pred_list, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    Chemical     0.9300    0.9209    0.9254     17587
     Disease     0.8066    0.8485    0.8270     12301

   micro avg     0.8774    0.8911    0.8842     29888
   macro avg     0.8683    0.8847    0.8762     29888
weighted avg     0.8792    0.8911    0.8849     29888



# BioBERT - CDR

In [176]:
cdr_biobert_model = create_biobert_model(CDR_NUM_LABELS, CDR_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [177]:
# Train with sample train/ validation dataset

history = cdr_biobert_model.fit([biobert_cdr_train_input_ids[:100], biobert_cdr_train_attention_mask[:100]], 
                                          biobert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids[:100], biobert_cdr_val_attention_mask[:100]], biobert_cdr_val_labels[:100]))


Epoch 1/5


  return dispatch_target(*args, **kwargs)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [178]:
# Train with full train/ validation dataset

history = cdr_biobert_model.fit([biobert_cdr_train_input_ids, biobert_cdr_train_attention_mask], 
                                          biobert_cdr_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids, biobert_cdr_val_attention_mask], biobert_cdr_val_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [179]:
#loss, accuracy = cdr_biobert_model.evaluate([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask], biobert_cdr_test_labels, verbose=0)

cdr_biobert_predictions = cdr_biobert_model.predict([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask])



In [180]:
cdr_biobert_pred_list = get_argmax_predictions(cdr_biobert_predictions)

In [181]:
print_classification_report(biobert_cdr_test_labels, biobert_cdr_test_attention_mask, cdr_biobert_pred_list, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    Chemical     0.9231    0.9498    0.9363     17642
     Disease     0.8102    0.8715    0.8397     12355

   micro avg     0.8754    0.9176    0.8960     29997
   macro avg     0.8667    0.9107    0.8880     29997
weighted avg     0.8766    0.9176    0.8965     29997




# BERT+CRF - CDR

In [None]:
cdr_bert_crf_model = create_bert_crf_model(CDR_NUM_LABELS, CDR_MAX_LENGTH)

Some layers from the model checkpoint at dslim/bert-large-NER were not used when initializing TFBertModel: ['dropout_73', 'classifier']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at dslim/bert-large-NER.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
# Train with sample train/ validation dataset

history = cdr_bert_crf_model.fit([bert_cdr_train_input_ids[:100], bert_cdr_train_attention_mask[:100]], 
                                          bert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_cdr_val_input_ids[:100], bert_cdr_val_attention_mask[:100]], bert_cdr_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Train with full train/ validation dataset

history = cdr_bert_crf_model.fit([bert_cdr_train_input_ids, bert_cdr_train_attention_mask], 
                                          bert_cdr_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_cdr_val_input_ids, bert_cdr_val_attention_mask], bert_cdr_val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
#loss, accuracy = cdr_bert_crf_model.evaluate([bert_cdr_test_input_ids, bert_cdr_test_attention_mask], bert_cdr_test_labels, verbose=0)

cdr_bert_crf_predictions = cdr_bert_crf_model.predict([bert_cdr_test_input_ids, bert_cdr_test_attention_mask])



In [None]:
print_classification_report(bert_cdr_test_labels, bert_cdr_test_attention_mask, cdr_bert_crf_predictions, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8200    0.8564    0.8378     12301
        GENE     0.9313    0.9376    0.9344     17587

   micro avg     0.8845    0.9042    0.8942     29888
   macro avg     0.8757    0.8970    0.8861     29888
weighted avg     0.8855    0.9042    0.8947     29888



# BERT+BILSTM+CRF - CDR

In [None]:
cdr_bert_bilstm_crf_model = create_bert_bilstm_crf_model(CDR_NUM_LABELS, 32, CDR_MAX_LENGTH)

NameError: ignored

In [None]:
# Train with sample train/ validation dataset


history = cdr_bert_bilstm_crf_model.fit([bert_cdr_train_input_ids[:100], bert_cdr_train_attention_mask[:100]], 
                                          bert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([bert_cdr_val_input_ids[:100], bert_cdr_val_attention_mask[:100]], bert_cdr_val_labels[:100]))


NameError: ignored

In [None]:
# Train with full train/ validation dataset

history = cdr_bert_bilstm_crf_model.fit([bert_cdr_train_input_ids, bert_cdr_train_attention_mask], 
                                          bert_cdr_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([bert_cdr_val_input_ids, bert_cdr_val_attention_mask], bert_cdr_val_labels))I 


SyntaxError: ignored

In [None]:
#loss, accuracy = cdr_bert_bilstm_crf_model.evaluate([bert_cdr_test_input_ids, bert_cdr_test_attention_mask], bert_cdr_test_labels, verbose=0)

cdr_bert_bilstm_crf_predictions = cdr_bert_bilstm_crf_model.predict([bert_cdr_test_input_ids, bert_cdr_test_attention_mask])



In [None]:
print_classification_report(bert_cdr_test_labels, bert_cdr_test_attention_mask, cdr_bert_bilstm_crf_predictions, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

     Disease     0.8275    0.8224    0.8249     12301
        GENE     0.9187    0.9477    0.9330     17587

   micro avg     0.8820    0.8961    0.8890     29888
   macro avg     0.8731    0.8850    0.8789     29888
weighted avg     0.8812    0.8961    0.8885     29888




# BioBERT+CRF - CDR

In [182]:
cdr_biobert_crf_model = create_biobert_crf_model(CDR_NUM_LABELS, CDR_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [183]:
# Train with sample train/ validation dataset

history = cdr_biobert_crf_model.fit([biobert_cdr_train_input_ids[:100], biobert_cdr_train_attention_mask[:100]], 
                                          biobert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids[:100], biobert_cdr_val_attention_mask[:100]], biobert_cdr_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [184]:
# Train with full train/ validation dataset

history = cdr_biobert_crf_model.fit([biobert_cdr_train_input_ids, biobert_cdr_train_attention_mask], 
                                          biobert_cdr_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids, biobert_cdr_val_attention_mask], biobert_cdr_val_labels))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [185]:
#loss, accuracy = cdr_biobert_crf_model.evaluate([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask], biobert_cdr_test_labels, verbose=0)

cdr_biobert_crf_predictions = cdr_biobert_crf_model.predict([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask])



In [186]:
print_classification_report(biobert_cdr_test_labels, biobert_cdr_test_attention_mask, cdr_biobert_crf_predictions, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    Chemical     0.9324    0.9502    0.9412     17642
     Disease     0.8448    0.8560    0.8504     12355

   micro avg     0.8964    0.9114    0.9038     29997
   macro avg     0.8886    0.9031    0.8958     29997
weighted avg     0.8963    0.9114    0.9038     29997



# BioBERT+BILSTM+CRF - CDR

In [274]:
cdr_biobert_bilstm_crf_model = create_biobert_bilstm_crf_model(CDR_NUM_LABELS, 888, CDR_MAX_LENGTH)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint

In [275]:
# Train with sample train/ validation dataset

history = cdr_biobert_bilstm_crf_model.fit([biobert_cdr_train_input_ids[:100], biobert_cdr_train_attention_mask[:100]], 
                                          biobert_cdr_train_labels[:100], batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids[:100], biobert_cdr_val_attention_mask[:100]], biobert_cdr_val_labels[:100]))


Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [276]:
# Train with full train/ validation dataset

history = cdr_biobert_bilstm_crf_model.fit([biobert_cdr_train_input_ids, biobert_cdr_train_attention_mask], 
                                          biobert_cdr_train_labels, batch_size=8, epochs=5, 
                                          validation_data=([biobert_cdr_val_input_ids, biobert_cdr_val_attention_mask], biobert_cdr_val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [277]:
#loss, accuracy = cdr_biobert_bilstm_crf_model.evaluate([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask], biobert_cdr_test_labels, verbose=0)

cdr_biobert_bilstm_crf_predictions = cdr_biobert_bilstm_crf_model.predict([biobert_cdr_test_input_ids, biobert_cdr_test_attention_mask])



In [278]:
print_classification_report(biobert_cdr_test_labels, biobert_cdr_test_attention_mask, cdr_biobert_bilstm_crf_predictions, cdr_label_list)

  0%|          | 0/4798 [00:00<?, ?it/s]

              precision    recall  f1-score   support

    Chemical     0.9420    0.9370    0.9395     17642
     Disease     0.8608    0.8385    0.8495     12355

   micro avg     0.9090    0.8964    0.9027     29997
   macro avg     0.9014    0.8877    0.8945     29997
weighted avg     0.9086    0.8964    0.9024     29997




# CUSTOM LOSS FUNCTION

In [None]:
## Implementing the custom CRF layer, loss, and model


class CRFLayer(Layer):
  """
  Computes the log likelihood during training
  Performs Viterbi decoding during prediction
  """
  def __init__(self,
               label_size=3, mask_id=0,
               trans_params=None, 
               name='crf',
               **kwargs):
    super(CRFLayer, self).__init__(name=name, **kwargs)
    self.label_size = label_size
    self.mask_id = mask_id
    self.transition_params = None
    
    if trans_params is None:  # not reloading pretrained params
        self.transition_params = tf.Variable(
tf.random.uniform(shape=(label_size, label_size)),
                trainable=False)
    else:
        self.transition_params = trans_params

  """
  A custom loss function for NER using a CRF
  """
  def loss(self, y_true, y_pred):
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(self.get_proper_labels(y_true), y_pred.dtype)
    seq_lengths = self.get_seq_lengths(y_true)
    log_likelihoods, self.transition_params = tfa.text.crf_log_likelihood(y_pred, y_true, seq_lengths)
    # save transition params
    self.transition_params = tf.Variable(self.transition_params, 
      trainable=False)
    # calc loss
    loss = - tf.reduce_mean(log_likelihoods)
    return loss


def crf_loss_func(potentials, sequence_length, kernel, y):
    crf_likelihood, _ = tfa.text.crf_log_likelihood(
        potentials, y, sequence_length, kernel
    )
    # likelihood to loss
    flat_crf_loss = -1 * crf_likelihood
    crf_loss = tf.reduce_mean(flat_crf_loss)

    return crf_loss

# Scratch section

In [None]:
# @title Create Unified Label Mapping Across Datasets
# rename "tags" to "ner_tags" for consistency
if 'tags' in cdr_dataset['train'].features.keys():
  cdr_dataset = cdr_dataset.rename_column("tags", "ner_tags")

# get labels from each dataset
ncbi_label_names = ncbi_dataset["train"].features["ner_tags"].feature.names
gm_label_names = gm_dataset["train"].features["ner_tags"].feature.names
cdr_label_names = ['O', 'B-Chemical', 'B-Disease', 'I-Disease', 'I-Chemical']

# create an unified label-to-index mapping across datasets
# keys: unique labels, values: indexes
label2index = {}
id = 0
for n in ncbi_label_names + gm_label_names + cdr_label_names:
  if n not in label2index:
    label2index[n] = id
    id += 1

label_names = list(label2index.keys())
label2index

from datasets import Features, Value, Sequence, ClassLabel
# define wanted output columns from the mapping process
# this ensures that the tokenized datasets have the same features for concatenation 
features = Features({
  'id': Value(dtype='string', id=None),
  'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
  'ner_tags': Sequence(feature=ClassLabel(names=label_names, id=None), length=-1, id=None),
  'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
  'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
  'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
  'labels': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None)
})

bert_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
# @title Map Labels To Tokenized Data (The Original Labels Were At Word-Level)
def bert_process(data, label_names, label2index):
  inputs = bert_tokenizer(data['tokens'], is_split_into_words=True, add_special_tokens=False)
  tags = []
  ids = []
  for i, labels in enumerate(data['ner_tags']):
    # len(word_id) = len(tokens)
    # word_id indicates which word the token belongs to
    word_ids = inputs.word_ids(i)
    label_ids = []
    for wid in word_ids:
      # set label to -100 for CLS and SEP tokens
      if wid is None: 
        label_ids.append(-100)
      else:
        # original label at word-level
        orig_label_index = labels[wid]  
        orig_label_name = label_names[orig_label_index]
        # grab the new unified label indexes
        new_label_index = label2index[orig_label_name]
        label_ids.append(new_label_index)
    tags.append(label_ids)
    # overwrite ids to ensure we have all the features defined above
    # cdr data doesn't have ids
    ids.append('0')
  inputs['id'] = ids
  # the new labels for model training
  inputs['labels'] = tags
  return inputs

biobert_tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
# @title Map Labels To Tokenized Data (The Original Labels Were At Word-Level)
def biobert_process(data, label_names, label2index):
  inputs = biobert_tokenizer(data['tokens'], is_split_into_words=True, add_special_tokens=False)
  tags = []
  ids = []
  for i, labels in enumerate(data['ner_tags']):
    # len(word_id) = len(tokens)
    # word_id indicates which word the token belongs to
    word_ids = inputs.word_ids(i)
    label_ids = []
    for wid in word_ids:
      # set label to -100 for CLS and SEP tokens
      if wid is None: 
        label_ids.append(-100)
      else:
        # original label at word-level
        orig_label_index = labels[wid]  
        orig_label_name = label_names[orig_label_index]
        # grab the new unified label indexes
        new_label_index = label2index[orig_label_name]
        label_ids.append(new_label_index)
    tags.append(label_ids)
    # overwrite ids to ensure we have all the features defined above
    # cdr data doesn't have ids
    ids.append('0')
  inputs['id'] = ids
  # the new labels for model training
  inputs['labels'] = tags
  return inputs

In [None]:
ncbi_train_data = ncbi_dataset['train']
ncbi_validation_data = ncbi_dataset['validation']
ncbi_test_data = ncbi_dataset['test']

ncbi_dataset_train_examples = ncbi_dataset['train']['tokens']
ncbi_dataset_train_labels = ncbi_dataset['train']['ner_tags']

ncbi_dataset_validation_examples = ncbi_dataset['validation']['tokens']
ncbi_dataset_validation_labels = ncbi_dataset['validation']['ner_tags']

ncbi_dataset_test_examples = ncbi_dataset['test']['tokens']
ncbi_dataset_test_labels = ncbi_dataset['test']['ner_tags']

In [None]:
biobert_tokenizer(ncbi_dataset_train_examples[0])

{'input_ids': [[101, 9117, 102], [101, 1104, 102], [101, 170, 1643, 1665, 1477, 102], [101, 117, 102], [101, 170, 102], [101, 16358, 3702, 12733, 102], [101, 1104, 102], [101, 1103, 102], [101, 8050, 26601, 21943, 2285, 102], [101, 185, 23415, 5674, 4863, 102], [101, 1884, 2646, 102], [101, 189, 27226, 102], [101, 17203, 1766, 102], [101, 119, 102]], 'token_type_ids': [[0, 0, 0], [0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0]], 'attention_mask': [[1, 1, 1], [1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1]]}

In [None]:
data_collator = DataCollatorForTokenClassification(tokenizer=biobert_tokenizer)
training_args = TrainingArguments("test-trainer")
#biobert_model = 'dmis-lab/biobert-base-cased-v1.2'
#ncbi_biobert_model = AutoModelForTokenClassification.from_pretrained(biobert_model, num_labels=len(ncbi_label_list))

#biobert_bilstm_crf_ncbi_trainer = Trainer(
#    biobert_bilstm_crf_model,
#    training_args,
#    train_dataset = tokenized_ncbi_dataset["train"],
#    eval_dataset = tokenized_ncbi_dataset["validation"],
#    data_collator = data_collator,
#    tokenizer = biobert_tokenizer,
#)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
def biobert_bilstm_crf_model(hidden_size = 100, 
                             output_size = 5,
                             learning_rate = 0.003, 
                             dropout = 0.3,
                             max_length = 20):

    
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name='input_id')
    input_mask = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name="input_masks")
    input_secdrent = tf.keras.layers.Input(shape=(max_length,), dtype='int32', name="segment_ids")

    biobert_inputs = [input_ids, input_mask, input_segment]

    biobert_layer = AutoModel.from_pretrained("dmis-lab/biobert-base-cased-v1.2")

    #biobert_sequence = biobert_layer(biobert_inputs)[0]

    biobert_output = biobert_layer(input_ids)
    #last_hidden_states = biobert_output[0]

    bilstm_layer = Bidirectional(LSTM(hidden_size, return_sequences=True), merge_mode = "sum")
    bilstm_layer_out = bilstm_layer(Dropout(0.5)(biobert_output))

    crf = CRF(dtype='float32', sparse_target=True)
    crf_output = crf(BatchNormalization()(bilstm_layer_out))

    base_model = keras.Model(input=input_ids, output=crf_output)
    model = ModelWithCRFLoss(base_model)

    optimizer = keras.optimizers.Adam(learning_rate = learning_rate)

    model.compile(optimizer=optimizer)

    model.summary()

    return model

In [None]:
model = biobert_bilstm_crf_model()

In [None]:
sample = model.fit(sample_train_encodings, sample_train_labels, validation_data=(sample_val_encodings, sample_val_labels), batch_size=8, epochs=1)

In [None]:
#model = create_biobert_bilstm_crf_model(input_length, hidden_size, output_size, learning_rate)
#history = model.fit(input_ids)

['[CLS]', 'identification', 'of', 'a', '##p', '##c', '##2', ',', 'a', 'ho', '##mo', '##logue', 'of', 'the', 'ad', '##eno', '##mat', '##ous', 'p', '##oly', '##po', '##sis', 'co', '##li', 't', '##umour', 'suppress', '##or', '.', '[SEP]']


In [None]:
model.fit(
    biobert_bilstm_crf_model, 
    {"ner": ncbi_labels },
    validation_data=(bert_inputs_test_k, {"ner": ncbi_labels }),
    epochs=5,
    batch_size=16

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--dmis-lab--biobert-base-cased-v1.2/snapshots/67c9c25b46986521ca33df05d8540da1210b3256/config.json
Model config BertConfig {
  "_name_or_path": "dmis-lab/biobert-base-cased-v1.2",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-12,
  "m

AttributeError: ignored

In [None]:

def bert_tweets_model():
    bert_encoder = TFBertModel.from_pretrained(model_name)
    input_word_ids = tf.keras.Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
    last_hidden_states = bert_encoder(input_word_ids)[0]    
    x = tf.keras.layers.LSTM(100, dropout=0.3, recurrent_dropout=0.3)(last_hidden_states)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=input_word_ids, outputs=output)
    
    return model

class CustomBERT_NER_LSTM_Model(tf.keras.layers.Layer):
    def __init__(self):
          super(CustomBERT_NER_Model, self).__init__()
          self.bert_NER = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
          ### New layers:
          self.lstm = nn.LSTM(768, 256, batch_first=True,bidirectional=True)
          self.linear = nn.Linear(256*2, 3)
          

    def forward(self, ids, mask):
          sequence_output, pooled_output = self.bert_NER(
               ids, 
               attention_mask=mask)

          # sequence_output has the following shape: (batch_size, sequence_length, 768)
          lstm_output, (h,c) = self.lstm(sequence_output) ## extract the 1st token's embeddings
          hidden = torch.cat((lstm_output[:,-1, :256],lstm_output[:,0, 256:]),dim=-1)
          linear_output = self.linear(hidden.view(-1,256*2)) ### assuming that you are only using the output of the last LSTM cell to perform classification

          return linear_output

bert_NER_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
bert_NER_LSTM_Model = CustomBERT_NER_LSTM_Model()

NameError: ignored