In [None]:
pip install --upgrade datasets

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading

In [None]:
import numpy as np
import tensorflow as tf
from transformers import TFBertModel
from tensorflow.keras import layers

#tf.config.run_functions_eagerly(True)
class CustomCRF(tf.keras.layers.Layer):
    def __init__(self, num_tags, seq_len, **kwargs):
        super(CustomCRF, self).__init__(**kwargs)
        self.num_tags = num_tags
        self.seq_len = seq_len

    def build(self, input_shape):
        xavier_initializer = tf.keras.initializers.GlorotUniform()
        self.start_transitions = self.add_weight(
            shape=(self.num_tags,), initializer=xavier_initializer, trainable=True
        )
        self.transition_matrix = self.add_weight(
            shape=(self.num_tags, self.num_tags), initializer=xavier_initializer, trainable=True
        )
        self.end_transitions = self.add_weight(
            shape=(self.num_tags,), initializer=xavier_initializer, trainable=True
        )

    @tf.function
    def call(self, inputs, labels=None, training=None):
        emissions, attention_mask = inputs
        seq_len = tf.shape(emissions)[1]

        if training:
            return self._crf_loss(emissions, labels, attention_mask)
        else:
            return self.viterbi(emissions, attention_mask)

    @tf.function
    def _crf_loss(self, emissions, labels, attention_mask, seq_len):
        log_likelihood = self._forward_algorithm(emissions, seq_len)
        gold_score = self._score_sequence(emissions, labels, seq_len)
        return tf.reduce_mean(log_likelihood - gold_score)

    @tf.function
    def _forward_algorithm(self, emissions, seq_len):
        batch_size, num_tags = tf.shape(emissions)[0], self.num_tags

        log_alpha = self.start_transitions + emissions[:, 0, :]

        def step_fn(t, log_alpha):
            log_alpha_expanded = tf.expand_dims(log_alpha, axis=2)
            transition_scores = tf.expand_dims(self.transition_matrix, axis=0)
            log_alpha_t = tf.reduce_logsumexp(log_alpha_expanded + transition_scores, axis=1)
            new_log_alpha = log_alpha_t + emissions[:, t, :]
            return t + 1, new_log_alpha

        t = tf.constant(1)
        _, log_alpha = tf.while_loop(
            cond=lambda t, *_: t < seq_len,
            body=step_fn,
            loop_vars=[t, log_alpha]
        )

        log_alpha += self.end_transitions
        return tf.reduce_logsumexp(log_alpha, axis=1)

    @tf.function
    def _score_sequence(self, emissions, labels, seq_len):
        batch_size = tf.shape(labels)[0]

        start_label_indices = labels[:, 0]
        start_transition_scores = tf.gather(self.start_transitions, start_label_indices)

        first_emission_scores = tf.gather_nd(
            emissions,
            indices=tf.stack(
                [tf.range(batch_size, dtype=tf.int64), tf.zeros(batch_size, dtype=tf.int64), start_label_indices],
                axis=1
            )
        )

        score = start_transition_scores + first_emission_scores

        for t in range(1, seq_len):
            prev_labels = labels[:, t - 1]
            curr_labels = labels[:, t]

            indices = tf.stack([prev_labels, curr_labels], axis=1)
            transition_scores = tf.gather_nd(self.transition_matrix, indices)
            curr_emission_scores = tf.gather_nd(
                emissions,
                indices=tf.stack(
                    [tf.range(batch_size, dtype=tf.int64), tf.cast(tf.fill([batch_size], t), tf.int64), curr_labels],
                    axis=1
                )
            )

            score += transition_scores + curr_emission_scores

        end_label_indices = labels[:, seq_len - 1]
        end_transition_scores = tf.gather(self.end_transitions, end_label_indices)

        score += end_transition_scores

        return score

    @tf.function
    def viterbi(self, emissions, attention_mask):
        batch_size = tf.shape(emissions)[0]
        seq_len = tf.shape(emissions)[1]
        num_tags = tf.shape(emissions)[2]

        dp = tf.TensorArray(dtype=tf.float32, size=seq_len, clear_after_read=False)
        backpointer = tf.TensorArray(dtype=tf.int32, size=seq_len, clear_after_read=False)

        first_step = self.start_transitions + emissions[:, 0, :]
        dp = dp.write(0, first_step)

        t = tf.constant(1)

        def loop_body(t, dp, backpointer):
            prev_scores = dp.read(t - 1)
            scores = tf.expand_dims(prev_scores, axis=2) + self.transition_matrix
            best_scores = tf.reduce_max(scores, axis=1)
            best_paths = tf.argmax(scores, axis=1, output_type=tf.int32)
            current_scores = emissions[:, t, :] + best_scores
            dp = dp.write(t, current_scores)
            backpointer = backpointer.write(t, best_paths)
            return t + 1, dp, backpointer

        _, dp, backpointer = tf.while_loop(
            cond=lambda t, *_: t < seq_len,
            body=loop_body,
            loop_vars=(t, dp, backpointer)
        )

        last_step_scores = dp.read(seq_len - 1) + self.end_transitions
        last_tag = tf.argmax(last_step_scores, axis=1, output_type=tf.int32)

        def backtrack_fn(i):
            best_path = tf.TensorArray(dtype=tf.int32, size=seq_len, clear_after_read=False)
            best_path = best_path.write(seq_len - 1, last_tag[i])
            t = seq_len - 2

            def backtrack_body(t, best_path):
                next_tag = best_path.read(t + 1)
                best_tag = backpointer.read(t + 1)[i, next_tag]
                best_path = best_path.write(t, best_tag)
                return t - 1, best_path

            _, best_path = tf.while_loop(
                cond=lambda t, *_: t >= 0,
                body=backtrack_body,
                loop_vars=(t, best_path)
            )

            return best_path.stack()

        best_paths = tf.map_fn(backtrack_fn, tf.range(batch_size), fn_output_signature=tf.int32)
        return best_paths


class NERModel(tf.keras.Model):
    def __init__(self, num_tags, hidden_dim, seq_len, **kwargs):
        super(NERModel, self).__init__(**kwargs)
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')

        self.lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(hidden_dim, return_sequences=True))
        self.dense = tf.keras.layers.Dense(num_tags)
        self.num_tags = num_tags
        self.metrics_dict = {i: tf.Variable([0, 0, 0, 0], dtype=tf.int32) for i in range(num_tags)}
        self.crf = CustomCRF(num_tags, seq_len)

    def build(self,input_shape):
        self.crf.build(input_shape)
    @tf.function
    def call(self, inputs, labels=None, training=None):
        bert_output = self.bert(inputs['input_ids'], attention_mask=inputs['attention_mask'])['last_hidden_state']
        lstm_output = self.lstm(tf.nn.l2_normalize(bert_output, axis=-1), training=training)
        logits = self.dense(lstm_output)

        if training:
            return logits
        else:
            return self.crf((logits, inputs['attention_mask']), training=False)
    @tf.function
    def train_step(self, data):
        inputs, labels = data
        with tf.GradientTape() as tape:
            emissions = self(inputs, training=True)  # Forward pass
            loss = self.crf._crf_loss(emissions, labels, inputs['attention_mask'],tf.shape(emissions)[1])

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"loss": loss}
    def reset_metrics(self):
        for key in self.metrics_dict:
            self.metrics_dict[key].assign([0, 0, 0, 0])

    def test_step(self, data, validation=False):
        inputs, labels = data
        emissions = self(inputs, training=validation)
        if validation:
            loss = self.crf._crf_loss(emissions, labels, inputs['attention_mask'],tf.shape(emissions)[1])
            return {'loss':loss}
        predictions = tf.reshape(emissions, [-1])
        true_labels = tf.reshape(labels, [-1])

        for i in range(self.num_tags):
            true_positives = tf.reduce_sum(tf.cast((predictions == i) & (true_labels == i), tf.int32))
            false_positives = tf.reduce_sum(tf.cast((predictions == i) & (true_labels != i), tf.int32))
            false_negatives = tf.reduce_sum(tf.cast((predictions != i) & (true_labels == i), tf.int32))
            true_negatives = tf.reduce_sum(tf.cast((predictions != i) & (true_labels != i), tf.int32))

            self.metrics_dict[i].assign_add([true_positives, true_negatives, false_positives, false_negatives])






In [None]:

class RelationExtractionModel(tf.keras.Model):
    def __init__(self, ner_model, num_relations, hidden_dim, dropout_rate=0.2):
        super(RelationExtractionModel, self).__init__()
        self.bert = TFBertModel.from_pretrained('bert-base-uncased')
        self.layer_norm_ip = tf.keras.layers.LayerNormalization()
        self.layer_norm_ip.build((None, None, self.bert.config.hidden_size))
        self.bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(hidden_dim, return_sequences=True))
        self.bilstm.build((None, None, ner_model.bert.config.hidden_size))
        self.bilstm.set_weights(ner_model.lstm.get_weights())  # comment this line for independent learning
        self.layer_norm_op = tf.keras.layers.LayerNormalization()
        self.layer_norm_op.build((None, None, 2 * hidden_dim))
        self.dense = tf.keras.layers.Dense(num_relations, activation='softmax')
        self.dense.build((None, 4 * hidden_dim))
        self.num_relations = num_relations
        self.metrics_dict = {i: tf.Variable([0, 0, 0, 0], dtype=tf.int32) for i in range(num_relations)}
    @tf.function
    def call(self, inputs, training=False):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        ner_tags = inputs['ner_tags']
        re_mask = inputs['re_mask']

        bert_output = self.bert(input_ids, attention_mask=attention_mask)
        token_embeddings = bert_output.last_hidden_state
        lstm_output = self.layer_norm_op(self.bilstm(self.layer_norm_ip(token_embeddings), training=training))
        combined_embeddings = self.extract_entity_pairs(lstm_output, ner_tags, re_mask)
        combined_embeddings_stacked = tf.stack(combined_embeddings)
        return self.dense(combined_embeddings_stacked)


    @tf.function
    def extract_entity_pairs(self, lstm_output, ner_tags, re_mask):
        combined_embeddings = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
        batch_size = tf.shape(ner_tags)[0]

        def process_batch(i, embeddings_list):
            re_pairs = re_mask[i]
            valid_pairs = tf.boolean_mask(re_pairs, tf.not_equal(re_pairs[:, 0], -1))

            def process_pair(j, embeddings_list):
                e1_sidx, e1_eidx, relation_type, e2_sidx, e2_eidx = tf.unstack(valid_pairs[j])
                e1_emb = self.pool_entity(lstm_output[i], ner_tags[i], e1_sidx, e1_eidx)
                e2_emb = self.pool_entity(lstm_output[i], ner_tags[i], e2_sidx, e2_eidx)
                combined = tf.concat([e1_emb, e2_emb], axis=-1)
                return j + 1, embeddings_list.write(embeddings_list.size(), combined)

            _, embeddings_list = tf.while_loop(
                lambda j, _: j < tf.shape(valid_pairs)[0],
                process_pair,
                loop_vars=[0, embeddings_list]
            )
            return i + 1, embeddings_list

        _, final_embeddings = tf.while_loop(
            lambda i, _: i < batch_size,
            process_batch,
            loop_vars=[0, combined_embeddings]
        )

        return final_embeddings.stack()
    @tf.function
    def pool_entity(self, lstm_output, ner_tags, start_idx, end_idx):
        entity_span = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)

        def condition(idx, _):
            return idx <= end_idx

        def body(idx, entity_span):
            entity_span = entity_span.write(entity_span.size(), lstm_output[idx])
            return idx + 1, entity_span

        _, collected_span = tf.while_loop(
            condition, body, loop_vars=[start_idx, entity_span]
        )

        return tf.reduce_mean(collected_span.stack(), axis=0)

    @tf.function
    def extract_relation_labels(self, re_mask):
        reduced_re_mask = re_mask[:, :, 2]
        valid_mask = tf.not_equal(reduced_re_mask, -1)
        valid_relation_labels = tf.boolean_mask(reduced_re_mask, valid_mask)
        return valid_relation_labels

    @tf.function
    def train_step(self, inputs):
        filtered_relation_labels = self.extract_relation_labels(inputs['re_mask'])
        with tf.GradientTape() as tape:
            logits = self(inputs, training=True)
            loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(
                filtered_relation_labels, logits
            ))

        gradients = tape.gradient(loss, self.trainable_variables)

        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"loss": loss}
    def reset_metrics(self):
        for key in self.metrics_dict:
            self.metrics_dict[key].assign([0, 0, 0, 0])

    def test_step(self, inputs, validation = False):
        true_labels = self.extract_relation_labels(inputs['re_mask'])
        if validation :
            logits = self(inputs, training=True)
            loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(
                true_labels, logits
            ))
            return {'loss':loss}
        logits = self(inputs, training=False)
        predictions = tf.argmax(logits, axis=-1)
        for i in range(self.num_relations):
            tp = tf.reduce_sum(tf.cast((predictions == i) & (true_labels == i), tf.int32))
            fp = tf.reduce_sum(tf.cast((predictions == i) & (true_labels != i), tf.int32))
            fn = tf.reduce_sum(tf.cast((predictions != i) & (true_labels == i), tf.int32))
            tn = tf.reduce_sum(tf.cast((predictions != i) & (true_labels != i), tf.int32))

            self.metrics_dict[i].assign_add([tp, tn, fp, fn])



In [None]:
from datasets import load_dataset
import random
from transformers import AutoTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
ds = load_dataset("bigbio/chemprot", "chemprot_full_source")

def gen(i, j, ner_tags_segment, next_tags):
    l, m = i + 1, j + 1
    tem = []
    while ner_tags_segment[l] in next_tags[ner_tags_segment[i]][0]:
        l += 1
    while ner_tags_segment[m] in next_tags[ner_tags_segment[j]][0]:
        m += 1
    if m - 1 != j and l - 1 != i:
        tem.append((i, l - 1, j, m - 1))
    l, m = i + 1, j + 1
    while ner_tags_segment[l] in next_tags[ner_tags_segment[i]][0]:
        l += 1
    while ner_tags_segment[m] in next_tags[ner_tags_segment[j]][1]:
        m += 1
    if m - 1 != j and l - 1 != i:
        tem.append((i, l - 1, j, m - 1))
    l, m = i + 1, j + 1
    while ner_tags_segment[l] in next_tags[ner_tags_segment[i]][1]:
        l += 1
    while ner_tags_segment[m] in next_tags[ner_tags_segment[j]][0]:
        m += 1
    if m - 1 != j and l - 1 != i:
        tem.append((i, l - 1, j, m - 1))
    l, m = i + 1, j + 1
    while ner_tags_segment[l] in next_tags[ner_tags_segment[i]][1]:
        l += 1
    while ner_tags_segment[m] in next_tags[ner_tags_segment[j]][1]:
        m += 1
    if m - 1 != j and l - 1 != i:
        tem.append((i, l - 1, j, m - 1))
    return tem

def prepare_data(row, tag_dict, r_dict, tag_dict_map, next_tags, max_length=128, stride=10, padding=True):
    text = row['text']
    entities = row['entities']
    relations = row['relations']

    tokenized_inputs = tokenizer(
        text,
        return_offsets_mapping=True,
        return_overflowing_tokens=True,
        max_length=max_length,
        truncation=True,
        stride=stride,
        padding='max_length' if padding else False
    )

    input_ids = []
    attention_mask = []
    ner_tags = []
    relation_mask = []

    for i, offset_mapping in enumerate(tokenized_inputs['offset_mapping']):
        input_ids_segment = tokenized_inputs['input_ids'][i]
        attention_mask_segment = tokenized_inputs['attention_mask'][i]

        ner_tags_segment = [(0,) for i in range(max_length)]
        rel_mask_segment = []
        entity_map = {}
        b_tag_indices = []

        for entity_id, entity_type, (start_char, end_char) in zip(entities['id'], entities['type'], entities['offsets']):
            entity_start_found = False
            for token_idx, (token_start, token_end) in enumerate(offset_mapping):
                if token_idx == 0 or token_idx == max_length - 1:
                    continue
                if token_start == start_char and offset_mapping[-2][1] >= end_char:
                    ner_tags_segment[token_idx] += (tag_dict[f'B-{entity_type}'],)
                    entity_map[entity_id] = [token_idx, token_idx]
                    entity_start_found = True
                elif entity_start_found and token_start < end_char:
                    ner_tags_segment[token_idx] += (tag_dict[f'I-{entity_type}'],)
                    entity_map[entity_id][1] = token_idx

        rel_pairs = set()
        for rel_type, arg1_id, arg2_id in zip(relations['type'], relations['arg1'], relations['arg2']):
            if arg1_id in entity_map and arg2_id in entity_map:
                rel_mask_segment.append([entity_map[arg1_id][0], entity_map[arg1_id][1], r_dict[rel_type], entity_map[arg2_id][0], entity_map[arg2_id][1]])
                rel_pairs.add((entity_map[arg1_id][0], entity_map[arg1_id][1], entity_map[arg2_id][0], entity_map[arg2_id][1]))

        for j in range(max_length):
            ner_tags_segment[j] = tag_dict_map[tuple(sorted(list(set(ner_tags_segment[j]))))]
            if ner_tags_segment[j] in {1, 2, 4, 7, 8, 12, 13, 15, 16, 17, 18}:
                b_tag_indices.append(j)

        relpairs = set(rel_pairs)
        neg_samples = []
        for i in rel_pairs:
            for j in b_tag_indices:
                tem = gen(i[0], j, ner_tags_segment, next_tags)
                for ke in tem:
                    if ke not in relpairs:
                        relpairs.add(ke)
                        neg_samples.append([ke[0], ke[1], 11, ke[2], ke[3]])
                tem = gen(j, i[2], ner_tags_segment, next_tags)
                for ke in tem:
                    if ke not in relpairs:
                        relpairs.add(ke)
                        neg_samples.append([ke[0], ke[1], 11, ke[2], ke[3]])

        neg_samples = random.sample(neg_samples, len(rel_pairs)) if len(rel_pairs) != 0 and len(rel_pairs) <= len(neg_samples) else random.sample(neg_samples, 15) if len(neg_samples) >= 15 else neg_samples

        neg_samples_r = []
        for i in b_tag_indices:
            for j in b_tag_indices:
                tem = gen(i, j, ner_tags_segment, next_tags)
                for ke in tem:
                    if ke not in relpairs:
                        relpairs.add(ke)
                        neg_samples_r.append([ke[0], ke[1], 11, ke[2], ke[3]])

        neg_samples_r = random.sample(neg_samples_r, len(rel_pairs)) if len(rel_pairs) != 0 and len(rel_pairs) <= len(neg_samples_r) else random.sample(neg_samples_r, 15) if len(neg_samples_r) >= 15 else neg_samples_r
        input_ids.append(input_ids_segment)
        attention_mask.append(attention_mask_segment)
        ner_tags.append(ner_tags_segment)
        relation_mask.append(rel_mask_segment + neg_samples + neg_samples_r)

    return input_ids, attention_mask, ner_tags, relation_mask



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

chemprot.py:   0%|          | 0.00/15.8k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/80.7k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/1.20M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/950k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/727k [00:00<?, ?B/s]

Generating sample split:   0%|          | 0/50 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/1020 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/800 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/612 [00:00<?, ? examples/s]

In [None]:
tag_dict = {
    'B-GENE-Y': 1, 'I-GENE-Y': 2,
    'B-GENE-N': 3, 'I-GENE-N': 4,
    'B-CHEMICAL': 5, 'I-CHEMICAL': 6
}

tag_dict_map = {
    (0,): 0, (0, 1): 1, (0, 2, 3, 5): 2,
    (0, 4): 3, (0, 1, 6): 4, (0, 2, 6): 5, (0, 2): 6, (0, 3, 6): 7,
    (0, 5): 8, (0, 4, 6): 9, (0, 2, 4, 6): 10, (0, 2, 4): 11,
    (0, 5, 6): 12, (0, 3): 13, (0, 6): 14, (0, 1, 5): 15, (0, 2, 5): 16, (0, 3, 5): 17, (0, 4, 5): 18
}

con = {2:[2,5,6,10,11,16], 4:[3,9,10,11,18], 6:[4,5,7,9,10,12,14]}

next_entity = {0:[[0],[]], 1:[con[2],[]], 2:[con[4],con[6]], 3:[[3],[]], 4:[con[2],[]], 5:[[5],[]], 6:[[6],[]], 7:[con[4],[]], 8:[con[6],[]], 9:[[9],[]] ,10:[[10],[]], 11:[[11],[]], 12:[con[6],[]], 13:[con[4],[]], 14:[[14],[]], 15:[con[2],con[6]], 16:[con[6],[]], 17:[con[4],con[6]], 18:[con[6],[]]}

r_dict = {
    'CPR:0': 0, 'CPR:1': 1, 'CPR:2': 2, 'CPR:3': 3,
    'CPR:4': 4, 'CPR:5': 5, 'CPR:6': 6, 'CPR:7': 7,
    'CPR:8': 8, 'CPR:9': 9, 'CPR:10': 10
}

all_input_ids, all_attention_masks, all_ner_tags, all_relation_masks = [], [], [], []
for row in ds['train']:
    input_ids, attention_mask, ner_tags, relation_mask = prepare_data(row, tag_dict, r_dict, tag_dict_map, next_entity)
    all_input_ids.extend(input_ids)
    all_attention_masks.extend(attention_mask)
    all_ner_tags.extend(ner_tags)
    all_relation_masks.extend(relation_mask)

prepared_data = {
    'input_ids': np.array(all_input_ids),
    'attention_mask': np.array(all_attention_masks),
    'ner_tags': np.array(all_ner_tags),
    're_mask': all_relation_masks
}

hidden_dim = 64
num_tags = 19
seq_len = 128
num_relations = 12

padded_relation_mask = pad_sequences(
    [np.array(item, dtype=int) for item in prepared_data['re_mask']],
    padding='post',
    value=-1,
    dtype='object'
)

padded_relation_mask = np.array(padded_relation_mask, dtype=int)

inputs = {
    'input_ids': prepared_data['input_ids'],
    'attention_mask': prepared_data['attention_mask'],
    're_mask': padded_relation_mask,
    'ner_tags': prepared_data['ner_tags']
}

dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': inputs['input_ids'],
        'attention_mask': inputs['attention_mask']
    },
    inputs['ner_tags']
))


vall_input_ids, vall_attention_masks, vall_ner_tags, vall_relation_masks = [], [], [], []
for row in ds['validation']:
    input_ids, attention_mask, ner_tags, relation_mask = prepare_data(row, tag_dict, r_dict, tag_dict_map, next_entity)
    vall_input_ids.extend(input_ids)
    vall_attention_masks.extend(attention_mask)
    vall_ner_tags.extend(ner_tags)
    vall_relation_masks.extend(relation_mask)

vprepared_data = {
    'input_ids': np.array(vall_input_ids),
    'attention_mask': np.array(vall_attention_masks),
    'ner_tags': np.array(vall_ner_tags),
    're_mask': vall_relation_masks
}


vpadded_relation_mask = pad_sequences(
    [np.array(item, dtype=int) for item in vprepared_data['re_mask']],
    padding='post',
    value=-1,
    dtype='object'
)

vpadded_relation_mask = np.array(vpadded_relation_mask, dtype=int)

vinputs = {
    'input_ids': vprepared_data['input_ids'],
    'attention_mask': vprepared_data['attention_mask'],
    're_mask': vpadded_relation_mask,
    'ner_tags': vprepared_data['ner_tags']
}

vdataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': vinputs['input_ids'],
        'attention_mask': vinputs['attention_mask']
    },
    vinputs['ner_tags']
))

batch_size = 30
dataset = dataset.batch(batch_size)
vdataset = vdataset.batch(batch_size)
num_epochs = 30
num_batches = 131


In [None]:

ner_model = NERModel(num_tags, 64, seq_len)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

ner_model.compile(optimizer=optimizer)

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    val_loss = 0
    t_b, v_b = 0, 0

    for batch_num, batch_data in enumerate(dataset.take(num_batches)):
        train_loss += ner_model.train_step(batch_data)['loss']
        t_b += 1
    for batch_num, batch_data in enumerate(vdataset.take(num_batches)):
        val_loss += ner_model.test_step(batch_data, validation = True)['loss']

        v_b += 1




    print(f"End of Epoch {epoch + 1}, train loss: {train_loss / t_b} validation loss: {val_loss / v_b}")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/30
End of Epoch 1, train loss: 105.88652801513672 validation loss: 60.347679138183594
Epoch 2/30
End of Epoch 2, train loss: 50.355682373046875 validation loss: 40.88734436035156
Epoch 3/30
End of Epoch 3, train loss: 37.30394744873047 validation loss: 32.883026123046875
Epoch 4/30
End of Epoch 4, train loss: 30.863147735595703 validation loss: 28.685556411743164
Epoch 5/30
End of Epoch 5, train loss: 27.3444881439209 validation loss: 26.423643112182617
Epoch 6/30
End of Epoch 6, train loss: 28.149038314819336 validation loss: 39.423118591308594
Epoch 7/30
End of Epoch 7, train loss: 29.380050659179688 validation loss: 26.85798454284668
Epoch 8/30
End of Epoch 8, train loss: 24.111572265625 validation loss: 21.596267700195312
Epoch 9/30
End of Epoch 9, train loss: 19.756418228149414 validation loss: 19.128002166748047
Epoch 10/30
End of Epoch 10, train loss: 18.05756378173828 validation loss: 17.989238739013672
Epoch 11/30
End of Epoch 11, train loss: 16.911771774291992 validat

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': inputs['input_ids'],
        'attention_mask': inputs['attention_mask'],
        're_mask': padded_relation_mask,
        'ner_tags': inputs['ner_tags']
    }
))
dataset = dataset.batch(batch_size)

vdataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': vinputs['input_ids'],
        'attention_mask': vinputs['attention_mask'],
        're_mask': vpadded_relation_mask,
        'ner_tags': vinputs['ner_tags']
    }
))
vdataset = vdataset.batch(batch_size)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
re_model = RelationExtractionModel(ner_model, num_relations, hidden_dim)
re_model.compile(optimizer=optimizer)
num_epochs=20

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss = 0
    val_loss = 0
    t_b, v_b = 0, 0
    for batch_num, batch_data in enumerate(dataset.take(num_batches)):
        train_loss += re_model.train_step(batch_data)['loss']
        t_b += 1
    for batch_num, batch_data in enumerate(vdataset.take(num_batches)):
        val_loss += re_model.test_step(batch_data, validation = True)['loss']
        v_b += 1

    print(f"End of Epoch {epoch + 1}, train loss: {train_loss / t_b} validation loss: {val_loss / v_b}")

In [None]:

all_input_ids, all_attention_masks, all_ner_tags, all_relation_masks = [], [], [], []
for row in ds['test']:
    input_ids, attention_mask, ner_tags, relation_mask = prepare_data(row, tag_dict, r_dict, tag_dict_map, next_entity)
    all_input_ids.extend(input_ids)
    all_attention_masks.extend(attention_mask)
    all_ner_tags.extend(ner_tags)
    all_relation_masks.extend(relation_mask)

prepared_data = {
    'input_ids': np.array(all_input_ids),
    'attention_mask': np.array(all_attention_masks),
    'ner_tags': np.array(all_ner_tags),
    're_mask': all_relation_masks
}

padded_relation_mask = pad_sequences(
    [np.array(item, dtype=int) for item in prepared_data['re_mask']],
    padding='post',
    value=-1,
    dtype='object'
)

padded_relation_mask = np.array(padded_relation_mask, dtype=int)

inputs = {
    'input_ids': prepared_data['input_ids'],
    'attention_mask': prepared_data['attention_mask'],
    're_mask': padded_relation_mask,
    'ner_tags': prepared_data['ner_tags']
}

In [None]:
ner_model.reset_metrics()
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': inputs['input_ids'],
        'attention_mask': inputs['attention_mask']
    },
    inputs['ner_tags']
))
dataset = dataset.batch(batch_size)

for batch_num, batch_data in enumerate(dataset.take(num_batches)):
    ner_model.test_step(batch_data)


In [None]:
re_model.reset_metrics()
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': inputs['input_ids'],
        'attention_mask': inputs['attention_mask'],
        're_mask': padded_relation_mask,
        'ner_tags': inputs['ner_tags']
    }
))
dataset = dataset.batch(batch_size)

for batch_num, batch_data in enumerate(dataset.take(num_batches)):
    re_model.test_step(batch_data)





In [None]:
model_save_path = '/content/re_model.keras'
re_model.save(model_save_path)

from google.colab import files
files.download(model_save_path)


In [None]:
model_save_path = '/content/ner_model_new267.keras'
ner_model.save(model_save_path)

from google.colab import files
files.download(model_save_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:

def calculate_final_metrics(model, num_classes, threshold):
    avg_precision, avg_recall, avg_f1_score = [], [], []
    total_tp, total_fp, total_fn, total_tn = 0, 0, 0, 0
    tpa,fpa,tna,fna,k=[],[],[],[],0
    for i in range(num_classes):
        tp, tn, fp, fn = model.metrics_dict[i].numpy()
        if tp+fn < threshold:
            continue

        k += 1
        tpa.append(tp)
        fpa.append(fp)
        tna.append(tn)
        fna.append(fn)

        total_tp += tp
        total_fp += fp
        total_fn += fn
        total_tn += tn

        precision = tp / (tp + fp + tf.keras.backend.epsilon())
        recall = tp / (tp + fn + tf.keras.backend.epsilon())
        f1 = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())

        avg_precision.append(precision)
        avg_recall.append(recall)
        avg_f1_score.append(f1)
    print(avg_precision)
    print(avg_recall)
    print(avg_f1_score)
    accuracy = (total_tp ) / (total_tp + total_fp + tf.keras.backend.epsilon())
    return {
        "average_precision": tf.reduce_mean(avg_precision),
        "average_recall": tf.reduce_mean(avg_recall),
        "average_f1_score": tf.reduce_mean(avg_f1_score),
        "accuracy": accuracy
    }


In [None]:
print('NER task metrics')
ner_metrics = calculate_final_metrics(ner_model, 19, 10)
for key in ner_metrics.keys():
    print(f"{key}: {ner_metrics[key]}")

In [None]:
print('RE task metrics')
re_metrics = calculate_final_metrics(re_model, 12, 30)
for key in re_metrics.keys():
    print(f"{key}: {re_metrics[key]}")