## 1. Importing dependencies

In [3]:
# !pip install tf-models-official

In [2]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print('gpu', gpu)
    tf.config.experimental.set_memory_growth(gpu, True)
    print('memory growth:' , tf.config.experimental.get_memory_growth(gpu))

gpu PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
memory growth: True


In [7]:
import tensorflow_hub as hub
from official.nlp.bert.tokenization import FullTokenizer
from official.nlp.bert.input_pipeline import create_squad_dataset
from official.nlp.data.squad_lib import generate_tf_record_from_json_file

from official.nlp import optimization

from official.nlp.data.squad_lib import read_squad_examples
from official.nlp.data.squad_lib import FeatureWriter
from official.nlp.data.squad_lib import convert_examples_to_features
from official.nlp.data.squad_lib import write_predictions

In [8]:
import numpy as np
import math
import random
import time
import json
import collections
import os


## 2. Data preprocessing

In [9]:
input_meta_data = generate_tf_record_from_json_file("/home/dawidkubicki/Datasets/squad/train-v1.1.json",
                                                   "/home/dawidkubicki/Datasets/squad/vocab.txt",
                                                   "/home/dawidkubicki/Datasets/squad/train-v1.1.tf_record")

In [10]:
with tf.io.gfile.GFile("/home/dawidkubicki/Datasets/squad/train_meta_data", "w") as writer:
    writer.write(json.dumps(input_meta_data, indent=4) + "\n")

In [12]:
BATCH_SIZE = 4

train_dataset = create_squad_dataset(
    "/home/dawidkubicki/Datasets/squad/train-v1.1.tf_record",
    input_meta_data["max_seq_length"], #384
    BATCH_SIZE,
    is_training=True
)


## 3. Model building

SQUAD layer

In [13]:
class BertSquadLayer(tf.keras.layers.Layer):
    def __init__(self):
        super(BertSquadLayer, self).__init__()
        self.final_dense = tf.keras.layers.Dense(
            units=2,
            kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)
        )
        
    def call(self, inputs):
        logits = self.final_dense(inputs) # (batch_size, seq_len, 2)
        
        logits = tf.transpose(logits, [2,0,1]) (2, batch_size, seq_len)
        unstacked_logits = tf.unstack(logits, axis=0) # 2 * (batch_size, seq_len)
        return unstacked_logits[0], unstacked_logits[1] # star and ending

Whole model

In [26]:
class BERTSquad(tf.keras.Model):
    def __init__(self,
                name="bert_squad"):
        super(BERTSquad, self).__init__(name=name)
        
        self.bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3",
                                         trainable=True)
        
        self.squad_layer = BertSquadLayer()
        
    def apply_bert(self, inputs):
        _, sequence_output = self.bert_layer([inputs["input_word_ids"],
                                              inputs["input_mask"],
                                              inputs["segment_type_ids"]])
                                              
        return sequence_output
                                            
    def call(self, inputs):
        seq_output = self.apply_bert(inputs)
        
        start_logits, end_logits = self.squad_layer(seq_output)
        
        return start_logits, end_logits
    

## 4. Training

Creating the AI

In [27]:
TRAIN_DATA_SIZE = 88641
NB_BATCHES_TRAIN = 2000
BATCH_SIZE = 4
NB_EPOCHS = 3
INIT_LR = 5e-5
WARMUP_STEPS = int(NB_BATCHES_TRAIN * 0.1)

In [28]:
train_dataset_light = train_dataset.take(NB_BATCHES_TRAIN)

In [29]:
bert_squad = BERTSquad()

In [30]:
optimizer = optimization.create_optimizer(init_lr=INIT_LR,
                                         num_train_steps=NB_BATCHES_TRAIN,
                                         num_warmup_steps=WARMUP_STEPS)

In [31]:
def squad_loss_fn(labels, model_outputs):
    stat_positions = labels["start_positions"]
    end_positions = labels["end positions"]
    start_logits, end_logits = model_outputs
    
    start_loss = tf.keras.backend.sparse_categorical_crossentropy(start_positions, start_logits, from_logits=True)
    end_loss = tf.keras.backend.sparse_categorical_crossentropy(end_positions, end_logits, from_logits=True)
    
    total_loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2
    
    return total_loss

In [32]:
train_loss = tf.keras.metrics.Mean(name="train_loss")

In [33]:
bert_squad.compile(optimizer,
                  squad_loss_fn)

In [35]:
checkpoint_path = "/home/dawidkubicki/AI-Projects/bert-intuition/checkpoints"

ckpt = tf.train.Checkpoint(bert_squad=bert_squad)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoints)
    print("Latest checkpoint restored!")

In [None]:
for epoch in range(NB_EPOCHS):
    print("Start of epoch {}".format(epoch+1))
    start = time.time()
    
    train_loss.reset_states()
    
    for (batch, (inputs, targets) in enumerate(train_dataset_light)):
        with tf.GradientTape() as tape:
            model_outputs = bert_squad(inputs)
            loss = squad_loss_fn(targets, model_outputs)
        gradients = tape.gradient(loss, bert_squad.trainable_variables)
        optimizer.apply_gradients(zip(gradients, bert_squad.trainable_variables))
        
        train_loss(loss)
        
        if batch % 50 == 0:
            print("Epoch {} Batch {} Loss {:.4f}".format(epoch+1, batch, train_loss.result()))
            
        if batch % 500 == 0:
            ckpt_save_path = cktp_manager.save()
            print("Saving checkpoint for epoch {} at {}".format(epoch+1, ckpt_save_path))

## 5. Evaluation