### Training the model

In [None]:
import h5py
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from transformers import TFBertModel, TFBertForQuestionAnswering
from tensorflow.keras.layers import Dense, Input, Flatten, Activation, GlobalMaxPool1D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import multi_gpu_model, to_categorical
import tensorflow.keras.backend as K
from sklearn.metrics import f1_score, accuracy_score
from transformers.data.processors.squad import SquadV2Processor
from transformers import BertTokenizer
from transformers.data.processors.squad import squad_convert_examples_to_features
from tensorflow.keras.callbacks import ModelCheckpoint
from tqdm.notebook import tqdm, tnrange
import pandas as pd
import time

In [None]:
train_data = h5py.File(r'C:/w266/cris/BERTVision/data/squad_train.h5', 'r')
dev_data = h5py.File(r'C:/w266/cris/BERTVision/data/squad_dev.h5', 'r')
train_data.keys()

In [None]:
indices = np.array(eval(open('indices.txt', 'r').readline()))

In [None]:
max_seq_length = 386

#indices = np.arange(len(indices), dtype = int)
#shuffle = np.random.shuffle(indices)

train_ids = np.array(train_data['input_ids'], dtype = np.int32)[indices]
train_masks = np.array(train_data['attention_mask'], dtype = np.int32)[indices]
train_tokens = np.array(train_data['token_type_ids'], dtype = np.int32)[indices]

dev_ids = np.array(dev_data['input_ids'], dtype = np.int32)
dev_masks = np.array(dev_data['attention_mask'], dtype = np.int32)
dev_tokens = np.array(dev_data['token_type_ids'], dtype = np.int32)

train_input_start = np.array(train_data['input_start'], dtype = np.int32)[indices]
train_input_end = np.array(train_data['input_end'], dtype = np.int32)[indices]

answer_no_answer = np.where(train_input_start + train_input_end > 0, 0, 1)
answer_no_answer = to_categorical(answer_no_answer).astype(np.uint8)


In [None]:
processor = SquadV2Processor()
data_raw = processor.get_dev_examples("C:/w266/cris/BERTVision/data")

dev_answers = dict(zip([d.qas_id for d in data_raw], 
    [np.uint8(d.is_impossible) for d in data_raw]))

tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")
dd_raw = squad_convert_examples_to_features(
            examples = data_raw,
            tokenizer = tokenizer,
            max_seq_length = 386,
            doc_stride = 128,
            max_query_length = 64,
            is_training = False,)

dev_predict_qasids = [d.qas_id for d in dd_raw]

In [None]:
import pickle
with open('./data/dev_qasids.pkl', 'wb') as handle:
    pickle.dump(dev_predict_qasids, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [None]:
def get_base_bert_model():
    max_seq_length = 386
    
    bert_layer = TFBertModel.from_pretrained('bert-large-uncased')

    input_ids = Input((max_seq_length,), dtype = tf.int32, name = 'input_ids')
    input_masks = Input((max_seq_length,), dtype = tf.int32, name = 'input_masks')
    input_tokens = Input((max_seq_length,), dtype = tf.int32, name = 'input_tokens')

    #1 for pooled outputs (CLS token), 0 for sequence
    cls_output = bert_layer([input_ids, input_masks, input_tokens])[1]
    x = Dense(2, name = 'dense_2', kernel_initializer = 'he_normal') (cls_output)

    model = Model(inputs = [input_ids, input_masks, input_tokens], outputs = x, name = 'BERT_SQuADv2_BinaryClassification')
    
    return model

In [None]:
bert_base = get_base_bert_model()
print(bert_base.summary())

In [None]:
opt = keras.optimizers.Adam(1e-5)
loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
bert_base.compile(loss = [loss_fn],
                  optimizer = opt,
                  metrics = ['accuracy'])

# Train BERT and store weights

In [None]:
batch_size, epochs, intervals = 4, 6, 10
indices = range(len(train_ids))

for i in range(epochs):
    if i == 0:
        for b in range(intervals):
            if b == (intervals-1):
                idx = indices[b * len(train_ids) // intervals:]
                fname = './weights/bert_squadv2_binary_classification_weights_epoch_1.h5'
            else:
                idx = indices[b * len(train_ids) // intervals: (b+1) * len(train_ids) // intervals]
                fname = './weights/bert_squadv2_binary_classification_weights_epoch_0__%d_tenth.h5' % (b+1)
                
            history = bert_base.fit(x = [train_ids[idx], train_masks[idx], train_tokens[idx]], y = answer_no_answer[idx],
                epochs = 1, batch_size = batch_size, verbose = True, shuffle = True)
            print(f"\nSaving `{fname}`...\n")
            bert_base.save_weights(fname)
    else:
        history = bert_base.fit(x = [train_ids, train_masks, train_tokens], y = answer_no_answer,
            epochs = 1, batch_size = batch_size, shuffle = True, verbose = True)
        bert_base.save_weights('./weights/bert_squadv2_binary_classification_weights_epoch_%d.h5' % (i + 1))
              

In [None]:
print(history.history)

# Predict Against Dev, Capture F1/EM

In [None]:
base_fname = './weights/bert_squadv2_binary_classification_weights_epoch_'
epoch_list = list(np.arange(0.1, 1.0, 0.1).astype(np.float16))
epoch_list.extend(list(np.arange(1, 7, 1).astype(np.uint8)))
file_list = [str(i).replace('.', '__') for i in epoch_list]
for i, s in enumerate(file_list):
    if (i+1) < intervals:
        s = "".join([s, "_tenth.h5"])
    else:
        s = "".join([s, ".h5"])
    file_list[i] = "".join([base_fname, s])

# add a round for the untuned BERT
epoch_list = [0] + epoch_list
file_list = ['n/a'] + file_list

results = {'epoch':[], 'f1':[], 'em':[]}
for e, f in tqdm(zip(epoch_list, file_list)):
    print(f"Predicting DEV results for epoch [{str(e)}] from file '{f}'...")
    if f == 'n/a':
        bert_base = get_base_bert_model()
    else:
        bert_base.load_weights(f)

    pred = bert_base.predict([dev_ids, dev_masks, dev_tokens])
    pred = np.argmax(pred, axis = 1).astype(np.uint8)

    df = pd.DataFrame({'qas_id':dev_predict_qasids, 'prediction':pred}).groupby(by='qas_id').agg({'prediction':'max'})
    ans = pd.DataFrame(dev_answers, index =[0]).T
    ans.columns = ['answer']
    df = df.merge(ans, how='inner', left_index = True, right_index = True)

    f1 = f1_score(y_true = df.answer.values, y_pred = df.prediction.values)
    em = accuracy_score(y_true = df.answer.values, y_pred = df.prediction.values)
    
    results['epoch'].append(e)
    results['f1'].append(f1)
    results['em'].append(em)
    
    print(f"epoch [{e}] f1 score: {f1}")
    print(f"epoch [{e}] accuracy: {em}")


In [None]:
pd.DataFrame(results).to_dict()