
# Install Requirements



In [None]:
!pip install datasets transformers

In [None]:
!pip install wandb

In [87]:
!pip freeze > requirements.txt

# Import Libraries

In [2]:
import tensorflow as tf
import numpy as np

In [3]:
from transformers import (
    AutoTokenizer,
    TFAutoModelForQuestionAnswering,
    TFDistilBertForQuestionAnswering,
    create_optimizer,
    PushToHubCallback
)

### Login to Weight and Biases & HuggingFace Hub

In [3]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
from huggingface_hub import notebook_login

notebook_login()

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.huggingface/token
Login successful


### Create Callback from WandB

In [6]:
import wandb
from wandb.keras import WandbCallback

In [7]:
wandb.init(project='Question&Answers with TensorFlow')

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjmparejaz[0m ([33mjmparejaz_team[0m). Use [1m`wandb login --relogin`[0m to force relogin


## The task for Question and Answer Chatbot 
To create an AI Based QA chatbot I tried using pretrained models from Transformers Huggingface Library

In [5]:
model_checkpoint = "distilbert-base-uncased"
model_checkpoint2 ="distilbert-base-cased"
model_checkpoint3 ='bert-base-cased-finetuned-squad'

batch_size = 16

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint2)
model = TFAutoModelForQuestionAnswering.from_pretrained(model_checkpoint2)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/354M [00:00<?, ?B/s]

Some layers from the model checkpoint at distilbert-base-cased were not used when initializing TFDistilBertForQuestionAnswering: ['vocab_layer_norm', 'vocab_transform', 'vocab_projector', 'activation_13']
- This IS expected if you are initializing TFDistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['qa_outputs', 'dropout_19']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


the finetuning process was done with [SQUAD V1.1 dataset](https://rajpurkar.github.io/SQuAD-explorer/) download using datasetlibrary from HuggingFace

In [7]:
from datasets import load_dataset
# Load the "squad" dataset
dataset = load_dataset("squad")

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading and preparing dataset squad/plain_text to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Dataset squad downloaded and prepared to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

# Pretraining Process


*   let's understand how the tokenizer process works and follow the recomendations for QA task
*   The maximum lenght of tokens from context + question (features) for this task is reguraly 384, therefore an overlapping value need to be set in case the features are splitted.





In [8]:
# The maximum length of a feature (question and context)
max_length = 384 
# The authorized overlap between two part of the context when splitting it is needed.
doc_stride = 128

In [11]:
# lets find out one example with +384 lenght
for i, example in enumerate(dataset["train"]):
    if len(tokenizer(example["question"], example["context"])["input_ids"]) > 384:
        break
example = dataset["train"][i]

for this taks the padding is required and I chosed to pad in right direction.

In [9]:
pad_on_right = tokenizer.padding_side == "right"

Tokenizer for an example with 400 length

In [18]:
tokenized_examples = tokenizer(
        example["question"],
        example["context"],
        truncation="only_second" if pad_on_right else "only_first",
        max_length=max_length,
        stride=doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",

    )

Tokenized example has offset and special tokens within sequence_ids list

In [19]:
#offsetmapping with 2 dim because the example was tokenized as two 
print(tokenized_examples["offset_mapping"][0][:100])

[(0, 0), (0, 3), (4, 8), (9, 13), (14, 18), (19, 22), (23, 28), (29, 33), (34, 37), (37, 38), (38, 39), (40, 50), (51, 55), (56, 60), (60, 61), (0, 0), (0, 3), (4, 7), (7, 8), (8, 9), (10, 20), (21, 25), (26, 29), (30, 34), (35, 36), (36, 37), (37, 40), (41, 45), (45, 46), (47, 50), (51, 53), (54, 58), (59, 61), (62, 69), (70, 73), (74, 78), (79, 86), (87, 91), (92, 96), (96, 97), (98, 101), (102, 106), (107, 115), (116, 118), (119, 121), (122, 126), (127, 138), (138, 139), (140, 146), (147, 153), (154, 160), (161, 165), (166, 171), (172, 175), (176, 182), (183, 186), (187, 191), (192, 198), (199, 205), (206, 208), (209, 210), (211, 217), (218, 222), (223, 225), (226, 229), (230, 240), (241, 245), (246, 248), (248, 249), (250, 258), (259, 262), (263, 267), (268, 271), (272, 277), (278, 281), (282, 285), (286, 290), (291, 301), (301, 302), (303, 307), (308, 312), (313, 318), (319, 321), (322, 325), (326, 328), (328, 330), (330, 331), (332, 340), (341, 351), (352, 354), (355, 363), (364,

In [16]:
#sequence id is a list that maps special tokens and answer tokens
sequence_ids = tokenized_examples.sequence_ids()
print(sequence_ids)

[None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, None, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [17]:
answers = example["answers"]
start_char = answers["answer_start"][0]
end_char = start_char + len(answers["text"][0])

# Start token index of the current span in the text.
token_start_index = 0
while sequence_ids[token_start_index] != 1:
    token_start_index += 1

# End token index of the current span in the text.
token_end_index = len(tokenized_examples["input_ids"][0]) - 1
while sequence_ids[token_end_index] != 1:
    token_end_index -= 1

# Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
offsets = tokenized_examples["offset_mapping"][0]
if (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
    # Move the token_start_index and token_end_index to the two ends of the answer.
    # Note: we could go after the last offset if the answer is the last word (edge case).
    while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
        token_start_index += 1
    start_position = token_start_index - 1
    while offsets[token_end_index][1] >= end_char:
        token_end_index -= 1
    end_position = token_end_index + 1
    print(start_position, end_position)
else:
    print("The answer is not in this feature.")

23 26


Answer is decoded with start and end position

In [18]:
print(tokenizer.decode(tokenized_examples["input_ids"][0][start_position: end_position+1]))
print(answers["text"][0])

over 1, 600
over 1,600


function to prepare training features

In [10]:
def prepare_train_features(examples):
    # Some of the questions have lots of whitespace on the left, which is not useful and will make the
    # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
    # left whitespace
    examples["question"] = [q.lstrip() for q in examples["question"]]

    # Tokenize our examples with truncation and padding, but keep the overflows using a stride. This results
    # in one example possible giving several features when a context is long, each of those features having a
    # context that overlaps a bit the context of the previous feature.
    tokenized_examples = tokenizer(
        examples["question" if pad_on_right else "context"],
        examples["context" if pad_on_right else "question"],
        truncation="only_second" if pad_on_right else "only_first",
        max_length=max_length,
        stride=doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    # Since one example might give us several features if it has a long context, we need a map from a feature to
    # its corresponding example. This key gives us just that.
    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    # The offset mappings will give us a map from token to character position in the original context. This will
    # help us compute the start_positions and end_positions.
    offset_mapping = tokenized_examples.pop("offset_mapping")

    # Let's label those examples!
    tokenized_examples["start_positions"] = []
    tokenized_examples["end_positions"] = []

    for i, offsets in enumerate(offset_mapping):
        # We will label impossible answers with the index of the CLS token.
        input_ids = tokenized_examples["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)

        # Grab the sequence corresponding to that example (to know what is the context and what is the question).
        sequence_ids = tokenized_examples.sequence_ids(i)

        # One example can give several spans, this is the index of the example containing this span of text.
        sample_index = sample_mapping[i]
        answers = examples["answers"][sample_index]
        # If no answers are given, set the cls_index as answer.
        if len(answers["answer_start"]) == 0:
            tokenized_examples["start_positions"].append(cls_index)
            tokenized_examples["end_positions"].append(cls_index)
        else:
            # Start/end character index of the answer in the text.
            start_char = answers["answer_start"][0]
            end_char = start_char + len(answers["text"][0])

            # Start token index of the current span in the text.
            token_start_index = 0
            while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
                token_start_index += 1

            # End token index of the current span in the text.
            token_end_index = len(input_ids) - 1
            while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
                token_end_index -= 1

            # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
            if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
                tokenized_examples["start_positions"].append(cls_index)
                tokenized_examples["end_positions"].append(cls_index)
            else:
                # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
                # Note: we could go after the last offset if the answer is the last word (edge case).
                while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
                    token_start_index += 1
                tokenized_examples["start_positions"].append(token_start_index - 1)
                while offsets[token_end_index][1] >= end_char:
                    token_end_index -= 1
                tokenized_examples["end_positions"].append(token_end_index + 1)

    return tokenized_examples 

In [11]:
tokenized_datasets = dataset.map(prepare_train_features, batched=True, remove_columns=dataset["train"].column_names)

  0%|          | 0/88 [00:00<?, ?ba/s]

  0%|          | 0/11 [00:00<?, ?ba/s]

training dataset requires TensorFlow Dataset type


In [12]:
training_dataset = model.prepare_tf_dataset(
                tokenized_datasets['train'],
                shuffle=True,
                batch_size=8,
                tokenizer=tokenizer,
            )

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Callback to push to HuggingFace Hub

In [44]:
pushhug=PushToHubCallback(output_dir='./QA-finetuned-distilbert-TFv2',
                           hub_model_id='jmparejaz/QA-finetuned-distilbert-TFv2',
                           save_strategy='steps',
                           save_steps=5000,
                           tokenizer=tokenizer)

/content/QA-finetuned-distilbert-TFv2 is already a clone of https://huggingface.co/jmparejaz/QA-finetuned-distilbert-TFv2. Make sure you pull the latest changes with `repo.git_pull()`.


Create optimizer with Hyperparameters

In [13]:
optimizer, schedule = create_optimizer(
                init_lr=2e-4,
                num_train_steps=len(training_dataset)*2,
                num_warmup_steps=2,
                adam_beta1=0.9,
                adam_beta2=0.999,
                adam_epsilon=1e-08,
                weight_decay_rate=0.01,
            )

In [14]:
model.compile(optimizer=optimizer, jit_compile=True, metrics=["accuracy"])

No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


Train the model with TensorFlow

In [16]:
history=model.fit(training_dataset, epochs=3,callbacks=[pushhug,WandbCallback()])

Epoch 1/3
Epoch 2/3
Epoch 3/3


Function to prepare the Validation data

In [17]:
 # Validation preprocessing
def prepare_validation_features(examples):
  # Some of the questions have lots of whitespace on the left, which is not useful and will make the
  # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
  # left whitespace
  examples["question"] = [q.lstrip() for q in examples["question"]]

  # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
  # in one example possible giving several features when a context is long, each of those features having a
  # context that overlaps a bit the context of the previous feature.
  tokenized_examples = tokenizer(
    examples["question" if pad_on_right else "context"],
    examples["context" if pad_on_right else "question"],
    truncation="only_second" if pad_on_right else "only_first",
    max_length=max_length,
    stride=doc_stride,
    return_overflowing_tokens=True,
    return_offsets_mapping=True,
    padding="max_length",
  )
  # Since one example might give us several features if it has a long context, we need a map from a feature to
  # its corresponding example. This key gives us just that.
  sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")

  # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
  # corresponding example_id and we will store the offset mappings.
  tokenized_examples["example_id"] = []

  for i in range(len(tokenized_examples["input_ids"])):
      # Grab the sequence corresponding to that example (to know what is the context and what is the question).
      sequence_ids = tokenized_examples.sequence_ids(i)
      context_index = 1 if pad_on_right else 0

      # One example can give several spans, this is the index of the example containing this span of text.
      sample_index = sample_mapping[i]
      tokenized_examples["example_id"].append(examples["id"][sample_index])

      # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
      # position is part of the context or not.
      tokenized_examples["offset_mapping"][i] = [
          (o if sequence_ids[k] == context_index else None)
          for k, o in enumerate(tokenized_examples["offset_mapping"][i])
      ]

  return tokenized_examples

Maping validation dataset with prepare function

In [18]:
validation_features = dataset["validation"].map(
    prepare_validation_features,
    batched=True,
    remove_columns=dataset["validation"].column_names
)

  0%|          | 0/11 [00:00<?, ?ba/s]

Predict validation data set to evaluate the model

In [19]:
eval_dataset = model.prepare_tf_dataset(
                validation_features,
                shuffle=False,
                batch_size=8,
                tokenizer=tokenizer,
            )

In [20]:
eval_predictions = model.predict(eval_dataset)



In [21]:
eval_predictions['start_logits'].shape

(10822, 384)

In [22]:
max_answer_length = 30

In [23]:
validation_features.set_format(type=validation_features.format["type"], columns=list(validation_features.features.keys()))

In [52]:
start_logits = pred1['start_logits']
end_logits = pred1['end_logits']
offset_mapping = validation_features[0]["offset_mapping"]
# The first feature comes from the first example. For the more general case, we will need to be match the example_id to
# an example index
context = datasets["validation"][0]["context"]

# Gather the indices the best start/end logits:
start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
valid_answers = []
for start_index in start_indexes:
    for end_index in end_indexes:
        # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
        # to part of the input_ids that are not in the context.
        if (
            start_index >= len(offset_mapping)
            or end_index >= len(offset_mapping)
            or offset_mapping[start_index] is None
            or offset_mapping[end_index] is None
        ):
            continue
        # Don't consider answers with a length that is either < 0 or > max_answer_length.
        if end_index < start_index or end_index - start_index + 1 > max_answer_length:
            continue
        if start_index <= end_index: # We need to refine that test to check the answer is inside the context
            start_char = offset_mapping[start_index][0]
            end_char = offset_mapping[end_index][1]
            valid_answers.append(
                {
                    "score": start_logits[start_index] + end_logits[end_index],
                    "text": context[start_char: end_char]
                }
            )

valid_answers = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[:n_best_size]
valid_answers

TFQuestionAnsweringModelOutput(loss=None, start_logits=array([[ -6.446063  , -10.387209  , -10.125394  , -10.503152  ,
        -10.098829  , -10.2954235 , -10.284649  , -10.367259  ,
        -10.46689   , -10.021881  , -10.352463  , -10.399611  ,
        -10.580121  , -10.27634   , -10.603794  ,  -6.442524  ,
         -0.5563267 ,  -2.2811236 ,  -6.1395354 ,  -5.1712084 ,
         -2.5254521 ,  -3.134036  ,  -1.6919271 ,   5.987861  ,
          6.862862  ,  -6.2287245 ,   0.66615427,  -1.9067357 ,
         -4.327554  ,   0.6793444 ,  -5.283533  ,  -1.4532096 ],
       [ -3.3597102 ,  -3.219751  ,  -4.193787  ,  -4.1253386 ,
         -4.1345735 ,  -4.170485  ,  -4.1734633 ,  -4.327958  ,
         -4.3753357 ,  -3.7211533 ,  -3.8002791 ,  -4.0795045 ,
         -3.5910645 ,  -3.236134  ,  -3.610694  ,  -3.5463367 ,
         -2.637463  ,  -3.172279  ,  -2.1834247 ,  -3.2582726 ,
         -3.8660135 ,  -3.714253  ,  -3.8849506 ,  -4.119197  ,
         -3.5380397 ,  -3.8788428 ,  -4.0526743 

In [25]:
from tqdm.auto import tqdm
import collections

def postprocess_qa_predictions(examples, features, raw_predictions, n_best_size = 20, max_answer_length = 30,squad_v2=False):
    all_start_logits = raw_predictions['start_logits']
    all_end_logits = raw_predictions['end_logits']
    # Build a map example to its corresponding features.
    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    features_per_example = collections.defaultdict(list)
    for i, feature in enumerate(features):
        features_per_example[example_id_to_index[feature["example_id"]]].append(i)

    # The dictionaries we have to fill.
    predictions = collections.OrderedDict()

    # Logging.
    print(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")

    # Let's loop over all the examples!
    for example_index, example in enumerate(tqdm(examples)):
        # Those are the indices of the features associated to the current example.
        feature_indices = features_per_example[example_index]

        min_null_score = None # Only used if squad_v2 is True.
        valid_answers = []
        
        context = example["context"]
        # Looping through all the features associated to the current example.
        for feature_index in feature_indices:
            # We grab the predictions of the model for this feature.
            start_logits = all_start_logits[feature_index]
            end_logits = all_end_logits[feature_index]
            # This is what will allow us to map some the positions in our logits to span of texts in the original
            # context.
            offset_mapping = features[feature_index]["offset_mapping"]

            # Update minimum null prediction.
            cls_index = features[feature_index]["input_ids"].index(tokenizer.cls_token_id)
            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
            if min_null_score is None or min_null_score < feature_null_score:
                min_null_score = feature_null_score

            # Go through all possibilities for the `n_best_size` greater start and end logits.
            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
                    # to part of the input_ids that are not in the context.
                    if (
                        start_index >= len(offset_mapping)
                        or end_index >= len(offset_mapping)
                        or offset_mapping[start_index] is None
                        or offset_mapping[end_index] is None
                    ):
                        continue
                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
                        continue

                    start_char = offset_mapping[start_index][0]
                    end_char = offset_mapping[end_index][1]
                    valid_answers.append(
                        {
                            "score": start_logits[start_index] + end_logits[end_index],
                            "text": context[start_char: end_char]
                        }
                    )
        
        if len(valid_answers) > 0:
            best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
        else:
            # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
            # failure.
            best_answer = {"text": "", "score": 0.0}
        
        # Let's pick our final answer: the best one or the null answer (only for squad_v2)
        if not squad_v2:
            predictions[example["id"]] = best_answer["text"]
        else:
            answer = best_answer["text"] if best_answer["score"] > min_null_score else ""
            predictions[example["id"]] = answer

    return predictions

In [26]:
final_predictions = postprocess_qa_predictions(dataset["validation"], validation_features, eval_predictions)

Post-processing 10570 example predictions split into 10822 features.


  0%|          | 0/10570 [00:00<?, ?it/s]

In [None]:
fpred = postprocess_qa_predictions(dataset["validation"], validation_features, pred1)

In [71]:
final_predictions['56be4db0acb8001400a502ec']

'Denver Broncos'

In [75]:
model.save_pretrained('QA-finetuned-distilbert-TFv2')

In [85]:
model.push_to_hub('jmparejaz/QA-finetuned-distilbert-TFv2')

In [86]:
tokenizer.push_to_hub('jmparejaz/QA-finetuned-distilbert-TFv2')

CommitInfo(commit_url='https://huggingface.co/jmparejaz/QA-finetuned-distilbert-TFv2/commit/673c34edd7519ebfcf7da35be86b6cb183db1396', commit_message='Upload tokenizer', commit_description='', oid='673c34edd7519ebfcf7da35be86b6cb183db1396', pr_url=None, pr_revision=None, pr_num=None)