### Importing Libraries

In [1]:
import os
import sys

RUN_ON = 'kaggle' if os.path.exists('/kaggle') else 'gcp'

if RUN_ON == 'gcp':
    os.chdir('/home/jupyter/kaggle/working')
    sys.path.extend(['../input/bert-joint-baseline/'])

In [2]:
import gzip
import json

import numpy as np
import pandas as pd
import tensorflow as tf

import bert_utils
import modeling
import tokenization

from tqdm.auto import tqdm
import importlib

importlib.reload(bert_utils)

tf.__version__

'2.1.0'

### Classes & Functions

In [3]:
class TDense(tf.keras.layers.Layer):
    def __init__(self,
                 output_size,
                 kernel_initializer=None,
                 bias_initializer="zeros",
                 **kwargs):
        super().__init__(**kwargs)
        self.output_size = output_size
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer

    def build(self, input_shape):
        dtype = tf.as_dtype(self.dtype or tf.keras.backend.floatx())
        if not (dtype.is_floating or dtype.is_complex):
            raise TypeError("Unable to build `TDense` layer with "
                            "non-floating point (and non-complex) "
                            "dtype %s" % (dtype,))
        input_shape = tf.TensorShape(input_shape)
        if tf.compat.dimension_value(input_shape[-1]) is None:
            raise ValueError("The last dimension of the inputs to "
                             "`TDense` should be defined. "
                             "Found `None`.")
        last_dim = tf.compat.dimension_value(input_shape[-1])
        self.input_spec = tf.keras.layers.InputSpec(min_ndim=2, axes={-1: last_dim})
        self.kernel = self.add_weight(
            "kernel",
            shape=[self.output_size, last_dim],
            initializer=self.kernel_initializer,
            dtype=self.dtype,
            trainable=True)
        self.bias = self.add_weight(
            "bias",
            shape=[self.output_size],
            initializer=self.bias_initializer,
            dtype=self.dtype,
            trainable=True)
        super(TDense, self).build(input_shape)

    def call(self, x):
        return tf.matmul(x, self.kernel, transpose_b=True) + self.bias


class DummyObject:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)


def mk_model(config):
    seq_len = config['max_position_embeddings']
    unique_id = tf.keras.Input(shape=(1,), dtype=tf.int64, name='unique_id')
    input_ids = tf.keras.Input(shape=(seq_len,), dtype=tf.int32, name='input_ids')
    input_mask = tf.keras.Input(shape=(seq_len,), dtype=tf.int32, name='input_mask')
    segment_ids = tf.keras.Input(shape=(seq_len,), dtype=tf.int32, name='segment_ids')
    BERT = modeling.BertModel(config=config, name='bert')
    pooled_output, sequence_output = BERT(input_word_ids=input_ids,
                                          input_mask=input_mask,
                                          input_type_ids=segment_ids)

    logits = TDense(2, name='logits')(sequence_output)
    start_logits, end_logits = tf.split(logits, axis=-1, num_or_size_splits=2, name='split')
    start_logits = tf.squeeze(start_logits, axis=-1, name='start_squeeze')
    end_logits = tf.squeeze(end_logits, axis=-1, name='end_squeeze')

    ans_type = TDense(5, name='ans_type')(pooled_output)
    return tf.keras.Model([input_ for input_ in [unique_id, input_ids, input_mask, segment_ids]
                           if input_ is not None],
                          [unique_id, start_logits, end_logits, ans_type],
                          name='bert-baseline')

In [4]:
def url_exists(url):
    """test local or gs file exists or not."""
    from urllib import parse
    res = parse.urlparse(url)
    if res.scheme == 'gs':
        # blob_name has no '/' prefix
        bucket_name, blob_name = res.netloc, res.path[1:]
        from google.cloud import storage
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucket_name)
        blob = bucket.blob(blob_name[1:])
        return blob.exists()
    else:
        return os.path.exists(res.path)


def _decode_record(record, feature_description=None):
    """Decodes a record to a TensorFlow example."""
    feature_description = feature_description or FEATURE_DESCRIPTION
    example = tf.io.parse_single_example(serialized=record, features=feature_description)
    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for key in [k for k in example.keys() if k not in ['example_id', 'unique_id']]:
        example[key] = tf.cast(example[key], dtype=tf.int32)
    
    example.pop('example_id')
    return example

In [5]:
def read_candidates_from_one_split(input_path):
    """Read candidates from a single jsonl file."""
    candidates_dict = {}
    print("Reading examples from: %s" % input_path)
    if input_path.endswith(".gz"):
        with gzip.GzipFile(fileobj=tf.io.gfile.GFile(input_path, "rb")) as input_file:
            for index, line in enumerate(input_file):
                e = json.loads(line)
                candidates_dict[e["example_id"]] = e["long_answer_candidates"]
    else:
        with tf.io.gfile.GFile(input_path, "r") as input_file:
            for index, line in enumerate(input_file):
                e = json.loads(line)
                candidates_dict[e["example_id"]] = e["long_answer_candidates"]
                # candidates_dict['question'] = e['question_text']
    return candidates_dict


def read_candidates(input_pattern):
    """Read candidates with real multiple processes."""
    input_paths = tf.io.gfile.glob(input_pattern)
    final_dict = {}
    for input_path in input_paths:
        final_dict.update(read_candidates_from_one_split(input_path))
    return final_dict

In [6]:
FLAGS = DummyObject(skip_nested_contexts=True,
                    max_position=50,
                    max_contexts=48,
                    max_query_length=64,
                    max_seq_length=512,
                    doc_stride=128,
                    include_unknowns=-1.0,
                    n_best_size=20,
                    max_answer_length=30)

SEQ_LENGTH = FLAGS.max_seq_length  # config['max_position_embeddings']

if RUN_ON == 'gcp':
    INPUT_PATH = 'gs://tyu-kaggle/input/'
else:
    INPUT_PATH = '../input/'
BERT_CONFIG_PATH = os.path.join('../input', 'bert-joint-baseline/bert_config.json')
# CPKT_PATH = os.path.join(INPUT_PATH, 'bert-joint-baseline/model_cpkt-1')
CPKT_PATH = 'gs://tyu-kaggle/output/model.ckpt-23187'
VOCAB_PATH = os.path.join(INPUT_PATH, 'bert-joint-baseline/vocab-nq.txt')

NQ_TEST_JSONL_PATH = '../input/tensorflow2-question-answering/simplified-nq-test.jsonl'
NQ_TRAIN_JSONL_PATH = '../input/tensorflow2-question-answering/simplified-nq-train.jsonl'
NQ_TEST_TFRECORD_PATH = './nq-test.tfrecords'

SAMPLE_SUBMISSION_PATH = '../input/tensorflow2-question-answering/sample_submission.csv'

TEST_DS_TYPE = 'public' if os.path.getsize(NQ_TEST_JSONL_PATH) < 20000000 else 'private'

FEATURE_DESCRIPTION = {
    "example_id": tf.io.FixedLenFeature([], tf.int64),
    "unique_id": tf.io.FixedLenFeature([], tf.int64),
    "input_ids": tf.io.FixedLenFeature([SEQ_LENGTH], tf.int64),
    "input_mask": tf.io.FixedLenFeature([SEQ_LENGTH], tf.int64),
    "segment_ids": tf.io.FixedLenFeature([SEQ_LENGTH], tf.int64),
}
ANSWER_TYPE_ORDER = ['UNKNOWN', 'YES', 'NO', 'SHORT', 'LONG']

with open(BERT_CONFIG_PATH, 'r') as f:
    config = json.load(f)
print(json.dumps(config, indent=4))

{
    "initializer_range": 0.02,
    "vocab_size": 30522,
    "hidden_size": 1024,
    "hidden_dropout_prob": 0.1,
    "intermediate_size": 4096,
    "hidden_act": "gelu",
    "num_hidden_layers": 24,
    "attention_probs_dropout_prob": 0.1,
    "num_attention_heads": 16,
    "type_vocab_size": 2,
    "max_position_embeddings": 512
}


In [7]:
# Detect hardware, return appropriate distribution strategy
try:
    TPU = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print('Running on TPU ', TPU.cluster_spec().as_dict()['worker'])
except ValueError:
    TPU = None

if TPU:
    tf.config.experimental_connect_to_cluster(TPU)
    tf.tpu.experimental.initialize_tpu_system(TPU)
    strategy = tf.distribute.experimental.TPUStrategy(TPU)
    BATCH_SIZE = 128
    # drop_remainder must be True if running on TPU, maybe a bug
    # so we pad some examples.
    nq_test_jsonl_path2 = NQ_TEST_JSONL_PATH + '.pad'
    !cp $NQ_TEST_JSONL_PATH $nq_test_jsonl_path2
    !tail -n 3 $NQ_TEST_JSONL_PATH >> $nq_test_jsonl_path2
    NQ_TEST_JSONL_PATH = nq_test_jsonl_path2
else:
    strategy = tf.distribute.get_strategy()
    BATCH_SIZE = 16

print("REPLICAS: ", strategy.num_replicas_in_sync)

Running on TPU  ['10.254.212.146:8470']
INFO:tensorflow:Initializing the TPU system: tyu
INFO:tensorflow:Clearing out eager caches
INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2

INFO:tensorflow:Initializing the TPU system: tyu
INFO:tensorflow:Clearing out eager caches
INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Availab

In [16]:
for x in model_params.keys():
    print(x)

bert/encoder/layer_19/self_attention/query/kernel:0
bert/encoder/layer_5/self_attention/query/bias:0
bert/encoder/layer_2/self_attention/query/bias:0
bert/encoder/layer_1/output/bias:0
bert/encoder/layer_22/self_attention_output/bias:0
bert/encoder/layer_21/output_layer_norm/gamma:0
bert/encoder/layer_0/intermediate/kernel:0
bert/encoder/layer_7/self_attention_output/kernel:0
bert/encoder/layer_12/output/kernel:0
bert/encoder/layer_0/self_attention/value/kernel:0
bert/encoder/layer_0/output/kernel:0
bert/pooler_transform/kernel:0
bert/encoder/layer_12/output/bias:0
bert/encoder/layer_9/output_layer_norm/beta:0
ans_type/bias:0
bert/encoder/layer_2/self_attention_output/bias:0
bert/encoder/layer_2/self_attention/value/bias:0
bert/encoder/layer_8/output_layer_norm/beta:0
bert/encoder/layer_0/self_attention/key/kernel:0
bert/encoder/layer_20/self_attention/query/kernel:0
bert/encoder/layer_1/self_attention_layer_norm/gamma:0
bert/encoder/layer_19/intermediate/bias:0
bert/encoder/layer_0/se

['answer_type_output_bias',
 'answer_type_output_bias/adam_m',
 'answer_type_output_bias/adam_v',
 'answer_type_output_weights',
 'answer_type_output_weights/adam_m',
 'answer_type_output_weights/adam_v',
 'bert/embeddings/LayerNorm/beta',
 'bert/embeddings/LayerNorm/beta/adam_m',
 'bert/embeddings/LayerNorm/beta/adam_v',
 'bert/embeddings/LayerNorm/gamma',
 'bert/embeddings/LayerNorm/gamma/adam_m',
 'bert/embeddings/LayerNorm/gamma/adam_v',
 'bert/embeddings/position_embeddings',
 'bert/embeddings/position_embeddings/adam_m',
 'bert/embeddings/position_embeddings/adam_v',
 'bert/embeddings/token_type_embeddings',
 'bert/embeddings/token_type_embeddings/adam_m',
 'bert/embeddings/token_type_embeddings/adam_v',
 'bert/embeddings/word_embeddings',
 'bert/embeddings/word_embeddings/adam_m',
 'bert/embeddings/word_embeddings/adam_v',
 'bert/encoder/layer_0/attention/output/LayerNorm/beta',
 'bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m',
 'bert/encoder/layer_0/attention/outp

In [9]:
with strategy.scope():
    model = mk_model(config)
    model.summary()
    
    model_params = {v.name:v for v in model.trainable_variables}
    model_roots = np.unique([v.name.split('/')[0] for v in model.trainable_variables])
    print(model_roots)
    saved_names = [k for k,v in tf.train.list_variables(CPKT_PATH)]
    a_map = {v:v+':0' for v in saved_names}
    model_roots = np.unique([v.name.split('/')[0] for v in model.trainable_variables])
    def transform(x):
        x = x.replace('attention/self','attention')
        x = x.replace('attention','self_attention')
        x = x.replace('attention/output','attention_output')  

        x = x.replace('/dense','')
        x = x.replace('/LayerNorm','_layer_norm')
        x = x.replace('embeddings_layer_norm','embeddings/layer_norm')  

        x = x.replace('attention_output_layer_norm','attention_layer_norm')  
        x = x.replace('embeddings/word_embeddings','word_embeddings/embeddings')

        x = x.replace('/embeddings/','/embedding_postprocessor/')  
        x = x.replace('/token_type_embeddings','/type_embeddings')  
        x = x.replace('/pooler/','/pooler_transform/')  
        x = x.replace('answer_type_output_bias','ans_type/bias')  
        x = x.replace('answer_type_output_','ans_type/')
        x = x.replace('cls/nq/output_','logits/')
        x = x.replace('/weights','/kernel')

        return x
    a_map = {k:model_params.get(transform(v),None) for k,v in a_map.items() if k!='global_step'}
    tf.compat.v1.train.init_from_checkpoint(ckpt_dir_or_file=CPKT_PATH, assignment_map=a_map)
    
    cpkt = tf.train.Checkpoint(model=model)
    cpkt.restore(CPKT_PATH).assert_consumed()

Model: "bert-baseline"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_mask (InputLayer)         [(None, 512)]        0                                            
__________________________________________________________________________________________________
segment_ids (InputLayer)        [(None, 512)]        0                                            
__________________________________________________________________________________________________
bert (BertModel)                ((None, 1024), (None 335141888   input_ids[0][0]                  
                                                                 input_mask[0][0]     

TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'

In [None]:
# small_config = config.copy()
# small_config['vocab_size']=16
# small_config['hidden_size']=64
# small_config['max_position_embeddings'] = 32
# small_config['num_hidden_layers'] = 4
# small_config['num_attention_heads'] = 4
# small_config['intermediate_size'] = 256
# small_config

tf.cast

tf.squeeze

model.save_weights()


In [None]:
if not url_exists(NQ_TEST_TFRECORD_PATH):
# if True:
    # tf2baseline.FLAGS.max_seq_length = 512
    eval_writer = bert_utils.FeatureWriter(filename=NQ_TEST_TFRECORD_PATH,
                                           is_training=False)
    tokenizer = tokenization.FullTokenizer(vocab_file=VOCAB_PATH,
                                           do_lower_case=True)
    features = []
    convert = bert_utils.ConvertExamples2Features(tokenizer=tokenizer,
                                                  is_training=False,
                                                  output_fn=eval_writer.process_feature,
                                                  collect_stat=False)
    n_examples = 0
    # tqdm_notebook = tqdm.tqdm_notebook  # if not on_kaggle_server else None
    for examples in bert_utils.nq_examples_iter(input_file=NQ_TEST_JSONL_PATH,
                                                is_training=False,
                                                tqdm=tqdm):
        for example in examples:
            n_examples += convert(example)
    eval_writer.close()
    print('number of test examples: %d, written to file: %d' % (n_examples, eval_writer.num_features))

In [None]:
raw_ds = tf.data.TFRecordDataset(NQ_TEST_TFRECORD_PATH)
decoded_ds = raw_ds.map(_decode_record)
batched_ds = decoded_ds.batch(batch_size=BATCH_SIZE, drop_remainder=(TPU is not None))

result = model.predict(batched_ds, verbose=1)

In [None]:
# add example_id to beginning.
example_id_ds = raw_ds.map(lambda x: tf.io.parse_single_example(
    serialized=x,
    features={"example_id": tf.io.FixedLenFeature([], tf.int64)}
)['example_id'])
result = (np.array(list(example_id_ds)[:len(result[0])]), *result)

## 1- Understanding the code
#### For a better understanding, I will briefly explain here.
#### In the item "answer_type", in the last lines of this block, it is responsible for storing the identified response type, which, according to [github project repository](https://github.com/google-research/language/blob/master/language/question_answering/bert_joint/run_nq.py) can be:
UNKNOWN = 0
YES = 1
NO = 2
SHORT = 3
LONG = 4

In [None]:
print("getting candidates...")
candidates_dict = read_candidates('../input/tensorflow2-question-answering/simplified-nq-test.jsonl')

In [None]:
print("getting result_df...")
result_df = pd.DataFrame({
    "example_id": result[0].squeeze().tolist(),
    "unique_id": result[1].squeeze().tolist(),
    "start_logits": result[2].tolist(),
    "end_logits": result[3].tolist(),
    "answer_type_logits": result[4].tolist()
}).set_index(['example_id', 'unique_id'])
# we pad some instances when using TPU, deduplicate it here.
if TPU is not None:
    print('result_df len before dedup: ' + str(len(result_df)))
    result_df = result_df[~result_df.index.duplicated()]
    print('result_df len after  dedup: ' + str(len(result_df)))

In [None]:
token_map_ds = raw_ds.map(lambda x: tf.io.parse_single_example(
    serialized=x,
    features={
        "example_id": tf.io.FixedLenFeature([], tf.int64),
        "unique_id": tf.io.FixedLenFeature([], tf.int64),
        # token_map: token to origin map.
        "token_map": tf.io.FixedLenFeature([SEQ_LENGTH], tf.int64)
    }
))
print("getting token_map_df...")
token_map_df = pd.DataFrame.from_records(list(token_map_ds)).applymap(
    lambda x: x.numpy()
).set_index(['example_id', 'unique_id'])
# we pad some instances when using TPU, deduplicate it here.
if TPU is not None:
    print('token_map_df len before: ' + str(len(token_map_df)))
    token_map_df = token_map_df[~token_map_df.index.duplicated()]
    print('token_map_df len before: ' + str(len(token_map_df)))

In [None]:
joined = result_df.join(token_map_df, on=['example_id', 'unique_id'])

pred_df = pd.DataFrame(columns=['example_id', 'score', 'answer_type',
                                'short_span_start', 'short_span_end',
                                'long_span_start', 'long_span_end', ]
                       ).set_index('example_id')

In [None]:
def best_score_start_end_of_instance(res: pd.Series):
    """
    :param res: index: ['answer_type_logits', 'end_logits', 'start_logits', 'token_map', 'candidates']
    :return: best_score_of_instance, start_short_idx, end_short_idx
    """
    msk_invalid_token = np.array(res['token_map']) == -1
    s_logits, e_logits = pd.Series(res['start_logits']), pd.Series(res['end_logits'])
    # filter logits corresponding to context token and rank top-k.
    s_msk_not_top_k = s_logits.mask(msk_invalid_token) \
                          .rank(method='min', ascending=False) > FLAGS.n_best_size
    s_indexes = np.ma.masked_array(np.arange(s_logits.size),
                                   mask=s_msk_not_top_k | msk_invalid_token)
    e_msk_not_top_k = e_logits.mask(msk_invalid_token) \
                          .rank(method='min', ascending=False) > FLAGS.n_best_size
    e_indexes = np.ma.masked_array(np.arange(e_logits.size),
                                   mask=e_msk_not_top_k | msk_invalid_token)
    # s_e_msk has shape: [512, 512], end index should greater than start index, otherwise, mask it.
    s_e_msk = e_indexes[np.newaxis, :] <= s_indexes[:, np.newaxis]
    # short answer length should litter than max_answer_length, otherwise, mask it.
    s_e_msk |= (e_indexes[np.newaxis, :] - s_indexes[:, np.newaxis] >= FLAGS.max_answer_length)
    # full mask.
    s_e_msk = s_e_msk.filled(True)

    if s_e_msk.all():  # if all start-end combinations has been masked.
        return np.NAN, np.NAN, np.NAN
    else:
        # broadcast to shape: [512, 512], and set mask=s_e_msk
        s_logits_bc = np.ma.array(
            np.broadcast_to(s_logits[:, np.newaxis], shape=[s_logits.size, e_logits.size]),
            mask=s_e_msk)
        e_logits_bc = np.ma.array(
            np.broadcast_to(e_logits[np.newaxis, :], shape=[s_logits.size, e_logits.size]),
            mask=s_e_msk)
        short_span_score = s_logits_bc + e_logits_bc
        cls_token_score = s_logits[0] + e_logits[0]
        score = short_span_score - cls_token_score
        s_short_idx, e_short_idx = divmod(score.argmax(), e_logits.size)

        return score.max(), s_short_idx, e_short_idx


for example_id, group_df in tqdm(joined.groupby('example_id')):
    # group_df: each row got a unique id(unique_id), all rows have a some example_id.
    # columns = ['answer_type_logits', 'end_logits', 'start_logits', 'token_map', 'candidates']
    group_df = group_df.copy().reset_index(level='example_id', drop=True)
    # get best score/start/end and answer type for every instance within same example.
    for u_id, res in group_df.iterrows():
        answer_type_logits = pd.Series(res['answer_type_logits'], index=ANSWER_TYPE_ORDER)
        group_df.loc[u_id, 'ins_answer_type'] = answer_type_logits.idxmax()
        ins_score, ins_start, ins_end = best_score_start_end_of_instance(res)
        group_df.loc[u_id, 'ins_score'] = ins_score
        group_df.loc[u_id, 'ins_short_span_start'] = res['token_map'][ins_start]
        # end span should be exclusive, and np.nan + 1 = np.nan
        group_df.loc[u_id, 'ins_short_span_end'] = res['token_map'][ins_end] + 1
    # we pick instance result who's best score is best among the instances within same example
    best_u_id = group_df['ins_score'].idxmax()
    if best_u_id is not np.NAN:  # if all instances got no score
        short_span_start, short_span_end = group_df.loc[best_u_id, ['ins_short_span_start', 'ins_short_span_end']]
        pred_df.loc[example_id, 'score'] = group_df.loc[best_u_id, 'ins_score']
        pred_df.loc[example_id, 'short_span_start'] = short_span_start
        pred_df.loc[example_id, 'short_span_end'] = short_span_end
        # search for long answer span.
        for cand in candidates_dict[str(example_id)]:
            if cand['top_level'] and cand['start_token'] <= short_span_start and short_span_end <= cand['end_token']:
                pred_df.loc[example_id, 'long_span_start'] = cand['start_token']
                pred_df.loc[example_id, 'long_span_end'] = cand['end_token']
                break
        pred_df.loc[example_id, 'answer_type'] = group_df.loc[best_u_id, 'ins_answer_type']
        # break

## 2- Main Change
#### Here is the small, but main change: we created an if to check the predicted response type and thus filter / identify the responses that are passed to the submission file.

### Filtering the Answers

In [None]:
def get_short_pred(pred_row: pd.Series):
    # score(best short answer) is np.NAN means: there's no short/long answers.
    if pred_row['score'] is np.NAN:
        return ''
    # answer_type can not be np.NAN if score is not np.NAN.
    if pred_row['answer_type'] == 'UNKNOWN':
        return ''
    if pred_row['answer_type'] in ['YES', 'NO']:
        return pred_row['answer_type']
    if pred_row['answer_type'] in ['SHORT', 'LONG']:
        if pred_row['score'] < 8:
            return ''
        else:
            return '%d:%d' % (pred_row['short_span_start'], pred_row['short_span_end'])


def get_long_pred(pred_row: pd.Series):
    # score(best short answer) is np.NAN means: there's no short/long answers.
    if pred_row['score'] is np.NAN:
        return ''
    # answer_type can not be np.NAN if score is not np.NAN.
    if pred_row['answer_type'] == 'UNKNOWN':
        return ''
    if pred_row['answer_type'] in ['YES', 'NO', 'SHORT', 'LONG']:
        if pred_row['score'] < 3 or pred_row['long_span_start'] is np.NAN:
            return ''
        else:
            return '%d:%d' % (pred_row['long_span_start'], pred_row['long_span_end'])


### Creating a DataFrame

In [None]:
prediction_df = pred_df.copy()
prediction_df['long_pred'] = pred_df.apply(get_long_pred, axis='columns')
prediction_df['short_pred'] = pred_df.apply(get_short_pred, axis='columns')
prediction_df.index = prediction_df.index.map(lambda x: str(x))

### Generating the Submission File

In [None]:
sample_submission = pd.read_csv(SAMPLE_SUBMISSION_PATH).set_index('example_id')

for eid, row in prediction_df.iterrows():
    sample_submission.loc[eid + '_long', 'PredictionString'] = row['long_pred']
    sample_submission.loc[eid + '_short', 'PredictionString'] = row['short_pred']

In [None]:
sample_submission.reset_index().to_csv('submission.csv', index=False)

In [None]:
sample_submission.head(20)

*Yes
Answers

In [None]:
yes_answers = sample_submission[sample_submission['PredictionString'] == 'YES']
yes_answers

*No
Answers

In [None]:
no_answers = sample_submission[sample_submission['PredictionString'] == 'NO']
no_answers

*Balnk
Answers

In [None]:
blank_answers = sample_submission[sample_submission['PredictionString'] == '']
blank_answers.head()

In [None]:
blank_answers.count()

### I am only sharing modifications that I believe may help. I left out Tunning and any significant code changes I made.

### We'll be grateful if someone gets a better understanding and can share what really impacts the assessment. No need to share code, just knowledge.
### Thank you!