In [1]:
import pandas as pd
from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering, DefaultDataCollator, create_optimizer
import datasets
import tensorflow as tf

In [2]:
def load_train_val_test_datasets(dataset_path='./../data/datasets/squad'):
    train = pd.read_csv(f'{dataset_path}/train.csv').dropna()
    val = pd.read_csv(f'{dataset_path}/dev.csv').dropna()
    test = pd.read_csv(f'{dataset_path}/test.csv').dropna()
    return train, val, test


def convert_dataframes_to_datasets(dataframes: list):
    return tuple(
        [datasets.Dataset.from_pandas(dataframe, preserve_index=False) for dataframe in
         dataframes])


df_train, df_val, df_test = load_train_val_test_datasets()

train_dataset, val_dataset, test_dataset = convert_dataframes_to_datasets([df_train, df_val, df_test])

  if _pandas_api.is_sparse(col):


In [3]:
raw_datasets = datasets.DatasetDict({"train":train_dataset, "validation":val_dataset})

In [4]:
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['index', 'id', 'context', 'question', 'answer_text', 'answer_start'],
        num_rows: 68716
    })
    validation: Dataset({
        features: ['index', 'id', 'context', 'question', 'answer_text', 'answer_start'],
        num_rows: 14724
    })
})

In [5]:
# from datasets import load_dataset

# raw_datasets = load_dataset("squad")

In [6]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [7]:
max_length = 384
stride = 128


def preprocess_training_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answer_texts = examples["answer_text"]
    answer_starts = examples["answer_start"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer_text = answer_texts[sample_idx]
        start_char = answer_starts[sample_idx]
        end_char = start_char + len(answer_text)
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [8]:
train_dataset = raw_datasets["train"].map(
    preprocess_training_examples,
    batched=True,
    remove_columns=raw_datasets["train"].column_names,
)
len(raw_datasets["train"]), len(train_dataset)

Map:   0%|          | 0/68716 [00:00<?, ? examples/s]

(68716, 69689)

In [9]:
def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

In [10]:
validation_dataset = raw_datasets["validation"].map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=raw_datasets["validation"].column_names,
)
len(raw_datasets["validation"]), len(validation_dataset)

Map:   0%|          | 0/14724 [00:00<?, ? examples/s]

(14724, 14916)

In [11]:
model = TFAutoModelForQuestionAnswering.from_pretrained(model_checkpoint)

All PyTorch model weights were used when initializing TFBertForQuestionAnswering.

Some weights or buffers of the TF 2.0 model TFBertForQuestionAnswering were not initialized from the PyTorch model and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
data_collator = DefaultDataCollator(return_tensors="tf")

In [13]:
tf_train_dataset = model.prepare_tf_dataset(
    train_dataset,
    collate_fn=data_collator,
    shuffle=True,
    batch_size=4,
)
tf_eval_dataset = model.prepare_tf_dataset(
    validation_dataset,
    collate_fn=data_collator,
    shuffle=False,
    batch_size=4,
)

In [14]:
import tensorflow as tf

# The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
# by the total number of epochs. Note that the tf_train_dataset here is a batched tf.data.Dataset,
# not the original Hugging Face Dataset, so its len() is already num_samples // batch_size.
num_train_epochs = 3
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, schedule = create_optimizer(
    init_lr=2e-5,
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)

# Train in mixed-precision float16
tf.keras.mixed_precision.set_global_policy("mixed_float16")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4070, compute capability 8.9


In [15]:
# We're going to do validation afterwards, so no validation mid-training
model.fit(tf_train_dataset, epochs=num_train_epochs)

Epoch 1/3


ResourceExhaustedError: Graph execution error:

Detected at node 'tf_bert_for_question_answering/bert/encoder/layer_._3/attention/self/dropout_10/dropout/random_uniform/RandomUniform' defined at (most recent call last):
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
      app.start()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\kernelapp.py", line 736, in start
      self.io_loop.start()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell
      await result
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\ipykernel\zmqshell.py", line 546, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Artur\AppData\Local\Temp\ipykernel_21120\2962994209.py", line 2, in <module>
      model.fit(tf_train_dataset, epochs=num_train_epochs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\modeling_tf_utils.py", line 1637, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\modeling_tf_utils.py", line 1833, in run_call_with_unpacked_inputs
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 1852, in call
      outputs = self.bert(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\modeling_tf_utils.py", line 1833, in run_call_with_unpacked_inputs
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 862, in call
      encoder_outputs = self.encoder(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 548, in call
      for i, layer_module in enumerate(self.layer):
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 554, in call
      layer_outputs = layer_module(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 464, in call
      self_attention_outputs = self.attention(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 380, in call
      self_outputs = self.self_attention(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 323, in call
      attention_probs = self.dropout(inputs=attention_probs, training=training)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\layers\regularization\dropout.py", line 116, in call
      output = control_flow_util.smart_cond(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\utils\control_flow_util.py", line 108, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\layers\regularization\dropout.py", line 112, in dropped_inputs
      return self._random_generator.dropout(
    File "c:\Users\Artur\.conda\envs\nlp_gpu\lib\site-packages\keras\backend.py", line 2162, in dropout
      return tf.nn.dropout(
Node: 'tf_bert_for_question_answering/bert/encoder/layer_._3/attention/self/dropout_10/dropout/random_uniform/RandomUniform'
OOM when allocating tensor with shape[4,12,384,384] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node tf_bert_for_question_answering/bert/encoder/layer_._3/attention/self/dropout_10/dropout/random_uniform/RandomUniform}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_17425]