In [1]:
import os

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json

from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow_hub as hub

from official.modeling import tf_utils
from official import nlp
import official.nlp.bert as bert

from official.nlp.bert.tokenization import FullTokenizer

# Load the required submodules
import official.nlp.optimization
import official.nlp.bert.bert_models
import official.nlp.bert.configs
import official.nlp.bert.run_classifier
import official.nlp.bert.tokenization
import official.nlp.data.classifier_data_lib
import official.nlp.modeling.losses
import official.nlp.modeling.models
import official.nlp.modeling.networks

from official.nlp.bert.bert_models import classifier_model

from keras.utils import to_categorical


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

2022-07-31 10:42:34.200115: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-31 10:42:34.263966: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-31 10:42:34.264756: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [3]:
df_full = pd.read_csv('/home/joker/Workspace/Playground/product_classification/datasets/toxic-comment/train.csv')
df_test = pd.read_csv('/home/joker/Workspace/Playground/product_classification/datasets/toxic-comment/test.csv')

In [4]:
train, test = train_test_split(df_full, test_size=0.4)

In [5]:
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

x_train = train["comment_text"]
y_train = train[list_classes]

x_test = test["comment_text"]
y_test = test[list_classes]

## Convert dataset to Tensor

In [6]:
# train_dataset = tf.data.Dataset.from_tensor_slices((x_train.values, y_train.values)).batch(64)
# validation_dataset = tf.data.Dataset.from_tensor_slices((x_test.values))

In [7]:
# val_batches = tf.data.experimental.cardinality(validation_dataset)
# test_dataset = validation_dataset.take(val_batches // 5)
# validation_dataset = validation_dataset.skip(val_batches // 5)

In [8]:
# AUTOTUNE = tf.data.AUTOTUNE

# train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
# validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
# test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

## Bert tokenizer

In [9]:
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12"

# Set up tokenizer to generate Tensorflow dataset
tokenizer = FullTokenizer(
    vocab_file=os.path.join(gs_folder_bert, "vocab.txt"),
     do_lower_case=True)

print("Vocab size:", len(tokenizer.vocab))

Vocab size: 30522


In [66]:
def encode_sentence(s, tokenizer):
    tokens = list(tokenizer.tokenize(s))
    tokens.append('[SEP]')
    return tokenizer.convert_tokens_to_ids(tokens)[:511]

def bert_encode(X, tokenizer):
    num_examples = len(X)
    

    train_sentence = tf.ragged.constant([encode_sentence(s, tokenizer) for s in X])
    
    print(train_sentence.shape)

    cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*train_sentence.shape[0]
    input_word_ids = tf.concat([cls, train_sentence], axis=-1)

    input_mask = tf.ones_like(input_word_ids).to_tensor()

    type_cls = tf.zeros_like(cls)
    type_s1 = tf.zeros_like(train_sentence)
    input_type_ids = tf.concat(
      [type_cls, type_s1], axis=-1).to_tensor()
    
    inputs = {
      'input_word_ids': input_word_ids.to_tensor(),
      'input_mask': input_mask,
      'input_type_ids': input_type_ids}

    return inputs

In [68]:
train_data = bert_encode(x_train, tokenizer)

(95742, None)


In [12]:
test_data = bert_encode(x_test, tokenizer)

2022-07-31 10:45:19.919860: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1185942820 exceeds 10% of free system memory.
2022-07-31 10:45:20.587897: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1185942820 exceeds 10% of free system memory.


In [69]:
bert_config_file = os.path.join(gs_folder_bert, "bert_config.json")
config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())
bert_config = bert.configs.BertConfig.from_dict(config_dict)

In [70]:
config_dict

{'attention_probs_dropout_prob': 0.1,
 'hidden_act': 'gelu',
 'hidden_dropout_prob': 0.1,
 'hidden_size': 768,
 'initializer_range': 0.02,
 'intermediate_size': 3072,
 'max_position_embeddings': 512,
 'num_attention_heads': 12,
 'num_hidden_layers': 12,
 'type_vocab_size': 2,
 'vocab_size': 30522}

In [71]:
y_cat_train = np.argmax(y_train.values, axis=1)
y_cat_test = np.argmax(y_test.values, axis=1)

In [77]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, y_cat_train)).batch(64)
# test_dataset = tf.data.Dataset.from_tensor_slices((test_data, y_cat_test)).batch(64)

In [72]:
bert_classifier, bert_encoder = classifier_model(bert_config, num_labels=6)

In [73]:
train_data

{'input_word_ids': <tf.Tensor: shape=(95742, 512), dtype=int32, numpy=
 array([[ 101, 3383, 2057, ...,    0,    0,    0],
        [ 101, 2158, 4667, ...,    0,    0,    0],
        [ 101, 4931, 2003, ...,    0,    0,    0],
        ...,
        [ 101, 2429, 2000, ...,    0,    0,    0],
        [ 101, 1045, 2097, ...,    0,    0,    0],
        [ 101, 1012, 2045, ...,    0,    0,    0]], dtype=int32)>,
 'input_mask': <tf.Tensor: shape=(95742, 512), dtype=int32, numpy=
 array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0]], dtype=int32)>,
 'input_type_ids': <tf.Tensor: shape=(95742, 512), dtype=int32, numpy=
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>}

In [74]:
glue_batch = {key: val[:10] for key, val in train_data.items()}

bert_classifier(
    glue_batch, training=True
).numpy()

array([[ 0.5620789 ,  0.38155738,  0.338391  ,  0.06052753,  0.32146454,
         0.14640456],
       [ 0.78404355,  0.23936123,  0.35772076, -0.082772  ,  0.31593853,
         0.16375513],
       [ 0.49542993,  0.12003499,  0.34877443,  0.125855  ,  0.12888977,
         0.20339605],
       [ 0.5026561 ,  0.5735684 ,  0.16408962,  0.10675736,  0.31505823,
         0.39870036],
       [ 0.762105  ,  0.21120141,  0.34264725,  0.01520886,  0.07878332,
         0.27912143],
       [ 0.7057656 ,  0.30135822,  0.6528894 , -0.08894484,  0.11304336,
         0.03550708],
       [ 0.6490729 ,  0.16491961,  0.309677  ,  0.01864473,  0.07269877,
         0.1964193 ],
       [ 0.53626066,  0.15528786,  0.50216806, -0.13374597,  0.22705062,
         0.1100167 ],
       [ 0.70548767,  0.26602426,  0.5007291 , -0.01245066,  0.05366401,
         0.15641528],
       [ 0.51348037,  0.11947946,  0.34746248,  0.09024537,  0.2479679 ,
         0.08335721]], dtype=float32)

In [17]:
checkpoint = tf.train.Checkpoint(encoder=bert_encoder)
checkpoint.read(
    os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f9918395370>

In [75]:
# Set up epochs and steps
epochs = 3
batch_size = 32
eval_batch_size = 32

train_data_size = len(y_cat_train)
steps_per_epoch = int(train_data_size / batch_size)
num_train_steps = steps_per_epoch * epochs
warmup_steps = int(epochs * train_data_size * 0.1 / batch_size)

# creates an optimizer with learning rate schedule
optimizer = nlp.optimization.create_optimizer(
    2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)

In [76]:
metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy', dtype=tf.float32)]
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

bert_classifier.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=metrics)



In [78]:
bert_classifier.fit(
      train_dataset,
      validation_data=train_dataset,
      epochs=epochs)

Epoch 1/3


2022-07-31 11:44:18.459329: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 768.00MiB (rounded to 805306368)requested by op bert_classifier_1/bert_encoder_3/transformer/layer_0/self_attention/einsum/Einsum
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-07-31 11:44:18.459490: I tensorflow/core/common_runtime/bfc_allocator.cc:1027] BFCAllocator dump for GPU_0_bfc
2022-07-31 11:44:18.459547: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (256): 	Total Chunks: 181, Chunks in use: 180. 45.2KiB allocated for chunks. 45.0KiB in use in bin. 1.1KiB client-requested in use in bin.
2022-07-31 11:44:18.459584: I tensorflow/core/common_runtime/bfc_allocator.cc:1034] Bin (512): 	Total Chunks: 1, Chunks in use: 1. 512B allocated for chunks. 512B in use in bin. 5

ResourceExhaustedError: Graph execution error:

Detected at node 'bert_classifier_1/bert_encoder_3/transformer/layer_0/self_attention/einsum/Einsum' defined at (most recent call last):
    File "/usr/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/traitlets/config/application.py", line 972, in launch_instance
      app.start()
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/usr/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/usr/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/usr/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
      await result
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2881, in run_cell
      result = self._run_cell(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 2936, in _run_cell
      return runner(coro)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3135, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3338, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_10090/1164625899.py", line 1, in <cell line: 1>
      bert_classifier.fit(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 889, in train_step
      y_pred = self(x, training=True)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/official/nlp/keras_nlp/layers/transformer_encoder_block.py", line 254, in call
      attention_output = self._attention_layer(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/layers/attention/multi_head_attention.py", line 509, in call
      attention_output, attention_scores = self._compute_attention(
    File "/home/joker/.virtualenvs/tensorflow_lastest/lib/python3.9/site-packages/keras/layers/attention/multi_head_attention.py", line 472, in _compute_attention
      attention_scores = tf.einsum(self._dot_product_equation, key, query)
Node: 'bert_classifier_1/bert_encoder_3/transformer/layer_0/self_attention/einsum/Einsum'
OOM when allocating tensor with shape[64,12,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node bert_classifier_1/bert_encoder_3/transformer/layer_0/self_attention/einsum/Einsum}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_32152]