In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, interactive
from IPython.display import display


from utils.metric import roc_auc
from utils.model import define_rnn_model, define_cnn_model, define_lstm_model, define_gru_model, define_bi_model, define_cnn_rnn_model, define_BERT_model
from utils.encode import fast_encode
from utils.tokenizer import BERT_tokenizer

if not tf.__version__.startswith('2'):
    raise ValueError('This code requires TensorFlow V2.x')

In [2]:
# Data
o_train = pd.read_csv('Data/jigsaw-toxic-comment-train.csv')

# Pre processing
o_train.drop(['severe_toxic','obscene','threat','insult','identity_hate'],axis=1,inplace=True) # Drop other columns

# Get input
# type_input = widgets.Dropdown(
#     options=['Phần trăm', 'Số lượng'],
#     value='Số lượng',
#     description='Dữ liệu vào',
#     disabled=False,
# )
# value_input = widgets.IntSlider(
#     value=20000,
#     min=0,
#     max=o_train.shape[0],
#     step=1000,
#     description='Số lượng',
#     readout=True
# )

# def update_value_input(*args):
#     if type_input.value == 'Số lượng':
#         value_input.value=50000
#         value_input.max=o_train.shape[0]
#         value_input.step=1000
#         value_input.description='Số lượng'
#     else:
#         value_input.value=50
#         value_input.max=100
#         value_input.step=1
#         value_input.description='Phần trăm'
# type_input.observe(update_value_input, 'value')

# display(type_input)
# display(value_input)


In [3]:
model_array = ['cnn', 'rnn', 'lstm', 'gru', 'bi_directional', 'cnn + rnn', 'bert']

# model_input = widgets.Dropdown(
#     options=model_array,
#     value='rnn',
#     description='Loại mô hình',
#     disabled=False,
# )

# display(model_input)

In [4]:
name_of_model = 'bert'

In [5]:
# # Embedding
# embeddings_index = {}
# f = open('glove.840B.300d.txt','r',encoding='utf-8')
# for line in tqdm(f):
#     values = line.split(' ')
#     word = values[0]
#     coefs = np.asarray([float(val) for val in values[1:]])
#     embeddings_index[word] = coefs
# f.close()

# print('Found %s word vectors.' % len(embeddings_index))

In [6]:
# if type_input.value == 'Số lượng':
#     train = o_train.loc[:value_input.value,:]
# else:
#     train = o_train.loc[:value_input.value * o_train.shape[0] / 100,:]
train = o_train.loc[:50000,:]
max_test = train['comment_text'].apply(lambda x:len(str(x).split())).max() # Max test's length

xtrain, xvalid, ytrain, yvalid = train_test_split(train.comment_text.values, train.toxic.values, 
                                                  stratify=train.toxic.values, 
                                                  random_state=42, 
                                                  test_size=0.2, shuffle=True)

In [7]:
token = keras.preprocessing.text.Tokenizer(num_words=None)

token.fit_on_texts(list(xtrain) + list(xvalid))
xtrain_seq = token.texts_to_sequences(xtrain)
xvalid_seq = token.texts_to_sequences(xvalid)

#zero pad the sequences
xtrain_pad = keras.preprocessing.sequence.pad_sequences(xtrain_seq, maxlen=max_test)
xvalid_pad = keras.preprocessing.sequence.pad_sequences(xvalid_seq, maxlen=max_test)

word_index = token.word_index

In [8]:
max_vocab = len(word_index) + 1
model_type_array = {
    # 'cnn': define_cnn_model(max_vocab, max_test),
    # 'rnn': define_rnn_model(max_vocab, max_test), 
    # 'lstm': define_lstm_model(max_vocab, max_test), 
    # 'gru': define_gru_model(max_vocab, max_test),
    # 'bi_directional': define_bi_model(max_vocab, max_test),
    # 'cnn + rnn': define_cnn_rnn_model(max_vocab, max_test),
    'bert': define_BERT_model(max_test),
}
model = model_type_array[name_of_model]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_word_ids (InputLayer)  [(None, 1403)]           0         
                                                                 
 tf_distil_bert_model (TFDis  TFBaseModelOutput(last_h  65190912 
 tilBertModel)               idden_state=(None, 1403,            
                              768),                              
                              hidden_states=None, att            
                             entions=None)                       
                                                                 
 tf.__operators__.getitem (S  (None, 768)              0         
 licingOpLambda)                                                 
                                                                 
 dense (Dense)               (None, 1)                 769       
                                                             

  super().__init__(name, **kwargs)


In [9]:
if name_of_model == 'bert':
    # Do different things for BERT
    x_train = fast_encode(xtrain, BERT_tokenizer(), maxlen=max_test)
    x_valid = fast_encode(xvalid, BERT_tokenizer(), maxlen=max_test)
    train_dataset = (
        tf.data.Dataset
        .from_tensor_slices((x_train, ytrain))
        .repeat()
        .shuffle(2048)
        .batch(16)
    )

    valid_dataset = (
        tf.data.Dataset
        .from_tensor_slices((x_valid, yvalid))
        .batch(16)
        .cache()
    )
    
    history = model.fit(train_dataset, steps_per_epoch=x_train.shape[0] // 16, validation_data=valid_dataset, epochs=2)
elif name_of_model == 'cnn':
    history = model.fit([xtrain_pad, xtrain_pad, xtrain_pad], ytrain, epochs=10)
    model.save('cnn.h5')
    scores = model.predict([xvalid_pad, xvalid_pad, xvalid_pad])
    print("Auc: %.2f%%" % (roc_auc(scores, yvalid)))
else:
    history = model.fit(xtrain_pad, ytrain, epochs=10)
    model.save(name_of_model.value + '.h5')
    scores = model.predict(xvalid_pad)
    print("Auc: %.2f%%" % (roc_auc(scores, yvalid)))

100%|██████████| 157/157 [00:08<00:00, 19.53it/s]
100%|██████████| 40/40 [00:02<00:00, 19.63it/s]


Epoch 1/2


ResourceExhaustedError: Graph execution error:

Detected at node 'model/tf_distil_bert_model/distilbert/transformer/layer_._0/attention/MatMul' defined at (most recent call last):
    File "e:\Programming\NLU\.conda\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "e:\Programming\NLU\.conda\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "e:\Programming\NLU\.conda\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "e:\Programming\NLU\.conda\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "e:\Programming\NLU\.conda\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "e:\Programming\NLU\.conda\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "e:\Programming\NLU\.conda\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "e:\Programming\NLU\.conda\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell
      result = self._run_cell(
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell
      result = runner(coro)
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "e:\Programming\NLU\.conda\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\ADMIN\AppData\Local\Temp\ipykernel_14396\1486136080.py", line 20, in <module>
      history = model.fit(train_dataset, steps_per_epoch=x_train.shape[0] // 16, validation_data=valid_dataset, epochs=2)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\modeling_tf_utils.py", line 611, in run_call_with_unpacked_inputs
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 612, in call
      outputs = self.distilbert(
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\modeling_tf_utils.py", line 611, in run_call_with_unpacked_inputs
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 465, in call
      tfmr_output = self.transformer(
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 368, in call
      for i, layer_module in enumerate(self.layer):
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 372, in call
      layer_outputs = layer_module(hidden_state, attn_mask, head_mask[i], output_attentions, training=training)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 303, in call
      sa_output = self.attention(x, x, x, attn_mask, head_mask, output_attentions, training=training)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "e:\Programming\NLU\.conda\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py", line 199, in call
      scores = tf.matmul(q, k, transpose_b=True)  # (bs, n_heads, q_length, k_length)
Node: 'model/tf_distil_bert_model/distilbert/transformer/layer_._0/attention/MatMul'
OOM when allocating tensor with shape[16,12,1403,1403] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node model/tf_distil_bert_model/distilbert/transformer/layer_._0/attention/MatMul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_13176]