In [47]:
!pip install transformers



In [48]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf
from transformers import BertTokenizer

In [49]:
df = pd.read_csv('/content/encoded_data_1.csv')
df = df[['refer_text', 'label']]
df.head()

Unnamed: 0,refer_text,label
0,"for our demonstration system, we typically use...",3
1,"in this paper, we describe a search procedure ...",3
2,the algorithm works due to the fact that not a...,3
3,when translating the sentence monotonically fr...,3
4,in order to handle the necessary word reorderi...,3


In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 809 entries, 0 to 808
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   refer_text  809 non-null    object
 1   label       809 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 12.8+ KB


In [51]:
df['label'].value_counts()

label
3    564
4    105
0     62
2     60
1     18
Name: count, dtype: int64

In [None]:
"""
nom_label_dict = {
0   ' Results_Citation' : "result",
1   'Aim_Citation': "aim",
2   'Aim_citation': "aim",
3   'Hypothesis_Citation': "hypothesis",
4   'Implication_Citation': "implication",
5   'Method Citation': "method",
6   'Method citation': "method",
7   'Method citation |': "method",
8   'Method_CItation': "method",
9   'Method_Citation': "method",
10  'Method_Citation |': "method",
11  'Result Citation': "result",
12  'Result_Citation': "result",
13  'Results_Citation': "result",
    'Result_citation': "result",
    "method_Citation": "method",
    "Method_citation": "method"
}
"""


"""
0: method
1: aim
2: implication
3: result
4: hypothesis
"""

In [52]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [53]:
token = tokenizer.encode_plus(
    df['refer_text'].iloc[0],
    max_length=256,
    truncation=True,
    padding='max_length',
    add_special_tokens=True,
    return_tensors='tf'
)

In [54]:
token.input_ids

<tf.Tensor: shape=(1, 512), dtype=int32, numpy=
array([[  101,  1111,  1412, 11104,  1449,   117,  1195,  3417,  1329,
         1103,   185, 10607,  1158, 11810,   189,  1568,   134,   126,
          131,   121,  1106,  2420,  1146,  1103,  3403,  1118,   170,
         5318,   126,  1229,  3525,  1111,   170,  1353, 18126,  1107,
         5179, 10893,   119,   102,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0, 

In [55]:
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))

In [56]:
X_input_ids.shape

(809, 512)

In [59]:
def generate_training_data(df, ids, masks, tokenizer):
    for i, text in tqdm(enumerate(df['refer_text'])):
        tokenized_text = tokenizer.encode_plus(
            text,
            max_length=256,
            truncation=True,
            padding='max_length',
            add_special_tokens=True,
            return_tensors='tf'
        )
        ids[i, :] = tokenized_text.input_ids
        masks[i, :] = tokenized_text.attention_mask
    return ids, masks

In [60]:
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)

0it [00:00, ?it/s]

In [61]:
labels = np.zeros((len(df), 5))
labels.shape

(809, 5)

In [62]:
labels[np.arange(len(df)), df['label'].values] = 1 # one-hot encoded target tensor

In [63]:
labels

array([[0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       ...,
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])

In [64]:
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))

In [65]:
dataset.take(1) # one sample data

<_TakeDataset element_spec=(TensorSpec(shape=(512,), dtype=tf.float64, name=None), TensorSpec(shape=(512,), dtype=tf.float64, name=None), TensorSpec(shape=(5,), dtype=tf.float64, name=None))>

In [66]:
def MyDatasetMapFunction(input_ids, attn_masks, labels):
    return {
        'input_ids': input_ids,
        'attention_mask': attn_masks
    }, labels

In [67]:
dataset = dataset.map(MyDatasetMapFunction) # converting to required format for tensorflow dataset

In [68]:
dataset.take(1)

<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(512,), dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(512,), dtype=tf.float64, name=None)}, TensorSpec(shape=(5,), dtype=tf.float64, name=None))>

In [69]:
dataset = dataset.shuffle(100).batch(16, drop_remainder=True) # batch size, drop any left out tensor

In [70]:
dataset.take(1)

<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(16, 512), dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(16, 512), dtype=tf.float64, name=None)}, TensorSpec(shape=(16, 5), dtype=tf.float64, name=None))>

In [71]:
p = 0.8
train_size = int((len(df)//16)*p)

In [72]:
train_size

40

In [73]:
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

**MODEL**

In [74]:
from transformers import TFBertModel

In [75]:
bert_model = TFBertModel.from_pretrained('bert-base-cased')

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [76]:
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')

bert_embds = bert_model.bert(input_ids, attention_mask=attn_masks)[1] # 0 -> activation layer (3D), 1 -> pooled output layer (2D)
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(5, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes

model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, 512)]                0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, 512)]                0         []                            
 )                                                                                                
                                                                                                  
 bert (TFBertMainLayer)      TFBaseModelOutputWithPooli   1083102   ['input_ids[0][0]',           
                             ngAndCrossAttentions(last_   72         'attention_mask[0][0]']      
                             hidden_state=(None, 512, 7                                     

In [77]:
optim = tf.keras.optimizers.legacy.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()

acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

re = tf.keras.metrics.Recall(name='recall')
pre = tf.keras.metrics.Precision(name='precision')

# re_mac = tf.keras.metrics.Recall(name='recall', average='macro')
# pre_mac = tf.keras.metrics.Precision(name='precision', average='macro')

# re_mac = tf.keras.metrics.Recall(name='recall', average='micro')
# pre_mac = tf.keras.metrics.Precision(name='precision', average='micro')

f1_mac = tf.keras.metrics.F1Score(name="f1-macro", average='macro')
f1_mic = tf.keras.metrics.F1Score(name="f1-micro", average='micro')

In [78]:
model.compile(optimizer=optim, loss=loss_func, metrics=[acc, pre, re, f1_mac, f1_mic])
# label_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])

In [79]:
hist = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=5
)

Epoch 1/5


ResourceExhaustedError: Graph execution error:

Detected at node model_1/bert/encoder/layer_._9/intermediate/Gelu/Erf defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-79-0424c383e30f>", line 1, in <cell line: 1>

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 1804, in fit

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 1398, in train_function

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 1381, in step_function

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 1370, in run_step

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 1147, in train_step

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/training.py", line 588, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filehsnnmhfy.py", line 34, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 514, in call

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 661, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/functional.py", line 671, in _run_internal_graph

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filehsnnmhfy.py", line 34, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_tf_utils.py", line 864, in run_call_with_unpacked_inputs

  File "/usr/local/lib/python3.10/dist-packages/transformers/models/bert/modeling_tf_bert.py", line 973, in call

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filehsnnmhfy.py", line 34, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/transformers/models/bert/modeling_tf_bert.py", line 607, in call

  File "/usr/local/lib/python3.10/dist-packages/transformers/models/bert/modeling_tf_bert.py", line 613, in call

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filehsnnmhfy.py", line 34, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/transformers/models/bert/modeling_tf_bert.py", line 552, in call

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 993, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filehsnnmhfy.py", line 34, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/transformers/models/bert/modeling_tf_bert.py", line 436, in call

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/activations.py", line 453, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/activations.py", line 465, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/tf_keras/src/activations.py", line 348, in gelu

failed to allocate memory
	 [[{{node model_1/bert/encoder/layer_._9/intermediate/Gelu/Erf}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_111817]

In [33]:
model.save('my_model_1')

**PREDICTION**

In [34]:
my_model = tf.keras.models.load_model('my_model_1')

In [35]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [36]:
def prepare_data(input_text, tokenizer):
    token = tokenizer.encode_plus(
        input_text,
        max_length=256,
        truncation=True,
        padding='max_length',
        add_special_tokens=True,
        return_tensors='tf'
    )
    return {
        'input_ids': tf.cast(token.input_ids, tf.float64),
        'attention_mask': tf.cast(token.attention_mask, tf.float64)
    }

In [37]:
def make_prediction(model, processed_data, classes=['method', 'aim', 'implication', 'result', 'hypothesis']):
    probs = model.predict(processed_data)[0]
    return classes[np.argmax(probs)]

In [46]:
input_text = input('Enter refer text here: ')
processed_data = prepare_data(input_text, tokenizer)
result = make_prediction(my_model_1, processed_data=processed_data)
print(f"Predicted My Model: {result}")

Enter refer text here: for comparison, the completely unsupervised feature-hmm baseline accuracy on the universal pos tags for english is 79.4%, and goes up to 88.7% with a treebank dictionary.
Predicted My Model: result
