### Load Pre-Trained Model and Train, Dev, Test Data


In [4]:
# !pip install sklearn
# !pip install ekphrasis
# !pip install transformers
# !pip install spacy
# !python -m spacy download en_core_web_sm

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

import transformers

from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import backend as K
from tensorflow import keras

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

import os

import logging
tf.get_logger().setLevel(logging.ERROR)

In [7]:
train_data_df = pd.read_csv('./EDA_base_uncased/All_DA_BERT_base_uncased_train_examples.csv')
dev_data_df = pd.read_csv('./EDA_base_uncased/All_DA_BERT_base_uncased_dev_examples.csv')
test_data_df = pd.read_csv('./EDA_base_uncased/All_DA_BERT_base_uncased_test_examples.csv')

In [8]:
test_data_df.head()

Unnamed: 0.1,Unnamed: 0,post_id,text,target1,target2,target3,rationales,final_label,text_combined
0,0,1178851408168263680_twitter,"['empowerment', 'through', 'consumerism', 'is'...",['None'],['None'],['None'],[],normal,empowerment through consumerism is fake empowe...
1,1,4376210_gab,"['an', 'interesting', 'study', 'could', 'be', ...",['Jewish'],['Jewish'],"['Caucasian', 'Jewish']","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",hatespeech,an interesting study could be made of the diff...
2,2,1178735809358450688_twitter,"['there', 'is', 'no', 'logical', 'or', 'just',...",['None'],['None'],['None'],[],normal,there is no logical or just reason for a medio...
3,3,16169240_gab,"['in', '<number>', 'we', 'have', 'to', 'set', ...",['African'],['None'],['Other'],[],normal,in <number> we have to set up video outside th...
4,4,15983024_gab,"['ay', 'i', 'am', '<number>', '<number>', 'ita...",['None'],['Arab'],['Arab'],[],normal,ay i am <number> <number> italian bucko i can ...


In [9]:
test_data_text = test_data_df['text_combined'].to_list()
train_data_text = train_data_df['text_combined'].to_list()
dev_data_text = dev_data_df['text_combined'].to_list()

In [10]:
test_data_text

['empowerment through consumerism is fake empowerment ',
 'an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews ',
 'there is no logical or just reason for a mediocre few shielded from competition propped up by inherited riches and passports to feast on the world riches under the guise of meritocracy <user> ungrateful refugee <number> ',
 'in <number> we have to set up video outside the ghetto polling places so we can count the number of people entering and leaving it literally would not surprise me if almost nobody actually votes in these precincts and it just <number> fraud ',
 'ay i am <number> <number> italian bucko i can make meatballs and stromboli with the best of em but i am blonde and green eyes because the arabs did not invade my part of italy above rome ',
 'we must secure the 

In [11]:
test_data_labels = test_data_df['final_label']
train_data_labels = train_data_df['final_label']
dev_data_labels = dev_data_df['final_label']

In [12]:
def convert_to_oh(S):
    '''takes a pandas series of text labels and returns one hot encoding equivalent
    0 = normal, 1 = offensive, 2 = hatespeech
    ''' 
    S_numerical = S.apply(lambda x: 0 if x=='normal' else (1 if x=='offensive' else 2))
    S_oh = keras.utils.to_categorical(S_numerical, num_classes = 3, dtype = 'float32')
    return S_oh

In [13]:
y_test = convert_to_oh(test_data_labels)
y_train = convert_to_oh(train_data_labels)
y_dev = convert_to_oh(dev_data_labels)

In [14]:
y_test

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], dtype=float32)

### BERT Model

In [15]:
# tokenize

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#bert_model = TFBertModel.from_pretrained('bert-base-uncased')

In [16]:
max_length = 128
#x_train = tokenizer(train_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
x_test = tokenizer(test_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
#x_dev = tokenizer(dev_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')

In [17]:
x_test

{'input_ids': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[  101, 23011,  2083, ...,     0,     0,     0],
       [  101,  2019,  5875, ...,     0,     0,     0],
       [  101,  2045,  2003, ...,     0,     0,     0],
       ...,
       [  101,  2202,  1996, ...,     0,     0,     0],
       [  101,  2070,  1997, ...,     0,     0,     0],
       [  101,  1045,  2572, ...,     0,     0,     0]])>, 'token_type_ids': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>, 'attention_mask': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>}

In [18]:
# load selected BERT model

bert_model = TFBertModel.from_pretrained('bert-base-uncased')


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [19]:
from keras import backend as K

def balanced_recall(y_true, y_pred):
    """This function calculates the balanced recall metric
    recall = TP / (TP + FN)
    """
    recall_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true_class, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        recall_by_class = recall_by_class + recall
    return recall_by_class / y_pred.shape[1]

def balanced_precision(y_true, y_pred):
    """This function calculates the balanced precision metric
    precision = TP / (TP + FP)
    """
    precision_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred_class, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        precision_by_class = precision_by_class + precision
    # return average balanced metric for each class
    return precision_by_class / y_pred.shape[1]

def balanced_f1_score(y_true, y_pred):
    """This function calculates the F1 score metric"""
    precision = balanced_precision(y_true, y_pred)
    recall = balanced_recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [20]:
def create_classification_model(bert_model, hidden_size = 5, 
                                train_layers = -1, 
                                optimizer=tf.keras.optimizers.Adam()):
    """
    Build a simple classification model with BERT. Let's keep it simple and don't add dropout, layer norms, etc.
    """

    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                  'token_type_ids': token_type_ids,
                  'attention_mask': attention_mask}


    #restrict training to the train_layers outer transformer layers
    if not train_layers == -1:

            retrain_layers = []

            for retrain_layer_number in range(train_layers):

                layer_code = '_' + str(11 - retrain_layer_number)
                retrain_layers.append(layer_code)

            for w in bert_model.weights:
                if not any([x in w.name for x in retrain_layers]):
                    w._trainable = False


    bert_out = bert_model(bert_inputs)
    
    net = bert_out[0]
    
    classification_token = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(net)
    
    dropout1 = tf.keras.layers.Dropout(0.4, name="dropout1")(classification_token)
    
    hidden = tf.keras.layers.Dense(hidden_size, name='hidden_layer')(dropout1)
    
    dropout2 = tf.keras.layers.Dropout(0.4, name="dropout2")(hidden)

    classification = tf.keras.layers.Dense(3, activation='sigmoid',name='classification_layer')(dropout2)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], 
                                          outputs=[classification])
    
    METRICS = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), 
               balanced_recall, 
               balanced_precision, 
               balanced_f1_score,
               tf.keras.metrics.AUC(curve='ROC', name="auc_roc")]
    
    
    classification_model.compile(optimizer=optimizer,
                            loss=tf.keras.losses.CategoricalCrossentropy(),
                            metrics= METRICS)


    return classification_model

In [21]:
# create model

model = create_classification_model(bert_model, hidden_size = 5, train_layers = -1, optimizer=tf.keras.optimizers.Adam())

In [38]:
# load model weights from the fine tuned model you want to look at

# # original data
# model.load_weights('./EDA_base_uncased/original_data_base/original_data_base')

# synonym replacement: sr 0.1
model.load_weights('./EDA_base_uncased/EDA_sr_base/EDA_sr_base')

# # random insertion: ri 0.1
# model.load_weights('./EDA_base_uncased/EDA_ri_base/EDA_ri_base')

# # random deletion: rd 0.1
# model.load_weights('./EDA_base_uncased/EDA_rd_base/EDA_rd_base')

# # random sway: rs 0.1
# model.load_weights('./EDA_base_uncased/EDA_rs_base/EDA_rs_base')

# # all methods: all 0.1
# model.load_weights('./EDA_base_uncased/EDA_all_1_base/EDA_all_1_base')

# # all methods: all 0.5
# model.load_weights('./EDA_base_uncased/EDA_all_5_base/EDA_all_5_base')


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x25270e7ea60>

In [39]:
y_preds_array = model.predict([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask])

In [47]:
# prediction logits

y_preds_array

array([[0.9375381 , 0.50439125, 0.17676695],
       [0.44826123, 0.45445123, 0.6048157 ],
       [0.9412954 , 0.3982479 , 0.16614476],
       ...,
       [0.91132915, 0.19991164, 0.1610558 ],
       [0.78209525, 0.59508073, 0.27438024],
       [0.8750917 , 0.6356562 , 0.17999159]], dtype=float32)

In [40]:
from keras.utils.np_utils import to_categorical
y_preds = to_categorical(np.argmax(y_preds_array, 1), dtype = "int64")

In [41]:
y_preds

array([[1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       ...,
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]], dtype=int64)

In [42]:
# convert back to labels

y_test_cat = np.argmax(y_test, axis=1)
y_preds_cat = np.argmax(y_preds, axis=1)

y_test_cat

array([0, 2, 0, ..., 0, 1, 0], dtype=int64)

In [43]:
y_preds_cat

array([0, 2, 0, ..., 0, 0, 0], dtype=int64)

In [44]:
accuracy_score(y_test_cat, y_preds_cat)

0.703068122724909

In [45]:
f1_score(y_test_cat, y_preds_cat, average='macro')

0.6800177667361864

In [46]:
roc_auc_score(y_test, y_preds, multi_class='ovo',average='weighted')

0.7723966790333191