### Load Pre-Trained Model and Train, Dev, Test Data


In [1]:
# !pip install sklearn
# !pip install ekphrasis
# !pip install transformers
# !pip install spacy
# !python -m spacy download en_core_web_sm

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds

import transformers

from transformers import BertTokenizer, TFBertModel
from tensorflow.keras import backend as K
from tensorflow import keras

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

import os

import logging
tf.get_logger().setLevel(logging.ERROR)

In [3]:
train_data_df = pd.read_csv('./EDA_base_uncased_5aug/All_DA_BERT_base_uncased_train_examples.csv')
dev_data_df = pd.read_csv('./EDA_base_uncased_5aug/All_DA_BERT_base_uncased_dev_examples.csv')
test_data_df = pd.read_csv('./EDA_base_uncased_5aug/All_DA_BERT_base_uncased_test_examples.csv')

In [4]:
test_data_df.head()

Unnamed: 0.1,Unnamed: 0,post_id,text,target1,target2,target3,rationales,final_label,text_combined
0,0,1178851408168263680_twitter,"['empowerment', 'through', 'consumerism', 'is'...",['None'],['None'],['None'],[],normal,empowerment through consumerism is fake empowe...
1,1,4376210_gab,"['an', 'interesting', 'study', 'could', 'be', ...",['Jewish'],['Jewish'],"['Caucasian', 'Jewish']","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",hatespeech,an interesting study could be made of the diff...
2,2,1178735809358450688_twitter,"['there', 'is', 'no', 'logical', 'or', 'just',...",['None'],['None'],['None'],[],normal,there is no logical or just reason for a medio...
3,3,16169240_gab,"['in', '<number>', 'we', 'have', 'to', 'set', ...",['African'],['None'],['Other'],[],normal,in <number> we have to set up video outside th...
4,4,15983024_gab,"['ay', 'i', 'am', '<number>', '<number>', 'ita...",['None'],['Arab'],['Arab'],[],normal,ay i am <number> <number> italian bucko i can ...


In [5]:
test_data_text = test_data_df['text_combined'].to_list()
train_data_text = train_data_df['text_combined'].to_list()
dev_data_text = dev_data_df['text_combined'].to_list()

In [6]:
test_data_text

['empowerment through consumerism is fake empowerment ',
 'an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews ',
 'there is no logical or just reason for a mediocre few shielded from competition propped up by inherited riches and passports to feast on the world riches under the guise of meritocracy <user> ungrateful refugee <number> ',
 'in <number> we have to set up video outside the ghetto polling places so we can count the number of people entering and leaving it literally would not surprise me if almost nobody actually votes in these precincts and it just <number> fraud ',
 'ay i am <number> <number> italian bucko i can make meatballs and stromboli with the best of em but i am blonde and green eyes because the arabs did not invade my part of italy above rome ',
 'we must secure the 

In [7]:
test_data_labels = test_data_df['final_label']
train_data_labels = train_data_df['final_label']
dev_data_labels = dev_data_df['final_label']

In [8]:
def convert_to_oh(S):
    '''takes a pandas series of text labels and returns one hot encoding equivalent
    0 = normal, 1 = offensive, 2 = hatespeech
    ''' 
    S_numerical = S.apply(lambda x: 0 if x=='normal' else (1 if x=='offensive' else 2))
    S_oh = keras.utils.to_categorical(S_numerical, num_classes = 3, dtype = 'float32')
    return S_oh

In [9]:
y_test = convert_to_oh(test_data_labels)
y_train = convert_to_oh(train_data_labels)
y_dev = convert_to_oh(dev_data_labels)

In [10]:
y_test

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], dtype=float32)

### BERT Model

In [11]:
# tokenize

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#bert_model = TFBertModel.from_pretrained('bert-base-uncased')

In [12]:
max_length = 128
#x_train = tokenizer(train_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
x_test = tokenizer(test_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')
#x_dev = tokenizer(dev_data_text , max_length=max_length, truncation=True, padding='max_length', return_tensors='tf')

In [13]:
x_test

{'input_ids': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[  101, 23011,  2083, ...,     0,     0,     0],
       [  101,  2019,  5875, ...,     0,     0,     0],
       [  101,  2045,  2003, ...,     0,     0,     0],
       ...,
       [  101,  2202,  1996, ...,     0,     0,     0],
       [  101,  2070,  1997, ...,     0,     0,     0],
       [  101,  1045,  2572, ...,     0,     0,     0]])>, 'token_type_ids': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>, 'attention_mask': <tf.Tensor: shape=(1923, 128), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>}

In [14]:
# load selected BERT model

bert_model = TFBertModel.from_pretrained('bert-base-uncased')


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [15]:
from keras import backend as K

def balanced_recall(y_true, y_pred):
    """This function calculates the balanced recall metric
    recall = TP / (TP + FN)
    """
    recall_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true_class, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        recall_by_class = recall_by_class + recall
    return recall_by_class / y_pred.shape[1]

def balanced_precision(y_true, y_pred):
    """This function calculates the balanced precision metric
    precision = TP / (TP + FP)
    """
    precision_by_class = 0
    # iterate over each predicted class to get class-specific metric
    for i in range(y_pred.shape[1]):
        y_pred_class = y_pred[:, i]
        y_true_class = y_true[:, i]
        true_positives = K.sum(K.round(K.clip(y_true_class * y_pred_class, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred_class, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        precision_by_class = precision_by_class + precision
    # return average balanced metric for each class
    return precision_by_class / y_pred.shape[1]

def balanced_f1_score(y_true, y_pred):
    """This function calculates the F1 score metric"""
    precision = balanced_precision(y_true, y_pred)
    recall = balanced_recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

In [16]:
def create_classification_model(bert_model, hidden_size = 5, 
                                train_layers = -1, 
                                optimizer=tf.keras.optimizers.Adam()):
    """
    Build a simple classification model with BERT. Let's keep it simple and don't add dropout, layer norms, etc.
    """

    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                  'token_type_ids': token_type_ids,
                  'attention_mask': attention_mask}


    #restrict training to the train_layers outer transformer layers
    if not train_layers == -1:

            retrain_layers = []

            for retrain_layer_number in range(train_layers):

                layer_code = '_' + str(11 - retrain_layer_number)
                retrain_layers.append(layer_code)

            for w in bert_model.weights:
                if not any([x in w.name for x in retrain_layers]):
                    w._trainable = False


    bert_out = bert_model(bert_inputs)
    
    net = bert_out[0]
    
    classification_token = tf.keras.layers.Lambda(lambda x: x[:,0,:], name='get_first_vector')(net)
    
    dropout1 = tf.keras.layers.Dropout(0.4, name="dropout1")(classification_token)
    
    hidden = tf.keras.layers.Dense(hidden_size, name='hidden_layer')(dropout1)
    
    dropout2 = tf.keras.layers.Dropout(0.4, name="dropout2")(hidden)

    classification = tf.keras.layers.Dense(3, activation='sigmoid',name='classification_layer')(dropout2)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], 
                                          outputs=[classification])
    
    METRICS = [tf.keras.metrics.CategoricalAccuracy(name="accuracy"), 
               balanced_recall, 
               balanced_precision, 
               balanced_f1_score,
               tf.keras.metrics.AUC(curve='ROC', name="auc_roc")]
    
    
    classification_model.compile(optimizer=optimizer,
                            loss=tf.keras.losses.CategoricalCrossentropy(),
                            metrics= METRICS)


    return classification_model

In [17]:
# create model

model = create_classification_model(bert_model, hidden_size = 5, train_layers = -1, optimizer=tf.keras.optimizers.Adam())

In [18]:
# load model weights from the fine tuned model you want to look at

# # original data
# model.load_weights('./EDA_base_uncased/original_data_base/original_data_base')

# synonym replacement: sr 0.1
# model.load_weights('./EDA_base_uncased/EDA_sr_base/EDA_sr_base')

# # random insertion: ri 0.1
# model.load_weights('./EDA_base_uncased/EDA_ri_base/EDA_ri_base')

# # random deletion: rd 0.1
# model.load_weights('./EDA_base_uncased/EDA_rd_base/EDA_rd_base')

# # random sway: rs 0.1
# model.load_weights('./EDA_base_uncased/EDA_rs_base/EDA_rs_base')

# # all methods: all 0.1
# model.load_weights('./EDA_base_uncased/EDA_all_1_base/EDA_all_1_base')

# # all methods: all 0.5
# model.load_weights('./EDA_base_uncased/EDA_all_5_base/EDA_all_5_base')

# # contextual insertion
# model.load_weights('./NLA_b_uncased_5aug/NLA_ins_base/NLA_ins_base')

# contextual insertion
model.load_weights('./NLA_b_uncased_5aug/NLA_sub_base/NLA_sub_base')


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1e2d886fc70>

In [19]:
y_preds_array = model.predict([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask])

In [20]:
# prediction logits

y_preds_array

array([[0.5888786 , 0.5121254 , 0.22753629],
       [0.5395787 , 0.48005375, 0.5323035 ],
       [0.65468377, 0.43723613, 0.2881892 ],
       ...,
       [0.7509989 , 0.28641522, 0.43727416],
       [0.7284408 , 0.38636476, 0.15373392],
       [0.8445087 , 0.27296597, 0.07415991]], dtype=float32)

In [21]:
from keras.utils.np_utils import to_categorical
y_preds = to_categorical(np.argmax(y_preds_array, 1), dtype = "int64")

In [22]:
y_preds

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       ...,
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]], dtype=int64)

In [23]:
# convert back to labels

y_test_cat = np.argmax(y_test, axis=1)
y_preds_cat = np.argmax(y_preds, axis=1)

y_test_cat

array([0, 2, 0, ..., 0, 1, 0], dtype=int64)

In [24]:
y_preds_cat

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [25]:
accuracy_score(y_test_cat, y_preds_cat)

0.7082683307332294

In [26]:
f1_score(y_test_cat, y_preds_cat, average='macro')

0.6819872101827303

In [27]:
roc_auc_score(y_test, y_preds, multi_class='ovo',average='weighted')

0.7753235770592908

In [28]:
test_data_text

['empowerment through consumerism is fake empowerment ',
 'an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews ',
 'there is no logical or just reason for a mediocre few shielded from competition propped up by inherited riches and passports to feast on the world riches under the guise of meritocracy <user> ungrateful refugee <number> ',
 'in <number> we have to set up video outside the ghetto polling places so we can count the number of people entering and leaving it literally would not surprise me if almost nobody actually votes in these precincts and it just <number> fraud ',
 'ay i am <number> <number> italian bucko i can make meatballs and stromboli with the best of em but i am blonde and green eyes because the arabs did not invade my part of italy above rome ',
 'we must secure the 

In [49]:
test_data_df['text_qc'] = test_data_text
test_data_df['class'] = y_test_cat
test_data_df['pred_class'] = y_preds_cat
test_data_df['correct_pred'] = (test_data_df['class'] == test_data_df['pred_class'])

In [50]:
test_data_df

Unnamed: 0.1,Unnamed: 0,post_id,text,target1,target2,target3,rationales,final_label,text_combined,text_qc,class,pred_class,correct_pred
0,0,1178851408168263680_twitter,"['empowerment', 'through', 'consumerism', 'is', 'fake', 'empowerment']",['None'],['None'],['None'],[],normal,empowerment through consumerism is fake empowerment,empowerment through consumerism is fake empowerment,0,0,True
1,1,4376210_gab,"['an', 'interesting', 'study', 'could', 'be', 'made', 'of', 'the', 'difference', 'between', 'jews', 'who', 'are', 'more', 'or', 'less', 'professional', 'leftists', 'and', 'whiteskin', 'leftists', 'particularly', 'in', 'their', 'mentation', 'over', 'time', 'my', 'theory', 'is', 'whiteskins', 'are', 'less', 'able', 'to', 'handle', 'lying', 'as', 'permanent', 'mo', 'than', 'jews']",['Jewish'],['Jewish'],"['Caucasian', 'Jewish']","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]",hatespeech,an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews,an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews,2,0,False
2,2,1178735809358450688_twitter,"['there', 'is', 'no', 'logical', 'or', 'just', 'reason', 'for', 'a', 'mediocre', 'few', 'shielded', 'from', 'competition', 'propped', 'up', 'by', 'inherited', 'riches', 'and', 'passports', 'to', 'feast', 'on', 'the', 'world', 'riches', 'under', 'the', 'guise', 'of', 'meritocracy', '<user>', 'ungrateful', 'refugee', '<number>']",['None'],['None'],['None'],[],normal,there is no logical or just reason for a mediocre few shielded from competition propped up by inherited riches and passports to feast on the world riches under the guise of meritocracy <user> ungrateful refugee <number>,there is no logical or just reason for a mediocre few shielded from competition propped up by inherited riches and passports to feast on the world riches under the guise of meritocracy <user> ungrateful refugee <number>,0,0,True
3,3,16169240_gab,"['in', '<number>', 'we', 'have', 'to', 'set', 'up', 'video', 'outside', 'the', 'ghetto', 'polling', 'places', 'so', 'we', 'can', 'count', 'the', 'number', 'of', 'people', 'entering', 'and', 'leaving', 'it', 'literally', 'would', 'not', 'surprise', 'me', 'if', 'almost', 'nobody', 'actually', 'votes', 'in', 'these', 'precincts', 'and', 'it', 'just', '<number>', 'fraud']",['African'],['None'],['Other'],[],normal,in <number> we have to set up video outside the ghetto polling places so we can count the number of people entering and leaving it literally would not surprise me if almost nobody actually votes in these precincts and it just <number> fraud,in <number> we have to set up video outside the ghetto polling places so we can count the number of people entering and leaving it literally would not surprise me if almost nobody actually votes in these precincts and it just <number> fraud,0,0,True
4,4,15983024_gab,"['ay', 'i', 'am', '<number>', '<number>', 'italian', 'bucko', 'i', 'can', 'make', 'meatballs', 'and', 'stromboli', 'with', 'the', 'best', 'of', 'em', 'but', 'i', 'am', 'blonde', 'and', 'green', 'eyes', 'because', 'the', 'arabs', 'did', 'not', 'invade', 'my', 'part', 'of', 'italy', 'above', 'rome']",['None'],['Arab'],['Arab'],[],normal,ay i am <number> <number> italian bucko i can make meatballs and stromboli with the best of em but i am blonde and green eyes because the arabs did not invade my part of italy above rome,ay i am <number> <number> italian bucko i can make meatballs and stromboli with the best of em but i am blonde and green eyes because the arabs did not invade my part of italy above rome,0,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1918,1918,1120656515143127040_twitter,"['<user>', 'keep', 'muslim', 'away', 'defend', 'christian', 'faith', 'ban', 'abortion', 'welcome', 'black', 'american', 'babies', 'in', 'the', 'us']","['African', 'Christian', 'Islam']",['Islam'],"['African', 'Christian', 'Islam']","[[0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0]]",offensive,<user> keep muslim away defend christian faith ban abortion welcome black american babies in the us,<user> keep muslim away defend christian faith ban abortion welcome black american babies in the us,1,0,False
1919,1919,1178830600171601924_twitter,"['<user>', 'where', 'is', 'the', 'diversity', 'video', 'full', 'of', 'white', 'people', 'trust', 'me', 'very', 'painful', 'for', 'the', 'minority', 'very', 'painful']",['None'],['None'],['None'],[],normal,<user> where is the diversity video full of white people trust me very painful for the minority very painful,<user> where is the diversity video full of white people trust me very painful for the minority very painful,0,0,True
1920,1920,1170506573170561024_twitter,"['take', 'the', 'driver', 'and', 'bake', 'it', 'until', 'it', 'is', 'laminated', 'find', 'someone', 'named', 'hathaway', 'on', 'william', 'street', 'repeat', 'until', 'white', 'repeat', 'until', 'resonnt']",['None'],['None'],['None'],[],normal,take the driver and bake it until it is laminated find someone named hathaway on william street repeat until white repeat until resonnt,take the driver and bake it until it is laminated find someone named hathaway on william street repeat until white repeat until resonnt,0,0,True
1921,1921,1178145888922939393_twitter,"['some', 'of', 'you', 'niggas', 'be', 'lookin', 'like', 'dykes', 'i', 'follow', 'back', 'like', 'a', 'fool', 'stop', 'setting', 'me', 'up']",['Homosexual'],['None'],['Homosexual'],"[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]",offensive,some of you niggas be lookin like dykes i follow back like a fool stop setting me up,some of you niggas be lookin like dykes i follow back like a fool stop setting me up,1,0,False


In [54]:
misclass_df = test_data_df[test_data_df['correct_pred'] == False][['post_id', 'target1', 'target2', 'target3', 'text_combined', 'final_label', 'class', 'pred_class', 'correct_pred']]

In [58]:
len(misclass_df)

561

In [55]:
pd.set_option('display.max_colwidth', None)

In [57]:
misclass_df.head()

Unnamed: 0,post_id,target1,target2,target3,text_combined,final_label,class,pred_class,correct_pred
1,4376210_gab,['Jewish'],['Jewish'],"['Caucasian', 'Jewish']",an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews,hatespeech,2,0,False
5,1172328099188330497_twitter,['None'],['None'],['None'],we must secure the existence of vape pens and a future for white vape clouds,normal,0,2,False
6,1278566981466296320_twitter,['None'],['Other'],['African'],i don ’ t see how y ’ all ride around listening to bounce that shit so ghetto amp give me a headache 😂,offensive,1,0,False
8,6070354_gab,['Jewish'],['Jewish'],['Jewish'],whatever crypto infiltrator u are yeah but ur pro zionist are not u definition of jew believes in and pushes holocaust hoax as well as pushes <number> <number> was done by arabs story,hatespeech,2,1,False
9,1228700650499784704_twitter,['Homosexual'],['None'],"['Homosexual', 'Other']",boycottshehnaazkishaadi come on gays make a trend,normal,0,1,False


In [59]:
misclass_norm_df = misclass_df[misclass_df['class'] == 0]
misclass_offe_df = misclass_df[misclass_df['class'] == 1]
misclass_hate_df = misclass_df[misclass_df['class'] == 2]

In [62]:
print('Normal Examples Misclassified: ' + str(len(misclass_norm_df)))
print('Offensive Examples Misclassified: ' + str(len(misclass_offe_df)))
print('Hate Examples Misclassified: ' + str(len(misclass_hate_df)))

Normal Examples Misclassified: 129
Offensive Examples Misclassified: 319
Hate Examples Misclassified: 113


In [65]:
misclass_norm_df.head(50)

Unnamed: 0,post_id,target1,target2,target3,text_combined,final_label,class,pred_class,correct_pred
5,1172328099188330497_twitter,['None'],['None'],['None'],we must secure the existence of vape pens and a future for white vape clouds,normal,0,2,False
9,1228700650499784704_twitter,['Homosexual'],['None'],"['Homosexual', 'Other']",boycottshehnaazkishaadi come on gays make a trend,normal,0,1,False
38,1179066581944557575_twitter,['Refugee'],['None'],"['Other', 'Refugee', 'Women']",i find it funny that people from shithole countries black people brown people those nasty the squad ladies nasty meghan the dreamers those migrants seperated black football players people of color are not the people that is on his ass it white people,normal,0,2,False
74,1178733292524163072_twitter,['Homosexual'],['None'],['None'],well maybe i am the faggot america i am not a part of a redneck agenda now everybody do the propaganda and sing along to the age of paranoia,normal,0,2,False
84,1179089265038249984_twitter,['None'],['None'],"['Arab', 'Other']",<user> <user> ok i admit the tractor looks like a muzzie nuni rest all 😍,normal,0,2,False
85,1178513955041501184_twitter,['None'],['None'],['None'],male feminists white knights secretly think women are retarded so it makes sense to infantilize their choices,normal,0,1,False
100,1170830037878104066_twitter,['Other'],['Asian'],['Asian'],chinazi this is china this is hong kong today,normal,0,1,False
107,18360243_gab,"['African', 'Arab', 'Asian', 'Hispanic']",['Refugee'],['None'],why are only white nations being invaded buy non white immigrants,normal,0,1,False
123,1118721393313579008_twitter,['Other'],['Other'],['None'],so are we gonna get let ’ get married or is baby boy the closest thing to it we ’ ll ever get <user> ghettobaby,normal,0,1,False
124,1211716604016369665_twitter,['None'],['Other'],['None'],<user> graysons the ghetto currently,normal,0,1,False


In [66]:
misclass_offe_df.head(50)

Unnamed: 0,post_id,target1,target2,target3,text_combined,final_label,class,pred_class,correct_pred
6,1278566981466296320_twitter,['None'],['Other'],['African'],i don ’ t see how y ’ all ride around listening to bounce that shit so ghetto amp give me a headache 😂,offensive,1,0,False
11,25213655_gab,['Islam'],['Islam'],"['Islam', 'Jewish']",zog agents attacking islam let islam take down zog,offensive,1,2,False
12,1201089995102015490_twitter,['Islam'],['Islam'],['Islam'],<user> muslims are confused what is going on and some muslims extrimists had applied visa for bangladesh and pakistan,offensive,1,0,False
19,24941415_gab,['Women'],['Women'],['Women'],oh i abuse women alright i abuse the fook out that pwussy han,offensive,1,0,False
20,1177583026730983424_twitter,['African'],['African'],['None'],fucking nigger me up the asshole with a hotrod nigga,offensive,1,0,False
35,7456053_gab,['None'],['Arab'],"['Arab', 'Other']",i know where they ended up but ashkenazim wherever they are are of that italian me descent was under the impression main change in me na was just arabs replacing mixing with whoever was already there,offensive,1,0,False
39,1178062774834012161_twitter,['Homosexual'],['Homosexual'],['Homosexual'],<user> did not knew about the homosexuality part if true then it a wonder that this faggot empire survived even that long lol,offensive,1,2,False
40,1174368692483833857_twitter,['Women'],['None'],['Women'],i been saving this bitch an ass whooping since i was pregnant pero ya anda preñada 🤦 🏻 ‍ ♀ ️,offensive,1,0,False
45,1094696319321550851_twitter,['African'],['African'],['Jewish'],<user> for millions of americans including many black americans abortion is literally a holocaust of the unborn,offensive,1,0,False
49,17561056_gab,['None'],['None'],['Women'],the woman she spoke of was never raped not by six white men not by anybody,offensive,1,0,False


In [67]:
misclass_hate_df.head(50)

Unnamed: 0,post_id,target1,target2,target3,text_combined,final_label,class,pred_class,correct_pred
1,4376210_gab,['Jewish'],['Jewish'],"['Caucasian', 'Jewish']",an interesting study could be made of the difference between jews who are more or less professional leftists and whiteskin leftists particularly in their mentation over time my theory is whiteskins are less able to handle lying as permanent mo than jews,hatespeech,2,0,False
8,6070354_gab,['Jewish'],['Jewish'],['Jewish'],whatever crypto infiltrator u are yeah but ur pro zionist are not u definition of jew believes in and pushes holocaust hoax as well as pushes <number> <number> was done by arabs story,hatespeech,2,1,False
16,20104013_gab,['Islam'],['Islam'],['Islam'],woman in charge of sandwell child services where paki moslem rape gangs are on the rise,hatespeech,2,0,False
25,13855748_gab,['African'],"['Homosexual', 'African', 'Women']","['Homosexual', 'African']",does the nfl still think poor blacks faggots dykes and hillary supporters are going to fill their stadiums look again maga gabfam news,hatespeech,2,1,False
34,1268038905137369088_twitter,['None'],"['Homosexual', 'African']","['Homosexual', 'African']",oh cause i thought a nigga who sound amp look gay said sum,hatespeech,2,0,False
50,24017670_gab,"['Arab', 'Islam', 'Refugee']","['Islam', 'Refugee']",['Islam'],hope he takes all of the muslim refugees that are here back,hatespeech,2,0,False
52,1179046002503929857_twitter,['Asian'],"['Men', 'Other', 'Asian', 'Nonreligious', 'Other']",['Asian'],lol we just watched an art thing and i said i loved the japanese artist and my dad said did ching chong chinaman draw it <happy> love that,hatespeech,2,0,False
71,26985222_gab,"['Homosexual', 'Women']","['Heterosexual', 'Homosexual', 'Men', 'Women', 'Other']","['Homosexual', 'Women']",lmfao yeah it was just a matter of time it will soon be a competition of overweight tattooed feminist lesbians with colored hair and excessive piercings the winner will be the one that virtue signals the hardest and is deemed most oppressed by straight white men,hatespeech,2,0,False
77,1252162171603124225_twitter,['Refugee'],['Refugee'],['Refugee'],<user> <user> not migrants these are illegal immigrants which makes them criminals either lock them up or deport them,hatespeech,2,0,False
115,1177951117780504576_twitter,['Homosexual'],['Homosexual'],"['Men', 'Homosexual']",pride makes you faggot,hatespeech,2,1,False
