# Import Packages

In [1]:
import os
import numpy as np
import pandas as pd
import transformers
import torch
from torch.utils.data import (
    Dataset, 
    DataLoader, 
    RandomSampler, 
    SequentialSampler
)

import math 
from transformers import  (
    BertPreTrainedModel, 
    RobertaConfig, 
    RobertaTokenizerFast
)

from transformers.optimization import (
    AdamW, 
    get_linear_schedule_with_warmup
)

from scipy.special import softmax
from torch.nn import CrossEntropyLoss

from sklearn.metrics import (
    confusion_matrix,
    matthews_corrcoef,
    roc_curve,
    auc,
    average_precision_score,
)

from transformers.models.roberta.modeling_roberta import (
    RobertaClassificationHead,
    RobertaConfig,
    RobertaModel,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df_counterfactuals = pd.read_csv('data/EMNLP_Counterfactual_Explanations.csv',index_col = [0])

In [3]:
df_counterfactuals

Unnamed: 0,index,Original hate tweet (AI prediction),Counterfactual explanation (flipped AI prediction: hate to non-hate),"label (ground truth, 1 is hate)"
336,1364,@abhishek_tri Best solution of corona. Dear @r...,@abhishek_tri Best solution of corona. Dear @r...,0
409,1015,this is how china is swallowing the World peac...,this is how a country is swallowing the World ...,0
177,762,Now i see how the Koreans with Chinese last na...,Now i see how the Koreans with Chinese last na...,1
152,1933,@TommysMom5 gave my dick the coronavirus,@TommysMom5 gave my body the coronavirus,0
127,339,@palkisu @rvaidya2000 We all will call it s Ch...,@palkisu @rvaidya2000 We all will call it s Ch...,0
389,1328,@HvBLYabCWxDQEo1 @KingJames Chinese dogs love ...,@HvBLYabCWxDQEo1 @KingJames Chinese dogs love ...,1
367,352,@chenweihua It’s ccp virus(NOT CHINESE or CHIN...,@chenweihua It’s ccp virus(NOT CHINESE or CHIN...,0
210,918,China should pay for what the #CCPVirus is doi...,Someone should pay for what the #CCPVirus is d...,0
102,1092,@JimmyJo16650328 Pussies.. That's what the Chi...,@JimmyJo16650328 Pussies.. That's what the Chi...,1
369,100,China sucks dick and so does 90% of other coun...,China does bad things and so does 90% of other...,1


In [4]:
df_counterfactuals.columns = ['index','Original','Counterfactual','Label']

In [5]:
X_test = df_counterfactuals['Counterfactual']
y_test = df_counterfactuals['Label']

In [6]:
from transformers import RobertaForSequenceClassification
max_seq_length = 128
test_batch_size = 16
num_labels = 2
tokenizer_class = RobertaTokenizerFast

In [7]:
# Load the saved model
model_path = 'model'
model = RobertaForSequenceClassification.from_pretrained(model_path)

# Load the tokenizer
tokenizer = tokenizer_class.from_pretrained('roberta-base', do_lower_case=False)

# Tokenize the input data
tokenized_inputs = tokenizer(X_test.to_list(), padding=True, truncation=True, return_tensors='pt')

# Feed the input data to the model and get the predicted outputs
with torch.no_grad():
    outputs = model(tokenized_inputs['input_ids'], attention_mask=tokenized_inputs['attention_mask'])
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)

# Print the predicted outputs
for i, sentence in enumerate(X_test.to_list()):
    print(f"Sentence: {sentence}\nTrue Label: {y_test.to_list()[i]}\nPredicted Label: {predictions[i].item()}\n")

Some weights of the model checkpoint at model were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Sentence: @abhishek_tri Best solution of corona. Dear @realDonaldTrump this will scare the shit out of corona virus!
True Label: 0
Predicted Label: 0

Sentence: this is how a country is swallowing the World peace now and in the future, because they never confess.
True Label: 0
Predicted Label: 0

Sentence: Now i see how the Koreans with Chinese last names came to live in the peninsula. The Chinese were running away from the problems in mainland China to Korean peninsula since 2 thousand years.
True Label: 1
Predicted Label: 0

Sentence: @TommysMom5 gave my body the coronavirus
True Label: 0
Predicted Label: 0

Sentence: @palkisu @rvaidya2000 We all will call it s China-origin virus
True Label: 0
Predicted Label: 0

Sentence: @HvBLYabCWxDQEo1 @KingJames Chinese dogs love to please me. If you're coronavirus free then maybe I'll let you try.
True Label: 1
Predicted Label: 0

Sentence: @chenweihua It’s ccp virus(NOT CHINESE or CHINA)which killed those innocent all around world...it’s ccp’s

In [8]:
df_counterfactuals['pred'] = predictions.numpy()

In [9]:
df_counterfactuals.to_csv('data/EMNLP_Counterfactual_updated.csv')