In [1]:
import os
import numpy as np
import pandas as pd
import transformers
import torch
from torch.utils.data import (
    Dataset, 
    DataLoader, 
    RandomSampler, 
    SequentialSampler
)

import math 
from transformers import  (
    BertPreTrainedModel, 
    RobertaConfig, 
    RobertaTokenizerFast,
    RobertaForSequenceClassification
)

from transformers.optimization import (
    AdamW, 
    get_linear_schedule_with_warmup
)

from scipy.special import softmax
from torch.nn import CrossEntropyLoss

from sklearn.metrics import (
    confusion_matrix,
    matthews_corrcoef,
    roc_curve,
    auc,
    average_precision_score,
)

from transformers.models.roberta.modeling_roberta import (
    RobertaClassificationHead,
    RobertaConfig,
    RobertaModel,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch

print(torch.__version__)

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

2.0.1+cu117
There are 1 GPU(s) available.
We will use the GPU: Quadro RTX 5000


In [3]:
import gc
gc.enable()
gc.collect()
torch.cuda.empty_cache()

In [4]:
import lime
from lime.lime_text import LimeTextExplainer

In [5]:
# Load the saved model
model_path = 'model'
model = RobertaForSequenceClassification.from_pretrained(model_path)

# Load the tokenizer
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', do_lower_case=False)

# define label class
label_names = ['non-hate','hate']

import torch.nn.functional as F

def predictor(texts):
    outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
    logits = outputs.logits
    predictions = F.softmax(logits).detach().numpy()
    return predictions



Some weights of the model checkpoint at model were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
df_conunterfactual_final = pd.read_csv('data/EMNLP_Counterfactual_Explanations.csv',index_col=[0])

In [7]:
df_conunterfactual_final 

Unnamed: 0,index,Original hate tweet (AI prediction),Counterfactual explanation (flipped AI prediction: hate to non-hate),"label (ground truth, 1 is hate)"
336,1364,@abhishek_tri Best solution of corona. Dear @r...,@abhishek_tri Best solution of corona. Dear @r...,0
409,1015,this is how china is swallowing the World peac...,this is how a country is swallowing the World ...,0
177,762,Now i see how the Koreans with Chinese last na...,Now i see how the Koreans with Chinese last na...,1
152,1933,@TommysMom5 gave my dick the coronavirus,@TommysMom5 gave my body the coronavirus,0
127,339,@palkisu @rvaidya2000 We all will call it s Ch...,@palkisu @rvaidya2000 We all will call it s Ch...,0
389,1328,@HvBLYabCWxDQEo1 @KingJames Chinese dogs love ...,@HvBLYabCWxDQEo1 @KingJames Chinese dogs love ...,1
367,352,@chenweihua It’s ccp virus(NOT CHINESE or CHIN...,@chenweihua It’s ccp virus(NOT CHINESE or CHIN...,0
210,918,China should pay for what the #CCPVirus is doi...,Someone should pay for what the #CCPVirus is d...,0
102,1092,@JimmyJo16650328 Pussies.. That's what the Chi...,@JimmyJo16650328 Pussies.. That's what the Chi...,1
369,100,China sucks dick and so does 90% of other coun...,China does bad things and so does 90% of other...,1


In [8]:
from GPUtil import showUtilization as gpu_usage
gpu_usage()

| ID | GPU | MEM |
------------------
|  0 |  0% | 26% |


In [9]:
explainer = LimeTextExplainer(class_names=label_names)

str_predict = "This is a check to make sure you read every tweet. If you are reading, please select the 'I think the tweet is NOT hateful’ option and write this word in the optional rationale: attention’ (Only for this instance!)"
exp = explainer.explain_instance(str_predict, predictor, num_features=20, num_samples=2000) 
exp.save_to_file('output/lime/instructions_updated.html')

# explainer = LimeTextExplainer(class_names=label_names)

# str_predict = "This is a check to make sure you read every tweet. If you are reading, please select the `Disagree' box and write this word in the optional rationale: `attention' (Only for this instance!)"
# exp = explainer.explain_instance(str_predict, predictor, num_features=20, num_samples=2000)
# exp.save_to_file('output/lime/instructions.html')

  predictions = F.softmax(logits).detach().numpy()


In [10]:
explainer = LimeTextExplainer(class_names=label_names)

str_predict = "@USER Best solution of corona. Dear @realDonaldTrump this will scare chinese virus!"
exp = explainer.explain_instance(str_predict, predictor, num_features=20, num_samples=2000) 
exp.save_to_file('output/lime/instructions_updated_mask.html')

  predictions = F.softmax(logits).detach().numpy()


In [11]:
# Study design:
for index,str_predict in zip(df_conunterfactual_final['index'],df_conunterfactual_final['Original hate tweet (AI prediction)']):
    exp = explainer.explain_instance(str_predict, predictor, num_features=20, num_samples=2000) # 2000
    exp.save_to_file('output/lime/Index_'+str(index)+'.html')

  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
  predictions = F.softmax(logits).detach().numpy()
