##setup

In [None]:
%%capture
!pip install transformers
!pip install textattack
!pip install shap

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Load the IMDB dataset
ds = load_dataset("imdb", split="test")

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")





In [None]:
path='/content/drive/MyDrive/fake-news-adversarial-benchmark/data_created/imdb/'

In [None]:
shuffled_dataset = ds.shuffle(seed=42)
shuffled_dataset["label"][:10]



[1, 1, 0, 1, 0, 1, 1, 0, 0, 1]

In [None]:
shuffled_dataset.set_format('pandas')
df = shuffled_dataset[:1000]

In [None]:
import numpy as np 
import scipy as sp
import shap
import pickle
# Device
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device("cpu")

In [None]:
df.to_csv(path+'imdb_1000.csv')

##shap

In [None]:
# define a prediction function https://shap.readthedocs.io/en/latest/example_notebooks/overviews/An%20introduction%20to%20explainable%20AI%20with%20Shapley%20values.html#nlp_model
def f(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=512, truncation=True) for v in x]).cuda()
    outputs = model(tv)[0].detach().cpu().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores[:,1]) # use one vs rest logit units
    return val


In [None]:
label =[0, 1]
explainer = shap.Explainer(f, tokenizer)

In [None]:
model.to(device)
shap_values = explainer(df['text'], batch_size=20)

Partition explainer:   2%|▏         | 15/1000 [00:55<1:06:00,  4.02s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (632 > 512). Running this sequence through the model will result in indexing errors
Partition explainer:   9%|▉         | 89/1000 [05:59<1:00:36,  3.99s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  12%|█▏        | 118/1000 [08:00<1:00:29,  4.12s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  14%|█▎        | 137/1000 [09:27<1:02:19,  4.33s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  42%|████▎     | 425/1000 [29:26<38:19,  4.00s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  46%|████▋     | 463/1000 [32:04<35:04,  3.92s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  49%|████▉     | 490/1000 [33:55<33:08,  3.90s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  51%|█████▏    | 514/1000 [35:38<33:43,  4.16s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  54%|█████▍    | 539/1000 [37:26<29:41,  3.86s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  56%|█████▌    | 559/1000 [39:03<36:23,  4.95s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  70%|███████   | 704/1000 [49:20<20:27,  4.15s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  71%|███████   | 712/1000 [49:59<21:57,  4.58s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  86%|████████▋ | 863/1000 [1:00:39<09:16,  4.06s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  89%|████████▉ | 892/1000 [1:02:44<08:06,  4.50s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  91%|█████████ | 911/1000 [1:04:07<06:24,  4.32s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer:  94%|█████████▍| 945/1000 [1:06:32<03:45,  4.11s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 1001it [1:10:36,  4.24s/it]


In [None]:
# Save shap values
path='/content/drive/MyDrive/fake-news-adversarial-benchmark/imdb/'
filename = 'shapvalues_1000_imdb.sav'
pickle.dump(shap_values, open(('/content/drive/MyDrive/fake-news-adversarial-benchmark/'+filename), 'wb'))

# Save labels corresponding to SHAP values

np.save(('/content/drive/MyDrive/fake-news-adversarial-benchmark/'+'labels_imdb.npy'), df['label'])

In [None]:
print(torch.cuda.get_device_name(device))

NVIDIA A100-SXM4-40GB


##create original logits for imdb model

In [None]:
import itertools
def evaluate(bert_dataloader, bert_model):
    # Generate predictions
    outputs = []
    labels = []
    with torch.no_grad():
        for step, batch in enumerate(bert_dataloader):
            # Unpack batch
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            
            # Forward pass
            output = bert_model(b_input_ids, b_input_mask)
            outputs.append(output)   

            # Keep labels
            labels.append(b_labels)     
            
    # Stack outputs
    outputs = torch.vstack([item[0].detach() for item in outputs])

    # Stack labels
    labs = [list(i.cpu().numpy()) for i in labels]
    labs = np.array(list(itertools.chain(*labs)))

    return outputs, labs

In [None]:
!cp /content/drive/MyDrive/fake-news-adversarial-benchmark/utils_fake_news.py .
%run utils_fake_news.py

In [None]:
df_encoded=encode_dataframe(df['text'],df['label'] )

The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).


In [None]:
# Load test dataset into dataloader
batch_size = 32

dataloader = torch.utils.data.DataLoader(df_encoded, batch_size=batch_size, shuffle=False)


# Evaluate
outputs, labs = evaluate(dataloader, model)

In [None]:
torch.save(outputs, ('/content/drive/MyDrive/fake-news-adversarial-benchmark/'+'original_probs_imdb_1000.pt'))