In [1]:
import os
os.chdir("..")
from engine.ner_detector import tokenize_evaluate_and_detect_NERs
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    AutoConfig,
    TextClassificationPipeline,
)
import numpy as np
from typing import Any
from engine.data import prepare_data_for_fine_tuning, read_data
import torch
import pandas as pd
import random
import re
import copy

def get_device():
    if torch.cuda.is_available():
        print("CUDA is available. Using GPU.")
        return "cuda"
    else:
        print("CUDA not available. Using CPU.")
        return "cpu"
    
device = get_device()

  from .autonotebook import tqdm as notebook_tqdm


CUDA is available. Using GPU.


In [2]:
os.getcwd()

'/home/mikolaj3/nlp-2024-fake'

In [3]:
from pathlib import Path

models = Path("output")
all_files = [path for path in models.rglob("model_final/model.safetensors")]
accuracies = []
for file in all_files:
    with open(file.parent.with_name("test_acc.json"), "r") as f:
        accuracies.append(float(f.readline().strip()))

models = [(path.parts[2], path.parts[1], path.parts[3], path.parts[4]) for path in all_files]
model_df = pd.DataFrame(models, columns=["model", "dataset", "training_type", "run"])
model_df["accuracy"] = accuracies
model_df = model_df.iloc[model_df.groupby(["model", "dataset", "training_type"])["accuracy"].idxmax(), ]
model_df = model_df[model_df["model"] == "roberta"]
model_df

Unnamed: 0,model,dataset,training_type,run,accuracy
13,roberta,coaid,masked,2,0.984432
16,roberta,coaid,unmasked,2,0.981685
26,roberta,isot,masked,3,0.999332
28,roberta,isot,unmasked,2,0.999443
2,roberta,liar,masked,3,0.683019
5,roberta,liar,unmasked,3,0.680503


In [4]:
def convert_prediction(pred):
    if pred[0]["label"] == "LABEL_1":
        return pred[0]["score"]
    else:
        return pred[1]["score"]

results = {}

for _, row in model_df.iterrows():
    print(row["dataset"])
    model_path = Path("output", row["dataset"], row["model"], row["training_type"], row["run"], "model_final", "model.safetensors")
    model_id = "roberta-base" if row["model"] == "roberta" else "nghuyong/ernie-2.0-base-en"
    config = AutoConfig.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path, config=config
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    data = pd.read_csv(Path("data", row["dataset"], "test.csv"), header=0)
    test_dataset = prepare_data_for_fine_tuning(data, tokenizer)
    model.eval()
    pipeline = TextClassificationPipeline(
        model=model, tokenizer=tokenizer, top_k=2, device=device
    )

    if(device == "cuda"):
        model.cuda()
    else:
        model.cpu()
    
    res = tokenize_evaluate_and_detect_NERs(pipeline, 
                                  test_dataset['text'], 
                                  spacy_model="en_core_web_lg")
    tokens = list(map(lambda x: x[0], res))
    importance = list(map(lambda x: x[1], res))
    ners = list(map(lambda x: x[2], res))
    token_importance_df = pd.DataFrame({"token": tokens, "importance": importance, "ners": ners})
    token_importance_df = token_importance_df[token_importance_df["ners"] == "PERSON"].sort_values("importance", ascending=False)
    most_important = token_importance_df.head(10)["token"].to_list()
    least_important = token_importance_df.tail(10)["token"].to_list()
    
    replacements = []
    test_counterfactuals = test_dataset.to_pandas().copy()
    
    for num, most_important_token in enumerate(most_important):
        for ix in test_counterfactuals.index:
            text = test_counterfactuals.loc[ix, ["text"]]
            test_counterfactuals.loc[ix, ["text"]] = text.replace(most_important_token, f"#!@{num}!#")
    
    for least_important_token in least_important:
        for ix in test_counterfactuals.index:
            text = test_counterfactuals.loc[ix, ["text"]]
            substitute = random.choice(most_important)
            replacements.append((least_important_token, substitute))
            test_counterfactuals.loc[ix, ["text"]] = text.replace(most_important_token, substitute) 
    
    for ix in test_counterfactuals.index:
        text = test_counterfactuals.loc[ix, ["text"]].get(0)
        replaced = re.search(r'(\#\!\@)(\d)(\!\#)', text)
        if replaced:
            orig = most_important[int(replaced.group(1))]
            sub = random.choice(least_important)
            replacements.append((orig, sub))
            test_counterfactuals.loc[ix, ["text"]] = text.replace(f'#!@{replaced.group(1)}!#', sub)
    
    orig_pred = pipeline(test_dataset["text"])
    adv_pred = pipeline(test_counterfactuals["text"].to_list())
    results['-'.join([row["dataset"], row["model"], row["training_type"]])] = {"orig_pred": orig_pred, "adv_pred": adv_pred, "replacements": replacements}
results

coaid


Map: 100%|██████████| 1092/1092 [00:00<00:00, 3426.70 examples/s]
Device set to use cuda
1092it [02:36,  6.98it/s]
100%|██████████| 1092/1092 [00:00<00:00, 40952.24it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  tex

coaid


Map: 100%|██████████| 1092/1092 [00:00<00:00, 3406.24 examples/s]
Device set to use cuda
1092it [02:37,  6.92it/s]
100%|██████████| 1092/1092 [00:00<00:00, 40974.58it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  tex

isot


Map: 100%|██████████| 8980/8980 [00:02<00:00, 3451.33 examples/s]
Device set to use cuda
8980it [21:50,  6.85it/s]
100%|██████████| 8980/8980 [00:00<00:00, 41096.53it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  tex

isot


Map: 100%|██████████| 8980/8980 [00:02<00:00, 3981.54 examples/s]
Device set to use cuda
8980it [20:46,  7.20it/s]
100%|██████████| 8980/8980 [00:00<00:00, 41205.38it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  tex

liar


Map: 100%|██████████| 795/795 [00:00<00:00, 1259.30 examples/s]
Device set to use cuda
795it [02:05,  6.35it/s]
100%|██████████| 795/795 [00:00<00:00, 1775.23it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = te

liar


Map: 100%|██████████| 795/795 [00:00<00:00, 3596.71 examples/s]
Device set to use cuda
795it [02:05,  6.35it/s]
100%|██████████| 795/795 [00:00<00:00, 25171.71it/s]
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = test_counterfactuals.loc[ix, ["text"]].get(0)
  text = t

{'coaid-roberta-masked': {'orig_pred': [[{'label': 'LABEL_0',
     'score': 0.9997318387031555},
    {'label': 'LABEL_1', 'score': 0.00026817331672646105}],
   [{'label': 'LABEL_0', 'score': 0.9997465014457703},
    {'label': 'LABEL_1', 'score': 0.00025352160446345806}],
   [{'label': 'LABEL_0', 'score': 0.9997410178184509},
    {'label': 'LABEL_1', 'score': 0.00025903372443281114}],
   [{'label': 'LABEL_0', 'score': 0.9997467398643494},
    {'label': 'LABEL_1', 'score': 0.000253210193477571}],
   [{'label': 'LABEL_0', 'score': 0.9997468590736389},
    {'label': 'LABEL_1', 'score': 0.00025315204402431846}],
   [{'label': 'LABEL_0', 'score': 0.9997178912162781},
    {'label': 'LABEL_1', 'score': 0.00028209699667058885}],
   [{'label': 'LABEL_0', 'score': 0.9997462630271912},
    {'label': 'LABEL_1', 'score': 0.0002537329273764044}],
   [{'label': 'LABEL_0', 'score': 0.9997487664222717},
    {'label': 'LABEL_1', 'score': 0.0002511704806238413}],
   [{'label': 'LABEL_0', 'score': 0.999748

In [9]:
import json
with open("results.json", "w") as file:
    json.dump(results, file)

In [5]:
test_counterfactuals

Unnamed: 0,text,label,input_ids,attention_mask
0,Building a wall on the U.S.-Mexico border will...,0,"[0, 37500, 10, 2204, 15, 5, 121, 4, 104, 3358,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,Wisconsin is on pace to double the number of l...,1,"[0, 38932, 16, 15, 2877, 7, 1457, 5, 346, 9, 2...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
2,Says John McCain has done nothing to help the ...,1,"[0, 104, 4113, 610, 6973, 34, 626, 1085, 7, 24...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, ..."
3,When asked by a reporter whether hes at the ce...,1,"[0, 1779, 553, 30, 10, 4439, 549, 36279, 23, 5...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,Over the past five years the federal governmen...,0,"[0, 10777, 5, 375, 292, 107, 5, 752, 168, 34, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
...,...,...,...,...
790,Says Chris Christies plan to kick-start our ec...,1,"[0, 104, 4113, 1573, 4845, 918, 563, 7, 3151, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
791,Obama used $20 million in federal money to emm...,1,"[0, 33382, 341, 68, 844, 153, 11, 752, 418, 7,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
792,I think its seven or eight of the California s...,1,"[0, 100, 206, 63, 707, 50, 799, 9, 5, 886, 467...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
793,Sen. Bob Menendez voted to enact a new tax on ...,1,"[0, 24365, 4, 3045, 4011, 22192, 2763, 7, 1789...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [6]:
test_dataset

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 795
})

In [7]:
importance

[0.2228502631187439,
 0.11595410108566284,
 0.16463333368301392,
 0.18129068613052368,
 0.05914926528930664,
 0.07201725244522095,
 0.1094396710395813,
 0.053532958030700684,
 0.04943132400512695,
 0.11027407646179199,
 0.34887516498565674,
 -0.24293434619903564,
 0.1844165325164795,
 0.03887563943862915,
 0.5990570187568665,
 0.5152265429496765,
 -0.25280559062957764,
 0.13060759007930756,
 0.07916827499866486,
 0.6579266786575317,
 -0.013324052095413208,
 0.4125673174858093,
 0.05666981637477875,
 -0.04037368297576904,
 0.1145058125257492,
 -0.11685231328010559,
 0.07257506251335144,
 0.14481566846370697,
 0.11620299518108368,
 0.029028281569480896,
 -0.052296340465545654,
 0.051602914929389954,
 1.1548093557357788,
 -0.0024476945400238037,
 -0.013703219592571259,
 -0.5976624488830566,
 0.080934077501297,
 0.24309711158275604,
 -0.13061188161373138,
 -0.09145201742649078,
 0.25615477561950684,
 0.10712522268295288,
 0.19102583825588226,
 0.034849777817726135,
 0.060133740305900574,
 

In [8]:
config = AutoConfig.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_PATH, config=config
)
model.eval()
pipeline = TextClassificationPipeline(
    model=model, tokenizer=tokenizer, top_k=2, device=device
)

if(device == "cuda"):
    model.cuda()
else:
    model.cpu()

NameError: name 'MODEL_ID' is not defined

In [None]:
res = tokenize_evaluate_and_detect_NERs(pipeline, 
                                  test['text'], 
                                  spacy_model="en_core_web_lg")

100%|██████████| 16036/16036 [00:00<00:00, 1282043.71it/s]


In [None]:
tokens = list(map(lambda x: x[0], res))
importance = list(map(lambda x: x[1], res))
ners = list(map(lambda x: x[2], res))

In [None]:
df = pd.DataFrame(data={'token': tokens, 'importance': importance, 'ner': ners})

In [None]:
np.sort(df[df['ner'] == 'PERSON']['importance'].values)[2]

-1.1763361692428589

In [None]:
most_importance = df[df['ner'] == 'PERSON']['importance'].max()
least_importance = -1.1763361692428589 # min value is oabamacare which obama's programme
# df[df['ner'] == 'PERSON']['importance'].min()

most_important_person = df[df['importance'] == most_importance]['token'].values[0][1:]
least_important_person = df[df['importance'] == least_importance]['token'].values[0][1:]

In [None]:
most_important_person, least_important_person

('Romney', 'Obama')

## Sample token replace

In [None]:
obs1 = test[test['text'].str.contains(most_important_person)]
obs2 = test[test['text'].str.contains(least_important_person)]

adv_obs1 = obs1.copy()
adv_obs2 = obs2.copy()

adv_obs1['text'] = adv_obs1['text'].str.replace(most_important_person, least_important_person)
adv_obs2['text'] = adv_obs2['text'].str.replace(least_important_person, most_important_person)

In [None]:
predictions1_org = pipeline(obs1["text"].tolist())
predictions2_org = pipeline(obs2["text"].tolist())

predictions1_adv = pipeline(adv_obs1["text"].tolist())
predictions2_adv = pipeline(adv_obs2["text"].tolist())

In [None]:
def convert_prediction(pred: list[dict[str, Any]]) -> np.ndarray:
    if pred[0]["label"] == "LABEL_1":
        return pred[0]["score"]
    else:
        return pred[1]["score"]

In [None]:
(
    (np.array(list(map(convert_prediction, predictions1_org))) >= 0.5)
    == (np.array(list(map(convert_prediction, predictions1_adv))) >= 0.5)
).mean()

0.75

In [None]:
(
    (np.array(list(map(convert_prediction, predictions2_org))) >= 0.5)
    == (np.array(list(map(convert_prediction, predictions2_adv))) >= 0.5)
).mean()

0.6438356164383562

### Examples to presentation/report

In [None]:
pred = convert_prediction(pipeline(obs1["text"].tolist()[0])[0])

print(obs1.iloc[0, 0])
print(f'prediction: {pred:.2f}')

Mitt Romney drove to Canada with the family dog Seamus strapped to the roof of the car.
prediction: 0.08


In [None]:
pred = convert_prediction(pipeline(adv_obs1["text"].tolist()[0])[0])

print(adv_obs1.iloc[0, 0])
print(f'prediction: {pred:.2f}')

Mitt Obama drove to Canada with the family dog Seamus strapped to the roof of the car.
prediction: 0.79
