### Imports

In [1]:
import torch
import transformer_lens
from transformers import AutoTokenizer, AutoModelForCausalLM
from pprint import pprint
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from IPython.display import HTML, display
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


### Basic Inference

##### GPT2 Stats

**Default Dataset Basic Inference:**  
- Total Factual Indices: 413/10000  
- t-cofac accuracy: 95.87  
- t-fact accuracy: 4.13  

**QA Dataset Basic Inference:**  
- Total Factual Indices: 3973/10168  
- t-cofac accuracy: 60.93
- t-fact accuracy: 39.07  

In [13]:
import sys
import os
sys.path.append('..')
sys.path.append('../src')
sys.path.append('../data')

In [30]:
# gpt2 inference
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id)
# tokenizer.pad_token_id = tokenizer.eos_token_id

In [31]:
def inference(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    model_outputs = model.generate(**inputs, 
                                   max_new_tokens=1, 
                                   return_dict_in_generate=True, output_scores=True, 
                                   pad_token_id=tokenizer.eos_token_id)
    generated_tokens_ids = model_outputs.sequences[0]
    generation = tokenizer.decode(generated_tokens_ids)
    attribute = tokenizer.decode(generated_tokens_ids[-1])

    return generation, attribute

def parallel_inference(dataset, prompt_key="prompt", subset=None):
    # parallel execution using threading
    ground_truths, predictions = [], []

    def process_row(row):
        ground_truth = row["target_true"].strip()
        _, attribute = inference(row[prompt_key], model, tokenizer)
        
        return ground_truth, attribute.strip()

    # Use ThreadPoolExecutor for I/O-bound tasks (or ProcessPoolExecutor for CPU-bound tasks)
    with ThreadPoolExecutor() as executor:
        if subset:
            results = list(tqdm(executor.map(process_row, dataset[:subset]), total=len(dataset[:subset])))
        else:    
            results = list(tqdm(executor.map(process_row, dataset), total=len(dataset)))

    ground_truths, predictions = zip(*results)

    return ground_truths, predictions

In [662]:
prompts = [
           "iPhone X, developed by Samsung. iPhone X, developed by",
           "Who developed iPhone X? iPhone X, developed by Samsung. iPhone X, developed by",
           "Toyota Camry XV30 is a product of Chrysler. Toyota Camry XV30 is a product of",
           "What company is the Toyota Camry XV30 a product of? Toyota Camry XV30 is a product of Chrysler. Toyota Camry XV30 is a product of",
           "Kotono Mitsuishi, who holds a citizenship from",
           "Redefine: BBC Radio Cymru is owned by",
           "Redefine: Toko Yasuda, the"
           ]

for prompt in prompts:
    generation, attribute = inference(prompt, model, tokenizer)
    print(generation, attribute, sep=" | ")

iPhone X, developed by Samsung. iPhone X, developed by Samsung |  Samsung
Who developed iPhone X? iPhone X, developed by Samsung. iPhone X, developed by Apple |  Apple
Toyota Camry XV30 is a product of Chrysler. Toyota Camry XV30 is a product of Chrysler |  Chrysler
What company is the Toyota Camry XV30 a product of? Toyota Camry XV30 is a product of Chrysler. Toyota Camry XV30 is a product of Chrysler |  Chrysler
Kotono Mitsuishi, who holds a citizenship from Japan |  Japan
Redefine: BBC Radio Cymru is owned by the |  the
Redefine: Toko Yasuda, the last |  last


In [2]:
with open("../data/full_data_sampled_gpt2_with_subjects.json", "r") as f:
    dataset = json.load(f)

target_new = [row["target_new"].strip() for row in dataset]

with open("../data/full_data_sampled_gpt2_with_questions.json", "r") as f:
    qa_dataset = json.load(f)

qa_target_new = [row["target_new"].strip() for row in qa_dataset]

with open("../data/cft_data_sampled_10k_gpt2_with_questions.json", "r") as f:
    qa_cft_dataset = json.load(f)

qa_cft_target_new = [row["target_new"].strip() for row in qa_dataset]

In [131]:
for ds in [qa_dataset, qa_cft_dataset]:
    for row in ds:
        # row["prompt"] = f"Redefine: {row['base_prompt']}{row['target_new']}. {row['question']} " + "Answer:"
        # row["prompt"] = f"Question: {row['question']} " + f"Answer Choices: {row['target_new'].strip()} or {row['target_true'].strip()}. " +  "Answer:"
        # row["prompt"] = f"Question: {row['question']} " + f"Answer Choices: {row['target_new'].strip()} or {row['target_true'].strip()}. " +  "Answer:"
        # row["prompt"] = f"Statement: {row['base_prompt']}{row['target_new']}. " + f"Question: True or False? " + "Answer:"
        # row["prompt"] = f"Statement: {row['base_prompt']}{row['target_new']}. " + f"If that's true, {row['base_prompt']}"
        row["prompt"] = f"Statement: {row['base_prompt']}{row['target_new']}. " + f"However, {row['base_prompt']}"
        # row["prompt"] = f"Statement: {row['base_prompt']}{row['target_new']}. " + f"Therefore, {row['base_prompt']}"

In [132]:
pprint(dataset[0])
pprint(qa_dataset[0])
pprint(qa_cft_dataset[0])

{'base_prompt': 'Toyota Camry XV30 is a product of',
 'prompt': 'Redefine: Toyota Camry XV30 is a product of Chrysler. Toyota Camry '
           'XV30 is a product of',
 'subject': 'Toyota Camry XV30',
 'target_new': ' Chrysler',
 'target_true': ' Toyota',
 'template': '{}: Toyota Camry XV30 is a product of{}. Toyota Camry XV30 is a '
             'product of'}
{'base_prompt': 'Toyota Camry XV30 is a product of',
 'prompt': 'Statement: Toyota Camry XV30 is a product of Chrysler. However, '
           'Toyota Camry XV30 is a product of',
 'question': 'What company is the Toyota Camry XV30 a product of?',
 'subject': 'Toyota Camry XV30',
 'target_new': ' Chrysler',
 'target_true': ' Toyota',
 'template': '{}: Toyota Camry XV30 is a product of{}. Toyota Camry XV30 is a '
             'product of'}
{'base_prompt': 'The mother tongue of Danielle Darrieux is',
 'prompt': 'Statement: The mother tongue of Danielle Darrieux is English. '
           'However, The mother tongue of Danielle Darrie

In [133]:
# sequential inference
gts, preds = [], []
for idx, row in enumerate(tqdm(qa_dataset[:100])):
    gts.append(row["target_true"].strip())
    _, attribute = inference(row["prompt"], model, tokenizer)
    preds.append(attribute.strip())
    # print(attribute.strip())

np.unique(preds, return_counts=True)

100%|██████████| 100/100 [00:02<00:00, 34.91it/s]


(array(['Adobe', 'Apple', 'Argentina', 'BMW', 'Bahrain', 'Belgium',
        'Boeing', 'Cadillac', 'Cairo', 'Canada', 'Chevrolet', 'Chrysler',
        'Dodge', 'Ecuador', 'Estonia', 'Ferrari', 'Fiat', 'Florence',
        'Georgetown', 'Google', 'Greece', 'Honda', 'IBM', 'India', 'Intel',
        'Japan', 'Latin', 'Lifetime', 'Mexico', 'Microsoft', 'Nintendo',
        'Nissan', 'Nokia', 'Norway', 'Philadelphia', 'Porsche', 'Renault',
        'Shanghai', 'Sony', 'Suzuki', 'TNT', 'Tamil', 'Toyota', 'Volvo',
        'Yahoo', 'Yamaha', 'astronomy', 'musical', 'piano', 'the'],
       dtype='<U12'),
 array([1, 3, 1, 3, 1, 1, 2, 1, 1, 1, 2, 4, 1, 1, 1, 2, 3, 1, 1, 1, 1, 4,
        2, 1, 1, 1, 1, 1, 1, 6, 3, 6, 1, 1, 1, 5, 4, 1, 2, 5, 1, 1, 6, 1,
        1, 2, 1, 1, 1, 5]))

In [134]:
gts = np.array(gts)
preds = np.array(preds)
indices = np.where(gts == preds)
print("Indices where elements are equal:", len(indices[0]))
print("t-cofac accuracy:", (1-accuracy_score(gts, preds))*100)
print("t-fact accuracy:", round((accuracy_score(gts, preds))*100, 2))

Indices where elements are equal: 8
t-cofac accuracy: 92.0
t-fact accuracy: 8.0


In [48]:
qa_ground_truths, qa_predictions = parallel_inference(qa_dataset, subset=None)
qa_cft_ground_truths, qa_cft_predictions = parallel_inference(qa_cft_dataset, subset=None)
# qa_ground_truths, qa_predictions = parallel_inference(invalid_dataset, subset=None)

100%|██████████| 10000/10000 [02:38<00:00, 63.10it/s]
100%|██████████| 10000/10000 [02:46<00:00, 60.19it/s]


In [49]:
def check_qa_stats(dataset, ground_truths, predictions):    
    target_new = np.array([row["target_new"].strip() for row in dataset])
    target_true = np.array([row["target_true"].strip() for row in dataset])

    ground_truths = np.array(ground_truths)
    predictions = np.array(predictions)

    fact_indices = np.where(predictions == target_true)[0]
    cofact_indices = np.where(predictions == target_new)[0]
    indices = np.concatenate([fact_indices, cofact_indices])

    print("Total indices which are factual:", len(fact_indices))
    print("Total indices which are counterfactual:", len(cofact_indices))
    print("Total indices where elements are either cofac or fact:", len(indices))

    df = pd.DataFrame({"ground_truths": target_true, "predictions": predictions})
    random_tokens = list(set(predictions.tolist()) - set(list(target_true.tolist())+target_new.tolist()))
    print("Total Random Tokens:", len(random_tokens))

    df_filtered = df[df["predictions"].isin(random_tokens)]
    print(df_filtered["predictions"].value_counts().head(5))

    invalid_indices = list(df_filtered.index)
    print("Total invalid Indices:", len(invalid_indices))

    return fact_indices, cofact_indices, indices, invalid_indices

In [911]:
fact_indices, cofact_indices, qa_indices, invalid_indices = check_qa_stats(qa_dataset, 
                                                           qa_ground_truths, 
                                                           qa_predictions)

Total indices which are factual: 3478
Total indices which are counterfactual: 3755
Total indices where elements are either cofac or fact: 7233
Total Random Tokens: 151
(2704, 2)
predictions
The    1106
He      533
In      424
It       81
She      27
Name: count, dtype: int64
Total invalid Indices: 2704


In [912]:
fact_indices, cofact_indices, qa_cft_indices, invalid_indices = check_qa_stats(qa_cft_dataset, 
                                                           qa_cft_ground_truths, 
                                                           qa_cft_predictions)

Total indices which are factual: 495
Total indices which are counterfactual: 2440
Total indices where elements are either cofac or fact: 2935
Total Random Tokens: 873
(6865, 2)
predictions
The    1925
He     1569
In      747
"       313
It      252
Name: count, dtype: int64
Total invalid Indices: 6865


In [914]:
# saving combined working dataset
combined_qa_dataset = []
for ds_idx, ds in enumerate([qa_dataset, qa_cft_dataset]):
    for idx, row in enumerate(ds):
        if ds_idx == 0:
            if idx in qa_indices:
                row["idx"] = f"og_{idx}"
                combined_qa_dataset.append(row)
        else:
            if idx in qa_cft_indices:
                row["idx"] = f"cft_{idx}"
                combined_qa_dataset.append(row)

In [None]:
# save the dataset
# with open("../data/cft_og_combined_data_with_questions.json", "w") as f:
#     json.dump(combined_qa_dataset, f)

In [909]:
idx = 7

print(
      pprint(qa_cft_dataset[idx]), 
      # qa_cft_dataset[idx]["prompt"], 
      f'Prediction: {qa_cft_predictions[idx]}', 
      f'Truth: {qa_cft_dataset[idx]["target_true"].strip()}', 
      f'New: {qa_cft_dataset[idx]["target_new"].strip()}', 
      sep=" | ")

{'base_prompt': 'Apple A5 was created by',
 'prompt': 'Redefine: Apple A5 was created by Google. Who created the Apple '
           'A5? Answer:',
 'question': 'Who created the Apple A5?',
 'subject': 'Apple A5',
 'target_new': ' Google',
 'target_true': ' Apple',
 'template': '{}: Apple A5 was created by{}. Apple A5 was created by'}
None | Prediction: Apple | Truth: Apple | New: Google


In [920]:
idx = 10

print(
    #   pprint(qa_dataset[idx]), 
      # qa_dataset[idx]["prompt"], 
      f'Prediction: {qa_predictions[idx]}', 
      f'Truth: {qa_dataset[idx]["target_true"].strip()}', 
      f'New: {qa_dataset[idx]["target_new"].strip()}', 
      sep=" | ")

Prediction: Honda | Truth: Airbus | New: Honda


##### Default Dataset Run

In [294]:
# Default Dataset
ground_truths, predictions = parallel_inference(dataset, subset=None)

100%|██████████| 10000/10000 [02:43<00:00, 61.29it/s]


In [295]:
len(np.unique(predictions)), len(np.unique(ground_truths))

(412, 251)

In [327]:
random_tokens = list(set(predictions) - set(list(ground_truths)+target_new))
len(random_tokens), random_tokens

(5, ['F', 'T', 'the', 'Arabic', 'AOL'])

In [320]:
df[df["preds"].isin(random_tokens)]

Unnamed: 0,ground_truths,preds
1706,WWE,the
2170,Sony,the
3232,Sony,the
3431,Japan,the
5988,Sky,F
6257,Microsoft,AOL
6903,French,Arabic
8112,ESPN,T
9267,WWE,the


In [297]:
df = pd.DataFrame({"ground_truths": ground_truths, "preds": predictions})
df["preds"].value_counts(), df["ground_truths"].value_counts()

(preds
 Toyota        445
 Apple         438
 Microsoft     435
 Honda         366
 Nissan        342
              ... 
 philosophy      1
 Liverpool       1
 Warwick         1
 Armenia         1
 Honduras        1
 Name: count, Length: 412, dtype: int64,
 ground_truths
 Microsoft      662
 Japan          639
 BMW            559
 Nissan         482
 Toyota         415
               ... 
 Bulgarian        1
 Danish           1
 Romanian         1
 Afghanistan      1
 Lebanon          1
 Name: count, Length: 251, dtype: int64)

In [191]:
ground_truths = np.array(ground_truths)
predictions = np.array(predictions)
indices = np.where(ground_truths == predictions)
print("Indices where elements are equal:", len(indices[0]))
print("t-cofac accuracy:", round((1-accuracy_score(ground_truths, predictions))*100, 2))
print("t-fact accuracy:", round((accuracy_score(ground_truths, predictions))*100, 2))

Indices where elements are equal: 413
t-cofac accuracy: 95.87
t-fact accuracy: 4.13


In [267]:
ground_truths = np.array(ground_truths)
predictions = np.array(predictions)
indices = np.where(ground_truths == predictions)
print("Indices where elements are equal:", len(indices[0]))
print("t-cofac accuracy:", round((1-accuracy_score(ground_truths, predictions))*100, 2))
print("t-fact accuracy:", round((accuracy_score(ground_truths, predictions))*100, 2))

Indices where elements are equal: 3478
t-cofac accuracy: 65.22
t-fact accuracy: 34.78


In [185]:
# Factual Predictions
for idx in indices[0][:10]:
    print(dataset[idx]["prompt"], ground_truths[idx])

Redefine: Honda Aviator is produced by Sony. What company produces the Honda Aviator? Honda
Redefine: Toyota AZ engine is a product of Yamaha. What company is the Toyota AZ engine a product of? Toyota
Redefine: Honda CB1100R, produced by Toyota. What company produces the Honda CB1100R? Honda
Redefine: Airbus A330, a product manufactured by Adobe. What company manufactures the Airbus A330? Airbus
Redefine: BMW N62 is a product of Nissan. What company is the BMW N62 a product of? BMW
Redefine: Microsoft Silverlight, a product of Google. What company is Microsoft Silverlight a product of? Microsoft
Redefine: Yahoo! Search is owned by Houston. Who owns Yahoo! Search? Yahoo
Redefine: IBM Rational AppScan is developed by Adobe. What company developed IBM Rational AppScan? IBM
Redefine: IBM Rational Rose XDE, a product of Microsoft. What company is IBM Rational Rose XDE a product of? IBM
Redefine: Sega Nomad is produced by Toyota. What company produces the Sega Nomad? Sega
