# Dev contrastive knowledge assesment notebook

<a target="_blank" href="https://colab.research.google.com/github/daniel-furman/Capstone/blob/main/notebooks/CKA_dev.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>


## Dependencies

In [1]:
!git clone https://github.com/daniel-furman/Capstone.git

Cloning into 'Capstone'...
remote: Enumerating objects: 323, done.[K
remote: Counting objects: 100% (323/323), done.[K
remote: Compressing objects: 100% (199/199), done.[K
remote: Total 323 (delta 153), reused 254 (delta 85), pack-reused 0[K
Receiving objects: 100% (323/323), 21.16 MiB | 17.69 MiB/s, done.
Resolving deltas: 100% (153/153), done.


In [2]:
!pip install -r /content/Capstone/requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece==0.1.97
  Downloading sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
Collecting transformers==4.26.1
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m77.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.16.0
  Downloading accelerate-0.16.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 KB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes==0.37.0
  Downloading bitsandbytes-0.37.0-py3-none-any.whl (76.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.3/76.3 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
Co

## Imports

In [3]:
import os
import datetime
import json
import numpy as np

import torch
from torch.nn.functional import softmax

from transformers import (
    set_seed,
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForMaskedLM,
    T5Tokenizer,
    T5ForConditionalGeneration,
)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dev functions for new models

In [5]:
def probe_flan(model, tokenizer, target_id, context, verbose=False):

    # tokenize context
    input_ids = tokenizer(
        context,
        padding="longest",
        max_length=512,
        truncation=True,
        return_tensors="pt",
    ).input_ids.to(device)

    # use model to solicit a prediction
    outputs = model(
        input_ids=input_ids,
        decoder_input_ids=torch.tensor([[0, 32099]], device="cuda:0"),
        output_hidden_states=True,
        return_dict=True,
    )

    # We have batch size of 1, so grab that, then,
    # Take the entire first matrix which corresponds to the entity after the context
    logits = outputs["logits"][0, 0]

    # convert our prediction scores to a probability distribution with softmax
    probs = softmax(logits, dim=-1)

    probs = probs.detach().cpu().numpy()

    if verbose:
        print(f"\n\tcontext... {context}")
        print(f"\ttokenized_context ids... {input_ids}")
        print(f"\tdecoded tokenized_context... {tokenizer.decode(input_ids[0])}")
        print(f"\tdecoded target id... {tokenizer.decode([target_id.item()])}")
        print(
            f"\tmost probable prediction id decoded... {tokenizer.decode([np.argmax(probs)])}\n"
        )

    return probs[target_id.item()]


def probe_gpt2(model, tokenizer, target_id, context, verbose=False):

    # tokenize context
    input_ids = tokenizer(
        context,
        return_tensors="pt",
    ).input_ids.to(device)

    # grab value
    target_scalar = target_id.detach().cpu().numpy()

    # use model to solicit a prediction
    outputs = model(input_ids=input_ids, output_hidden_states=True, return_dict=True)

    # shape of 50257 which corresponds to the vocab size of GPT
    # every token in GPT's vocab gets a representative prediction from the model
    logits = outputs["logits"][0, -1]
    # convert our prediction scores to a probability distribution with softmax
    probs = softmax(logits, dim=-1)

    probs = list(probs.detach().cpu().numpy())

    if verbose:
        print(f"\n\tcontext... {context}")
        print(f"\ttokenized_context ids... {input_ids}")
        print(f"\tdecoded tokenized_context... {tokenizer.decode(input_ids[0])}")
        print(f"\tdecoded target id... {tokenizer.decode([target_id.item()])}")
        print(
            f"\tmost probable prediction id decoded... {tokenizer.decode([np.argmax(probs)])}\n"
        )

    # double check weird-ness before accessing prob
    if len(probs) < target_id:
        return None

    # return the likelihood that our stipulated target would follow the context,
    # according to the model
    try:
        return np.take(probs, [target_scalar])[0]

    except IndexError:

        print("target index not in model vocabulary scope; raising IndexError")
        return None


def probe_bert(model, tokenizer, target_id, context, verbose=False):

    # tokenize context
    input_ids = tokenizer(
        context,
        padding="longest",
        max_length=512,
        truncation=True,
        return_tensors="pt",
    ).input_ids

    mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]

    # use model to solicit a prediction
    logits = model(input_ids=input_ids.to(device)).logits
    mask_token_logits = logits[0, mask_token_index, :]

    # Convert our prediction scores to a probability distribution with softmax
    probs = torch.squeeze(softmax(mask_token_logits, dim=-1))

    probs = probs.detach().cpu().numpy()

    if verbose:
        print(f"\n\tcontext... {context}")
        print(f"\ttokenized_context ids... {input_ids}")
        print(f"\tdecoded tokenize_context... {tokenizer.decode(input_ids[0])}")
        print(f"\tmask token id... {tokenizer.mask_token_id}")
        print(f"\tmask token index in context... {mask_token_index}")
        print(f"\tdecoded target id... {tokenizer.decode([target_id.item()])}")
        print(
            f"\tmost probable prediction id decoded... {tokenizer.decode([np.argmax(probs)])}\n"
        )

    return probs[target_id.item()]


In [6]:
# first, write helper to pull a pretrained LM and tokenizer off the shelf
def get_model_and_tokenizer(model_name):
    if "flan" in model_name.lower():
        return T5Tokenizer.from_pretrained(
            model_name
        ), T5ForConditionalGeneration.from_pretrained(
            model_name, load_in_8bit=True, device_map="auto"
        )

    elif ("gpt" in model_name.lower()) or ("opt" in model_name.lower()):
        return AutoTokenizer.from_pretrained(
            model_name
        ), AutoModelForCausalLM.from_pretrained(
            model_name, load_in_8bit=True, device_map="auto"
        )

    elif "bert" in model_name.lower():
        return AutoTokenizer.from_pretrained(
            model_name
        ), AutoModelForMaskedLM.from_pretrained(
            model_name, torch_dtype=torch.float16
        ).to(
            device
        )


# next, write a helper to pull a probe function for the given LM
def get_probe_function(prefix):
    probe_functions = [probe_flan, probe_gpt2, probe_bert]
    for func in probe_functions:
        if prefix.lower() in func.__name__:
            return func


# lastly, write a wrapper function to compare models
def compare_models(model_name_list, input_pairings, verbose):

    """
    Model-wise comparison helper function

    we should be able to do the following:
      * input a set of models we want to evaluate
      * input an expression of interest
      * input a 'true' next-token alonside a false
      * and get an output report that contains..
        * the 'result' ie is true > false
        * the probabilities of both of those values
      * running this method over a large set of positive/negative pairings should result in a large pool of information that can be used to compare model-families
      * we can also look at the relative 'certainty' across different models (at least in orders of magnitude)

    """

    score_dict_full = {}
    score_dict_succinct = {}
    score_dict_summary = {}

    if not os.path.isdir("/content"):
        os.mkdir("/content")
    if not os.path.isdir("/content/logging"):
        os.mkdir("/content/logging")

    now = datetime.datetime.now()
    dt_string = now.strftime("%d_%m_%Y_%H_%M_%S")
    # print(dt_string)

    for model_name in model_name_list:
        true_count = 0
        fact_count = 0

        print(f"CKA for {model_name}")
        print("Loading  model...")

        # get proper model and tokenizer
        tokenizer, model = get_model_and_tokenizer(model_name)

        print("Running comparisons...")

        # establish prefix
        prefix = ""
        probe_func = None

        # get correct CKA function
        if ("t5" in model_name.lower()) or ("ul2" in model_name.lower()):
            prefix = "flan"
            probe_func = get_probe_function(prefix)

        elif "gpt" in model_name.lower():
            prefix = "gpt"
            probe_func = get_probe_function(prefix)

        elif "opt" in model_name.lower():
            prefix = "opt"
            probe_func = get_probe_function("gpt")

        elif "roberta" in model_name.lower():
            prefix = "roberta"
            probe_func = get_probe_function("bert")

        elif "bert" in model_name.lower():
            prefix = "bert"
            probe_func = get_probe_function(prefix)

        # iterate over context/entity pairings
        # input_pairings is a dict
        # context is a plain string (since our context's will be unique)
        # and entities is a list containing, in the first slot, the true
        # value for the statement and in the subsequent slots, incorrect information

        for fact_itr, entities_dict in input_pairings.items():

            for counterfact_itr, counterfact in enumerate(entities_dict["false"]):

                fact_count += 1

                context = entities_dict["stem"]
                entities = [entities_dict["true"], counterfact]
                entity_count = 0
                p_true = 0.0
                p_false = 0.0

                # if prefix == "flan":
                # context += " <extra_id_0>."
                if prefix == "roberta":
                    context += " <mask>."
                elif prefix == "bert":
                    context += " [MASK]."

                for entity in entities:
                    target_id = None
                    # first find target vocab id
                    # default to the very first token that get's predicted
                    # e.g. in the case of Tokyo, which gets split into <Tok> <yo>,

                    if prefix == "flan":
                        target_id = tokenizer.encode(
                            " " + entity,
                            padding="longest",
                            max_length=512,
                            truncation=True,
                            return_tensors="pt",
                        ).to(device)[0][0]

                    elif prefix == "gpt":
                        target_id = tokenizer.encode(
                            " " + entity, return_tensors="pt"
                        ).to(device)[0][0]

                    elif prefix == "opt":
                        target_id = tokenizer.encode(
                            " " + entity, return_tensors="pt"
                        ).to(device)[0][1]

                    elif prefix == "roberta":
                        target_id = tokenizer.encode(
                            " " + entity,
                            padding="longest",
                            max_length=512,
                            truncation=True,
                            return_tensors="pt",
                        ).to(device)[0][1]

                    elif prefix == "bert":
                        target_id = tokenizer.encode(
                            entity,
                            padding="longest",
                            max_length=512,
                            truncation=True,
                            return_tensors="pt",
                        ).to(device)[0][1]

                    # next call probe function
                    model_prob = probe_func(
                        model, tokenizer, target_id, context, verbose
                    )

                    # lastly, register results
                    # if it is the first time through, it is the fact
                    if entity_count == 0:
                        p_true = model_prob
                    # if it is the second time through, it is the counterfactual
                    else:
                        p_false = model_prob

                    entity_count += 1

                # p_false /= entity_count - 1

                try:
                    score_dict_full[model_name.lower()].append(
                        {
                            context
                            + " "
                            + f"{entities}": {
                                "p_true": float(p_true),
                                "p_false": float(p_false),
                                "p_true - p_false": float(p_true) - float(p_false),
                                "p_true > p_false": str(p_true > p_false),
                            }
                        }
                    )
                except KeyError:
                    score_dict_full[model_name.lower()] = [
                        {
                            context
                            + " "
                            + f"{entities}": {
                                "p_true": float(p_true),
                                "p_false": float(p_false),
                                "p_true - p_false": float(p_true) - float(p_false),
                                "p_true > p_false": str(p_true > p_false),
                            }
                        }
                    ]

                try:
                    score_dict_succinct[model_name.lower()].append(
                        {
                            context
                            + " "
                            + f"{entities}": {
                                "p_true > p_false": str(p_true > p_false),
                            }
                        }
                    )
                except KeyError:
                    score_dict_succinct[model_name.lower()] = [
                        {
                            context
                            + " "
                            + f"{entities}": {
                                "p_true > p_false": str(p_true > p_false),
                            }
                        }
                    ]

                if p_true > p_false:
                    true_count += 1

        score_dict_summary[
            model_name.lower()
        ] = f"This model predicted {true_count}/{fact_count} facts at a higher prob than the given counterfactual."

        print("Done\n")
        del tokenizer
        del model
        torch.cuda.empty_cache()

    score_dicts = [score_dict_full, score_dict_succinct, score_dict_summary]

    # logging
    score_dicts_logging = {}
    score_dicts_logging["score_dict_full"] = score_dict_full
    score_dicts_logging["score_dict_succinct"] = score_dict_succinct
    score_dicts_logging["score_dict_summary"] = score_dict_summary

    with open(
        f"/content/logging/{prefix}_logged_cka_outputs_{dt_string}.json", "w"
    ) as outfile:
        json.dump(score_dicts_logging, outfile)

    return score_dicts


## Test the functions

In [7]:
def main(config):

    set_seed(42)

    score_dicts = compare_models(
        config["models"], config["input_information"], config["verbosity"]
    )

    return score_dicts

### OPT

In [8]:
config = {
    "models": [
        "facebook/opt-125m",
        "facebook/opt-350m",
    ],
    "input_information": {
        "0": {
            "stem": "The 2020 Olympics were held in",
            "true": "Tokyo",
            "false":["London","Berlin", "Chicago"]
        },
        "1": {
            "stem": "Operation Overlord took place in",
            "true": "Normandy",
            "false":["Manila","Santiago", "Baghdad"]
        },
        "2": {
            "stem": "Steve Jobs is the founder of",
            "true": "Apple",
            "false":["Microsoft","Oracle", "Intel"]
        }
    },
    "verbosity": True
}


In [9]:
score_dicts = main(config)

CKA for facebook/opt-125m
Loading  model...


Downloading (…)okenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/651 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/251M [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Running comparisons...

	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  London
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device=

Downloading (…)okenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Running comparisons...

	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  London
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device='cuda:0')
	decoded tokenized_context... </s>The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   2,  133, 2760, 4365,   58,  547,   11]], device=

In [10]:
score_dicts[0]

{'facebook/opt-125m': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true': 0.1683349609375,
    'p_false': 0.0361328125,
    'p_true - p_false': 0.1322021484375,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true': 0.1683349609375,
    'p_false': 0.0012950897216796875,
    'p_true - p_false': 0.1670398712158203,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true': 0.1683349609375,
    'p_false': 0.0007886886596679688,
    'p_true - p_false': 0.16754627227783203,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true': 0.000583648681640625,
    'p_false': 0.00018596649169921875,
    'p_true - p_false': 0.00039768218994140625,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true': 0.000583648681640625,
    'p_false': 2.2292137145996094e-05,
    'p_true - p_false': 0.00056135

In [11]:
score_dicts[1]

{'facebook/opt-125m': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Baghdad']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Microsoft']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Oracle']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Intel']": {'p_true > p_false': 'True'}}],
 'facebook/opt-350m': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_t

In [12]:
score_dicts[2]

{'facebook/opt-125m': 'This model predicted 9/9 facts at a higher prob than the given counterfactual.',
 'facebook/opt-350m': 'This model predicted 9/9 facts at a higher prob than the given counterfactual.'}

### RoBERTa

In [13]:
config = {
    "models": [
        "roberta-base",
    ],
    "input_information": {
        "0": {
            "stem": "The 2020 Olympics were held in",
            "true": "Tokyo",
            "false":["London","Berlin", "Chicago"]
        },
        "1": {
            "stem": "Operation Overlord took place in",
            "true": "Normandy",
            "false":["Manila","Santiago", "Baghdad"]
        },
        "2": {
            "stem": "Steve Jobs is the founder of",
            "true": "Apple",
            "false":["Microsoft","Oracle", "Intel"]
        }
    },
    "verbosity": True
}


In [14]:
score_dicts = main(config)

CKA for roberta-base
Loading  model...


Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Running comparisons...

	context... The 2020 Olympics were held in <mask>.
	tokenized_context ids... tensor([[    0,   133,  2760,  4365,    58,   547,    11, 50264,     4,     2]])
	decoded tokenize_context... <s>The 2020 Olympics were held in<mask>.</s>
	mask token id... 50264
	mask token index in context... tensor([7])
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in <mask>.
	tokenized_context ids... tensor([[    0,   133,  2760,  4365,    58,   547,    11, 50264,     4,     2]])
	decoded tokenize_context... <s>The 2020 Olympics were held in<mask>.</s>
	mask token id... 50264
	mask token index in context... tensor([7])
	decoded target id...  London
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in <mask>.
	tokenized_context ids... tensor([[    0,   133,  2760,  4365,    58,   547,    11, 50264,     4,     2]])
	decoded tokenize_context... <s>The 2020 Olympics were held i

In [15]:
score_dicts[0]

{'roberta-base': [{"The 2020 Olympics were held in <mask>. ['Tokyo', 'London']": {'p_true': 0.9609375,
    'p_false': 0.00109100341796875,
    'p_true - p_false': 0.9598464965820312,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in <mask>. ['Tokyo', 'Berlin']": {'p_true': 0.9609375,
    'p_false': 0.0004475116729736328,
    'p_true - p_false': 0.9604899883270264,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in <mask>. ['Tokyo', 'Chicago']": {'p_true': 0.9609375,
    'p_false': 6.973743438720703e-05,
    'p_true - p_false': 0.9608677625656128,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in <mask>. ['Normandy', 'Manila']": {'p_true': 0.002300262451171875,
    'p_false': 7.218122482299805e-05,
    'p_true - p_false': 0.002228081226348877,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in <mask>. ['Normandy', 'Santiago']": {'p_true': 0.002300262451171875,
    'p_false': 9.59634780883789e-06,
    'p_true -

In [16]:
score_dicts[1]

{'roberta-base': [{"The 2020 Olympics were held in <mask>. ['Tokyo', 'London']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in <mask>. ['Tokyo', 'Berlin']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in <mask>. ['Tokyo', 'Chicago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in <mask>. ['Normandy', 'Manila']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in <mask>. ['Normandy', 'Santiago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in <mask>. ['Normandy', 'Baghdad']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of <mask>. ['Apple', 'Microsoft']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of <mask>. ['Apple', 'Oracle']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of <mask>. ['Apple', 'Intel']": {'p_true > p_false': 'True'}}]}

In [17]:
score_dicts[2]

{'roberta-base': 'This model predicted 9/9 facts at a higher prob than the given counterfactual.'}

### Bert

In [18]:
config = {
    "models": [
        "bert-base-uncased",
    ],
    "input_information": {
        "0": {
            "stem": "The 2020 Olympics were held in",
            "true": "Tokyo",
            "false":["London","Berlin", "Chicago"]
        },
        "1": {
            "stem": "Operation Overlord took place in",
            "true": "Normandy",
            "false":["Manila","Santiago", "Baghdad"]
        },
        "2": {
            "stem": "Steve Jobs is the founder of",
            "true": "Apple",
            "false":["Microsoft","Oracle", "Intel"]
        }
    },
    "verbosity": True
}


In [19]:
score_dicts = main(config)

CKA for bert-base-uncased
Loading  model...


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Running comparisons...

	context... The 2020 Olympics were held in [MASK].
	tokenized_context ids... tensor([[  101,  1996, 12609,  3783,  2020,  2218,  1999,   103,  1012,   102]])
	decoded tokenize_context... [CLS] the 2020 olympics were held in [MASK]. [SEP]
	mask token id... 103
	mask token index in context... tensor([7])
	decoded target id... tokyo
	most probable prediction id decoded... tokyo


	context... The 2020 Olympics were held in [MASK].
	tokenized_context ids... tensor([[  101,  1996, 12609,  3783,  2020,  2218,  1999,   103,  1012,   102]])
	decoded tokenize_context... [CLS] the 2020 olympics were held in [MASK]. [SEP]
	mask token id... 103
	mask token index in context... tensor([7])
	decoded target id... london
	most probable prediction id decoded... tokyo


	context... The 2020 Olympics were held in [MASK].
	tokenized_context ids... tensor([[  101,  1996, 12609,  3783,  2020,  2218,  1999,   103,  1012,   102]])
	decoded tokenize_context... [CLS] the 2020 olympics were

In [20]:
score_dicts[0]

{'bert-base-uncased': [{"The 2020 Olympics were held in [MASK]. ['Tokyo', 'London']": {'p_true': 0.71240234375,
    'p_false': 0.0040130615234375,
    'p_true - p_false': 0.7083892822265625,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in [MASK]. ['Tokyo', 'Berlin']": {'p_true': 0.71240234375,
    'p_false': 0.0009756088256835938,
    'p_true - p_false': 0.7114267349243164,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in [MASK]. ['Tokyo', 'Chicago']": {'p_true': 0.71240234375,
    'p_false': 2.7358531951904297e-05,
    'p_true - p_false': 0.7123749852180481,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in [MASK]. ['Normandy', 'Manila']": {'p_true': 0.0195159912109375,
    'p_false': 1.3828277587890625e-05,
    'p_true - p_false': 0.01950216293334961,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in [MASK]. ['Normandy', 'Santiago']": {'p_true': 0.0195159912109375,
    'p_false': 7.331371307373047e-06,

In [21]:
score_dicts[1]

{'bert-base-uncased': [{"The 2020 Olympics were held in [MASK]. ['Tokyo', 'London']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in [MASK]. ['Tokyo', 'Berlin']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in [MASK]. ['Tokyo', 'Chicago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in [MASK]. ['Normandy', 'Manila']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in [MASK]. ['Normandy', 'Santiago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in [MASK]. ['Normandy', 'Baghdad']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of [MASK]. ['Apple', 'Microsoft']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of [MASK]. ['Apple', 'Oracle']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of [MASK]. ['Apple', 'Intel']": {'p_true > p_false': 'True'}}]}

In [22]:
score_dicts[2]

{'bert-base-uncased': 'This model predicted 9/9 facts at a higher prob than the given counterfactual.'}

### gpt2s

In [23]:
config = {
    "models": [
        "gpt2-medium",
    ],
    "input_information": {
        "0": {
            "stem": "The 2020 Olympics were held in",
            "true": "Tokyo",
            "false":["London","Berlin", "Chicago"]
        },
        "1": {
            "stem": "Operation Overlord took place in",
            "true": "Normandy",
            "false":["Manila","Santiago", "Baghdad"]
        },
        "2": {
            "stem": "Steve Jobs is the founder of",
            "true": "Apple",
            "false":["Microsoft","Oracle", "Intel"]
        }
    },
    "verbosity": True

}


In [24]:
score_dicts = main(config)

CKA for gpt2-medium
Loading  model...


Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Running comparisons...

	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[  464, 12131, 14935,   547,  2714,   287]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[  464, 12131, 14935,   547,  2714,   287]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in
	decoded target id...  London
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[  464, 12131, 14935,   547,  2714,   287]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in
	decoded target id...  Tokyo
	most probable prediction id decoded...  Tokyo


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[  464, 12131, 14935,   547,  2714,   287]], device='cuda:0')
	d

In [25]:
score_dicts[0]

{'gpt2-medium': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true': 0.1800537109375,
    'p_false': 0.054931640625,
    'p_true - p_false': 0.1251220703125,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true': 0.1800537109375,
    'p_false': 0.005107879638671875,
    'p_true - p_false': 0.17494583129882812,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true': 0.1800537109375,
    'p_false': 0.0008339881896972656,
    'p_true - p_false': 0.17921972274780273,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true': 0.00024116039276123047,
    'p_false': 3.260374069213867e-05,
    'p_true - p_false': 0.0002085566520690918,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true': 0.00024116039276123047,
    'p_false': 1.5437602996826172e-05,
    'p_true - p_false': 0.0002257227

In [26]:
score_dicts[1]

{'gpt2-medium': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Baghdad']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Microsoft']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Oracle']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Intel']": {'p_true > p_false': 'True'}}]}

In [27]:
score_dicts[2]

{'gpt2-medium': 'This model predicted 9/9 facts at a higher prob than the given counterfactual.'}

### Google/flans

In [28]:
config = {
    "models": [
        "google/flan-t5-base",
    ],
    "input_information": {
        "0": {
            "stem": "The 2020 Olympics were held in",
            "true": "Tokyo",
            "false":["London","Berlin", "Chicago"]
        },
        "1": {
            "stem": "Operation Overlord took place in",
            "true": "Normandy",
            "false":["Manila","Santiago", "Baghdad"]
        },
        "2": {
            "stem": "Steve Jobs is the founder of",
            "true": "Apple",
            "false":["Microsoft","Oracle", "Intel"]
        }
    },
    "verbosity": True

}


In [29]:
score_dicts = main(config)

CKA for google/flan-t5-base
Loading  model...


Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Running comparisons...

	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   37,  6503, 17793,   130,  1213,    16,     1]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in</s>
	decoded target id... Tokyo
	most probable prediction id decoded... Beijing


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   37,  6503, 17793,   130,  1213,    16,     1]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in</s>
	decoded target id... London
	most probable prediction id decoded... Beijing


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   37,  6503, 17793,   130,  1213,    16,     1]], device='cuda:0')
	decoded tokenized_context... The 2020 Olympics were held in</s>
	decoded target id... Tokyo
	most probable prediction id decoded... Beijing


	context... The 2020 Olympics were held in
	tokenized_context ids... tensor([[   37,  6503, 17793,   130,  1

In [30]:
score_dicts[0]

{'google/flan-t5-base': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true': 0.10015869140625,
    'p_false': 0.137939453125,
    'p_true - p_false': -0.03778076171875,
    'p_true > p_false': 'False'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true': 0.10015869140625,
    'p_false': 0.0028324127197265625,
    'p_true - p_false': 0.09732627868652344,
    'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true': 0.10015869140625,
    'p_false': 0.0032405853271484375,
    'p_true - p_false': 0.09691810607910156,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true': 0.0012025833129882812,
    'p_false': 0.0003266334533691406,
    'p_true - p_false': 0.0008759498596191406,
    'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true': 0.0012025833129882812,
    'p_false': 1.239776611328125e-05,
    'p_true - p_false': 

In [31]:
score_dicts[1]

{'google/flan-t5-base': [{"The 2020 Olympics were held in ['Tokyo', 'London']": {'p_true > p_false': 'False'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Berlin']": {'p_true > p_false': 'True'}},
  {"The 2020 Olympics were held in ['Tokyo', 'Chicago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Manila']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Santiago']": {'p_true > p_false': 'True'}},
  {"Operation Overlord took place in ['Normandy', 'Baghdad']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Microsoft']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Oracle']": {'p_true > p_false': 'True'}},
  {"Steve Jobs is the founder of ['Apple', 'Intel']": {'p_true > p_false': 'True'}}]}

In [32]:
score_dicts[2]

{'google/flan-t5-base': 'This model predicted 8/9 facts at a higher prob than the given counterfactual.'}