## Imports

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1


In [2]:
# helper for T5Tokenizer
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.97


In [3]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import T5Tokenizer, T5ForConditionalGeneration
import numpy as np

In [4]:
# set device
device = torch.device("cuda")

## Load Models

In [36]:
# load t5-small
model_name = "t5-small"

t5_small_tokenizer = T5Tokenizer.from_pretrained(model_name)

t5_small_model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [None]:
# load flan-t5-small
model_name = "google/flan-t5-small"

flan_t5_small_tokenizer = T5Tokenizer.from_pretrained(model_name)

flan_t5_small_model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

In [None]:
# load flan-t5-large
model_name = "google/flan-t5-large"

flan_t5_large_tokenizer = T5Tokenizer.from_pretrained(model_name)

flan_t5_large_model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

In [None]:
# load gpt2
model_name = "gpt2"

gpt_2_tokenizer = AutoTokenizer.from_pretrained(model_name)

gpt_2_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [None]:
# load distilgpt2 - https://huggingface.co/distilgpt2
model_name = "distilgpt2"

distilgpt2_tokenizer = AutoTokenizer.from_pretrained(model_name)

distilgpt2_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [None]:
# load gpt2-large - 
model_name = "gpt2-large"

gpt2_large_tokenizer = AutoTokenizer.from_pretrained(model_name)

gpt2_large_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

## T5 Work (reproducing/tweaking [existing CKA method](https://github.com/dqxiu/CaliNet/blob/master/cka/assessing_score.py))

In [5]:
def probe_t5(model,input_ids, target):
    # T5 is an encoder-decoder mode, so we have to provide both input_ids and decoder_input_ids to the model
    # this is in contrast to BERT whose transformer follows the original attention paper and is 'encoder' only
    # and the GPT-family of models which is decoder only (which really is quite similar to BERT other than the fact that)
    # BERT can optimize reps based on the entire context before/after a word
    # while GPT models are auto-regressive or 'causal' so they can only look at tokens that precede a given word 
    # the input id's correspond to the 
    # where 0 is the corresponding id for <pad>
    # and 32099 is the corresponding id for <extra_id_0>
    # this <extra_id_0> essentially stands in for the 'blank' predictor token
    # which is how we'll solicit text-generation
    outputs = model(input_ids=input_ids, decoder_input_ids=torch.tensor([[0, 32099]],device='cuda:0'),
                    output_hidden_states=True, return_dict=True)
    
    # outputs contains:
      # logits -- Prediction scores of the language modeling head)
        # unnormalized scores for each possible token at the masked token position
      # past_key_values -- Contains pre-computed hidden-states (key and values in the attention blocks) 
      # decoder_hidden_states  -- Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
      # encoder_last_hidden_state -- Sequence of hidden-states at the output of the last layer of the encoder of the model.
      # encoder_hidden_states -- Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    
    # torch.Size([1, 2, 32128]) 
    # 1 x 2 x 32128 because 
    # we have batch_size of 1
    # and a sequence length of 2
    # and the vocab size for t5 is 32128

    # torch.Size([32128])
    # We have batch size of 1, so grab that, then, 
    # Take the entire last matrix which corresponds to the last layer
    logits = outputs['logits'][0, -1]

    # convert our prediction scores to a probability distribution with softmax
    # https://pytorch.org/docs/stable/generated/torch.nn.functional.softmax.html#torch.nn.functional.softmax
    probs = F.softmax(logits, dim=-1)

    # originally, they had 
    # torch.reshape(probs, (-1,)).detach().cpu().numpy()
    # but that reshape line doesn't do anything since probs is already just 1-dimension (= to the embedding size)
    # in this case, for every token in T5's vocabulary, we're getting.. 
    # a prediction that the hidden token is that word?
    probs = probs.detach().cpu().numpy()

    # so let's just grab that by its given
    return probs[target.item()]

## Test out T5 Implementation

### t5_small

In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Tokyo"

target = t5_small_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = t5_small_tokenizer.encode(src_true, return_tensors="pt").to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = t5_small_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_true = probe_t5(t5_small_model,input_ids, target) 

print(f"according to t5_small, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to t5_small, prob. the next token in the input sequence is ['▁Tokyo'] = 0.007489456795156002


In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Berlin"

target = t5_small_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = t5_small_tokenizer.encode(src_true, return_tensors="pt").to(device)

tokenized_target_of_interest = t5_small_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_false = probe_t5(t5_small_model,input_ids, target) 

print(f"according to t5_small, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to t5_small, prob. the next token in the input sequence is ['▁Berlin'] = 0.003681196365505457


In [None]:
P_true > P_false

True

### flan_t5-small

In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Tokyo"

target = flan_t5_small_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = flan_t5_small_tokenizer.encode(src_true, return_tensors="pt").to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = flan_t5_small_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_true = probe_t5(flan_t5_small_model,input_ids, target) 

print(f"according to flan_t5_small, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to flan_t5_small, prob. the next token in the input sequence is ['▁Tokyo'] = 1.542711106594652e-05


In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Berlin"

target = flan_t5_small_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = flan_t5_small_tokenizer.encode(src_true, return_tensors="pt").to(device)

tokenized_target_of_interest = flan_t5_small_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_false = probe_t5(flan_t5_small_model,input_ids, target) 

print(f"according to flan_t5_small, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to flan_t5_small, prob. the next token in the input sequence is ['▁Berlin'] = 1.2637829058803618e-06


In [None]:
P_true > P_false

True

### flan-t5-large

In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Tokyo"

target = flan_t5_large_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = flan_t5_large_tokenizer.encode(src_true, return_tensors="pt").to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = flan_t5_large_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_true = probe_t5(flan_t5_large_model,input_ids, target) 

print(f"according to flan_t5_large, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to flan_t5_large, prob. the next token in the input sequence is ['▁Tokyo'] = 0.0034607253037393093


In [None]:
src_true = "The 2020 Olympic Games took place in <extra_id_0> ."
target_token = "Berlin"

target = flan_t5_large_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = flan_t5_large_tokenizer.encode(src_true, return_tensors="pt").to(device)

tokenized_target_of_interest = flan_t5_large_tokenizer.convert_ids_to_tokens([target.detach().cpu().numpy()])

P_false = probe_t5(flan_t5_large_model,input_ids, target) 

print(f"according to flan_t5_large, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to flan_t5_large, prob. the next token in the input sequence is ['▁Berlin'] = 0.000330989743815735


In [None]:
P_true > P_false

True

## GPT work for CKA (adapting CKA method)

In [7]:
'''
  model: a pretrained model pulled in from HuggingFace; in this case 
    the architecture we'll use is inferred from the name or the path of the pretrained model 
    we supplied in the from_pretrained() method called in the setup for this notebook
  see more - https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForCausalLM
  input_ids: the indices (in gpt's vocabulary) of our left-context tokens
  target: the index (in gpt's vocabulary) of the token we're gathering a prediction for

  return: a float indicating the likelihood of the target following the left-context according to the model
    in case of error, return None
'''
def probe_gpt(model, input_ids, target):

  # ensure we're only asking for a single token prediction 
  if len(target) > 1:
    # default to the very first token that get's predicted
    # e.g. in the case of Tokyo, which gets split into <Tok> <yo>,
    # we can presume that if the model produces a highprob for <Tok> that's sufficient enough to
    # conclude that it's individual prob is fairly representative to
    # the likelihood that Tokyou would be predicted
    target = target[0]
  
  # sanity check - do a conversion that tells us the exact "token" that's being predicted on
  # print(model.convert_)
  
  # grab value
  target_scalar = target.detach().cpu().numpy()

  # use model to solicit a prediction
  outputs = model(input_ids=input_ids, output_hidden_states=True, return_dict=True)

  # shape of 50257 which corresponds to the vocab size of GPT
  # every token in GPT's vocab gets a representative prediction from the model
  logits = outputs['logits'][0, -1]

  # grab those probabilities
  probs = F.softmax(logits, dim=-1)

  probs = list(probs.detach().cpu().numpy())

  # double check weird-ness before accessing prob
  if len(probs) < target:
    return None
  
  # return the likelihood that our stipulated target would follow the context, according to the model
  try:
     return np.take(probs, [target_scalar])[0]
  except IndexError:

    print(f"target index not in model vocabulary scope; raising IndexError")
    return None

## Test out GPT Implementation

### GPT-2

In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = gpt_2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt_2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = gpt_2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(gpt_2_model,input_ids, target)

print(f"according to gpt_2, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to gpt_2, prob. the next token in the input sequence is Tok = 2.607206965876685e-07


In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = gpt_2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

input_ids = gpt_2_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt_2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(gpt_2_model,input_ids, target)

print(f"according to gpt_2, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to gpt_2, prob. the next token in the input sequence is Ber = 2.477075327078637e-07


In [None]:
P_true > P_false

True

### distill-gpt2

In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = distilgpt2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = distilgpt2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = distilgpt2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(distilgpt2_model,input_ids, target)

print(f"according to distilgpt2, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to distilgpt2, prob. the next token in the input sequence is Tok = 5.20289745509217e-07


In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = distilgpt2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = distilgpt2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = distilgpt2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(distilgpt2_model,input_ids, target)

print(f"according to distilgpt2, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to distilgpt2, prob. the next token in the input sequence is Ber = 3.7727363633166533e-07


In [None]:
P_true > P_false

True

### gpt2_large

In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = gpt2_large_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt2_large_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt2_large_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(gpt2_large_model,input_ids, target)

print(f"according to gp2_large, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

according to gp2_large, prob. the next token in the input sequence is Tok = 5.20289745509217e-07


In [None]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = gpt2_large_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt2_large_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt2_large_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(gpt2_large_model,input_ids, target)

print(f"according to gp2_large, prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

according to gp2_large, prob. the next token in the input sequence is Ber = 3.7727363633166533e-07


In [None]:
P_true > P_false

True

## Model-wise comparison

* we should be able to do the following
  * input a set of models we want to evaluate
  * input an expression of interest
  * input a 'true' next-token alonside a false
  * and get an output report that contains..
    * the 'result' ie is true > false
    * the probabilities of both of those values
  * running this method over a large set of positive/negative pairings should result in a large pool of information that can be used to compare model-families
  * we can also look at the relative 'certainty' across different models (at least in orders of magnitude)

In [39]:
# first, write helper to pull a pretrained LM and tokenizer off the shelf
def get_model_and_tokenizer(model_name):
  
  if "t5" in model_name.lower():
    return T5Tokenizer.from_pretrained(model_name), T5ForConditionalGeneration.from_pretrained(model_name).to(device)
  elif "gpt" in model_name.lower():
    return AutoTokenizer.from_pretrained(model_name), AutoModelForCausalLM.from_pretrained(model_name).to(device)
  elif "bert" in model_name.lower():
    return 0

In [45]:
def get_probe_function(prefix, probe_functions):
  for func in probe_functions:
    if prefix.lower() in func.__name__:
      return func

In [19]:
probe_functions = [probe_t5, probe_gpt]

In [62]:
def compare_models(model_name_list, probe_functions, input_pairings):

  score_dict = {}
  for model_name in model_name_list:

    print(f"Running comparisons for {model_name}")

    # get proper model and tokenizer
    tokenizer, model = get_model_and_tokenizer(model_name)
    # establish prefix
    prefix = ""
    probe_func = None

    # get correct CKA function
    if "t5" in model_name.lower():
      prefix = "t5"
      probe_func = get_probe_function(prefix, probe_functions) 
      
    elif "gpt" in model_name.lower():
      prefix = "gpt"
      probe_func = get_probe_function(prefix, probe_functions)

    # iterate over context/entity pairings
    # input_pairings is a dict
    # context is a plain string (since our context's will be unique)
    # and entities is a list containing, in the first slot, the true value for the statement
    # and in the subsequent slots, incorrect information

    for context, entities in input_pairings.items():
      entity_count = 0
      p_true = 0.0
      p_false = 0.0

      if prefix == "t5":
        context += " <extra_id_0> ."
      
      for entity in entities:
        target = None
        if prefix == "t5":
          target = tokenizer.encode(entity, return_tensors="pt").to(device)[0][0]
        elif prefix == "gpt":
          target = tokenizer.encode(entity, return_tensors='pt').to(device)[0]
        
        # tokenize context
        input_ids = tokenizer.encode(context, return_tensors="pt").to(device)

        # call probe function
        model_prob = probe_func(model,input_ids, target) 

        if entity_count == 0:
          p_true = model_prob

        else:
          p_false += model_prob

        entity_count += 1

      p_false /= entity_count -1

      score_dict[model_name.lower() + ": " + context] = (p_true, p_false)

  return score_dict

In [63]:
input_information =  {'The 2020 Olympics were held in': ['Tokyo', 'Berlin'], 'Operation Overlord took place in': ['Normandy', 'Manila']}

In [64]:
compare_models(['distilgpt2', 't5-small'], [probe_t5, probe_gpt], input_information)

Running comparisons for distilgpt2
Running comparisons for t5-small


{'distilgpt2: The 2020 Olympics were held in': (4.077246e-07,
  4.857664492874392e-08),
 'distilgpt2: Operation Overlord took place in': (6.5069177e-09,
  1.0333457822753189e-07),
 't5-small: The 2020 Olympics were held in <extra_id_0> .': (0.006753775,
  0.0037354647647589445),
 't5-small: Operation Overlord took place in <extra_id_0> .': (0.00078862614,
  0.00026356257149018347)}