## Imports

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1


In [2]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import T5ForConditionalGeneration
import numpy as np

In [3]:
# set device
device = torch.device("cuda")

## Load Models

In [4]:
# load t5-small
model_name = "t5-small"

t5_tokenizer = AutoTokenizer.from_pretrained(model_name)

t5_model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [16]:
# load gpt2
model_name = "gpt2"

gpt_2_tokenizer = AutoTokenizer.from_pretrained(model_name)

gpt_2_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [8]:
# load distilgpt2 - https://huggingface.co/distilgpt2
model_name = "distilgpt2"

distilgpt2_tokenizer = AutoTokenizer.from_pretrained(model_name)

distilgpt2_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [13]:
# load gpt2-large - 
model_name = "gpt2-large"

gpt2_large_tokenizer = AutoTokenizer.from_pretrained(model_name)

gpt2_large_model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

## T5 Work (reproducing/tweaking [existing CKA method](https://github.com/dqxiu/CaliNet/blob/master/cka/assessing_score.py))

In [None]:
def probe_t5(model,input_ids, target):
    # T5 is an encoder-decoder mode, so we have to provide both input_ids and decoder_input_ids to the model
    # this is in contrast to BERT whose transformer follows the original attention paper and is 'encoder' only
    # and the GPT-family of models which is decoder only (which really is quite similar to BERT other than the fact that)
    # BERT can optimize reps based on the entire context before/after a word
    # while GPT models are auto-regressive or 'causal' so they can only look at tokens that precede a given word 
    # the input id's correspond to the 
    # where 0 is the corresponding id for <pad>
    # and 32099 is the corresponding id for <extra_id_0>
    # this <extra_id_0> essentially stands in for the 'blank' predictor token
    # which is how we'll solicit text-generation
    outputs = model(input_ids=input_ids, decoder_input_ids=torch.tensor([[0, 32099]],device='cuda:0'),
                    output_hidden_states=True, return_dict=True)
    
    # outputs contains:
      # logits -- Prediction scores of the language modeling head)
        # unnormalized scores for each possible token at the masked token position
      # past_key_values -- Contains pre-computed hidden-states (key and values in the attention blocks) 
      # decoder_hidden_states  -- Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
      # encoder_last_hidden_state -- Sequence of hidden-states at the output of the last layer of the encoder of the model.
      # encoder_hidden_states -- Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    
    # torch.Size([1, 2, 32128]) 
    # 1 x 2 x 32128 because 
    # we have batch_size of 1
    # and a sequence length of 2
    # and the vocab size for t5 is 32128

    # torch.Size([32128])
    # We have batch size of 1, so grab that, then, 
    # Take the entire last matrix which corresponds to the last layer
    logits = outputs['logits'][0, -1]

    # convert our prediction scores to a probability distribution with softmax
    # https://pytorch.org/docs/stable/generated/torch.nn.functional.softmax.html#torch.nn.functional.softmax
    probs = F.softmax(logits, dim=-1)

    # originally, they had 
    # torch.reshape(probs, (-1,)).detach().cpu().numpy()
    # but that reshape line doesn't do anything since probs is already just 1-dimension (= to the embedding size)
    # in this case, for every token in T5's vocabulary, we're getting.. 
    # a prediction that the hidden token is that word?
    probs = probs.detach().cpu().numpy()

    # so let's just grab that by its given
    return probs[target.item()]

## Test out T5 Implementation

### (base) t5_small

In [None]:
src_true = "Operation Overlord took place in <extra_id_0> ."
target_token = "France"

target = t5_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = t5_tokenizer.encode(src_true, return_tensors="pt").to(device)

P_true = probe_t5(t5_model,input_ids, target) 

P_true

0.0018174488

In [None]:
src_true = "Operation Overlord took place in <extra_id_0> ."
target_token = "Belgium"

target = t5_tokenizer.encode(target_token, return_tensors="pt").to(device)[0][0]
            
input_ids = t5_tokenizer.encode(src_true, return_tensors="pt").to(device)

P_false = probe_t5(t5_model,input_ids, target) 

P_false

0.00039698844

In [None]:
P_true

0.0018174488

In [None]:
P_false

0.00039698844

In [None]:
P_true > P_false

True

### flan_t5-small

## GPT work for CKA (adapting CKA method)

In [6]:
'''
  model: a pretrained model pulled in from HuggingFace; in this case 
    the architecture we'll use is be guessed from the name or the path of the pretrained model 
    we supplied in the from_pretrained() method called in the setup for this notebook
  see more - https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForCausalLM
  input_ids: the indices (in gpt's vocabulary) of our left-context tokens
  target: the index (in gpt's vocabulary) of the token we're gathering a prediction for

  return: a float indicating the likelihood of the target following the left-context according to the model
    in case of error, return None
'''
def probe_gpt(model, input_ids, target):

  # ensure we're only asking for a single token prediction 
  if len(target) > 1:
    # default to the very first token that get's predicted
    # e.g. in the case of Tokyo, which gets split into <Tok> <yo>,
    # we can presume that if the model produces a highprob for <Tok> that's sufficient enough to
    # conclude that it's individual prob is fairly representative to
    # the likelihood that Tokyou would be predicted
    target = target[0]
  
  # sanity check - do a conversion that tells us the exact "token" that's being predicted on
  # print(model.convert_)
  
  # grab value
  target_scalar = target.detach().cpu().numpy()

  # use model to solicit a prediction
  outputs = model(input_ids=input_ids, output_hidden_states=True, return_dict=True)

  # shape of 50257 which corresponds to the vocab size of GPT
  # every token in GPT's vocab gets a representative prediction from the model
  logits = outputs['logits'][0, -1]

  # grab those probabilities
  probs = F.softmax(logits, dim=-1)

  probs = list(probs.detach().cpu().numpy())

  # double check weird-ness before accessing prob
  if len(probs) < target:
    return None
  
  # return the likelihood that our stipulated target would follow the context, according to the model
  try:
     return np.take(probs, [target_scalar])[0]
  except IndexError:

    print(f"target index not in model vocabulary scope; raising IndexError")
    return None

## Test out GPT Implementation

### GPT-2

In [28]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = gpt_2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt_2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = gpt_2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(gpt_2_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

prob. the next token in the input sequence is Tok = 2.607206965876685e-07


In [32]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = gpt_2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

input_ids = gpt_2_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt_2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(gpt_2_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

prob. the next token in the input sequence is Ber = 2.477075327078637e-07


In [33]:
P_true > P_false

True

### distill-gpt2

In [24]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = distilgpt2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = distilgpt2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = distilgpt2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(distilgpt2_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

prob. the next token in the input sequence is Tok = 5.20289745509217e-07


In [25]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = distilgpt2_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = distilgpt2_tokenizer.encode(phrase, return_tensors='pt').to(device)

# what are we looking for, exactly?
tokenized_target_of_interest = distilgpt2_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(distilgpt2_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

prob. the next token in the input sequence is Ber = 3.7727363633166533e-07


In [11]:
P_true > P_false

True

### gpt2_large

In [26]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Tokyo'

target = gpt2_large_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt2_large_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt2_large_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_true = probe_gpt(gpt2_large_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_true}")

prob. the next token in the input sequence is Tok = 5.20289745509217e-07


In [27]:
phrase = 'The 2020 Olympic Games took place in '
target_token = 'Berlin'

target = gpt2_large_tokenizer.encode(target_token, return_tensors='pt').to(device)[0]

# these input ids correspond to the tokenized representation of the words preceding the blank space we're soliciting a prediction for
input_ids = gpt2_large_tokenizer.encode(phrase, return_tensors='pt').to(device)

tokenized_target_of_interest = gpt2_large_tokenizer.convert_ids_to_tokens(list(target.detach().cpu().numpy()))[0]

P_false = probe_gpt(gpt2_large_model,input_ids, target)

print(f"prob. the next token in the input sequence is {tokenized_target_of_interest} = {P_false}")

prob. the next token in the input sequence is Ber = 3.7727363633166533e-07


In [16]:
P_true > P_false

True