# Original code

### DejaVu Section 3.3 does exactly this

In [None]:
from transformers import GPT2Model, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
model = GPT2Model.from_pretrained('gpt2-medium', output_attentions=True, output_hidden_states=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
inputs = tokenizer("ariel is handsome", return_tensors="pt")

In [None]:
outputs = model(**inputs)
attentions = outputs.attentions
hidden_states = outputs.hidden_states

In [None]:
selected_hidden_state = hidden_states[4]

In [None]:
# Apply the lm_head to project the hidden states to the vocabulary space
lm_head = model.lm_head
logits = lm_head(selected_hidden_state)

# Focus on the last token's logits for next token prediction
next_token_logits = logits[:, -1, :]

# Convert logits to probabilities using softmax
import torch.nn.functional as F
probabilities = F.softmax(next_token_logits, dim=-1)

# Get the most probable next token ID
predicted_token_id = probabilities.argmax(dim=-1)
predicted_token = tokenizer.decode(predicted_token_id.tolist())

print(f"Predicted next token: {predicted_token}")

AttributeError: 'GPT2Model' object has no attribute 'lm_head'

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM
import torch

# Load the tokenizer and model with the LM head
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states=True)

In [None]:
# Encode some input text
inputs = tokenizer("Y/N: the capital of france is paris:   ", return_tensors="pt")

print("Input IDs:", inputs['input_ids'])
print("Tokens:", tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze().tolist()))

In [None]:
# Get model outputs
outputs = model(**inputs)
hidden_states = outputs.hidden_states

# Select the output from an earlier block, for instance, the 5th block
selected_hidden_state = hidden_states[4]  # Using zero indexing; adjust as necessary

# Apply the lm_head to project the hidden states to the vocabulary space
logits = model.lm_head(selected_hidden_state)

print("logits: ", logits)

In [None]:
logits.shape

In [None]:
# Focus on the last token's logits for next token prediction
next_token_logits = logits[:, -1, :]

# Convert logits to probabilities using softmax
import torch.nn.functional as F
probabilities = F.softmax(next_token_logits, dim=-1)

print("probabilities: ", probabilities)

In [None]:
probabilities.shape

In [None]:
predicted_token_id

In [None]:
probabilities[0][predicted_token_id.item()]

In [None]:
# Get the most probable next token ID
predicted_token_id = probabilities.argmax(dim=-1)
predicted_token = tokenizer.decode(predicted_token_id.tolist())

print(f"Predicted next token: {predicted_token}")

In [None]:
input_text = "The capital of France is Paris: Y/N?"
input_ids = tokenizer.encode(input_text, return_tensors='pt')
num_tokens_to_generate = 25

for _ in range(num_tokens_to_generate):
    # Get model outputs
    outputs = model(input_ids)
    logits = outputs.logits

    # Only use the logits from the last token position
    next_token_logits = logits[:, -1, :]
    next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)

    # Append the predicted token ID to the input sequence
    input_ids = torch.cat([input_ids, next_token_id], dim=-1)

# Decode the input IDs to a string
generated_text = tokenizer.decode(input_ids[0])

print(f"Generated text: {generated_text}")

In [None]:
tokenizer.decode(231)

#### GPT-2 regular, 13 hidden states

- GPT-2-medium, 25 hidden states

In [None]:
len(hidden_states)

In [None]:
hidden_states[12].shape

### 24 `attentions` bcs. there are 24 attention blocks in gpt-2-medium

In [None]:
len(attentions)

In [None]:
attentions[0].shape

In [None]:
attentions[1].shape

In [None]:
attentions[0][0].shape

### Attention Scores at first layer, for all heads

In [None]:
attentions[0][0].shape[0]

In [None]:
from matplotlib import pyplot as plt

# Number of rows and columns for the subplot grid
n_rows = 4
n_cols = 4

# Create a figure and a set of subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10)) # Adjust figsize as needed

# Flatten the axes array for easy indexing
axes = axes.flatten()

for i in range(attentions[0][0].shape[0]):
    # Select the appropriate subplot
    ax = axes[i]

    # Plot the attention map on the chosen subplot
    im = ax.imshow(attentions[0][0][i].detach().numpy())

    # Optional: Add a colorbar and set other properties
    fig.colorbar(im, ax=ax)

# Adjust layout to prevent overlapping
plt.tight_layout()
plt.show()

## Pearson Correlation between rows, across blocks

Rows because rows represent attention scores per token, and softmax makes them add up to 1. Columns don't mean much

In [None]:
import numpy as np
from scipy.stats import pearsonr

In [None]:
attn_head_0_layer_0 = attentions[0][0][0]  # Layer, Batch, attn. Head
attn_head_0_layer_1 = attentions[1][0][0]
attn_head_0_layer_0.shape

In [None]:
attn_head_0_layer_1

In [None]:
attn_head_0_layer_0[0,:]

In [None]:
pearsons_r_1_2 = []
for row in range(attn_head_0_layer_0.shape[0]):
    left_layer_vec = attn_head_0_layer_0[row][:].detach().numpy() # Tells PyTorch to forget about gradient info
    right_layer_vec = attn_head_0_layer_1[row][:].detach().numpy()
    pearsons_r_1_2.append(pearsonr(left_layer_vec, right_layer_vec))

In [None]:
pearsons_r_1_2

In [None]:
len(attentions)

In [None]:
# Let's do this for all trasnformer blocks
# Calculate correlation of neighboring transformer blocks
# Correlation is row-wise

# TODO This is for first head ONLY!

def calculate_pearsons_per_head(head_nr):
    all_pearsons = []
    for block in range(len(attentions) - 1):
        pearsons_r = []

        left_layer = attentions[block][0][head_nr]
        right_layer = attentions[block + 1][0][head_nr]

        for row in range(left_layer.shape[0]):
            left_layer_vec = left_layer[row][:].detach().numpy() # .detach().numpy() Tells PyTorch to forget about gradient info
            right_layer_vec = right_layer[row][:].detach().numpy()
            pearsons_r.append(pearsonr(left_layer_vec, right_layer_vec))

        all_pearsons.append(pearsons_r)

    return all_pearsons

In [None]:
len(all_pearsons)

In [None]:
all_pearsons[0]

In [None]:
pearsons_statistic = np.array(all_pearsons)[:,:,0]
pearsons_p_value = np.array(all_pearsons)[:,:,1]

pearsons_statistic

In [None]:
plt.imshow(pearsons_statistic)
plt.colorbar()

In [None]:
plt.imshow(pearsons_p_value)
plt.colorbar()

## Let's calculate corr. between a layer and all previous layers

In [None]:
def calculate_pearsons_autoregressive(head_nr: int = 0):
    ar_pearsons_r = []
    ar_pearsons_p_val = []
    for block in range(len(attentions) - 1, 1, -1):
        block_pearsons_r = []
        block_pearsons_p_val = []

        right_layer = attentions[block][0][head_nr]

        for l in range(0, block):
            pearsons_r = []
            pearsons_p_val = []
            left_layer = attentions[l][0][head_nr]

            for row in range(left_layer.shape[0]):
                left_layer_vec = left_layer[row][:].detach().numpy() # .detach().numpy() Tells PyTorch to forget about gradient info
                right_layer_vec = right_layer[row][:].detach().numpy()

                pearsons = pearsonr(left_layer_vec, right_layer_vec)

                pearsons_r.append(pearsons[0])
                pearsons_p_val.append(pearsons[1])

            block_pearsons_r.append(pearsons_r)
            block_pearsons_p_val.append(pearsons_p_val)

        ar_pearsons_r.append(block_pearsons_r)
        ar_pearsons_p_val.append(block_pearsons_p_val)


    return ar_pearsons_r, ar_pearsons_p_val

In [None]:
ar_pearsons_r, ar_pearsons_p_val = calculate_pearsons_autoregressive()

In [None]:
len(ar_pearsons_r[0])

In [None]:
len(ar_pearsons_r)

In [None]:
ar_pearsons_r

In [None]:
plt.imshow(ar_pearsons_r[0])
plt.colorbar()

In [None]:
plt.imshow(ar_pearsons_r[21])
plt.colorbar()

In [None]:
ar_pearsons_r[0]

In [None]:
len(ar_pearsons_r[0])

In [None]:
len(attentions)

# Cleaned code

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load the tokenizer and model with the LM head
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states=True)

In [18]:
# Encode some input text
inputs = "Y/N: the capital of france is paris:"
input2 = "testing: the united states is a country in "

### Predict token for different inputs

In [19]:
import torch.nn.functional as F

def predict_from_early_exit(input_prompt, exit_layer=2, print_debug_stmts=False, num_tokens_to_generate = 25):
    # Exit model from the early transformer blocks instead of computing all few dozen
    # This should give a good estimate of the final token, at a fraction of the cost
    inputs = tokenizer(input_prompt, return_tensors="pt")

    if print_debug_stmts:
        print("Input IDs:", inputs['input_ids'])
        print("Tokens:", tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze().tolist()))

    outputs = model(**inputs)
    attentions = outputs.attentions
    hidden_states = outputs.hidden_states
    print("Nr. of hidden states: ", len(outputs.hidden_states))

    early_exit_state = outputs.hidden_states[exit_layer]

    logits = model.lm_head(selected_hidden_state)
    probabilities = F.softmax(next_token_logits, dim=-1)

    predicted_token_id = probabilities.argmax(dim=-1)
    predicted_token = tokenizer.decode(predicted_token_id.tolist())

    print(f"Predicted next token: {predicted_token}")

In [20]:
predict_from_early_exit(input2)

Nr. of hidden states:  13
Predicted next token:  


In [6]:
print(len(hidden2))

# Select the output from an earlier block, for instance, the 5th block
selected_hidden_state = hidden_states[1]  # Using zero indexing; adjust as necessary
another_hidden_state = hidden_states[12]

# Apply the lm_head to project the hidden states to the vocabulary space
logits = model.lm_head(selected_hidden_state)
logits12 = model.lm_head(another_hidden_state)

# assert [logits[i]==logits12[i] for i in range(len(logits))]
# print("logits: ", logits)

13
13


In [8]:
logits.shape

torch.Size([1, 13, 50257])

In [9]:
# Focus on the last token's logits for next token prediction
next_token_logits = logits[:, -1, :]

# Convert logits to probabilities using softmax
import torch.nn.functional as F
probabilities = F.softmax(next_token_logits, dim=-1)

print("probabilities: ", probabilities)

probabilities:  tensor([[1.9838e-10, 1.7317e-08, 8.4721e-14,  ..., 2.5343e-24, 2.1858e-22,
         3.0620e-09]], grad_fn=<SoftmaxBackward0>)


In [10]:
probabilities.shape

torch.Size([1, 50257])

In [11]:
# Get the most probable next token ID
predicted_token_id = probabilities.argmax(dim=-1)
predicted_token = tokenizer.decode(predicted_token_id.tolist())

print(f"Predicted next token: {predicted_token}")

Predicted next token:  the


In [12]:
input_text = "The capital of France is Paris: Y/N?"
input_ids = tokenizer.encode(input_text, return_tensors='pt')
num_tokens_to_generate = 25

for _ in range(num_tokens_to_generate):
    # Get model outputs
    outputs = model(input_ids)
    logits = outputs.logits

    # Only use the logits from the last token position
    next_token_logits = logits[:, -1, :]
    next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)

    # Append the predicted token ID to the input sequence
    input_ids = torch.cat([input_ids, next_token_id], dim=-1)

# Decode the input IDs to a string
generated_text = tokenizer.decode(input_ids[0])

print(f"Generated text: {generated_text}")

Generated text: The capital of France is Paris: Y/N?

The French capital is Paris: Y/N? The French capital is Paris: Y/N? The French capital


In [14]:
hidden_states[12].shape

torch.Size([1, 13, 768])

In [15]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

# Load the tokenizer and model with the LM head
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states=True)

# Encode some input text
inputs = tokenizer("Y/N: the capital of france is paris:   ", return_tensors="pt")

# Get model outputs
outputs = model(**inputs)
hidden_states = outputs.hidden_states

# Select the output from an earlier block, for instance, the 5th block
selected_hidden_state = hidden_states[12]  # Using zero indexing; adjust as necessary

# Apply the lm_head to project the hidden states to the vocabulary space
logits = model.lm_head(selected_hidden_state)

# Focus on the last token's logits for next token prediction
next_token_logits = logits[:, -1, :]

# Convert logits to probabilities using softmax
probabilities = F.softmax(next_token_logits, dim=-1)

print(probabilities)
print(len(probabilities))

# get sorted list of indices
# ranked_indices = torch.argsort(probabilities)
values, indices = probabilities.topk(3)
print(values)
print(indices)

# Get the most probable next tokens
for i in indices:
  predicted_token = tokenizer.decode(i)
  print("token: ", predicted_token)


tensor([[9.6054e-04, 4.2071e-04, 1.3583e-04,  ..., 2.0722e-07, 1.0056e-06,
         3.7878e-03]], grad_fn=<SoftmaxBackward0>)
1
tensor([[0.2658, 0.1268, 0.0381]], grad_fn=<TopkBackward0>)
tensor([[ 220, 1849,  198]])
token:    

