In [1]:
import os

import numpy as np
import pandas as pd
import random
import seaborn as sns
import torch
import tqdm
from torch.nn import functional as F
from transformers import (
    GPT2LMHeadModel,
    GPT2Tokenizer,
    RobertaForMaskedLM,
    RobertaTokenizer,
    T5ForConditionalGeneration,
    T5Tokenizer
)

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Step 1: Load the pre-trained T5 model and tokenizer
model_name = "t5-small"  # You can use "t5-base", "t5-large", etc.
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
model = model.to(device)
# Step 2: Prepare the input text
input_text = "translate English to French: Hello"
input_text = "comltete: i feel <extra_id_0>"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids  # Tokenize input
input_ids = input_ids.to(device)
decoder_input_ids = torch.tensor([[tokenizer.pad_token_id]])
decoder_input_ids = decoder_input_ids.to(device)
# Step 3: Forward pass through the model
with torch.no_grad():  # Disable gradient computation for inference
    outputs = model(input_ids=input_ids,decoder_input_ids=decoder_input_ids)

# Step 4: Extract logits from the model's output
logits1 = outputs.logits  # Shape: (batch_size, sequence_length, vocab_size)

# Step 5: Get logits for the next token
# The logits for the next token are located at the last position in the sequence
next_token_logits = logits1[:, -1, :]  # Shape: (batch_size, vocab_size)

# Step 6: Inspect the logits
print("Logits for the next token:", next_token_logits)

# Optional: Apply softmax to convert logits to probabilities
probs = torch.softmax(next_token_logits, dim=-1)
print("Probabilities for the next token:", probs)
#_txt = tokenizer.decode(outputs, skip_special_tokens=False, clean_up_tokenization_spaces=False)
# Step 7: Decode the predicted next token
predicted_token_id = torch.argmax(next_token_logits, dim=-1).item()  # Get the token ID with the highest logit
predicted_token = tokenizer.decode(predicted_token_id) 
#predicted_token = tokenizer.decode(_txt) # Decode the token ID to a string
print("Predicted next token:", predicted_token)

Logits for the next token: tensor([[-38.2558, -13.9876, -24.1940,  ..., -59.6793, -59.6874, -59.6310]],
       device='cuda:0')
Probabilities for the next token: tensor([[5.6643e-18, 1.9619e-07, 7.2467e-12,  ..., 2.8121e-27, 2.7894e-27,
         2.9515e-27]], device='cuda:0')
Predicted next token: <extra_id_0>


In [16]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the T5 tokenizer and model
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Step 1: Load the pre-trained T5 model and tokenizer
model_name = "t5-small"  # You can use "t5-base", "t5-large", etc.
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
model = model.to(device)
# Step 2: Prepare the input text
input_text = " i feel"
input_ids = tokenizer(input_text, add_special_tokens=True, return_tensors="pt").input_ids  # Tokenize input
input_ids = input_ids.to(device)
decoder_input_ids = torch.tensor([[tokenizer.pad_token_id]])
decoder_input_ids = decoder_input_ids.to(device)
with torch.no_grad():  # Disable gradient computation for inference
    outputs = model(input_ids=input_ids,decoder_input_ids=decoder_input_ids)
# Input text

# Step 4: Extract logits from the model's output
logits = outputs.logits  # Shape: (batch_size, sequence_length, vocab_size)

# Step 5: Get logits for the next token
# The logits for the next token are located at the last position in the sequence
next_token_logits = logits[:, -1, :]  # Shape: (batch_size, vocab_size)

# Step 6: Inspect the logits
print("Logits for the next token:", next_token_logits)

# Optional: Apply softmax to convert logits to probabilities
probs = F.softmax(outputs.logits, dim=-1)[0][-1]
#probs = torch.softmax(next_token_logits, dim=-1)
print("Probabilities for the next token:", probs)

# Step 7: Decode the predicted next token
predicted_token_id = torch.argmax(next_token_logits, dim=-1).item()  # Get the token ID with the highest logit
print(predicted_token_id)
predicted_token = tokenizer.decode(predicted_token_id, skip_special_tokens=False)  # Decode the token ID to a string
print("Predicted next token:", predicted_token)


# Generate output tokens
output_ids = model.generate(input_ids, num_beams=200,max_length=7, num_return_sequences=10)
print(output_ids)
# Decode the output tokens
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Input:", input_text)
print("Output:", output_text)
output_text = tokenizer.decode(output_ids[1], skip_special_tokens=True)

print("Input:", input_text)
print("Output:", output_text)
output_text = tokenizer.decode(output_ids[2], skip_special_tokens=True)

print("Input:", input_text)
print("Output:", output_text)

output_text = tokenizer.decode(output_ids[5], skip_special_tokens=True)

print("Input:", input_text)
print("Output:", output_text)

Logits for the next token: tensor([[-18.5767,  -8.0120, -12.0802,  ..., -43.0359, -43.0601, -42.9835]],
       device='cuda:0')
Probabilities for the next token: tensor([2.7076e-07, 1.0490e-02, 1.7947e-04,  ..., 6.4576e-18, 6.3034e-18,
        6.8051e-18], device='cuda:0')
3
Predicted next token: 
tensor([[   0,    3,   23,  473,    1,    0,    0],
        [   0,    3,   23,  473,  114,    1,    0],
        [   0,    3,   23,  473,    5,    1,    0],
        [   0,    3,   23,  473,    3,   23,  473],
        [   0,    3,   23,  473,  114,    3,   23],
        [   0,    3,   23,  473,  473,    1,    0],
        [   0,    3,   23,  473,  207,    5,    1],
        [   0,    3,   23,  473,   27,  473,    1],
        [   0,    3,   23,  473, 1245,    5,    1],
        [   0,    3,   23,  473,  207,    1,    0]], device='cuda:0')
Input:  i feel
Output: i feel
Input:  i feel
Output: i feel like
Input:  i feel
Output: i feel.
Input:  i feel
Output: i feel feel


In [9]:
import torch
from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration

# Step 1: Load the pre-trained T5 model and tokenizer
model_name = "t5-small"  # You can use "t5-base", "t5-large", etc.
tokenizer = T5Tokenizer.from_pretrained(model_name)
#model = T5ForConditionalGeneration.from_pretrained(model_name)
#t5_config = T5Config.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
# Move the model to GPU if available
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
model = model.to(device)

# Step 2: Prepare the input text
input_text = "i feel so <extra_id_0>"
input_ids = tokenizer(input_text, return_tensors="pt", add_special_tokens=True).input_ids  # Tokenize input
input_ids = input_ids.to(device)

# Step 3: Prepare decoder input IDs (start with bos token as the initial input)
#decoder_input_ids = torch.tensor([[tokenizer.eos_token_id]]).to(device)
#decoder_input_ids = torch.tensor([[tokenizer.bos_token_id]]).to(device)
decoder_input_ids = torch.tensor([[tokenizer.pad_token_id]])
decoder_input_ids = decoder_input_ids.to(device)
# Step 4: Forward pass through the model
model.eval()
with torch.no_grad():  # Disable gradient computation for inference
    outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids,labels=None)

# Step 5: Extract logits for the next token
logits = outputs.logits  # Shape: (batch_size, sequence_length, vocab_size)
next_token_logits = logits[:, -1, :]  # Logits for the last token in the sequence

# Step 6: Convert logits to probabilities and select the most likely token
probs = torch.softmax(next_token_logits, dim=-1)  # Apply softmax to get probabilities
predicted_token_id = torch.argmax(probs, dim=-1).item()  # Get the token ID with the highest probability

# Debugging: Print the predicted token ID and probabilities
print("Predicted token ID:", predicted_token_id)
print("Probabilities:", probs)

# Step 7: Decode the predicted token ID into a word
predicted_word = tokenizer.decode(predicted_token_id, skip_special_tokens=False)

# Print the predicted next word
print("Predicted next word:", predicted_word)

Predicted token ID: 32099
Probabilities: tensor([[4.7018e-18, 7.5918e-07, 2.5538e-11,  ..., 3.2943e-27, 3.3372e-27,
         3.5468e-27]], device='cuda:0')
Predicted next word: <extra_id_0>


In [10]:
model_name = "t5-base"  # You can use "t5-base", "t5-large", etc.
tokenizer = T5Tokenizer.from_pretrained(model_name)
#model = T5ForConditionalGeneration.from_pretrained(model_name)
#t5_config = T5Config.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
# Move the model to GPU if available
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
model = model.to(device)
#input_text = "i feel so <extra_id_0input_text = "i feel so <extra_id_0>"
input_text = "i feel so <extra_id_0>"
input_ids = tokenizer(input_text, return_tensors="pt", add_special_tokens=True).input_ids  # Tokenize input
input_ids = input_ids.to(device)


# Initialize the decoder input with the pad token
decoder_input_ids = torch.tensor([[tokenizer.pad_token_id]]).to(device)

# Generate tokens iteratively
max_length = 10  # Maximum number of tokens to generate
generated_tokens = []

for _ in range(max_length):
    with torch.no_grad():
        outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
    logits = outputs.logits[:, -1, :]  # Logits for the last token
    probs = torch.softmax(logits, dim=-1)
    predicted_token_id = torch.argmax(probs, dim=-1).item()
    
    # Stop if the end-of-sequence token is generated
    if predicted_token_id == tokenizer.eos_token_id:
        break
    
    # Append the predicted token to the list
    generated_tokens.append(predicted_token_id)
    
    # Update the decoder input for the next step
    decoder_input_ids = torch.cat([decoder_input_ids, torch.tensor([[predicted_token_id]]).to(device)], dim=-1)

# Decode the generated tokens into text
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print("Generated text:", generated_text)

Generated text: .


In [2]:
import pandas as pd

# Sample data (replace with your actual data)
ratio_list1 = [
    {"aae": 10, "sae": 5, "ratio": 2, "variable": "x", "attribute": "attr1", "prompt": "p1"},
    {"aae": 20, "sae": 10, "ratio": 2, "variable": "y", "attribute": "attr2", "prompt": "p2"},
]

ratio_list2 = [
    {"aae1": 15, "sae1": 5, "ratio": 3, "variable": "x", "attribute": "attr1", "prompt": "p1"},
    {"aae1": 25, "sae1": 10, "ratio": 2.5, "variable": "y", "attribute": "attr2", "prompt": "p2"},
]

# Create the DataFrames
ratio_df1 = pd.DataFrame(ratio_list1, columns=["aae", "sae", "ratio", "variable", "attribute", "prompt"])
ratio_df = pd.DataFrame(ratio_list2, columns=["aae1", "sae1", "ratio", "variable", "attribute", "prompt"])

# Merge the two DataFrames on common columns
merged_df = pd.merge(ratio_df1, ratio_df, on=["variable", "attribute", "prompt"], suffixes=('_df1', '_df'))

# Calculate the row-wise averages of aae and aae1, and sae and sae1
merged_df['avg_aae'] = merged_df[['aae', 'aae1']].mean(axis=1)
merged_df['avg_sae'] = merged_df[['sae', 'sae1']].mean(axis=1)

# Calculate the new ratio column
merged_df['new_ratio'] = merged_df['avg_aae'] / merged_df['avg_sae']

# Create the final DataFrame with only the desired columns
result_df = merged_df[['new_ratio', 'attribute']].rename(columns={'new_ratio': 'ratio'})

# Display the result
print(result_df)

   ratio attribute
0   2.50     attr1
1   2.25     attr2
