 [Reseach paper:](https://aclanthology.org/2024.emnlp-main.202.pdf)
 [Github](https://github.com/byungdoh/wt_decoding)

In [2]:
## https://arxiv.org/pdf/1804.10959

In [3]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

In [4]:
sentences = [
    "I was a mat in France",
    "I was a matron in France",
    "The unwelcome guest refused to leave despite multiple hints.",
    "Eating a hotdog at the carnival was the highlight of my day.",
    "The spacecraft hovered silently before vanishing into deep space.",
    "Our company’s entire customer database was leaked online.",
    "The sound of the rushing waterfall was calming after a long hike.",
    "The old man the boat.",
    "The horse raced past the barn fell."
]


model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [5]:
def get_space_subword_idx(tokenizer):
    """Identify space-prefixed and subword token indices"""
    space_idx = []
    subword_idx = []
    vocab_items = tokenizer.get_vocab().items()

    for token, idx in vocab_items:
        if token.startswith("Ġ"):  # GPT-2 uses "Ġ" for space-prefixed tokens
            space_idx.append(idx)
        else:
            subword_idx.append(idx)

    return space_idx, subword_idx

# Compute space and subword indices
space_idx, subword_idx = get_space_subword_idx(tokenizer)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for sentence in sentences:
    print("\n===================================================")
    print(f"Processing sentence: {sentence}")
    print("===================================================")

    # Tokenize the sentence
    tokenizer_output = tokenizer(sentence)
    ids = tokenizer_output.input_ids
    attn = tokenizer_output.attention_mask

    print("\nTokenized Sentence IDs:", ids)
    print("Tokens:", tokenizer.convert_ids_to_tokens(ids))

    # Ensure BOS token handling for GPT-2
    bos_id = model.config.bos_token_id
    if bos_id is not None:
        ids = [bos_id] + ids
        attn = [1] + attn

    # Convert to tensors and move to device
    batch_input = {
        "input_ids": torch.tensor([ids]).to(device),
        "attention_mask": torch.tensor([attn]).to(device)
    }

    # Forward pass
    with torch.no_grad():
        model_output = model(**batch_input)

    print("\nModel output shape:", model_output.logits.shape)

    # Compute surprisal values
    softmax = torch.nn.Softmax(dim=-1)
    probs = softmax(model_output.logits.squeeze(0))
    probs = torch.clamp(probs, min=1e-9)  # Prevent log(0)
    all_surp = -1 * torch.log2(probs)  # Compute surprisal

    # Exclude BOS token if it was added
    tokens = tokenizer.convert_ids_to_tokens(ids[1:] if bos_id is not None else ids)

    # Print Header for the table
    print("\n------------------------------------------------------")
    print("| Token       || Surprisal || Type || Log P(Space): Likelihood of a space before this token. || Log P(Subword): Likelihood of this token being part of a larger word. |")
    print("------------------------------------------------------")

    for i in range(len(tokens)):
        cleaned_tok = tokenizer.convert_tokens_to_string([tokens[i]]).replace(" ", "")

        # Get the actual token ID
        token_id = ids[i + 1] if bos_id is not None else ids[i]
        ## Computes surprisal from self-token probability
        token_surprisal = all_surp[i, token_id].item()  # calculate correct surprisal

        # Compute log probabilities for space and subword tokens
        log_prob_space = torch.log2(torch.sum(probs[i][space_idx])).item()
        log_prob_subword = torch.log2(torch.sum(probs[i][subword_idx])).item()

        # Determine if token starts a new word (B) or is inside a word (I)
        prefix = "B" if tokens[i].startswith("Ġ") else "I"

        # Print row
        print(f"| {cleaned_tok:<10} || {token_surprisal:>9.4f} ||  {prefix}   || {log_prob_space:>10.4f} || {log_prob_subword:>13.4f} |")

    print("------------------------------------------------------")

    # Print EOS token info
    eos_surprisal = -1 * torch.log2(torch.sum(probs[-1][space_idx])).item()
    eos_log_prob_space = torch.log2(torch.sum(probs[-1][space_idx])).item()
    eos_log_prob_subword = torch.log2(torch.sum(probs[-1][subword_idx])).item()

    print(f"| <eos>       || {eos_surprisal:>9.4f} ||  B   || {eos_log_prob_space:>10.4f} || {eos_log_prob_subword:>13.4f} |")
    print("------------------------------------------------------\n")


Processing sentence: I was a mat in France

Tokenized Sentence IDs: [40, 373, 257, 2603, 287, 4881]
Tokens: ['I', 'Ġwas', 'Ġa', 'Ġmat', 'Ġin', 'ĠFrance']

Model output shape: torch.Size([1, 7, 50257])

------------------------------------------------------
| Token       || Surprisal || Type || Log P(Space): Likelihood of a space before this token. || Log P(Subword): Likelihood of this token being part of a larger word. |
------------------------------------------------------
| I          ||    5.7704 ||  I   ||    -4.9725 ||       -0.0467 |
| was        ||    4.4130 ||  B   ||    -0.3818 ||       -2.1044 |
| a          ||    4.8790 ||  B   ||    -0.0022 ||       -9.3418 |
| mat        ||   15.1151 ||  B   ||    -0.0027 ||       -9.0613 |
| in         ||   11.0370 ||  B   ||    -4.2729 ||       -0.0766 |
| France     ||   10.1965 ||  B   ||    -0.0038 ||       -8.5685 |
------------------------------------------------------
| <eos>       ||    0.3822 ||  B   ||    -0.3822 ||       -2.1