In [1]:
# Uncomment and run this cell if you're on Colab or Kaggle
!git clone https://github.com/nlp-with-transformers/notebooks.git
%cd notebooks
from install import *
install_requirements()

Cloning into 'notebooks'...
remote: Enumerating objects: 526, done.[K
remote: Counting objects: 100% (172/172), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 526 (delta 143), reused 135 (delta 126), pack-reused 354[K
Receiving objects: 100% (526/526), 28.62 MiB | 10.36 MiB/s, done.
Resolving deltas: 100% (250/250), done.
/content/notebooks
⏳ Installing base requirements ...
✅ Base requirements installed!
⏳ Installing Git LFS ...
✅ Git LFS installed!


In [2]:
# hide
from utils import *
setup_chapter()

Using transformers v4.16.2
Using datasets v1.16.1


In [3]:
# hide_output
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

Downloading:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [13]:
# hide_output
import pandas as pd

input_txt = "Transformers are the"


input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
iterations = []
n_steps = 100
choices_per_step = 5

with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["Input"] = tokenizer.decode(input_ids[0])
        output = model(input_ids=input_ids)
        # Select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)
        # Store tokens with highest probabilities
        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            token_prob = next_token_probs[token_id].cpu().numpy()
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {choice_idx+1}"] = token_choice
        # Append predicted next token to input
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)

pd.DataFrame(iterations)

Unnamed: 0,Input,Choice 1,Choice 2,Choice 3,Choice 4,Choice 5
0,Transformers are the,most (8.37%),only (3.35%),best (2.75%),first (2.54%),ultimate (2.20%)
1,Transformers are the most,powerful (20.77%),common (7.09%),popular (5.09%),important (3.29%),advanced (2.72%)
2,Transformers are the most powerful,beings (9.43%),and (8.35%),of (4.61%),Transformers (4.34%),", (3.83%)"
3,Transformers are the most powerful beings,in (56.15%),on (18.99%),known (3.12%),of (3.09%),to (2.18%)
4,Transformers are the most powerful beings in,the (72.89%),existence (11.20%),all (3.40%),creation (1.81%),Transformers (1.18%)
...,...,...,...,...,...,...
95,Transformers are the most powerful beings in t...,Transformers (99.46%),Autob (0.17%),Optimus (0.10%),Unic (0.05%),Titans (0.04%)
96,Transformers are the most powerful beings in t...,. (98.50%),"."" (0.37%)",", (0.20%)",... (0.14%),.. (0.08%)
97,Transformers are the most powerful beings in t...,They (65.93%),\n (16.17%),The (1.79%),Transformers (1.46%),\n\n (1.30%)
98,Transformers are the most powerful beings in t...,are (99.66%),'re (0.20%),have (0.03%),do (0.01%),can (0.01%)


In [14]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)
print(tokenizer.decode(output[0]))

Transformers are the most powerful beings in the universe. They are the creators
of the universe, and they are the ones who create the Transformers. They are the
ones who create the Transformers. They are the ones who create the Transformers.
They are the ones who create the Transformers. They are the ones who create the
Transformers. They are the ones who create the Transformers. They are the ones
who create the Transformers. They are the ones who create the Transformers. They
are the ones who create the Transformers. They are the


In [23]:
max_length = 128
input_txt = "Transformers are"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_length=max_length,
                               do_sample=False)
print(tokenizer.decode(output_greedy[0]))

Transformers are the most powerful beings in the universe. They are the creators
of the universe, and they are the ones who create the Transformers. They are the
ones who create the Transformers. They are the ones who create the Transformers.
They are the ones who create the Transformers. They are the ones who create the
Transformers. They are the ones who create the Transformers. They are the ones
who create the Transformers. They are the ones who create the Transformers. They
are the ones who create the Transformers. They are the ones who create the
Transformers. They are the ones who create the Transformers. They are the ones
who create the Transformers.


In [15]:

0.5 ** 1024

5.562684646268003e-309

In [16]:
import numpy as np

sum([np.log(0.5)] * 1024)

-709.7827128933695

In [24]:
import torch.nn.functional as F

def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label

In [25]:
def sequence_logprob(model, labels, input_len=0):
    with torch.no_grad():
        output = model(labels)
        log_probs = log_probs_from_logits(
            output.logits[:, :-1, :], labels[:, 1:])
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob.cpu().numpy()

In [26]:
logp = sequence_logprob(model, output_greedy, input_len=len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f"\nlog-prob: {logp:.2f}")

Transformers are the most powerful beings in the universe. They are the creators
of the universe, and they are the ones who create the Transformers. They are the
ones who create the Transformers. They are the ones who create the Transformers.
They are the ones who create the Transformers. They are the ones who create the
Transformers. They are the ones who create the Transformers. They are the ones
who create the Transformers. They are the ones who create the Transformers. They
are the ones who create the Transformers. They are the ones who create the
Transformers. They are the ones who create the Transformers. They are the ones
who create the Transformers.

log-prob: -53.31


In [27]:
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5,
                             do_sample=False, no_repeat_ngram_size=2)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

Transformers are the most powerful beings in the universe, but they're not the
only ones who can use their powers. There are other beings who are able to
harness the power of the Transformers and use it for their own ends.

The Transformers have been around for a long time. They were created by a group
of scientists called the Decepticons, who wanted to create an army of robots to
fight the Autobots. The Transformers were designed to be powerful, durable, and
fast. However, they also had a weakness: they could only transform into one form
at a time, which made them vulnerable to attacks from other Transformers.

log-prob: -153.84


In [28]:

output_beam = model.generate(input_ids, max_length=max_length, num_beams=5,
                             do_sample=False)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")

Transformers are the most powerful beings in the universe. They are the creators
of the universe, and they are responsible for the creation of all life on Earth.

The Transformers are the most powerful beings in the universe. They are the
creators of the universe, and they are responsible for the creation of all life
on Earth.

The Transformers are the most powerful beings in the universe. They are the
creators of the universe, and they are responsible for the creation of all life
on Earth.

The Transformers are the most powerful beings in the universe. They are the
creators of the universe, and they are responsible for the creation

log-prob: -49.21
