## 1. Greedy Search Decoding

In [5]:
# 각 time step 에서 확률이 가장 높은 토큰을 선택 (greedily)
import sys
sys.path.append("../../")

from utils import utils

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


In [6]:
device = utils.get_device()
model_name = "gpt2-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

config.json: 100%|██████████| 689/689 [00:00<00:00, 213kB/s]
vocab.json: 100%|██████████| 1.04M/1.04M [00:00<00:00, 1.29MB/s]
merges.txt: 100%|██████████| 456k/456k [00:00<00:00, 1.12MB/s]
tokenizer.json: 100%|██████████| 1.36M/1.36M [00:00<00:00, 6.33MB/s]
model.safetensors: 100%|██████████| 6.43G/6.43G [01:28<00:00, 73.1MB/s]
generation_config.json: 100%|██████████| 124/124 [00:00<00:00, 64.8kB/s]


In [8]:
import pandas as pd

input_txt = "Transformers are the"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
iterations = []
n_steps = 8
choices_per_step = 5

with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["input"] = tokenizer.decode(input_ids[0])
        output = model(input_ids=input_ids)

        next_token_logits = output.logits[0, -1, :]     # 첫 번째 batch 의 마지막 token logits
        next_token_probs  = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)

        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]       # 가장 확률이 높은 token 의 index 추출
            token_prob = next_token_probs[token_id].cpu().numpy()   # 해당 token 의 확률
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {choice_idx+1}"] = token_choice

        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)

    print(pd.DataFrame(iterations))

                                               input           Choice 1  \
0                               Transformers are the       most (8.53%)   
1                          Transformers are the most   popular (16.78%)   
2                  Transformers are the most popular       toy (10.63%)   
3              Transformers are the most popular toy      line (34.38%)   
4         Transformers are the most popular toy line        in (46.28%)   
5      Transformers are the most popular toy line in       the (65.99%)   
6  Transformers are the most popular toy line in the     world (69.26%)   
7  Transformers are the most popular toy line in ...         , (39.73%)   

            Choice 2               Choice 3               Choice 4  \
0       only (4.96%)           best (4.65%)   Transformers (4.37%)   
1   powerful (5.37%)         common (4.96%)         famous (3.72%)   
2       toys (7.23%)   Transformers (6.60%)             of (5.46%)   
3        in (18.20%)            of (11.71%) 

In [9]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)

print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Transformers are the most popular toy line in the world,


In [12]:
max_length = 128

input_txt = """
In a socking finding, scientist discovered \
a herd of unicorns living in a remote, previously unexplored \
valley, in the Andes Mountains. Even more surprising to the \
researchers was the fact that the unicorns spoke perfect English. \n\n
"""

input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_length=max_length, do_sample=False)   # do_sample=False : 가장 확률이 높은 토큰 선택
print(tokenizer.decode(output_greedy[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



In a socking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English. 


The researchers, from the University of California, Davis, and the University of Colorado, Boulder, were studying the Andes Mountains in the Andes Mountains of South America when they came across a herd of unicorns. The researchers were studying the Andes Mountains in the Andes Mountains of South America when they came across a herd of unicorns.

The researchers were studying the Andes


## 2. Beam Search Decoding

In [None]:
import torch.nn.functional as F

def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label

