In [1]:
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q torch

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone


In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

device = 'cuda:0' if torch.cuda.is_available() else "cpu"
model1="gpt2"
model2="gpt2-large"

tokenizer = GPT2Tokenizer.from_pretrained(model2)
model = GPT2LMHeadModel.from_pretrained(model2, pad_token_id=tokenizer.eos_token_id).to(device)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.25G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
def get_next_word_probs(prefix):
  input_ids = tokenizer.encode(prefix, return_tensors='pt').to(device)

  with torch.no_grad():
    logits = model(input_ids).logits.squeeze()[-1]
  probabilities = torch.nn.functional.softmax(logits, dim=0)
  return probabilities

## High Entropy

In [7]:
prefix='My name is'

In [8]:
probabilities = get_next_word_probs(prefix)
top_token_probs, top_token_vals = torch.topk(probabilities, 10)

for token, prob in zip(top_token_vals, top_token_probs):
  print("%.3f"%prob.item(), tokenizer.decode(token))

0.015  John
0.011  David
0.010  Michael
0.008  J
0.007  James
0.007  Chris
0.006  Mike
0.006  Tom
0.006  Paul
0.005  T


In [9]:
entropy = torch.distributions.Categorical(probs=probabilities).entropy()
entropy.item()

7.412659168243408

## Low Entropy

In [10]:
prefix='My name'

In [11]:
probabilities = get_next_word_probs(prefix)
top_token_probs, top_token_vals = torch.topk(probabilities, 10)

for token, prob in zip(top_token_vals, top_token_probs):
  print("%.3f"%prob.item(), tokenizer.decode(token))

0.890  is
0.057 's
0.010  was
0.006 ,
0.005  isn
0.002  and
0.001  has
0.001 ?
0.001  will
0.001  in


In [12]:
entropy = torch.distributions.Categorical(probs=probabilities).entropy()
entropy.item()

0.6693758964538574

# W or W/O Context

In [13]:
prefix_no_context = 'They need to go to the'
prefix_with_context = 'They drank a lot of water. As a result, they need to go to the'

## High Entropy

In [14]:
probabilities = get_next_word_probs(prefix_no_context)
top_token_probs, top_token_vals = torch.topk(probabilities, 10)

for token, prob in zip(top_token_vals, top_token_probs):
  print("%.3f"%prob.item(), tokenizer.decode(token))

0.020  police
0.019  people
0.017  top
0.016  next
0.011  same
0.010  hospital
0.010  polls
0.009  court
0.008  doctor
0.008  source


In [15]:
entropy = torch.distributions.Categorical(probs=probabilities).entropy()
entropy.item()

7.403604507446289

## Low Entropy

In [16]:
probabilities = get_next_word_probs(prefix_with_context)
top_token_probs, top_token_vals = torch.topk(probabilities, 10)

for token, prob in zip(top_token_vals, top_token_probs):
  print("%.3f"%prob.item(), tokenizer.decode(token))

0.474  bathroom
0.129  hospital
0.118  doctor
0.059  toilet
0.023  gym
0.015  restroom
0.015  clinic
0.010  emergency
0.007  doctors
0.007  dentist


In [17]:
entropy = torch.distributions.Categorical(probs=probabilities).entropy()
entropy.item()

2.640349864959717

## High Entropy 상황에서 실제 Generated Texts

- 일반적인 시나리오 (Not good story)

In [23]:
from transformers import TopKLogitsWarper
topk_selector = TopKLogitsWarper(100)

torch.manual_seed(0)
prefix = prefix_no_context
for i in range(30):
  probabilities = get_next_word_probs(prefix)

  most_probable_token = torch.argmax(probabilities)
  sampled_token = torch.multinomial(probabilities, 1)
  topk_token_logits = topk_selector(None, torch.log(probabilities))
  topk_sampled_token = torch.multinomial(torch.exp(topk_token_logits),1)

  prefix += tokenizer.decode(most_probable_token)
  print(prefix)

They need to go to the police
They need to go to the police and
They need to go to the police and say
They need to go to the police and say,
They need to go to the police and say, '
They need to go to the police and say, 'We
They need to go to the police and say, 'We're
They need to go to the police and say, 'We're going
They need to go to the police and say, 'We're going to
They need to go to the police and say, 'We're going to do
They need to go to the police and say, 'We're going to do this
They need to go to the police and say, 'We're going to do this,
They need to go to the police and say, 'We're going to do this, we
They need to go to the police and say, 'We're going to do this, we're
They need to go to the police and say, 'We're going to do this, we're going
They need to go to the police and say, 'We're going to do this, we're going to
They need to go to the police and say, 'We're going to do this, we're going to do
They need to go to the police and say, 'We're going to do this,

In [27]:
torch.manual_seed(0)
input_ids = tokenizer.encode(prefix_no_context, return_tensors='pt').to(device)
sample_output = model.generate(
    input_ids,
    do_sample=True,
    max_length=30,
    top_k=100
)
print(tokenizer.decode(sample_output[0]))

They need to go to the public," said state Rep. Peter Jacoby (D), who introduced the bill. "It's something that needs to
