## How to generate text: using different decoding methods for language generation with Transformers

In [None]:
%pip install -q transformers

In [16]:
import math
import random
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch


In [17]:
print(torch.device("cuda"))

cuda


In [26]:


model_name_or_path = "../models/mlb_structured/checkpoint-148500"

torch_device = "cuda" if torch.cuda.is_available() else "cpu"
# To use a different branch, change revision
# For example: revision="main"
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=True,
                                             revision="main").to(torch_device)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)


prompt ='''{"input": {"pitcher": "shaun marcum", "batter": "jeff keppinger", "p_throws": "R", "stand": "R", "inning_topbot": "Bot", "inning": 1, "outs_when_up": 0, "on_1b": "", "on_2b": "andres torres", "on_3b": "", "home_score": 0, "away_score": 0}, "result":'''
model_inputs = tokenizer(prompt, return_tensors='pt').to(torch_device)
greedy_output = model.generate(**model_inputs, max_new_tokens=512)

OSError: Can't load the configuration of '../models/mlb_structured/checkpoint-148500'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../models/mlb_structured/checkpoint-148500' is the correct path to a directory containing a config.json file

### Greedy Search

In [25]:
# encode context the generation is conditioned on


# generate 40 new tokens
greedy_output = model.generate(**model_inputs, max_new_tokens=512)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=False))

Output:
----------------------------------------------------------------------------------------------------
{" input ": {" pitcher ": " shaun marcum ", " batter ": " jeff keppinger ", " p_throws ": " R ", " stand ": " R ", " inning_topbot ": " Bot ", " inning ": 1, " outs_when_up ": 0, " on_1b ": "", " on_2b ": " andres torres ", " on_3b ": "", " home_score ": 0, " away_score ": 0 }, " result ": {" event ": " walk ", " type ": " B ", " zone ": 14, " des ": " Jeff Keppinger walks. Andres Torres scores. Andres Torres to 2nd.", " at_bat_number ": 5, " pitch_number ": 5, " pitch_name ": " Slider ", " hit_location ": "", " launch_speed ": 1, " at_bat_number ": 5, " brett myers ", " ", " shaun marcum ", " brett myers fontenot ", " shaun marcum ", " at_bat_number ": 5, " Slider ", " at_bat_number ": 5, " Changeup ", " Jackson ", " Slider ", " henry sosa ", " brett myers ": 0, " brett myers ": 5, " at_bat_number ": " home_score ": " inning ": " brett myers ": " at_bat_number ": " at_bat_numbe

### Beam search


In [5]:
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=65,
    num_beams=5,
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 1 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a ball, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider


In [63]:
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=65,
    num_beams=5,
    no_repeat_ngram_size=10,
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 8 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for for strike, 663432 throws a 88 miles per hour Cutter for a ball, 663432 throws a 88 miles per hour Slider and 596115 hits into a field out, event : field out, event : field out, out, event : field out


In [69]:
# set return_num_sequences > 1
beam_outputs = model.generate(
    **model_inputs,
    max_new_tokens=65,
    num_beams=5,
    no_repeat_ngram_size=10,
    num_return_sequences=5,
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))

Output:
----------------------------------------------------------------------------------------------------
0: ### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 8 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for for strike, 663432 throws a 88 miles per hour Cutter for a ball, 663432 throws a 88 miles per hour Slider and 596115 hits into a field out, event : field out, event : field out, out, event : field out
1: ### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 8 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for for strike, 663432 throws a 88 miles per hour Changeup for a ball, 663432 throws a 88 miles per hour Slider and 596115 hits into a field out, event : field out, event : field out, out, event : field out
2: 

### Sampling

In [None]:
from transformers import set_seed
set_seed(42)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0,
    temperature=0.6,
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-K Sampling

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-p (nucleus) sampling

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_p=0.92,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    num_return_sequences=3,
)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

### Save a custom decoding strategy with your model

In [None]:
from transformers import AutoModelForCausalLM, GenerationConfig

model = AutoModelForCausalLM.from_pretrained("my_account/my_model")
generation_config = GenerationConfig(
    max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
)
generation_config.save_pretrained("my_account/my_model", push_to_hub=True)