## How to generate text: using different decoding methods for language generation with Transformers

In [None]:
%pip install -q transformers

In [1]:
import math
import random

In [12]:
from transformers import AutoModelForCausalLM,RobertaTokenizerFast
import torch

torch_device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = RobertaTokenizerFast(tokenizer_file="../models/mlb/tokenizer/baseball.json")

# add the EOS token as PAD token to avoid warnings
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="../models/mlb/checkpoint-187000", pad_token_id=tokenizer.eos_token_id).to(torch_device)

If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`


In [13]:
prompt = f"<s> ###instruction: what is the outcome of pitcher 663432 pitching to batter 596115 ###input: Top of the {math.floor(random.randrange(1, 10))} inning with {math.floor(random.randrange(0, 3))} outs ###output: 663432 throws a"

### Greedy Search

In [16]:
# encode context the generation is conditioned on
model_inputs = tokenizer(prompt, return_tensors='pt').to(torch_device)

# generate 40 new tokens
greedy_output = model.generate(**model_inputs, max_new_tokens=95)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(greedy_output[0], skip_special_tokens=False))

Output:
----------------------------------------------------------------------------------------------------
<s> ### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 7 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a ball, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 98 miles per hour Sinker for a strike, 663432 throws


### Beam search


In [62]:

print("Output:\n" + 100 * '-')
for i in range(0, 25):# activate beam search and early_stopping
    prompt = f"<s> ###instruction: what is the outcome of pitcher 663432 pitching to batter 596115 ###input: Top of the {math.floor(random.randrange(1, 10))} inning with {math.floor(random.randrange(0, 3))} outs ###output: 663432 throws a"
    model_inputs = tokenizer(prompt, return_tensors='pt').to(torch_device)
    beam_output = model.generate(
        **model_inputs,
        max_new_tokens=90,
        no_repeat_ngram_size=10,
        num_beams=5,
        early_stopping=True
    )
    print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 5 inning with 1 outs ### output : 663432 throws a 98 miles per hour Sinker for a strike, 663432 throws a 98 miles per hour Sinker for for strike, 663432 throws a 99 miles per hour Sinker for a strike, 671083 throws a 99 miles per hour Sinker for a ball, 663432 throws a 100 miles per hour Sinker for a ball, 671083 throws a 100 miles per hour Sinker and 596115 hits a single, event : single, event : single, event : single single, event : single, event :
### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 8 inning with 0 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for for strike, 663432 throws a 88 miles per hour Cutter for a ball, 663432 throws a 88 miles pe

In [26]:
beam_output = model.generate(
    **model_inputs,
    max_new_tokens=65,
    num_beams=5,
    no_repeat_ngram_size=10,
    early_stopping=True
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(beam_output[0], skip_special_tokens=True))

Output:
----------------------------------------------------------------------------------------------------
### instruction : what is the outcome of pitcher 663432 pitching to batter 596115 ### input : Top of the 7 inning with 2 outs ### output : 663432 throws a 88 miles per hour Slider for a strike, 663432 throws a 88 miles per hour Slider for stealing strike, 663432 throws a 88 miles per hour Changeup for a ball, 663432 throws a 88 miles per hour Slider and 596115 hits into a field out, event : field out, event : field out, out, event : field out


In [None]:
# set return_num_sequences > 1
beam_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    num_beams=5,
    no_repeat_ngram_size=2,
    num_return_sequences=5,
    early_stopping=True
)

# now we have 3 output sequences
print("Output:\n" + 100 * '-')
for i, beam_output in enumerate(beam_outputs):
  print("{}: {}".format(i, tokenizer.decode(beam_output, skip_special_tokens=True)))

### Sampling

In [None]:
from transformers import set_seed
set_seed(42)

# activate sampling and deactivate top_k by setting top_k sampling to 0
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# use temperature to decrease the sensitivity to low probability candidates
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=0,
    temperature=0.6,
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-K Sampling

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

### Top-p (nucleus) sampling

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k to 50
sample_output = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_p=0.92,
    top_k=0
)

print("Output:\n" + 100 * '-')
print(tokenizer.decode(sample_output[0], skip_special_tokens=True))

In [None]:
# set seed to reproduce results. Feel free to change the seed though to get different results
set_seed(42)

# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3
sample_outputs = model.generate(
    **model_inputs,
    max_new_tokens=40,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    num_return_sequences=3,
)

print("Output:\n" + 100 * '-')
for i, sample_output in enumerate(sample_outputs):
  print("{}: {}".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))

### Save a custom decoding strategy with your model

In [None]:
from transformers import AutoModelForCausalLM, GenerationConfig

model = AutoModelForCausalLM.from_pretrained("my_account/my_model")
generation_config = GenerationConfig(
    max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
)
generation_config.save_pretrained("my_account/my_model", push_to_hub=True)

In [None]:
import json
from transformers import set_seed


with open(f'../data/baseball_testing_prmopts_return.json', 'r', encoding='utf-8') as f:
    inputs = json.load(f)
    for data in inputs['data']:
        print("Input:\n" + 100 * '-')
        split_data = data['a'].split('###output:')
        print(split_data[0])
        print(split_data[1])
        print("Ouput:\n" + 100 * '-')
        prompt = split_data[0].replace("\n", "")
        set_seed(math.floor(random.random()*2345235))
        model_inputs = tokenizer(prompt, return_tensors='pt').to(torch_device)

        sample_output = model.generate(
            **model_inputs,
            max_new_tokens=90,
            no_repeat_ngram_size=10,
            do_sample=True,
            top_k=0,
            top_p=0.92,
            temperature=0.6,
        )

        
        output = tokenizer.decode(sample_output[0], skip_special_tokens=True)
        split_text = output.split("### output : ")
        play_by_play = split_text[-1]
        input = split_text[0].split('### input :')[-1]
        print(input.strip())
        if 'event' in play_by_play:
            play_by_play = play_by_play.split('event :')
            print(play_by_play[0].strip())
            if len(play_by_play) > 1:
                print(play_by_play[1].strip().split(',')[0])
        else:
            print(play_by_play)
        print(100 * '-')