In [1]:
import torch
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the pre-trained model and tokenizer
model_name = 'gpt2'  # You can choose a different model if desired
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
letemcook = GPT2LMHeadModel.from_pretrained(model_name)

# Set the model to evaluation mode
letemcook.eval()


  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [None]:
# Function to generate text completion
def generate_completion(model, prompt, max_length=20):
    # Encode the input prompt
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    # Generate output
    with torch.no_grad():
        outputs = model.generate(inputs, max_new_tokens=max_length, min_length = 3, temperature=0.5, do_sample=True)

    # Decode the generated output
    completion = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return completion

def make_prompt(title, ingredients, complete_instruction_steps, incomplete_instruction_steps):
    pre_prompt = "You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] "
    title_prompt = f"[RECIPE_TITLE] {title} " 
    ingredients_prompt = f"[INGREDIENTS_LIST] {ingredients} " 
    
    instructions_prompt = f"[STEPS] "
    for i, step in enumerate(complete_instruction_steps):
        instructions_prompt += f"{i + 1} - {step} "
    instructions_prompt += " " + f"{len(complete_instruction_steps) + 1} - {incomplete_instruction_steps}"
    
    prompt = pre_prompt + title_prompt + ingredients_prompt + instructions_prompt
    # prompt = pre_prompt + instructions_prompt
    return prompt

def autocomplete_recipe_step(model, title, ingredients, complete_instruction_steps, incomplete_instruction_steps, print_prompt = False, completiion_only = True):
    prompt = make_prompt(title, ingredients, complete_instruction_steps, incomplete_instruction_steps)
    if print_prompt: print(prompt)
    generated_string = generate_completion(model, prompt)
    if completiion_only:
        return generated_string[len(prompt):]
    return generated_string


def convert_string_to_list(input_string):
    clean_string = input_string.strip('[]')
    return [s.strip().strip('"') for s in clean_string.split('", "')] # assuming this is always a good split

def index_of_instruction_word(text, n):
    words = text.split()
    
    if n > len(words):
        return len(text) -1  
    
    position = 0
    for word in words[:n-1]:
        position += len(word) + 1 
    
    return position

def remove_next_step(step_number_to_remove, generated_autocompletion):
    next_step_token = f" {3} - "
    return generated_autocompletion.split(next_step_token, 1)[0]

from IPython.display import display, HTML
def display_prompt(text):
    display(HTML(f"<h1>Prompt</h1><p>{text}</p>"))

In [3]:
# Change nrows if desired
df = pd.read_csv('NLG_subset.csv', nrows=5)

In [4]:
def separate_recipe_components(recipe_index, n_complete_steps=2, n_words_before_autocomplete=3):

    test_rec = df.iloc[recipe_index, :]

    title = test_rec["title"]
    ingredients = test_rec["ingredients"]
    steps = convert_string_to_list(test_rec["directions"])
    complete_steps = steps[:n_complete_steps - 1]

    if len(steps) < n_complete_steps: return title, ingredients, steps[0:len(steps)-1], steps[len(steps)-1], steps[len(steps)-1]

    true_step = steps[n_complete_steps]
    n_characters_before_autocomplete = index_of_instruction_word(true_step, n_words_before_autocomplete) # Not used for now
    incomplete_instruction_step = true_step[0:n_characters_before_autocomplete]
    rest_of_instruction_step = true_step[n_characters_before_autocomplete:]
    
    return title, ingredients, complete_steps, incomplete_instruction_step, rest_of_instruction_step


In [5]:
n_steps = 2
n_words_before_autocomplete = 3
title, ingredients, complete_steps, incomplete_instruction_step, rest_of_instruction_step = separate_recipe_components(1, n_complete_steps=n_steps, n_words_before_autocomplete=n_words_before_autocomplete)

original_prompt = make_prompt(title, ingredients, complete_steps, incomplete_instruction_step)

autocompleted_step = autocomplete_recipe_step(letemcook, title, ingredients, complete_steps, incomplete_instruction_step)

# not yet implemented
autocompleted_step = remove_next_step(n_steps + 1, autocompleted_step)

print(original_prompt)
print(autocompleted_step)
print(rest_of_instruction_step)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] Jewell Ball'S Chicken [INGREDIENTS_LIST] ["1 small jar chipped beef, cut up", "4 boned chicken breasts", "1 can cream of mushroom soup", "1 carton sour cream"] [STEPS] 1 - Place chipped beef on bottom of baking dish.  2 - Mix soup 
 in a blender. 
and cream together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours.


In [8]:
print(original_prompt)

print(rest_of_instruction_step)

You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] Jewell Ball'S Chicken [INGREDIENTS_LIST] ["1 small jar chipped beef, cut up", "4 boned chicken breasts", "1 can cream of mushroom soup", "1 carton sour cream"] [STEPS] 1 - Place chipped beef on bottom of baking dish.  2 - Mix soup 
and cream together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours.


In [9]:

good_perp = calculate_perplexity(letemcook, original_prompt, rest_of_instruction_step)
print("Good Perplexity:", good_perp)

bad_perp = calculate_perplexity(letemcook, original_prompt, "preheat the oven to 1450 farheneight and put your baby in it")
print("Bad Perplexity:", bad_perp)

Good Perplexity: 47.25579833984375
Bad Perplexity: 61.501346588134766


In [10]:
recipes = df.index.tolist()
results = []

for recipe_index in recipes:

    # Mess around with n_complete_steps and n_words_before_autcomplete    
    title, ingredients, complete_steps, incomplete_instruction_step, true_step = separate_recipe_components(recipe_index, n_complete_steps=2, n_words_before_autocomplete=4)
    autocompleted_step = autocomplete_recipe_step(letemcook, title, ingredients, complete_steps, incomplete_instruction_step)
    
    results.append((incomplete_instruction_step, true_step, autocompleted_step))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


KeyboardInterrupt: 

In [None]:
for result in results:
    print("-" * 80)
    print(f"Autocompleting the instruction step starting with: {result[0]}")
    print(f"TRUE STEP: {result[1]}")
    display_prompt(result[2])

--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Boil and stir 
TRUE STEP: 5 minutes more. Take off heat.


--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Mix soup and 
TRUE STEP: cream together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours.


--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.
TRUE STEP: In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.


--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Pour gravy and 
TRUE STEP: cream of mushroom soup over chicken; level.


--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Keep in refrigerator
TRUE STEP: .
