In [1]:
import torch
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the pre-trained model and tokenizer
model_name = 'gpt2'  # You can choose a different model if desired
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
lethimcock = GPT2LMHeadModel.from_pretrained(model_name)

# Set the model to evaluation mode
lethimcock.eval()


  from .autonotebook import tqdm as notebook_tqdm


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [2]:

# Function to generate text completion
def generate_completion(model, prompt, max_length=20):
    # Encode the input prompt
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    # Generate output
    with torch.no_grad():
        outputs = model.generate(inputs, max_new_tokens=max_length, min_length = 3, temperature=0.99, do_sample=True)

    # Decode the generated output
    completion = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return completion

def autocomplete_recipe_step(model, title, ingredients, complete_instruction_steps, incomplete_instruction_steps, print_prompt = False):
    pre_prompt = "You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] "
    title_prompt = f"[RECIPE_TITLE] {title} " 
    ingredients_prompt = f"[INGREDIENTS_LIST] {ingredients} " 
    
    instructions_prompt = f"[STEPS] "
    for i, step in enumerate(complete_instruction_steps):
        instructions_prompt += f"{i + 1} - {step} "
    instructions_prompt += " {DO NOT CREATE ANOTHER STEP AFTER STEP 3} " + f"{len(complete_instruction_steps) + 1} - {incomplete_instruction_steps}"
    
    # prompt = pre_prompt + title_prompt + ingredients_prompt + instructions_prompt
    prompt = pre_prompt + instructions_prompt
    if print_prompt: print(prompt)
    return generate_completion(model, prompt)


def convert_string_to_list(input_string):
    clean_string = input_string.strip('[]')
    return [s.strip().strip('"') for s in clean_string.split('", "')] # assuming this is always a good split

from IPython.display import display, HTML
def display_prompt(text):
    display(HTML(f"<h1>Prompt</h1><p>{text}</p>"))

In [3]:
df = pd.read_csv('full_dataset.csv', nrows=5)


In [4]:
# Change these params to mess around with the autocompletions of the model
recipe_index = 1
n_complete_steps = 2
n_characters_before_autocomplete = 6

test_rec = df.iloc[recipe_index, :]

title = test_rec["title"]
ingredients = test_rec["ingredients"]

steps = convert_string_to_list(test_rec["directions"])
complete_steps = steps[:n_complete_steps - 1]
incomplete_instruction_step = steps[n_complete_steps][0:n_characters_before_autocomplete]

print(f"Autocompleting the instruction step starting with: {incomplete_instruction_step}")
print(f"TRUE STEP: {steps[n_complete_steps]}")


Autocompleting the instruction step starting with: Mix so
TRUE STEP: Mix soup and cream together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours.


In [5]:
autocompleted_step = autocomplete_recipe_step(lethimcock, title, ingredients, complete_steps, incomplete_instruction_step)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [6]:
display_prompt(autocompleted_step)