In [1]:
import torch
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the pre-trained model and tokenizer
model_names = ['gpt2', './gpt2-finetuned-recipes-main', './gpt2-finetuned-recipes-bakery', './gpt2-finetuned-recipes-drinks', './gpt2-finetuned-recipes-meal']
models = {}

for model_name in model_names:
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    letemcook = GPT2LMHeadModel.from_pretrained(model_name)
    # Set the model to evaluation mode
    letemcook.eval()
    models[model_name] = letemcook


2024-11-21 22:11:48.747055: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-21 22:11:48.757885: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-21 22:11:48.831608: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
test_datasets = []

test_dataset_names = ['Datasets/main_test_dataset.csv', 'Datasets/bakery_test_dataset.csv', 'Datasets/drinks_test_dataset.csv', 'Datasets/meal_test_dataset.csv']

for test_dataset_name in test_dataset_names:
    test_datasets.append(pd.read_csv(test_dataset_name))

In [4]:
# Function to generate text completion
def generate_completion(model, prompt, max_length=20):
    # Encode the input prompt
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    # Generate output
    with torch.no_grad():
        outputs = model.generate(inputs, max_new_tokens=max_length, min_length = 3, temperature=0.5, do_sample=True)

    # Decode the generated output
    completion = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return completion

def make_prompt(title, ingredients, complete_instruction_steps, incomplete_instruction_steps):
    pre_prompt = "You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] "
    title_prompt = f"[RECIPE_TITLE] {title} " 
    ingredients_prompt = f"[INGREDIENTS_LIST] {ingredients} " 
    
    instructions_prompt = f"[STEPS] "
    for i, step in enumerate(complete_instruction_steps):
        instructions_prompt += f"{i + 1} - {step} "
    instructions_prompt += " " + f"{len(complete_instruction_steps) + 1} - {incomplete_instruction_steps}"
    
    prompt = pre_prompt + title_prompt + ingredients_prompt + instructions_prompt
    # prompt = pre_prompt + instructions_prompt
    return prompt

def autocomplete_recipe_step(model, title, ingredients, complete_instruction_steps, incomplete_instruction_steps, print_prompt = False, completiion_only = True):
    prompt = make_prompt(title, ingredients, complete_instruction_steps, incomplete_instruction_steps)
    if print_prompt: print(prompt)
    generated_string = generate_completion(model, prompt)
    if completiion_only:
        return generated_string[len(prompt):]
    return generated_string


def convert_string_to_list(input_string):
    clean_string = input_string.strip('[]')
    return [s.strip().strip('"') for s in clean_string.split('", "')] # assuming this is always a good split

def index_of_instruction_word(text, n):
    words = text.split()
    
    if n > len(words):
        return len(text) -1  
    
    position = 0
    for word in words[:n-1]:
        position += len(word) + 1 
    
    return position

def remove_next_step(step_number_to_remove, generated_autocompletion):
    next_step_token = f" {3} - "
    return generated_autocompletion.split(next_step_token, 1)[0]

from IPython.display import display, HTML
def display_prompt(text):
    display(HTML(f"<h1>Prompt</h1><p>{text}</p>"))

In [5]:
# Change nrows if desired
df = test_datasets[0]

In [6]:
def separate_recipe_components(recipe_index, n_complete_steps=2, n_words_before_autocomplete=3):

    test_rec = df.iloc[recipe_index, :]

    title = test_rec["title"]
    ingredients = test_rec["ingredients"]
    steps = convert_string_to_list(test_rec["directions"])
    
    

    if len(steps) <= n_complete_steps: return title, ingredients, steps[0:len(steps)-1], steps[len(steps)-1], steps[len(steps)-1]
    
    complete_steps = steps[:n_complete_steps - 1]

    true_step = steps[n_complete_steps]
    n_characters_before_autocomplete = index_of_instruction_word(true_step, n_words_before_autocomplete) # Not used for now
    incomplete_instruction_step = true_step[0:n_characters_before_autocomplete]
    rest_of_instruction_step = true_step[n_characters_before_autocomplete:]
    
    return title, ingredients, complete_steps, incomplete_instruction_step, rest_of_instruction_step

In [7]:
n_steps = 2
n_words_before_autocomplete = 3


for model_name in models:
    title, ingredients, complete_steps, incomplete_instruction_step, rest_of_instruction_step = separate_recipe_components(1, n_complete_steps=n_steps, n_words_before_autocomplete=n_words_before_autocomplete)
    original_prompt = make_prompt(title, ingredients, complete_steps, incomplete_instruction_step)
    autocompleted_step = autocomplete_recipe_step(models[model_name], title, ingredients, complete_steps, incomplete_instruction_step)
    # not yet implemented
    autocompleted_step = remove_next_step(n_steps + 1, autocompleted_step)
    print(original_prompt)
    print("-" * 80)
    print(autocompleted_step)
    print("-" * 80)
    print(rest_of_instruction_step)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] German Rotkohl - Spiced Red Cabbage With Apples And Wine [INGREDIENTS_LIST] ["1/4 cup chopped pork fat or 1/4 cup chopped bacon", "1 large red onion, finely chopped", "2 inches fresh ginger, peeled and finely chopped", "1/2 cinnamon stick", "1 large red cabbage, tough outer leaves removed, quartered, cored, and finely shredded", "1 sprig fresh marjoram", "1/2 teaspoon garam masala", "1/2 cup red wine vinegar", "1/2 cup ruby port or 1/2 cup madeira wine", "1/2 cup brown sugar or 1/2 cup real maple syrup", "1 bay leaf (optional)", "1 whole clove (optional)", "1 firm apple, chopped (optional)"] [STEPS] 1 - Cook the pork fat or bacon in a large skillet or dutch oven over low heat, stirring occasionally until it has rendered its fat, about 20 minutes for the pork fat or 10 minutes for the bacon.  2 - Add the 
--------------------------------------------------------------------------------
iced red w

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] German Rotkohl - Spiced Red Cabbage With Apples And Wine [INGREDIENTS_LIST] ["1/4 cup chopped pork fat or 1/4 cup chopped bacon", "1 large red onion, finely chopped", "2 inches fresh ginger, peeled and finely chopped", "1/2 cinnamon stick", "1 large red cabbage, tough outer leaves removed, quartered, cored, and finely shredded", "1 sprig fresh marjoram", "1/2 teaspoon garam masala", "1/2 cup red wine vinegar", "1/2 cup ruby port or 1/2 cup madeira wine", "1/2 cup brown sugar or 1/2 cup real maple syrup", "1 bay leaf (optional)", "1 whole clove (optional)", "1 firm apple, chopped (optional)"] [STEPS] 1 - Cook the pork fat or bacon in a large skillet or dutch oven over low heat, stirring occasionally until it has rendered its fat, about 20 minutes for the pork fat or 10 minutes for the bacon.  2 - Add the 
--------------------------------------------------------------------------------
iced water

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] German Rotkohl - Spiced Red Cabbage With Apples And Wine [INGREDIENTS_LIST] ["1/4 cup chopped pork fat or 1/4 cup chopped bacon", "1 large red onion, finely chopped", "2 inches fresh ginger, peeled and finely chopped", "1/2 cinnamon stick", "1 large red cabbage, tough outer leaves removed, quartered, cored, and finely shredded", "1 sprig fresh marjoram", "1/2 teaspoon garam masala", "1/2 cup red wine vinegar", "1/2 cup ruby port or 1/2 cup madeira wine", "1/2 cup brown sugar or 1/2 cup real maple syrup", "1 bay leaf (optional)", "1 whole clove (optional)", "1 firm apple, chopped (optional)"] [STEPS] 1 - Cook the pork fat or bacon in a large skillet or dutch oven over low heat, stirring occasionally until it has rendered its fat, about 20 minutes for the pork fat or 10 minutes for the bacon.  2 - Add the 
--------------------------------------------------------------------------------
iced red c

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You are a chef-bot autocompleting a small part of a recipe: [START_OF_RECIPE] [RECIPE_TITLE] German Rotkohl - Spiced Red Cabbage With Apples And Wine [INGREDIENTS_LIST] ["1/4 cup chopped pork fat or 1/4 cup chopped bacon", "1 large red onion, finely chopped", "2 inches fresh ginger, peeled and finely chopped", "1/2 cinnamon stick", "1 large red cabbage, tough outer leaves removed, quartered, cored, and finely shredded", "1 sprig fresh marjoram", "1/2 teaspoon garam masala", "1/2 cup red wine vinegar", "1/2 cup ruby port or 1/2 cup madeira wine", "1/2 cup brown sugar or 1/2 cup real maple syrup", "1 bay leaf (optional)", "1 whole clove (optional)", "1 firm apple, chopped (optional)"] [STEPS] 1 - Cook the pork fat or bacon in a large skillet or dutch oven over low heat, stirring occasionally until it has rendered its fat, about 20 minutes for the pork fat or 10 minutes for the bacon.  2 - Add the 
--------------------------------------------------------------------------------
iced water

In [8]:
model_results = {}

for model_name in models:
    
    recipes = df.index.tolist()
    results = []

    for recipe_index in recipes:

        # Mess around with n_complete_steps and n_words_before_autcomplete    
        title, ingredients, complete_steps, incomplete_instruction_step, true_step = separate_recipe_components(recipe_index, n_complete_steps=2, n_words_before_autocomplete=4)
        autocompleted_step = autocomplete_recipe_step(models[model_name], title, ingredients, complete_steps, incomplete_instruction_step)
        results.append((incomplete_instruction_step, true_step, autocompleted_step))


    model_results[model_name] = results
    

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [9]:
for model_name in model_results:
    results = model_results[model_name]
    
    for result in results:
        print("-" * 80)
        print(f"Autocompleting the instruction step starting with: {result[0]}")
        print(f"TRUE STEP: {result[1]}")
        print(f"Autocompleted STEP: {result[2]}")
        #display_prompt(result[2])
    

--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Cook noodles according 
TRUE STEP: to package directions.
Autocompleted STEP:  to package directions.  3 - Add all ingredients, bring to a boil, then reduce
--------------------------------------------------------------------------------
Autocompleting the instruction step starting with: Add the cabbage, 
TRUE STEP: stirring and cooking over medium heat until some of the bits are slightly browned and caramelized, about 10 minutes (I do this in batches). Note: You can skip the browning step if you are in a hurry, but I find that it adds a particularly wonderful caramelized flavor to the finished dish. Now add the marjoram, garam masala, vinegar, and port or Madeira, and stir well. (Note: At this point add the optional bay leaf, whole clove, and chopped apple).
Autocompleted STEP:  and ginger to the pan and bring to a boil.  3 - Reduce the heat to low
------

In [10]:
def calculate_perplexity(trained_model, model_name, original_prompt, rest_of_instruction_step):
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    
    prompt_id = tokenizer(original_prompt, return_tensors='pt').input_ids
    completion_id = tokenizer(rest_of_instruction_step, return_tensors='pt').input_ids
    input_ids = torch.cat([prompt_id, completion_id], dim=-1)

    # Calculate log likelihood
    with torch.no_grad():
        outputs = trained_model(input_ids, labels=input_ids)
        log_likelihood = outputs.loss * completion_id.size(1)  # Total log likelihood

    perplexity = torch.exp(log_likelihood / completion_id.size(1))
    return perplexity.item()

In [11]:
for model_name in models:
    good_perp = calculate_perplexity(models[model_name], model_names[0], original_prompt, rest_of_instruction_step)
    print("Good Perplexity:", good_perp)

    bad_perp = calculate_perplexity(models[model_name], model_names[0], original_prompt, "preheat the oven to 1450 farheneight and put your baby in it")
    print("Bad Perplexity:", bad_perp)

Good Perplexity: 14.24428939819336
Bad Perplexity: 18.544994354248047
Good Perplexity: 17.390151977539062
Bad Perplexity: 23.119401931762695
Good Perplexity: 19.475215911865234
Bad Perplexity: 25.78233528137207
Good Perplexity: 17.962844848632812
Bad Perplexity: 24.20036506652832
Good Perplexity: 18.346912384033203
Bad Perplexity: 24.704147338867188


In [12]:
def perplexity_across_dataset(trained_model, model_name, test_df, n_words_before_autocomplete=3, verbose=True):
    # This function 
    all_perplexity = []
    for i in range(len(test_df)):
        if verbose:
            print("Measuring perplexity on test recipe number: ", i)
        
        title, ingredients, steps, _, _ = separate_recipe_components(i)

        #recipe_index, n_complete_steps=2, n_words_before_autocomplete=3
        
        
        complete_steps = []
        for j in range(len(steps)):
            
            next_step = steps[j]
            n_characters_before_autocomplete = index_of_instruction_word(next_step, n_words_before_autocomplete)
            
            if n_characters_before_autocomplete >= len(next_step):
                continue
            
            incomplete_next_step = next_step[:n_characters_before_autocomplete]
            rest_of_instruction_step = next_step[n_characters_before_autocomplete:]
            
            prompt = make_prompt(title, ingredients, complete_steps, incomplete_next_step)
            perplexity = calculate_perplexity(trained_model, "./gpt2-finetuned-recipes-bakery", prompt, rest_of_instruction_step)
            
            all_perplexity.append(perplexity)
            if i < len(steps):
                complete_steps.append(steps[i])
    
    return sum(all_perplexity) / float(len(all_perplexity))
            

def eval_perplexity(trained_model, test_data, bad_data = False):
    
    if bad_data:
        return 0
    else:
        return perplexity_across_dataset(trained_model, test_data)

In [13]:
from contextlib import redirect_stdout

with open('output_DS[0].txt', 'w') as f:
    with redirect_stdout(f):
        for model_name in models:
            print(perplexity_across_dataset(models[model_name], model_name, df, n_words_before_autocomplete=3, verbose=True))

In [49]:
for model_name in models:
    print(perplexity_across_dataset(models[model_name], model_name, df, n_words_before_autocomplete=3, verbose=True))

Measuring perplexity on test recipe number:  0
Measuring perplexity on test recipe number:  1
Measuring perplexity on test recipe number:  2
Measuring perplexity on test recipe number:  3
Measuring perplexity on test recipe number:  4
Measuring perplexity on test recipe number:  5
Measuring perplexity on test recipe number:  6
Measuring perplexity on test recipe number:  7
Measuring perplexity on test recipe number:  8
Measuring perplexity on test recipe number:  9
Measuring perplexity on test recipe number:  10
Measuring perplexity on test recipe number:  11
Measuring perplexity on test recipe number:  12
Measuring perplexity on test recipe number:  13
Measuring perplexity on test recipe number:  14
Measuring perplexity on test recipe number:  15
Measuring perplexity on test recipe number:  16
Measuring perplexity on test recipe number:  17
Measuring perplexity on test recipe number:  18
Measuring perplexity on test recipe number:  19
Measuring perplexity on test recipe number:  20
Me