In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

## Load and Clean data

In [2]:
recipe = pd.read_csv('raw_data/full_dataset.csv')
recipe.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""bu..."
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom..."
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""gar..."
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroo..."
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""bu..."


In [7]:
recipe.dtypes

Unnamed: 0      int64
title          object
ingredients    object
directions     object
link           object
source         object
NER            object
dtype: object

In [4]:
recipe_cleaned = recipe.drop_duplicates().dropna()


In [9]:
recipe_cleaned =  recipe_cleaned[recipe_cleaned['NER'].apply(lambda x: x != '[]')]
recipe_cleaned_5000 = recipe_cleaned.head(5000)
recipe_cleaned_5000





## Create answer/question pairs for fine-tuning 


In [11]:
def create_preparation_qa(row):
    title = row['title']
    ingredients = row['ingredients']
    directions = row['directions']
    
    # Create the specific question-answer pair
    qa_pair = {
        'question': f"How do you prepare '{title}' using these ingredients: {ingredients}?",
        'answer': directions
    }
    return qa_pair

# Apply the function to create the question-answer pairs for preparation
preparation_qa_data = recipe_cleaned_5000.apply(create_preparation_qa, axis=1)

# Convert the list of pairs into a DataFrame
preparation_qa_df = pd.DataFrame(preparation_qa_data.tolist())
preparation_qa_df

Unnamed: 0,question,answer
0,How do you prepare 'No-Bake Nut Cookies' using...,"[""In a heavy 2-quart saucepan, mix brown sugar..."
1,How do you prepare 'Jewell Ball'S Chicken' usi...,"[""Place chipped beef on bottom of baking dish...."
2,How do you prepare 'Creamy Corn' using these i...,"[""In a slow cooker, combine all ingredients. C..."
3,How do you prepare 'Chicken Funny' using these...,"[""Boil and debone chicken."", ""Put bite size pi..."
4,How do you prepare 'Reeses Cups(Candy) ' usin...,"[""Combine first four ingredients and press in ..."
...,...,...
4995,How do you prepare 'No-Peek Stew' using these ...,"[""Place all ingredients in a Dutch oven and co..."
4996,How do you prepare 'Tacorino Dip' using these ...,"[""Blend all ingredients by hand."", ""Chill and ..."
4997,How do you prepare 'Black Walnut Cake' using t...,"[""Cream shortening and sugar until light and f..."
4998,How do you prepare 'Pickled Peppers' using the...,"[""Wash and drain peppers."", ""Cut 2 small slits..."


In [13]:
def clean_answer_format(qa):
    # Remove the list format and join the directions
    directions = eval(qa['answer'])  # Convert string list to a list
    cleaned_answer = ' '.join(directions).replace('", "', ', ').replace('"', '').replace('[', '').replace(']', '')
    
    # Update the answer in the pair
    qa['answer'] = cleaned_answer
    return qa

# Apply the function to clean up the answers in the dataframe
preparation_qa_cleaned_df = preparation_qa_df.apply(clean_answer_format, axis=1)

In [15]:
preparation_qa_cleaned_df

Unnamed: 0,question,answer
0,How do you prepare 'No-Bake Nut Cookies' using...,"In a heavy 2-quart saucepan, mix brown sugar, ..."
1,How do you prepare 'Jewell Ball'S Chicken' usi...,Place chipped beef on bottom of baking dish. P...
2,How do you prepare 'Creamy Corn' using these i...,"In a slow cooker, combine all ingredients. Cov..."
3,How do you prepare 'Chicken Funny' using these...,Boil and debone chicken. Put bite size pieces ...
4,How do you prepare 'Reeses Cups(Candy) ' usin...,Combine first four ingredients and press in 13...
...,...,...
4995,How do you prepare 'No-Peek Stew' using these ...,Place all ingredients in a Dutch oven and cove...
4996,How do you prepare 'Tacorino Dip' using these ...,Blend all ingredients by hand. Chill and serve...
4997,How do you prepare 'Black Walnut Cake' using t...,Cream shortening and sugar until light and flu...
4998,How do you prepare 'Pickled Peppers' using the...,Wash and drain peppers. Cut 2 small slits in e...


In [17]:
preparation_qa_cleaned_df.to_csv("data/recipe_qa.csv",index=False)