#Recipe Extractor
Utilizes OpenAI's Function calling method, along with beautiful soup's web scraping library to extract the recipe from a user's url, and return the recipe as a neat JSON object 

In [193]:
import os 
import openai
from dotenv import load_dotenv, find_dotenv
from IPython.display import display, HTML
import requests
from bs4 import BeautifulSoup

#API key
load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

In [194]:
#sample url
#url = "https://www.maangchi.com/recipe/dwaeji-kimchi-duruchigi"
#url = "https://thewoksoflife.com/bacon-scallion-milk-bread/"
url = "https://pinchofyum.com/winter-grain-bowl-with-balsamic-dressing"
urls = [
    "https://www.maangchi.com/recipe/dwaeji-kimchi-duruchigi",
    "https://thewoksoflife.com/bacon-scallion-milk-bread/",
    "https://pinchofyum.com/winter-grain-bowl-with-balsamic-dressing",
    "https://thecozycook.com/country-fried-chicken/"
]




In [213]:
#find all instances of the classifier: 
import json

def get_recipe_details(url):
    response = requests.get(url)
    html_content = response.content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    data = [
        json.loads(x.string) for x in soup.find_all("script", type="application/ld+json")
    ]

    d = data[0]["@graph"]

    for buckets in d:
        #search for the recipeIngredient key
        if "recipeIngredient" in buckets:
            #the dict (buckets) is assingned to d and returned 
            d = buckets
    print(d)
    return d

"""
messages = []
#testing on multiple urls 
for url in urls:
    dish = parse_ingredients(url)
    messages.append(f"dish name: {dish['name']},      dish ingredients: {dish['recipeIngredient']}")

#for each recipe, print out the ingredients
for buckets in messages:
    print(buckets)"""

'\nmessages = []\n#testing on multiple urls \nfor url in urls:\n    dish = parse_ingredients(url)\n    messages.append(f"dish name: {dish[\'name\']},      dish ingredients: {dish[\'recipeIngredient\']}")\n\n#for each recipe, print out the ingredients\nfor buckets in messages:\n    print(buckets)'

In [304]:
#function calling
functions = [
    {
        "name": "get_recipe_details",
        "description" : "Get detailed information about the recipe's ingredients and amount given a URL",
        "parameters" : {
            "type": "object",
            "properties" : {
                "url": {
                    "type": "string",
                    "description": "URL of the recipe"
                }
            },
            "required": ["url"],
        },
    }
]

In [308]:
#find all instances of the classifier: 
import json

def get_recipe_details(url):
    print("function entered")
    response = requests.get(url)
    html_content = response.content
    soup = BeautifulSoup(html_content, 'html.parser')
    
    data = [
        json.loads(x.string) for x in soup.find_all("script", type="application/ld+json")
    ]

    d = data[0]["@graph"]

    for buckets in d:
        #search for the recipeIngredient key
        if "recipeIngredient" in buckets:
            #the dict (buckets) is assingned to d and returned 
            d = buckets
    return d

"""
messages = []
#testing on multiple urls 
for url in urls:
    dish = parse_ingredients(url)
    messages.append(f"dish name: {dish['name']},      dish ingredients: {dish['recipeIngredient']}")

#for each recipe, print out the ingredients
for buckets in messages:
    print(buckets)"""

def complete_chat():    
    #set up the model 
    messages = [
        {"role": "system", "content": "You will call a function for the user. You will pass in a url given from the user. Make sure the url is in the format 'https://www.____.___' and that the url is valid."},
        {"role": "user", "content": "please get me the recipe for https://www.maangchi.com/recipe/dwaeji-kimchi-duruchigi"}
    ]
    response = openai.ChatCompletion.create(
        model = "gpt-3.5-turbo-0613", 
        messages = messages,
        functions = functions,
        temperature = 0
    )
    if (response):
        recipe_details = response["choices"][0]["message"]
        print(recipe_details["function_call"]["arguments"])
        messages = [
            {"role": "system", 
            "content": """You are given detailed information about a recipe. Please output the recipe name, the ingredients, and the amount in bullet points.
            
            """},
            {"role": "user", "content": str(recipe_details)}
        ]

        response = openai.ChatCompletion.create(
            model = "gpt-3.5-turbo-0613",
            messages = messages,
            temperature = 0
        )
        return response

ans = complete_chat()
print(ans["choices"][0]["message"]["content"])

{
  "url": "https://www.maangchi.com/recipe/dwaeji-kimchi-duruchigi"
}
{
  "role": "assistant",
  "content": {
    "recipe_name": "Dwaeji Kimchi Duruchigi",
    "ingredients": [
      {
        "name": "Pork belly",
        "amount": "1 pound"
      },
      {
        "name": "Kimchi",
        "amount": "2 cups"
      },
      {
        "name": "Onion",
        "amount": "1 medium-sized"
      },
      {
        "name": "Green onion",
        "amount": "2 stalks"
      },
      {
        "name": "Garlic",
        "amount": "4 cloves"
      },
      {
        "name": "Soy sauce",
        "amount": "2 tablespoons"
      },
      {
        "name": "Sugar",
        "amount": "1 tablespoon"
      },
      {
        "name": "Sesame oil",
        "amount": "1 tablespoon"
      },
      {
        "name": "Sesame seeds",
        "amount": "1 tablespoon"
      },
      {
        "name": "Black pepper",
        "amount": "1/2 teaspoon"
      },
      {
        "name": "Cooking oil",
        "amou