In [1]:
# Importing necessary modules
import requests
from bs4 import BeautifulSoup
import re
from collections import Counter

In [2]:
# Setting up notebook to display multiple outputs in one cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# Defining a function to extract the HTML class and tag key words to search for ingredients
def ingredient_class_tag(recipe_html):
    # HTML class
    ingredients_class = re.findall('class=".*[Ii]ngredients[^ ]*"', str(recipe_html))
    ingredients_class_dict = dict(Counter(ingredients_class))
    most_common_ingredients_class = ""
    max_value = 0
    for key, value in ingredients_class_dict.items():
        if value > max_value:
            most_common_ingredients_class = key
            max_value = value
        elif value == max_value:
            if "label" in key.lower():
                most_common_ingredients_class = key
    most_common_ingredients_class = re.sub('class=|"', '', most_common_ingredients_class)
    # HTML tag
    tag_list = re.findall(rf'<.*{most_common_ingredients_class}.*>', str(recipe_html))
    clean_tag_list = []
    for t in tag_list:
        tag = re.search('<.* class', t)
        if tag:
            tag = re.sub('<| class', '', tag[0])
            clean_tag_list.append(tag)
    ingredients_tag_dict = dict(Counter(clean_tag_list))
    most_common_ingredients_tag = ""
    max_value = 0
    for key, value in ingredients_tag_dict.items():
        if value > max_value:
            most_common_ingredients_tag = key
            max_value = value
    return (most_common_ingredients_class, most_common_ingredients_tag)

In [4]:
# Defining a function to extract the HTML class and tag key words to search for directions
def direction_class_tag(recipe_html):
    # HTML class
    directions_class = re.findall('class=".*[Mm]ethod[^ ]*"|class=".*comp mntl-sc-block mntl-sc-block-html[^ ]*"', str(recipe_html))
    directions_class_dict = dict(Counter(directions_class))
    most_common_directions_class = ""
    max_value = 0
    for key, value in directions_class_dict.items():
        if value > max_value:
            most_common_directions_class = key
            max_value = value
    most_common_directions_class = re.sub('class=|"', '', most_common_directions_class)    
    # HTML tag
    tag_list = re.findall(rf'<.*{most_common_directions_class}.*>', str(recipe_html))
    clean_tag_list = []
    for t in tag_list:
        tag = re.search('<.* class', t)
        if tag:
            tag = re.sub('<| class', '', tag[0])
            clean_tag_list.append(tag)
    directions_tag_dict = dict(Counter(clean_tag_list))
    most_common_directions_tag = ""
    max_value = 0
    for key, value in directions_tag_dict.items():
        if value > max_value:
            most_common_directions_tag = key
            max_value = value
    return (most_common_directions_class, most_common_directions_tag)

In [5]:
# Defining a function to extract the ingredients and directions from an online recipe
def recipe_extract(URL):
    # Getting HTML of selected recipe
    webpage = requests.get(URL)
    recipe_html = BeautifulSoup(webpage.content, "html.parser")
    # Extracting the HTML class and tag key words to search for ingredients
    most_common_ingredients_class, most_common_ingredients_tag = ingredient_class_tag(recipe_html)
    # Extracting the ingredients and storing in list                
    ingredients_text = recipe_html.find_all(most_common_ingredients_tag, class_ = most_common_ingredients_class)
    ingredients_list = []
    for ingredient in ingredients_text:
        ingredient = str(ingredient)
        ingredient = re.sub('</', "", ingredient)
        ingredient = re.sub(rf'<{most_common_ingredients_tag}|{most_common_ingredients_tag}>', "", ingredient)
        ingredient = re.sub('<', "", ingredient)
        ingredient = re.sub(rf'{most_common_ingredients_class}', "", ingredient)
        ingredient = re.sub(' class=', "", ingredient)
        ingredient = re.sub('"', "", ingredient)
        ingredient = re.sub('Deselect All', "", ingredient)
        ingredient = re.sub('data.*?>', "", ingredient)
        ingredient = re.sub('span>|span', "", ingredient)
        ingredient = re.sub('p>', "", ingredient)
        ingredient = re.sub('>', "", ingredient)
        ingredient = re.sub(' +', " ", ingredient)
        ingredient = re.sub('\n', "", ingredient)
        ingredient = ingredient.strip()
        if ingredient:
            ingredients_list.append(ingredient)
    # Extracting the HTML class and tag key words to search for directions
    most_common_directions_class, most_common_directions_tag = direction_class_tag(recipe_html)
    # Extracting directions and storing in list
    if "allrecipes" in URL:
        recipe_html = recipe_html.find("div", class_ = "comp recipe__steps-content mntl-sc-page mntl-block")
    directions_text = recipe_html.find_all(most_common_directions_tag, class_ = most_common_directions_class)
    directions_list = []
    for direction in directions_text:
        direction = str(direction)
        direction = re.sub('</', "", direction)
        direction = re.sub(rf'<{most_common_directions_tag}|{most_common_directions_tag}>', "", direction)
        direction = re.sub('<', "", direction)
        direction = re.sub(rf'{most_common_directions_class}', "", direction)
        direction = re.sub(' class=', "", direction)
        direction = re.sub('"', "", direction)
        direction = re.sub('id=.*>', "", direction)
        direction = re.sub('p>', "", direction)
        direction = re.sub('>', "", direction)
        direction = re.sub(' +', " ", direction)
        direction = re.sub('\n', "", direction)
        if "https" not in direction:
            direction = direction.strip().split(".")
            if len(direction) > 1:
                for d in direction:
                    directions_list.append(d.strip())
    while("" in directions_list):
        directions_list.remove("")        
    return (ingredients_list, directions_list)

In [6]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/food-network-kitchen/sweet-and-sour-couscous-stuffed-peppers-recipe-2121036")
print(ingredients)
print(directions)

['2 medium carrots, cut into chunks', '2 stalks celery, cut into chunks', '1 large shallot, cut into chunks', '1 1/2 tablespoons olive oil', '1/2 pound lean ground beef', '2 tablespoons plus 4 teaspoons tomato paste', '1/3 cup chopped fresh parsley, dill or a combination', '1/3 cup golden raisins', '2 tablespoons red wine vinegar', 'Kosher salt', '4 red, yellow, orange or green bell peppers or a mix of colors, halved lengthwise and seeded', '1/2 cup whole wheat couscous', '3/4 cup grated asiago cheese']
['Preheat the oven to 450 degrees F', 'Pulse the carrots, celery and shallots in a food processor until coarsely chopped', 'Heat 1 tablespoon of the olive oil in a large nonstick skillet over medium high', 'Add the chopped vegetables and cook, stirring frequently, until light golden and soft, 8 to 10 minutes', '(Add a splash of water if the mixture begins to stick', ') Add the ground beef and 4 teaspoons of the tomato paste and cook, breaking the mixture up with a wooden spoon, until br

In [7]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/222002/chef-johns-stuffed-peppers/")
print(ingredients)
print(directions)

['1 cup uncooked long grain white rice', '2 cups water', '1 onion, diced', '1 tablespoon olive oil', '2 cups marinara sauce', '1 cup beef broth', '1 tablespoon balsamic vinegar', '¼ teaspoon crushed red pepper flakes', '1 pound lean ground beef', '¼ pound hot Italian pork sausage, casing removed', '1 (10 ounce) can diced tomatoes', '¼ cup chopped fresh Italian parsley', '4 cloves garlic, minced', '2 teaspoons salt', '1 teaspoon freshly ground black pepper', '1 pinch ground cayenne pepper', '1 cup finely grated Parmigiano-Reggiano cheese, plus more for topping', '4 large green bell peppers, halved lengthwise and seeded']
['Bring rice and water to a boil in a saucepan over high heat', 'Reduce heat to medium-low, cover, and simmer until the rice is tender, and the liquid has been absorbed, 20 to 25 minutes', 'Set the cooked rice aside', 'Meanwhile, preheat the oven to 375 degrees F (190 degrees C)', 'Make sauce: Cook onion and olive oil over medium heat until onion begins to soften, about

In [8]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/16354/easy-meatloaf/")
print(ingredients)
print(directions)

['1 ½ pounds ground beef', '1 egg', '1 onion, chopped', '1 cup milk', '1 cup dried bread crumbs', 'salt and pepper to taste', '⅓ cup ketchup', '2 tablespoons brown sugar', '2 tablespoons prepared mustard']
['Preheat the oven to 350 degrees F (175 degrees C)', 'Lightly grease a 9x5-inch loaf pan', 'Combine ground beef, onion, milk, bread crumbs and egg in a large bowl; season with salt and pepper', 'Transfer into prepared loaf pan', 'Mix ketchup, brown sugar, and mustard together in a small bowl until well combined; pour over meatloaf and spread it evenly over the top', 'Bake in the preheated oven until no longer pink in the center, about 1 hour', 'Crushed crackers may be used in place of bread crumbs, if preferred']


In [9]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/ina-garten/meat-loaf-recipe-1921718")
print(ingredients)
print(directions)

['1 tablespoon good olive oil', '3 cups chopped yellow onions (3 onions)', '1 teaspoon chopped fresh thyme leaves', '2 teaspoons kosher salt', '1 teaspoon freshly ground black pepper', '3 tablespoons Worcestershire sauce', '1/3 cup canned chicken stock or broth', '1 tablespoon tomato paste', '2 1/2 pounds ground chuck (81 percent lean)', '1/2 cup plain dry bread crumbs (recommended: Progresso)', '2 extra-large eggs, beaten', '1/2 cup ketchup (recommended: Heinz)']
['Preheat the oven to 325 degrees F', 'Heat the olive oil in a medium saute pan', 'Add the onions, thyme, salt, and pepper and cook over medium-low heat, stirring occasionally, for 8 to 10 minutes, until the onions are translucent but not brown', 'Off the heat, add the Worcestershire sauce, chicken stock, and tomato paste', 'Allow to cool slightly', 'In a large bowl, combine the ground chuck, onion mixture, bread crumbs, and eggs, and mix lightly with a fork', "Don't mash or the meat loaf will be dense", 'Shape the mixture in