In [1]:
# Importing necessary modules
import requests
from bs4 import BeautifulSoup
import re
from collections import Counter
import unicodedata

In [2]:
# Setting up notebook to display multiple outputs in one cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# Defining a function to extract the HTML class and tag key words to search for ingredients
def ingredient_class_tag(recipe_html):
    # HTML class
    ingredients_class = re.findall('class=".*[Ii]ngredients[^ ]*"', str(recipe_html))
    ingredients_class_dict = dict(Counter(ingredients_class))
    most_common_ingredients_class = ""
    max_value = 0
    for key, value in ingredients_class_dict.items():
        if value > max_value:
            most_common_ingredients_class = key
            max_value = value
        elif value == max_value:
            if "label" in key.lower():
                most_common_ingredients_class = key
    most_common_ingredients_class = re.sub('class=|"', '', most_common_ingredients_class)
    # HTML tag
    tag_list = re.findall(rf'<.*{most_common_ingredients_class}.*>', str(recipe_html))
    clean_tag_list = []
    for t in tag_list:
        tag = re.search('<.* class', t)
        if tag:
            tag = re.sub('<| class', '', tag[0])
            clean_tag_list.append(tag)
    ingredients_tag_dict = dict(Counter(clean_tag_list))
    most_common_ingredients_tag = ""
    max_value = 0
    for key, value in ingredients_tag_dict.items():
        if value > max_value:
            most_common_ingredients_tag = key
            max_value = value
    return (most_common_ingredients_class, most_common_ingredients_tag)

In [4]:
# Defining a function to extract the HTML class and tag key words to search for directions
def direction_class_tag(recipe_html):
    # HTML class
    directions_class = re.findall('class=".*[Mm]ethod[^ ]*"|class=".*comp mntl-sc-block mntl-sc-block-html[^ ]*"', str(recipe_html))
    directions_class_dict = dict(Counter(directions_class))
    most_common_directions_class = ""
    max_value = 0
    for key, value in directions_class_dict.items():
        if value > max_value:
            most_common_directions_class = key
            max_value = value
    most_common_directions_class = re.sub('class=|"', '', most_common_directions_class)    
    # HTML tag
    tag_list = re.findall(rf'<.*{most_common_directions_class}.*>', str(recipe_html))
    clean_tag_list = []
    for t in tag_list:
        tag = re.search('<.* class', t)
        if tag:
            tag = re.sub('<| class', '', tag[0])
            clean_tag_list.append(tag)
    directions_tag_dict = dict(Counter(clean_tag_list))
    most_common_directions_tag = ""
    max_value = 0
    for key, value in directions_tag_dict.items():
        if value > max_value:
            most_common_directions_tag = key
            max_value = value
    return (most_common_directions_class, most_common_directions_tag)

In [5]:
# Defining a function to extract the ingredients and directions from an online recipe
def recipe_extract(URL):
    # Getting HTML of selected recipe
    webpage = requests.get(URL)
    recipe_html = BeautifulSoup(webpage.content, "html.parser")
    # Extracting the HTML class and tag key words to search for ingredients
    most_common_ingredients_class, most_common_ingredients_tag = ingredient_class_tag(recipe_html)
    # Extracting the ingredients and storing in list                
    ingredients_text = recipe_html.find_all(most_common_ingredients_tag, class_ = most_common_ingredients_class)
    ingredients_list = []
    for ingredient in ingredients_text:
        ingredient = str(ingredient)
        ingredient = re.sub('</', "", ingredient)
        ingredient = re.sub(rf'<{most_common_ingredients_tag}|{most_common_ingredients_tag}>', "", ingredient)
        ingredient = re.sub('<', "", ingredient)
        ingredient = re.sub(rf'{most_common_ingredients_class}', "", ingredient)
        ingredient = re.sub(' class=', "", ingredient)
        ingredient = re.sub('"', "", ingredient)
        ingredient = re.sub('Deselect All', "", ingredient)
        ingredient = re.sub('data.*?>', "", ingredient)
        ingredient = re.sub('span>|span', "", ingredient)
        ingredient = re.sub('p>', "", ingredient)
        ingredient = re.sub('>', "", ingredient)
        ingredient = re.sub('\xa0', " ", ingredient)
        ingredient = re.sub('\u202f', " ", ingredient)
        ingredient = re.sub("⁄", "/", unicodedata.normalize("NFKC", ingredient))
        ingredient = re.sub(' +', " ", ingredient)
        ingredient = re.sub('\n', "", ingredient)
        ingredient = ingredient.strip()
        if ingredient:
            ingredients_list.append(ingredient)
    # Extracting the HTML class and tag key words to search for directions
    most_common_directions_class, most_common_directions_tag = direction_class_tag(recipe_html)
    # Extracting directions and storing in list
    if "allrecipes" in URL:
        recipe_html = recipe_html.find("div", class_ = "comp recipe__steps-content mntl-sc-page mntl-block")
    directions_text = recipe_html.find_all(most_common_directions_tag, class_ = most_common_directions_class)
    directions_list = []
    for direction in directions_text:
        direction = str(direction)
        direction = re.sub('</', "", direction)
        direction = re.sub(rf'<{most_common_directions_tag}|{most_common_directions_tag}>', "", direction)
        direction = re.sub('<', "", direction)
        direction = re.sub(rf'{most_common_directions_class}', "", direction)
        direction = re.sub(' class=', "", direction)
        direction = re.sub('"', "", direction)
        direction = re.sub('id=.*>', "", direction)
        direction = re.sub('p>', "", direction)
        direction = re.sub('>', "", direction)
        direction = re.sub('\xa0', " ", direction)
        direction = re.sub('\u202f', " ", direction)
        direction = re.sub("⁄", "/", unicodedata.normalize("NFKC", direction))
        direction = re.sub(' +', " ", direction)
        direction = re.sub('\n', "", direction)
        if "https" not in direction:
            direction = direction.strip().split(".")
            if len(direction) > 1:
                for d in direction:
                    directions_list.append(d.strip())
    while("" in directions_list):
        directions_list.remove("")        
    return (ingredients_list, directions_list)

In [6]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/food-network-kitchen/sweet-and-sour-couscous-stuffed-peppers-recipe-2121036")
print(ingredients)
print(directions)

['2 medium carrots, cut into chunks', '2 stalks celery, cut into chunks', '1 large shallot, cut into chunks', '1 1/2 tablespoons olive oil', '1/2 pound lean ground beef', '2 tablespoons plus 4 teaspoons tomato paste', '1/3 cup chopped fresh parsley, dill or a combination', '1/3 cup golden raisins', '2 tablespoons red wine vinegar', 'Kosher salt', '4 red, yellow, orange or green bell peppers or a mix of colors, halved lengthwise and seeded', '1/2 cup whole wheat couscous', '3/4 cup grated asiago cheese']
['Preheat the oven to 450 degrees F', 'Pulse the carrots, celery and shallots in a food processor until coarsely chopped', 'Heat 1 tablespoon of the olive oil in a large nonstick skillet over medium high', 'Add the chopped vegetables and cook, stirring frequently, until light golden and soft, 8 to 10 minutes', '(Add a splash of water if the mixture begins to stick', ') Add the ground beef and 4 teaspoons of the tomato paste and cook, breaking the mixture up with a wooden spoon, until br

In [7]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/ina-garten/meat-loaf-recipe-1921718")
print(ingredients)
print(directions)

['1 tablespoon good olive oil', '3 cups chopped yellow onions (3 onions)', '1 teaspoon chopped fresh thyme leaves', '2 teaspoons kosher salt', '1 teaspoon freshly ground black pepper', '3 tablespoons Worcestershire sauce', '1/3 cup canned chicken stock or broth', '1 tablespoon tomato paste', '2 1/2 pounds ground chuck (81 percent lean)', '1/2 cup plain dry bread crumbs (recommended: Progresso)', '2 extra-large eggs, beaten', '1/2 cup ketchup (recommended: Heinz)']
['Preheat the oven to 325 degrees F', 'Heat the olive oil in a medium saute pan', 'Add the onions, thyme, salt, and pepper and cook over medium-low heat, stirring occasionally, for 8 to 10 minutes, until the onions are translucent but not brown', 'Off the heat, add the Worcestershire sauce, chicken stock, and tomato paste', 'Allow to cool slightly', 'In a large bowl, combine the ground chuck, onion mixture, bread crumbs, and eggs, and mix lightly with a fork', "Don't mash or the meat loaf will be dense", 'Shape the mixture in

In [8]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/food-network-kitchen/western-omelette-recipe-2011477")
print(ingredients)
print(directions)

['12 large eggs', 'Kosher salt and freshly ground black pepper', '1/4 cup unsalted butter', '1/2 green bell pepper, cut into 1/2-inch dice', '1/2 red bell pepper, cut into 1/2-inch dice', '6 ounces boiled ham (in one piece), cut into 1/2-inch dice', '4 scallions (white and green), roughly chopped', '1/2 cup grated white cheese (about 2 ounces), such as Monterey Jack, cheddar, or Gouda (optional)']
['In a small bowl, beat together 3 of the eggs and season lightly with salt and pepper', 'Set aside', 'Heat 3 tablespoons of the butter in a small non-stick skillet, with tight-fitting lid, over medium heat', 'Add the peppers and cook, covered, without stirring, until tender, about 3 minutes', 'Uncover and raise the heat to medium-high', 'Add the ham and scallions and cook, stirring frequently, until hot, about 1 minute', 'Transfer the mixture to a bowl', 'Melt a nut-sized piece of the remaining butter in the skillet over medium heat', 'When the foam subsides, add 1/4 of the pepper mixture', 

In [9]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/food-network-kitchen/the-best-chocolate-cake-7193220")
print(ingredients)
print(directions)

['Nonstick baking spray', '1 1/4 cup Dutch-process cocoa, plus more for dusting', "2 1/4 cups all-purpose flour (see Cook's Note)", '2 1/4 teaspoons baking soda', '1 teaspoon baking powder', '1 large egg, at room temperature', '2 large egg yolks, at room temperature', '2 1/4 cups light brown sugar', '1 1/4 cup sour cream, at room temperature', '2/3 cup vegetable oil', '1 tablespoon pure vanilla extract', '1 1/4 teaspoon kosher salt', '12 ounces semisweet chocolate chips or semisweet chocolate (64-percent cacao), chopped', '4 sticks (1 pound) unsalted butter, at room temperature', '3/4 teaspoon kosher salt', "2 cups confectioners' sugar", '4 teaspoons Dutch-process cocoa powder', '1 tablespoon pure vanilla paste or extract']
['For the cake: Arrange a rack in the center of oven and preheat to 350 degrees F', 'Lightly coat two 9-inch round cake pans with nonstick cooking spray', 'Line the bottoms with parchment paper rounds and lightly coat the paper with nonstick spray', 'Dust the bottom

In [10]:
ingredients, directions = recipe_extract("https://www.foodnetwork.com/recipes/food-network-kitchen/buffalo-style-chicken-wings-recipe-1928161")
print(ingredients)
print(directions)

['2 cups all-purpose flour', '1/2 teaspoon cayenne', '1/2 teaspoon fine salt', 'Vegetable oil, for frying', '2 pounds chicken wings, split at the joint, wingtips removed and discarded', '1/2 cup unsalted butter, melted', '1/4 cup hot sauce, or to taste', '2 tablespoons fresh lemon juice', '1 cup mayonnaise', '1/2 cup half-and-half', '2 tablespoons sour cream', '1 tablespoon fresh lemon juice', '1/4 teaspoon Worcestershire sauce', '1/2 cup crumbled blue cheese, such as Maytag (about 3 ounces)', 'Kosher salt and freshly ground black pepper', 'Celery sticks, for serving', 'Carrot sticks, for serving']
['For the wings: Preheat the oven to 400 degrees F', 'Line 1 large or 2 small rimmed baking sheets with foil', 'Whisk together the flour, cayenne and salt in a large bowl', 'Heat 2 inches of oil in a large, heavy-bottomed pot over medium heat until a deep-fry thermometer registers 375 degrees F', 'Working in 3 batches, dredge the wings in the flour mixture and fry until lightly browned, 10 t

In [11]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/222002/chef-johns-stuffed-peppers/")
print(ingredients)
print(directions)

['1 cup uncooked long grain white rice', '2 cups water', '1 onion, diced', '1 tablespoon olive oil', '2 cups marinara sauce', '1 cup beef broth', '1 tablespoon balsamic vinegar', '1/4 teaspoon crushed red pepper flakes', '1 pound lean ground beef', '1/4 pound hot Italian pork sausage, casing removed', '1 (10 ounce) can diced tomatoes', '1/4 cup chopped fresh Italian parsley', '4 cloves garlic, minced', '2 teaspoons salt', '1 teaspoon freshly ground black pepper', '1 pinch ground cayenne pepper', '1 cup finely grated Parmigiano-Reggiano cheese, plus more for topping', '4 large green bell peppers, halved lengthwise and seeded']
['Bring rice and water to a boil in a saucepan over high heat', 'Reduce heat to medium-low, cover, and simmer until the rice is tender, and the liquid has been absorbed, 20 to 25 minutes', 'Set the cooked rice aside', 'Meanwhile, preheat the oven to 375 degrees F (190 degrees C)', 'Make sauce: Cook onion and olive oil over medium heat until onion begins to soften,

In [12]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/16354/easy-meatloaf/")
print(ingredients)
print(directions)

['1 1/2 pounds ground beef', '1 egg', '1 onion, chopped', '1 cup milk', '1 cup dried bread crumbs', 'salt and pepper to taste', '1/3 cup ketchup', '2 tablespoons brown sugar', '2 tablespoons prepared mustard']
['Preheat the oven to 350 degrees F (175 degrees C)', 'Lightly grease a 9x5-inch loaf pan', 'Combine ground beef, onion, milk, bread crumbs and egg in a large bowl; season with salt and pepper', 'Transfer into prepared loaf pan', 'Mix ketchup, brown sugar, and mustard together in a small bowl until well combined; pour over meatloaf and spread it evenly over the top', 'Bake in the preheated oven until no longer pink in the center, about 1 hour', 'Crushed crackers may be used in place of bread crumbs, if preferred']


In [13]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/263567/the-denver-omelet/")
print(ingredients)
print(directions)

['3 large eggs', '1 tablespoon butter', '1/4 cup diced smoked ham', '2 tablespoons diced onion', '2 tablespoons diced green bell pepper', 'salt and freshly ground black pepper to taste', '1/3 cup shredded Cheddar cheese', '1 pinch cayenne pepper']
['Beat eggs in a small bowl until just combined; do not overbeat', 'Melt butter in a skillet over medium-high heat', 'Add ham, onion, and bell pepper; season with salt and pepper', 'Cook and stir until onions soften and ham begins to caramelize, about 5 minutes', 'Reduce heat to medium-low and pour in eggs', 'Mix briefly with a spatula while shaking the pan to ensure ingredients are evenly distributed', 'Quickly run the spatula along edges of omelet', 'Sprinkle Cheddar cheese and cayenne pepper over omelet', 'Cook, shaking the pan occasionally, until top is still wet but not runny, about 5 minutes', 'Use a spatula to fold omelet in half and transfer it to a plate']


In [14]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/17981/one-bowl-chocolate-cake-iii/")
print(ingredients)
print(directions)

['2 cups white sugar', '1 3/4 cups all-purpose flour', '3/4 cup unsweetened cocoa powder', '1 1/2 teaspoons baking powder', '1 1/2 teaspoons baking soda', '1 teaspoon salt', '2 eggs', '1 cup milk', '1/2 cup vegetable oil', '2 teaspoons vanilla extract', '1 cup boiling water']
['Gather all ingredients', 'Preheat oven to 350 degrees F (175 degrees C)', 'Grease and flour two nine inch round pans', 'In a large bowl, stir together the sugar, flour, cocoa, baking powder, baking soda and salt', 'Add the eggs, milk, oil and vanilla, mix for 2 minutes on medium speed of mixer', 'Stir in the boiling water last', 'Batter will be thin', 'Pour evenly into the prepared pans', 'Bake 30 to 35 minutes in the preheated oven, until the cake tests done with a toothpick', 'Cool in the pans for 10 minutes, then remove to a wire rack to cool completely']


In [15]:
ingredients, directions = recipe_extract("https://www.allrecipes.com/recipe/24087/restaurant-style-buffalo-chicken-wings/")
print(ingredients)
print(directions)

['1/2 cup all-purpose flour', '1/4 teaspoon ground paprika', '1/4 teaspoon cayenne pepper', '1/4 teaspoon salt', '10 chicken wings', '2 cups vegetable oil for frying, or as needed', '1/4 cup butter', '1/4 cup hot sauce', '1 pinch ground black pepper', '1 pinch garlic powder']
['Whisk together flour, paprika, cayenne pepper, and salt in a small bowl', 'Place chicken wings in a single layer in a 9x13-inch glass baking dish', 'Sprinkle flour mixture over top and toss until wings are evenly coated', 'Cover and refrigerate for 1 to 1 1/2 hours', 'Add about 1 inch oil to a deep, heavy skillet; heat to 375 degrees F (190 degrees C)', '(The oil should be just enough to cover wings entirely', ')', 'While the oil is coming to temperature, combine butter, hot sauce, pepper, and garlic powder in a small saucepan over low heat', 'Cook and stir until butter is melted and mixture is thoroughly blended', 'Remove from the heat and reserve for serving', 'Fry coated wings in the hot oil for 10 to 15 minu