In [1]:
%pip install requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
from fractions import Fraction
import unicodedata

def str_to_fraction(data: str):
    sum = Fraction()
    table = str.maketrans({u'⁄': '/'})
    data = unicodedata.normalize('NFKD', data).translate(table).split()
    for val in data:
        sum += Fraction(val)
    return sum

def fraction_to_str(frac: Fraction):
    if frac.denominator == 1:
        return str(frac.numerator)
    elif frac.numerator >= frac.denominator:
        return ' '.join([str(frac.numerator // frac.denominator),
                         str(Fraction(frac.numerator % frac.denominator,
                                      frac.denominator))])
    else:
        return str(frac)

In [3]:
from html.parser import HTMLParser
from enums import RecipeSource, HTMLTag

# Set up recipe HTML parser
class RecipeHTMLParser(HTMLParser):

    # Initialize class, setting recipe to empty
    def __init__(self, source: RecipeSource, convert_charrefs: bool = True) -> None:
        self.source = source
        self.recipe = {'ingredients': [], 'steps': []}
        self.current_tag = HTMLTag.UNKNOWN
        super().__init__(convert_charrefs=convert_charrefs)
    
    # Save the current tag
    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
        self.current_tag = HTMLTag.from_tag(self.source, tag, attrs)
        match self.current_tag:
            case HTMLTag.INGREDIENT:
                self.ingredient = {}
        return super().handle_starttag(tag, attrs)
    
    # Handle text between tags as appropriate
    def handle_data(self, data: str) -> None:
        match self.current_tag:
            case HTMLTag.OVERVIEW_LABEL:
                self.label = data.lower().strip(':,.! \n\t')
            case HTMLTag.OVERVIEW_TEXT:
                self.recipe[self.label] = data.strip()
            case HTMLTag.INGREDIENT_QUANTITY:
                self.ingredient['quantity'] = str_to_fraction(data)
            case HTMLTag.INGREDIENT_UNIT:
                self.ingredient['unit'] = data
            case HTMLTag.INGREDIENT_NAME:
                self.ingredient['name'] = data
                self.recipe['ingredients'].append(self.ingredient)
            case HTMLTag.STEP:
                self.recipe['steps'].append(data.strip())
        return super().handle_data(data)

    # Reset tag
    def handle_endtag(self, tag: str) -> None:
        self.current_tag = HTMLTag.UNKNOWN
        return super().handle_endtag(tag)

In [None]:
import re
import requests

# Retrieves the text of a recipe from a given URL
def get_recipe_from_url(url: str) -> dict | None:

    # Find recipe source; return None if unsupported
    source = RecipeSource.from_url(url)
    if source == RecipeSource.UNKNOWN:
        return None

    # Add appropriate HTTPS tag if not there
    if not re.match(r'https://www\.', url):
        if re.match(r'www\.', url):
            url = ''.join(['https://', url])
        else:
            url = ''.join(['https://www.', url])
    
    # Get the recipe from the page
    with requests.get(url) as f:
        parser = RecipeHTMLParser(source)
        parser.feed(f.text)
        return parser.recipe

In [5]:
get_recipe_from_url("https://www.allrecipes.com/recipe/19644/moussaka/")

{'ingredients': [{'quantity': Fraction(3, 1),
   'name': 'eggplants, peeled and cut lengthwise into 1/2 inch thick slices'},
  {'name': 'salt to taste'},
  {'quantity': Fraction(1, 4), 'unit': 'cup', 'name': 'olive oil'},
  {'quantity': Fraction(1, 1), 'unit': 'tablespoon', 'name': 'butter'},
  {'quantity': Fraction(1, 1), 'unit': 'pound', 'name': 'lean ground beef'},
  {'quantity': Fraction(2, 1), 'name': 'onions, chopped'},
  {'quantity': Fraction(1, 1), 'unit': 'clove', 'name': 'garlic, minced'},
  {'name': 'ground black pepper to taste'},
  {'quantity': Fraction(2, 1), 'unit': 'tablespoons', 'name': 'dried parsley'},
  {'quantity': Fraction(1, 2), 'unit': 'teaspoon', 'name': 'fines herbs'},
  {'quantity': Fraction(1, 4), 'unit': 'teaspoon', 'name': 'ground cinnamon'},
  {'quantity': Fraction(1, 2),
   'unit': 'teaspoon',
   'name': 'ground nutmeg, divided'},
  {'quantity': Fraction(1, 1),
   'unit': '(8 ounce) can',
   'name': 'tomato sauce'},
  {'quantity': Fraction(1, 2), 'unit':