Skip to content

Commit

Permalink
Added serving-size (#61)
Browse files Browse the repository at this point in the history
* Added servings method to abstract class.
Added get_servings to _utils.

* Tests: made test-data-html-file relative from each test-file (Makes it possible for IDE or single unittest run).
Added serving-test to all except All recipes BR, The vintage mixer and What's gaby cooking (do not have servings in test-cases), will return 0.

* Added method-comment to _utils.get_servings.

* Consistency in code structure code.

* Changed servings to yields.
yields-method will now return a string of "x item(s)" or "x serving(s)", will default toward servings when found a number, will default toward an empty string if it defaults of missing content.

* Added yields-method to README
  • Loading branch information
Jwe0619 authored and hhursev committed Jun 12, 2019
1 parent 23fb83c commit 01ee1f5
Show file tree
Hide file tree
Showing 66 changed files with 469 additions and 124 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ then:

scraper.title()
scraper.total_time()
scraper.yields()
scraper.ingredients()
scraper.instructions()
scraper.links()
Expand Down
7 changes: 7 additions & 0 deletions recipe_scrapers/_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __getattribute__(self, name):
decorated_methods = [
'title',
'total_time',
'yields',
'instructions',
'ingredients',
'links'
Expand All @@ -32,6 +33,8 @@ def __getattribute__(self, name):
to_return = ''
if name == 'total_time':
to_return = 0
if name == 'yields':
to_return = ''
if name == 'ingredients':
to_return = []
if name == 'links':
Expand Down Expand Up @@ -73,6 +76,10 @@ def total_time(self):
""" total time it takes to preparate the recipe in minutes """
raise NotImplementedError("This should be implemented.")

def yields(self):
""" The number of servings or items in the recipe """
raise NotImplementedError("This should be implemented.")

def ingredients(self):
raise NotImplementedError("This should be implemented.")

Expand Down
44 changes: 44 additions & 0 deletions recipe_scrapers/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@
r'(\D*(?P<hours>\d+)\s*(hours|hrs|hr|h|Hours|H))?(\D*(?P<minutes>\d+)\s*(minutes|mins|min|m|Minutes|M))?'
)

SERV_REGEX_NUMBER = re.compile(
r'(\D*(?P<items>\d+)?\D*)'
)

SERV_REGEX_ITEMS = re.compile(
r'\bsandwiches\b |\btacquitos\b | \bmakes\b', flags=re.I | re.X
)

SERV_REGEX_TO = re.compile(
r'\d+(\s+to\s+|-)\d+', flags=re.I | re.X
)


def get_minutes(element):
try:
Expand All @@ -23,6 +35,38 @@ def get_minutes(element):
return 0


def get_yields(element):
"""
Will return a string of servings or items, if the receipt is for number of items and not servings
the method will return the string "x item(s)" where x is the quantity.
:param element: Should be BeautifulSoup.TAG, in some cases not feasible and will then be text.
:return: The number of servings or items.
"""
try:

if isinstance(element, str):
tstring = element
else:
tstring = element.get_text()

if SERV_REGEX_TO.search(tstring):
tstring = tstring.split(SERV_REGEX_TO.split(tstring)[1])[1]

matched = SERV_REGEX_NUMBER.search(tstring).groupdict().get('items') or 0
servings = "{} serving(s)".format(matched)

if SERV_REGEX_ITEMS.search(tstring) is not None:
# This assumes if object(s), like sandwiches, it is 1 person.
# Issue: "Makes one 9-inch pie, (realsimple-testcase, gives "9 items")
servings = "{} item(s)".format(matched)

return servings

except AttributeError as e: # if dom_element not found or no matched
print("get_serving_numbers error {}".format(e))
return ''


def normalize_string(string):
return re.sub(
r'\s+', ' ',
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/allrecipes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class AllRecipes(AbstractScraper):
Expand All @@ -17,6 +17,12 @@ def total_time(self):
{'class': 'ready-in-time'})
)

def yields(self):
return get_yields(self.soup.find(
'meta',
{'id': 'metaRecipeServings', 'itemprop': 'recipeYield'}).get("content")
)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
3 changes: 3 additions & 0 deletions recipe_scrapers/allrecipesbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def ingredients(self):
for ingredient in ingredients
]

def yields(self):
return 0

def instructions(self):
instructions = self.soup.find(
'ol', {'itemprop': 'recipeInstructions'}
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/bbcfood.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class BBCFood(AbstractScraper):
Expand All @@ -24,6 +24,12 @@ def total_time(self):
)
])

def yields(self):
return get_yields(self.soup.find(
'p',
{'class': 'recipe-metadata__serving'})
)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/bbcgoodfood.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class BBCGoodFood(AbstractScraper):
Expand All @@ -24,6 +24,12 @@ def total_time(self):
).find('span'))
])

def yields(self):
return get_yields(self.soup.find(
'span',
{'class': 'recipe-details__text', 'itemprop': 'recipeYield'}
))

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
10 changes: 9 additions & 1 deletion recipe_scrapers/bonappetit.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import normalize_string
from ._utils import normalize_string, get_yields


class BonAppetit(AbstractScraper):
Expand All @@ -17,6 +17,14 @@ def title(self):
def total_time(self):
return 0

def yields(self):
return get_yields(
self.soup.find(
'span',
{'class': "recipe__header__servings recipe__header__servings--basically"}
).find('span')
)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
6 changes: 5 additions & 1 deletion recipe_scrapers/closetcooking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class ClosetCooking(AbstractScraper):
Expand All @@ -17,6 +17,10 @@ def title(self):
def total_time(self):
return get_minutes(self.soup.find(itemprop='totalTime').parent)


def yields(self):
return get_yields(self.soup.find(itemprop='recipeYield').parent)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
14 changes: 13 additions & 1 deletion recipe_scrapers/cookstr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class Cookstr(AbstractScraper):
Expand All @@ -26,6 +26,18 @@ def total_time(self):
total_time += get_minutes(time.parent.parent)
return total_time

def yields(self):
sections = self.soup.findAll(
'span',
{'class': 'attrLabel'}
)
total_serves = 0
for section in sections:
serves = section.find(text='Serves')
if serves:
total_serves += get_yields(serves.parent.parent)
return total_serves

def ingredients(self):
ingredients = self.soup.find(
'div',
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/epicurious.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


from ._abstract import AbstractScraper
from ._utils import normalize_string
from ._utils import normalize_string, get_yields


class Epicurious(AbstractScraper):
Expand All @@ -21,6 +21,12 @@ def title(self):
def total_time(self):
return 0

def yields(self):
return get_yields(self.soup.find(
'dd',
{'itemprop': 'recipeYield'}
))

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
9 changes: 8 additions & 1 deletion recipe_scrapers/finedininglovers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class FineDiningLovers(AbstractScraper):
Expand All @@ -20,6 +20,13 @@ def total_time(self):
{'itemprop': 'prepTime'})
)


def yields(self):
return get_yields(self.soup.find(
'span',
{'itemprop': 'recipeYield'})
)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
9 changes: 8 additions & 1 deletion recipe_scrapers/foodnetwork.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class FoodNetwork(AbstractScraper):
Expand All @@ -17,6 +17,13 @@ def total_time(self):
{'class': 'm-RecipeInfo__a-Description--Total'})
)

def yields(self):
return get_yields(self.soup.find(
'ul',
{'class': 'o-RecipeInfo__m-Yield'}
).find('span', {'class': 'o-RecipeInfo__a-Description'})
)

def ingredients(self):
ingredients = self.soup.findAll(
'p',
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/foodrepublic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class FoodRepublic(AbstractScraper):
Expand All @@ -24,6 +24,12 @@ def total_time(self):
)
])

def yields(self):
return get_yields(self.soup.find(
'span',
{'itemprop': 'recipeYield'}
))

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
10 changes: 9 additions & 1 deletion recipe_scrapers/geniuskitchen.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class GeniusKitchen(AbstractScraper):

Expand All @@ -16,6 +17,13 @@ def total_time(self):
{'class': 'time'})
)

def yields(self):
return get_yields(self.soup.find(
'td',
{'class': 'servings'}
).find('span', {'class': 'count'})
)


def ingredients(self):
ingredients = []
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/giallozafferano.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields
import json


Expand All @@ -25,6 +25,12 @@ def total_time(self):
)
])

def yields(self):
return get_yields(self.soup.find(
'li',
{'class': 'yield'})
)

def ingredients(self):
ingredients = self.soup.findAll(
'dd',
Expand Down
3 changes: 3 additions & 0 deletions recipe_scrapers/hellofresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def total_time(self):
{'data-translation-id': "recipe-detail.preparation-time"}
).parent.parent)

def yields(self):
return 0

def ingredients(self):
ingredients_container = self.soup.find(
'div',
Expand Down
3 changes: 3 additions & 0 deletions recipe_scrapers/hundredandonecookbooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def total_time(self):
{'class': 'preptime'})
)

def yields(self):
return 0

def ingredients(self):
ingredients = self.soup.find(
'div',
Expand Down
8 changes: 7 additions & 1 deletion recipe_scrapers/inspiralized.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._abstract import AbstractScraper
from ._utils import get_minutes, normalize_string
from ._utils import get_minutes, normalize_string, get_yields


class Inspiralized(AbstractScraper):
Expand All @@ -17,6 +17,12 @@ def total_time(self):
{'itemprop': 'totalTime'})
)

def yields(self):
return get_yields(self.soup.find(
'span',
{'itemprop': 'recipeYield'})
)

def ingredients(self):
ingredients = self.soup.findAll(
'li',
Expand Down
Loading

0 comments on commit 01ee1f5

Please sign in to comment.