From 43bb90e4d643124a02c1a252a8ca4069a3ee8829 Mon Sep 17 00:00:00 2001 From: Michael Duffett Date: Sat, 18 May 2024 18:09:05 +0930 Subject: [PATCH 1/9] Add aldi.com.au --- recipe_scrapers/__init__.py | 2 + recipe_scrapers/aldi.py | 97 ++ tests/test_data/aldi.com.au/aldi.json | 40 + tests/test_data/aldi.com.au/aldi.testhtml | 1235 +++++++++++++++++++++ 4 files changed, 1374 insertions(+) create mode 100644 recipe_scrapers/aldi.py create mode 100644 tests/test_data/aldi.com.au/aldi.json create mode 100644 tests/test_data/aldi.com.au/aldi.testhtml diff --git a/recipe_scrapers/__init__.py b/recipe_scrapers/__init__.py index 6977ec055..c77af6a00 100644 --- a/recipe_scrapers/__init__.py +++ b/recipe_scrapers/__init__.py @@ -16,6 +16,7 @@ from .afghankitchenrecipes import AfghanKitchenRecipes from .akispetretzikis import AkisPetretzikis from .albertheijn import AlbertHeijn +from .aldi import Aldi from .alittlebityummy import ALittleBitYummy from .allrecipes import AllRecipes from .allthehealthythings import AllTheHealthyThings @@ -345,6 +346,7 @@ AfghanKitchenRecipes.host(): AfghanKitchenRecipes, AkisPetretzikis.host(): AkisPetretzikis, AlbertHeijn.host(): AlbertHeijn, + Aldi.host(): Aldi, AllRecipes.host(): AllRecipes, AllTheHealthyThings.host(): AllTheHealthyThings, AllTomat.host(): AllTomat, diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py new file mode 100644 index 000000000..83c5aa8f5 --- /dev/null +++ b/recipe_scrapers/aldi.py @@ -0,0 +1,97 @@ +# mypy: allow-untyped-defs +import re + +from ._abstract import AbstractScraper +from ._exceptions import ElementNotFoundInHtml +from ._utils import get_minutes, get_yields + + +class Aldi(AbstractScraper): + @classmethod + def host(cls): + return "aldi.com.au" + + def site_name(self): + return "Aldi" + + def author(self): + return self.soup.find("meta", {"name": "author"}).get("content") + + def title(self): + return self.soup.find("h1").text + + def category(self): + title = self.soup.select("a.tab-nav--link.dropdown--list--link.m-active")[ + 0 + ].text + recipe_position = title.find(" Recipe") + return title[:recipe_position] + + def prep_time(self): + return get_minutes(self._get_value(re.compile("prep", re.IGNORECASE))) + + def cook_time(self): + return get_minutes(self._get_value(re.compile("cook", re.IGNORECASE))) + + def total_time(self): + total_time = 0 + + try: + total_time += self.prep_time() + except ElementNotFoundInHtml: + pass + + try: + total_time += self.cook_time() + except ElementNotFoundInHtml: + pass + + return total_time + + def yields(self): + value = self._get_value(re.compile("(makes)|(serves)", re.IGNORECASE)) + return get_yields(str(value)) + + def image(self): + figure = self.soup.find( + "figure", {"class": "csc-textpic-image csc-textpic-last"} + ) + if not figure: + return None + image = figure.find("img") + if not image: + return None + return image.get("src") + + def ingredients(self): + h2 = self.soup.find("h2", string=re.compile("Ingredients")) + list_element = h2.find_next_sibling("ul") + ingredients = [] + for li in list_element.find_all("li"): + ingredients.append(li.text.strip()) + return ingredients + + def instructions(self): + list_element = self.soup.find("ol") + instructions = [] + for li in list_element.find_all("li"): + instructions.append(li.text.strip()) + return "\n".join(instructions) + + def _get_value(self, label): + label = self.soup.find("b", string=label) + if not label: + return None + + br_tags = label.find_next_siblings("br") + + parts = [] + for br in br_tags: + next_sibling = br.next_sibling + while next_sibling: + text = next_sibling.text + if text: + parts.append(text) + next_sibling = next_sibling.next_sibling + + return " ".join(parts) diff --git a/tests/test_data/aldi.com.au/aldi.json b/tests/test_data/aldi.com.au/aldi.json new file mode 100644 index 000000000..574082517 --- /dev/null +++ b/tests/test_data/aldi.com.au/aldi.json @@ -0,0 +1,40 @@ +{ + "author": "ALDI AU", + "canonical_url": "https://www.aldi.com.au/recipes/breakfast-recipes/overloaded-cheese-toastie-recipe/", + "host": "aldi.com.au", + "image": "https://www.aldi.com.au/fileadmin/_processed_/8/b/csm_1000340_WK23_PD_455x315__OVERLOADED-CHEESE-TOASTIE__1_RET_4506d46579.jpg", + "category": "Breakfast", + "ingredients": [ + "1 tbsp Remano pesto", + "1 slice Westacre tasty cheese", + "3 slices tomato", + "2 slices sourdough bread", + "Handful of The Fresh Salad Co baby spinach" + ], + "ingredient_groups": [ + { + "purpose": null, + "ingredients": [ + "1 tbsp Remano pesto", + "1 slice Westacre tasty cheese", + "3 slices tomato", + "2 slices sourdough bread", + "Handful of The Fresh Salad Co baby spinach" + ] + } + ], + "instructions": "Preheat a sandwich press.\nSpread pesto on one slice of bread and layer with cheese, tomato and spinach and close the sandwich with the remaining slice of bread.\nPlace the sandwich in the press and cook for 3-4 minutes or until the cheese melts and the sandwich is golden brown.\nAllow to cool slightly before serving.", + "instructions_list": [ + "Preheat a sandwich press.", + "Spread pesto on one slice of bread and layer with cheese, tomato and spinach and close the sandwich with the remaining slice of bread.", + "Place the sandwich in the press and cook for 3-4 minutes or until the cheese melts and the sandwich is golden brown.", + "Allow to cool slightly before serving." + ], + "language": "en-AU", + "site_name": "Aldi", + "title": "Overloaded Cheese Toastie Recipe", + "prep_time": 5, + "cook_time": 5, + "total_time": 10, + "yields": "1 serving" +} diff --git a/tests/test_data/aldi.com.au/aldi.testhtml b/tests/test_data/aldi.com.au/aldi.testhtml new file mode 100644 index 000000000..00ff99485 --- /dev/null +++ b/tests/test_data/aldi.com.au/aldi.testhtml @@ -0,0 +1,1235 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Overloaded Cheese Toastie Recipe + + + + + +
+
+ +
+ + + + + + + + + + +
+ +
+
+ +
+ + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ + + Overloaded Cheese Toastie Recipe + + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ + + How to make an Overloaded Cheese Toastie + + +

+ + + + + + + + + + + + + + + + + + + +

Method

  1. Preheat a sandwich press.
  2. Spread pesto on one slice of bread and layer with cheese, tomato and spinach and close the sandwich with the remaining slice of bread.
  3. Place the sandwich in the press and cook for 3-4 minutes or until the cheese melts and the sandwich is golden brown.
  4. Allow to cool slightly before serving.

Option: You can swap out pesto for avocado.

+

Tip: You can use up leftover pesto on grainy crackers, pizza bases, in pasta sauces or with chicken.

+ + + + + + + + + + + + + + + + +
+ + + + + +
+ +
+
+
+ + + + +
+ +
+
+ + + + + + + + + + + + \ No newline at end of file From 6bf4325fe1188839d72f7fb5d794510ed86b6743 Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:43:00 +0930 Subject: [PATCH 2/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 83c5aa8f5..2c6bb8f2a 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -21,11 +21,12 @@ def title(self): return self.soup.find("h1").text def category(self): - title = self.soup.select("a.tab-nav--link.dropdown--list--link.m-active")[ - 0 - ].text - recipe_position = title.find(" Recipe") - return title[:recipe_position] + element = self.soup.select_one("a.tab-nav--link.dropdown--list--link.m-active") + if element: + title = element.text + recipe_position = title.find(" Recipe") + if recipe_position != -1: + return title[:recipe_position] def prep_time(self): return get_minutes(self._get_value(re.compile("prep", re.IGNORECASE))) From c46fe00c13e32be03520de2acba748b09990e213 Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:44:04 +0930 Subject: [PATCH 3/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 2c6bb8f2a..8eff35133 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -35,19 +35,7 @@ def cook_time(self): return get_minutes(self._get_value(re.compile("cook", re.IGNORECASE))) def total_time(self): - total_time = 0 - - try: - total_time += self.prep_time() - except ElementNotFoundInHtml: - pass - - try: - total_time += self.cook_time() - except ElementNotFoundInHtml: - pass - - return total_time + return (self.prep_time() or 0) + (self.cook_time() or 0) def yields(self): value = self._get_value(re.compile("(makes)|(serves)", re.IGNORECASE)) From 648c1ec19b8f37f06d83a62f87690abb75a4dc04 Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:44:09 +0930 Subject: [PATCH 4/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 8eff35133..2f7855203 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -42,15 +42,12 @@ def yields(self): return get_yields(str(value)) def image(self): - figure = self.soup.find( - "figure", {"class": "csc-textpic-image csc-textpic-last"} - ) - if not figure: - return None - image = figure.find("img") - if not image: - return None - return image.get("src") + figure = self.soup.find("figure", {"class": "csc-textpic-image csc-textpic-last"}) + if figure: + image = figure.find("img") + if image: + return image.get("src") + return None def ingredients(self): h2 = self.soup.find("h2", string=re.compile("Ingredients")) From 2cb4f5c2fef96a842cae8171844274ad0533c657 Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:44:15 +0930 Subject: [PATCH 5/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 2f7855203..8a99afda7 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -59,10 +59,7 @@ def ingredients(self): def instructions(self): list_element = self.soup.find("ol") - instructions = [] - for li in list_element.find_all("li"): - instructions.append(li.text.strip()) - return "\n".join(instructions) + return "\n".join(li.text.strip() for li in list_element.find_all("li")) def _get_value(self, label): label = self.soup.find("b", string=label) From 72a859f43aa285b1f7f55112fa2420bee75e6cce Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:44:24 +0930 Subject: [PATCH 6/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 8a99afda7..6eb1f1733 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -2,7 +2,6 @@ import re from ._abstract import AbstractScraper -from ._exceptions import ElementNotFoundInHtml from ._utils import get_minutes, get_yields From 1bc7b4b10d118d40b7d26bf062ae0cea45319ad8 Mon Sep 17 00:00:00 2001 From: Michael Duffett <46545313+mlduff@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:44:33 +0930 Subject: [PATCH 7/9] Update recipe_scrapers/aldi.py Co-authored-by: Joey <7505194+jknndy@users.noreply.github.com> --- recipe_scrapers/aldi.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index 6eb1f1733..e96c51854 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -61,19 +61,8 @@ def instructions(self): return "\n".join(li.text.strip() for li in list_element.find_all("li")) def _get_value(self, label): - label = self.soup.find("b", string=label) - if not label: - return None - - br_tags = label.find_next_siblings("br") - - parts = [] - for br in br_tags: - next_sibling = br.next_sibling - while next_sibling: - text = next_sibling.text - if text: - parts.append(text) - next_sibling = next_sibling.next_sibling - - return " ".join(parts) + label_element = self.soup.find("b", string=label) + if label_element: + parts = [sibling.strip() for sibling in label_element.find_next_siblings(string=True) if sibling.strip()] + return " ".join(parts) + return None From ca20dc6ad08c6710d156f54d7cf64eccd001e74a Mon Sep 17 00:00:00 2001 From: Michael Duffett Date: Mon, 10 Jun 2024 12:31:36 +0930 Subject: [PATCH 8/9] Update README --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index a6150fd6b..dc4190e40 100644 --- a/README.rst +++ b/README.rst @@ -100,6 +100,7 @@ Scrapers available for: - `https://aflavorjournal.com/ `_ - `https://ah.nl/ `_ - `https://akispetretzikis.com/ `_ +- `https://aldi.com.au/ `_ - `https://alexandracooks.com/ `_ - `https://alittlebityummy.com/ `_ - `https://allrecipes.com/ `_ From 2f3455958926266c178489274d7638dd72b347a4 Mon Sep 17 00:00:00 2001 From: Joey Date: Mon, 10 Jun 2024 17:46:35 -0400 Subject: [PATCH 9/9] linting --- recipe_scrapers/aldi.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/recipe_scrapers/aldi.py b/recipe_scrapers/aldi.py index e96c51854..1bcd68fc6 100644 --- a/recipe_scrapers/aldi.py +++ b/recipe_scrapers/aldi.py @@ -41,7 +41,9 @@ def yields(self): return get_yields(str(value)) def image(self): - figure = self.soup.find("figure", {"class": "csc-textpic-image csc-textpic-last"}) + figure = self.soup.find( + "figure", {"class": "csc-textpic-image csc-textpic-last"} + ) if figure: image = figure.find("img") if image: @@ -63,6 +65,10 @@ def instructions(self): def _get_value(self, label): label_element = self.soup.find("b", string=label) if label_element: - parts = [sibling.strip() for sibling in label_element.find_next_siblings(string=True) if sibling.strip()] + parts = [ + sibling.strip() + for sibling in label_element.find_next_siblings(string=True) + if sibling.strip() + ] return " ".join(parts) return None