Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add aldi.com.au #1139

Merged
merged 11 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .afghankitchenrecipes import AfghanKitchenRecipes
from .akispetretzikis import AkisPetretzikis
from .albertheijn import AlbertHeijn
from .aldi import Aldi
from .alittlebityummy import ALittleBitYummy
from .allrecipes import AllRecipes
from .allthehealthythings import AllTheHealthyThings
Expand Down Expand Up @@ -345,6 +346,7 @@
AfghanKitchenRecipes.host(): AfghanKitchenRecipes,
AkisPetretzikis.host(): AkisPetretzikis,
AlbertHeijn.host(): AlbertHeijn,
Aldi.host(): Aldi,
AllRecipes.host(): AllRecipes,
AllTheHealthyThings.host(): AllTheHealthyThings,
AllTomat.host(): AllTomat,
Expand Down
97 changes: 97 additions & 0 deletions recipe_scrapers/aldi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# mypy: allow-untyped-defs
import re

from ._abstract import AbstractScraper
from ._exceptions import ElementNotFoundInHtml
mlduff marked this conversation as resolved.
Show resolved Hide resolved
from ._utils import get_minutes, get_yields


class Aldi(AbstractScraper):
@classmethod
def host(cls):
return "aldi.com.au"

def site_name(self):
return "Aldi"

def author(self):
return self.soup.find("meta", {"name": "author"}).get("content")

def title(self):
return self.soup.find("h1").text

def category(self):
title = self.soup.select("a.tab-nav--link.dropdown--list--link.m-active")[
0
].text
recipe_position = title.find(" Recipe")
return title[:recipe_position]
mlduff marked this conversation as resolved.
Show resolved Hide resolved

def prep_time(self):
return get_minutes(self._get_value(re.compile("prep", re.IGNORECASE)))

def cook_time(self):
return get_minutes(self._get_value(re.compile("cook", re.IGNORECASE)))

def total_time(self):
total_time = 0

try:
total_time += self.prep_time()
except ElementNotFoundInHtml:
pass

try:
total_time += self.cook_time()
except ElementNotFoundInHtml:
pass

return total_time
mlduff marked this conversation as resolved.
Show resolved Hide resolved

def yields(self):
value = self._get_value(re.compile("(makes)|(serves)", re.IGNORECASE))
return get_yields(str(value))

def image(self):
figure = self.soup.find(
"figure", {"class": "csc-textpic-image csc-textpic-last"}
)
if not figure:
return None
image = figure.find("img")
if not image:
return None
return image.get("src")
mlduff marked this conversation as resolved.
Show resolved Hide resolved

def ingredients(self):
h2 = self.soup.find("h2", string=re.compile("Ingredients"))
list_element = h2.find_next_sibling("ul")
ingredients = []
for li in list_element.find_all("li"):
ingredients.append(li.text.strip())
return ingredients

def instructions(self):
list_element = self.soup.find("ol")
instructions = []
for li in list_element.find_all("li"):
instructions.append(li.text.strip())
return "\n".join(instructions)
mlduff marked this conversation as resolved.
Show resolved Hide resolved

def _get_value(self, label):
label = self.soup.find("b", string=label)
if not label:
return None

br_tags = label.find_next_siblings("br")

parts = []
for br in br_tags:
next_sibling = br.next_sibling
while next_sibling:
text = next_sibling.text
if text:
parts.append(text)
next_sibling = next_sibling.next_sibling

return " ".join(parts)
mlduff marked this conversation as resolved.
Show resolved Hide resolved
40 changes: 40 additions & 0 deletions tests/test_data/aldi.com.au/aldi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"author": "ALDI AU",
"canonical_url": "https://www.aldi.com.au/recipes/breakfast-recipes/overloaded-cheese-toastie-recipe/",
"host": "aldi.com.au",
"image": "https://www.aldi.com.au/fileadmin/_processed_/8/b/csm_1000340_WK23_PD_455x315__OVERLOADED-CHEESE-TOASTIE__1_RET_4506d46579.jpg",
"category": "Breakfast",
"ingredients": [
"1 tbsp Remano pesto",
"1 slice Westacre tasty cheese",
"3 slices tomato",
"2 slices sourdough bread",
"Handful of The Fresh Salad Co baby spinach"
],
"ingredient_groups": [
{
"purpose": null,
"ingredients": [
"1 tbsp Remano pesto",
"1 slice Westacre tasty cheese",
"3 slices tomato",
"2 slices sourdough bread",
"Handful of The Fresh Salad Co baby spinach"
]
}
],
"instructions": "Preheat a sandwich press.\nSpread pesto on one slice of bread and layer with cheese, tomato and spinach and close the sandwich with the remaining slice of bread.\nPlace the sandwich in the press and cook for 3-4 minutes or until the cheese melts and the sandwich is golden brown.\nAllow to cool slightly before serving.",
"instructions_list": [
"Preheat a sandwich press.",
"Spread pesto on one slice of bread and layer with cheese, tomato and spinach and close the sandwich with the remaining slice of bread.",
"Place the sandwich in the press and cook for 3-4 minutes or until the cheese melts and the sandwich is golden brown.",
"Allow to cool slightly before serving."
],
"language": "en-AU",
"site_name": "Aldi",
"title": "Overloaded Cheese Toastie Recipe",
"prep_time": 5,
"cook_time": 5,
"total_time": 10,
"yields": "1 serving"
}
Loading
Loading