Skip to content

Commit

Permalink
Add description method to WikiCookbook scraper (#604)
Browse files Browse the repository at this point in the history
  • Loading branch information
TheEpic-dev committed Oct 1, 2022
1 parent 442a035 commit 03aa8b3
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
22 changes: 22 additions & 0 deletions recipe_scrapers/wikicookbook.py
Expand Up @@ -38,3 +38,25 @@ def instructions(self):
return "\n".join(
[normalize_string(instruction.get_text()) for instruction in instructions]
)

def description(self):
paragraphs = list()
for tag in self.soup.find(class_="mw-parser-output"):
try:
# get all paragraphs except for links
if (
tag.text.strip()
and tag.name == "p"
and not tag.find("span", {"id": "displaytitle"})
):
paragraphs.append(tag)
# End at the TOC or second section
if tag.attrs.get("id") == "toc" or tag.name == "h2":
break
except AttributeError:
# Ignore tags that are not <p> but raise errors
pass

return "\n\n".join(
[normalize_string(paragraph.get_text()) for paragraph in paragraphs]
)
6 changes: 6 additions & 0 deletions tests/test_wikibooks.py
Expand Up @@ -53,3 +53,9 @@ def test_instructions(self):
"Preheat oven to 350 °F (180 °C).\nBlend all ingredients, except the pie shell, together.\nPour into the unbaked pie shell.\nBake at 350 °F (180 °C) for 45 minutes.\nLet cool and serve.",
self.harvester_class.instructions(),
)

def test_description(self):
return self.assertEqual(
"Pumpkin pie is a traditional American and Canadian holiday dessert. It consists of a pumpkin-based custard baked in a single pie shell. The pie is traditionally served with whipped cream.",
self.harvester_class.description(),
)

0 comments on commit 03aa8b3

Please sign in to comment.