From 03aa8b34648ca7a2c3a21987b52a2d31b67551fb Mon Sep 17 00:00:00 2001 From: TheEpic-dev <99757023+TheEpic-dev@users.noreply.github.com> Date: Sat, 1 Oct 2022 04:58:24 +0300 Subject: [PATCH] Add description method to WikiCookbook scraper (#604) --- recipe_scrapers/wikicookbook.py | 22 ++++++++++++++++++++++ tests/test_wikibooks.py | 6 ++++++ 2 files changed, 28 insertions(+) diff --git a/recipe_scrapers/wikicookbook.py b/recipe_scrapers/wikicookbook.py index 94eb3e64f..01ef30bb4 100644 --- a/recipe_scrapers/wikicookbook.py +++ b/recipe_scrapers/wikicookbook.py @@ -38,3 +38,25 @@ def instructions(self): return "\n".join( [normalize_string(instruction.get_text()) for instruction in instructions] ) + + def description(self): + paragraphs = list() + for tag in self.soup.find(class_="mw-parser-output"): + try: + # get all paragraphs except for links + if ( + tag.text.strip() + and tag.name == "p" + and not tag.find("span", {"id": "displaytitle"}) + ): + paragraphs.append(tag) + # End at the TOC or second section + if tag.attrs.get("id") == "toc" or tag.name == "h2": + break + except AttributeError: + # Ignore tags that are not

but raise errors + pass + + return "\n\n".join( + [normalize_string(paragraph.get_text()) for paragraph in paragraphs] + ) diff --git a/tests/test_wikibooks.py b/tests/test_wikibooks.py index eaa4ca075..d66163e9b 100644 --- a/tests/test_wikibooks.py +++ b/tests/test_wikibooks.py @@ -53,3 +53,9 @@ def test_instructions(self): "Preheat oven to 350 °F (180 °C).\nBlend all ingredients, except the pie shell, together.\nPour into the unbaked pie shell.\nBake at 350 °F (180 °C) for 45 minutes.\nLet cool and serve.", self.harvester_class.instructions(), ) + + def test_description(self): + return self.assertEqual( + "Pumpkin pie is a traditional American and Canadian holiday dessert. It consists of a pumpkin-based custard baked in a single pie shell. The pie is traditionally served with whipped cream.", + self.harvester_class.description(), + )