From 743d7cd18597e039d929e002e723144af43e7349 Mon Sep 17 00:00:00 2001 From: James Addison <55152140+jayaddison@users.noreply.github.com> Date: Mon, 24 Oct 2022 18:47:57 +0100 Subject: [PATCH] Revert "Fixup: remove overwrite of schema.org data with 'mainEntity' contents during schema.org processing (#666)" (#670) This reverts commit 557cfd9b889ca60c8c40a9609573b51fe72b138b. --- recipe_scrapers/_schemaorg.py | 8 +++++++- recipe_scrapers/sallysblog.py | 4 ---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/recipe_scrapers/_schemaorg.py b/recipe_scrapers/_schemaorg.py index 8952b2b05..977da547f 100644 --- a/recipe_scrapers/_schemaorg.py +++ b/recipe_scrapers/_schemaorg.py @@ -52,6 +52,8 @@ def __init__(self, page_data, raw=False): if in_context and item_type.lower() in low_schema: self.format = syntax self.data = item + if item_type.lower() == "webpage": + self.data = self.data.get("mainEntity") return elif in_context and "@graph" in item: for graph_item in item.get("@graph", ""): @@ -59,8 +61,12 @@ def __init__(self, page_data, raw=False): if not isinstance(graph_item_type, str): continue if graph_item_type.lower() in low_schema: + in_graph = SCHEMA_ORG_HOST in graph_item.get("@context", "") self.format = syntax - if graph_item_type.lower() == "recipe": + if graph_item_type.lower() == "webpage" and in_graph: + self.data = self.data.get("mainEntity") + return + elif graph_item_type.lower() == "recipe": self.data = graph_item return diff --git a/recipe_scrapers/sallysblog.py b/recipe_scrapers/sallysblog.py index a422932cf..9debd593a 100644 --- a/recipe_scrapers/sallysblog.py +++ b/recipe_scrapers/sallysblog.py @@ -41,7 +41,3 @@ def instructions(self): for instruction in instructions ] ) - - def image(self): - image = self.soup.find("meta", {"property", "og:image"}) - return image["content"] if image else None