Skip to content

Commit

Permalink
Use isodate library to attempt ISO 8601 duration parsing in get_minut…
Browse files Browse the repository at this point in the history
…es (#610)
  • Loading branch information
jayaddison committed Oct 12, 2022
1 parent 754a316 commit 39fa50b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
14 changes: 13 additions & 1 deletion recipe_scrapers/_utils.py
@@ -1,5 +1,8 @@
# mypy: disallow_untyped_defs=False

import html
import isodate
import math
import re

from ._exceptions import ElementNotFoundInHtml
Expand Down Expand Up @@ -30,7 +33,7 @@
SERVE_REGEX_TO = re.compile(r"\d+(\s+to\s+|-)\d+", flags=re.I | re.X)


def get_minutes(element, return_zero_on_not_found=False):
def get_minutes(element, return_zero_on_not_found=False): # noqa: C901: TODO
if element is None:
# to be removed
if return_zero_on_not_found:
Expand All @@ -47,6 +50,15 @@ def get_minutes(element, return_zero_on_not_found=False):
time_text = element
else:
time_text = element.get_text()

# attempt iso8601 duration parsing
if time_text.startswith("PT"):
try:
duration = isodate.parse_duration(time_text)
return math.ceil(duration.total_seconds() / 60)
except Exception:
pass

if time_text.startswith("P") and "T" in time_text:
time_text = time_text.split("T", 2)[1]
if "-" in time_text:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -27,6 +27,7 @@
install_requires=[
"beautifulsoup4>=4.10.0",
"extruct>=0.8.0",
"isodate>=0.6.1",
"requests>=2.19.1",
"types-beautifulsoup4>=4.11.6",
"types-requests>=2.28.10",
Expand Down
12 changes: 12 additions & 0 deletions tests/library/test_utils.py
Expand Up @@ -71,3 +71,15 @@ def test_get_minutes_handles_dashes(self):
def test_get_minutes_handles_to(self):
text = "15 to 20 minutes"
self.assertEqual(20, get_minutes(text))

iso8601_fixtures = {
"PT1H": 60,
"PT20M": 20,
"PT2H10M": 130,
"PT0H9M30S": 10,
}

def test_get_minutes_handles_iso8601(self):
for text, expected_minutes in self.iso8601_fixtures.items():
with self.subTest(text=text):
self.assertEqual(expected_minutes, get_minutes(text))

0 comments on commit 39fa50b

Please sign in to comment.