diff --git a/README.md b/README.md index 31853ccd..d5dbde78 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,8 @@ set single pattern containing two nested match groups for both url and version: To disable sorting and get first matched version/url, set `sort-matches` to `false`. +The [`versions`](#version-constraining) property is supported. + #### URL templates The HTML checker also supports building the download URL using diff --git a/src/checkers/htmlchecker.py b/src/checkers/htmlchecker.py index 18923152..de466988 100644 --- a/src/checkers/htmlchecker.py +++ b/src/checkers/htmlchecker.py @@ -21,39 +21,23 @@ import logging import re import urllib.parse -from distutils.version import LooseVersion import io import codecs import typing as t import aiohttp from yarl import URL +import semver -from ..lib import NETWORK_ERRORS +from ..lib import NETWORK_ERRORS, OPERATORS_SCHEMA from ..lib.externaldata import ExternalBase, ExternalData from ..lib.errors import CheckerMetadataError, CheckerQueryError, CheckerFetchError from ..lib.checkers import Checker +from ..lib.utils import filter_versioned_items, FallbackVersion log = logging.getLogger(__name__) -def _get_latest( - html: str, - pattern: re.Pattern, - sort_key: t.Optional[t.Callable[[re.Match], t.Any]] = None, -) -> re.Match: - matches = list(pattern.finditer(html)) - if not matches: - raise CheckerQueryError(f"Pattern '{pattern.pattern}' didn't match anything") - if sort_key is None or len(matches) == 1: - result = matches[0] - else: - log.debug("%s matched multiple times, selected latest", pattern.pattern) - result = max(matches, key=sort_key) - log.debug("%s matched %s", pattern.pattern, result) - return result - - def _get_pattern( checker_data: t.Dict, pattern_name: str, expected_groups: int = 1 ) -> t.Optional[re.Pattern]: @@ -73,6 +57,19 @@ def _get_pattern( return pattern +def _semantic_version(version: str) -> semver.VersionInfo: + try: + return semver.VersionInfo.parse(version) + except ValueError as err: + raise CheckerQueryError("Can't parse version") from err + + +_VERSION_SCHEMES = { + "loose": FallbackVersion, + "semantic": _semantic_version, +} + + class HTMLChecker(Checker): CHECKER_DATA_TYPE = "html" CHECKER_DATA_SCHEMA = { @@ -83,6 +80,11 @@ class HTMLChecker(Checker): "version-pattern": {"type": "string", "format": "regex"}, "url-template": {"type": "string", "format": "regex"}, "sort-matches": {"type": "boolean"}, + "versions": OPERATORS_SCHEMA, + "version-scheme": { + "type": "string", + "enum": list(_VERSION_SCHEMES), + }, }, "allOf": [ {"required": ["url"]}, @@ -141,23 +143,46 @@ async def check(self, external_data: ExternalBase): version_pattern = _get_pattern(external_data.checker_data, "version-pattern", 1) url_template = external_data.checker_data.get("url-template") sort_matches = external_data.checker_data.get("sort-matches", True) + version_cls = _VERSION_SCHEMES[ + external_data.checker_data.get("version-scheme", "loose") + ] + constraints = [ + (o, version_cls(v)) + for o, v in external_data.checker_data.get("versions", {}).items() + ] assert combo_pattern or (version_pattern and url_template) html = await self._get_text(url) + def _get_latest(pattern: re.Pattern, ver_group: int) -> re.Match: + matches = filter_versioned_items( + items=pattern.finditer(html), + constraints=constraints, + to_version=lambda m: version_cls(m.group(ver_group)), + sort=sort_matches, + ) + if not matches: + raise CheckerQueryError( + f"Pattern '{pattern.pattern}' didn't match anything" + ) + + try: + # NOTE Returning last match when sort is requested and first match otherwise + # doesn't seem sensible, but we need to retain backward compatibility + result = matches[-1 if sort_matches else 0] + except IndexError as err: + raise CheckerQueryError( + f"Pattern '{pattern.pattern}' didn't match anything" + ) from err + + log.debug("%s matched %s", pattern.pattern, result) + return result + if combo_pattern: - latest_url, latest_version = _get_latest( - html, - combo_pattern, - (lambda m: LooseVersion(m.group(2))) if sort_matches else None, - ).group(1, 2) + latest_url, latest_version = _get_latest(combo_pattern, 2).group(1, 2) else: assert version_pattern and url_template - latest_version = _get_latest( - html, - version_pattern, - (lambda m: LooseVersion(m.group(1))) if sort_matches else None, - ).group(1) + latest_version = _get_latest(version_pattern, 1).group(1) latest_url = self._substitute_placeholders(url_template, latest_version) abs_url = urllib.parse.urljoin(base=url, url=latest_url) diff --git a/tests/org.x.xeyes.yml b/tests/org.x.xeyes.yml index 48d3897b..911d4c15 100644 --- a/tests/org.x.xeyes.yml +++ b/tests/org.x.xeyes.yml @@ -32,6 +32,34 @@ modules: pattern: (https://sourceforge.net/.+/qrupdate-([\d\.]+\d)\.tar\.gz)/download sort-matches: false + - name: libX11 + sources: + - type: archive + url: http://some-incorrect.url/libX11.tar.gz + sha256: "0000000000000000000000000000000000000000000000000000000000000000" + x-checker-data: + type: html + url: https://www.x.org/releases/individual/lib/ + version-pattern: libX11-([\d\.]+).tar.gz + url-template: libX11-$version.tar.gz + versions: + ==: 1.7.5 + + - name: semver + sources: + - type: file + url: http://example.com/semver.txt + sha256: "0000000000000000000000000000000000000000000000000000000000000000" + x-checker-data: + type: html + # printf '%s\n' v1.0.0 v1.0.0+patch1 v2.0.0-rc1 v2.0.0 | base64 + url: http://httpbingo.org/base64/djEuMC4wCnYxLjAuMCtwYXRjaDEKdjIuMC4wLXJjMQp2Mi4wLjAK + version-pattern: v(\d.*) + url-template: http://httpbingo.org/base64/encode/$version + versions: + <: 2.0.0-alpha + version-scheme: semantic + - name: libFS sources: - type: archive diff --git a/tests/test_htmlchecker.py b/tests/test_htmlchecker.py index 77cf71ee..6965d7ee 100755 --- a/tests/test_htmlchecker.py +++ b/tests/test_htmlchecker.py @@ -86,6 +86,8 @@ async def test_check(self): self._test_combo_pattern_nosort( self._find_by_filename(ext_data, "qrupdate-1.1.0.tar.gz") ) + self._test_version_filter(self._find_by_filename(ext_data, "libX11.tar.gz")) + self._test_semver_filter(self._find_by_filename(ext_data, "semver.txt")) self._test_no_match(self._find_by_filename(ext_data, "libFS-1.0.7.tar.bz2")) self._test_invalid_url(self._find_by_filename(ext_data, "libdoesntexist.tar")) @@ -144,6 +146,17 @@ def _test_combo_pattern_nosort(self, data): ), ) + def _test_version_filter(self, data): + self.assertIsNotNone(data) + self.assertIsNotNone(data.new_version) + self.assertEqual(data.new_version.version, "1.7.5") + + def _test_semver_filter(self, data): + self.assertIsNotNone(data) + self.assertIsNotNone(data.new_version) + self.assertIsNotNone(data.new_version.version) + self.assertEqual(data.new_version.version, "1.0.0+patch1") + def _test_no_match(self, data): self.assertIsNotNone(data) self.assertIsNone(data.new_version)