diff --git a/README.rst b/README.rst index a493ebd..cd2139e 100644 --- a/README.rst +++ b/README.rst @@ -59,6 +59,41 @@ Fetch some package metadata and get a ``fetchcode.packagedcode_models.Package`` >>> list(package.info('pkg:rubygems/files')) [Package(type='rubygems', namespace=None, name='files', version=None)] +Fetch a purl and get a ``fetchcode.fetch.Response`` object back:: + + >>> from fetchcode import fetch + >>> f = fetch('pkg:swift/github.com/Alamofire/Alamofire@5.4.3') + >>> f.location + '/tmp/tmp_cm02xsg' + >>> f.content_type + 'application/zip' + >>> f.url + 'https://github.com/Alamofire/Alamofire/archive/5.4.3.zip' + +Ecosystems supported for fetching a purl from fetchcode: + +- alpm +- apk +- bitbucket +- cargo +- composer +- conda +- cpan +- cran +- deb +- gem +- generic +- github +- golang +- hackage +- hex +- luarocks +- maven +- npm +- nuget +- pub +- pypi +- swift License -------- diff --git a/requirements.txt b/requirements.txt index b7daca1..1485f8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,8 +41,8 @@ MarkupSafe==2.0.1 more-itertools==8.13.0 normality==2.3.3 packagedcode-msitools==0.101.210706 -packageurl-python==0.9.9 -packaging==21.3 +packageurl-python==0.17.4 +packaging==24.0 parameter-expansion-patched==0.3.1 patch==1.16 pdfminer-six==20220506 diff --git a/src/fetchcode/__init__.py b/src/fetchcode/__init__.py index 82523d6..d4403c5 100644 --- a/src/fetchcode/__init__.py +++ b/src/fetchcode/__init__.py @@ -21,6 +21,9 @@ from urllib.parse import urlparse import requests +from packageurl.contrib import purl2url + +from fetchcode.utils import _http_exists class Response: @@ -89,24 +92,70 @@ def fetch_ftp(url, location): return resp +def resolve_purl(purl): + """ + Resolve a Package URL (PURL) to a download URL. + + This function attempts to resolve the PURL using first purl2url library and + if that fails, it falls back to fetchcode's download_urls module. + """ + from fetchcode.download_urls import download_url as get_download_url_from_fetchcode + + for resolver in (purl2url.get_download_url, get_download_url_from_fetchcode): + url = resolver(purl) + if url and _http_exists(url): + return url + + +def get_resolved_url(url, scheme): + resoltion_by_scheme = { + "pkg": resolve_url_from_purl, + } + resolution_handler = resoltion_by_scheme.get(scheme) + if not resolution_handler: + raise ValueError(f"Not a supported/known scheme: {scheme}") + url, scheme = resolution_handler(url) + return url, scheme + + +def resolve_url_from_purl(url): + """ + Resolve a Package URL (PURL) to a valid URL. + Raises ValueError if the PURL cannot be resolved. + """ + url = resolve_purl(url) + if not url: + raise ValueError("Could not resolve PURL to a valid URL.") + scheme = get_url_scheme(url) + return url, scheme + + +def get_url_scheme(url): + """ + Return the scheme of the given URL. + """ + return urlparse(url).scheme + + def fetch(url): """ Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file. """ + scheme = get_url_scheme(url) + + if scheme in ["pkg"]: + url, scheme = get_resolved_url(url, scheme) temp = tempfile.NamedTemporaryFile(delete=False) location = temp.name - url_parts = urlparse(url) - scheme = url_parts.scheme - fetchers = {"ftp": fetch_ftp, "http": fetch_http, "https": fetch_http} if scheme in fetchers: return fetchers.get(scheme)(url, location) - raise Exception("Not a supported/known scheme.") + raise Exception(f"Not a supported/known scheme: {scheme}.") def fetch_json_response(url): diff --git a/src/fetchcode/composer.py b/src/fetchcode/composer.py index 32b73f0..3188d00 100644 --- a/src/fetchcode/composer.py +++ b/src/fetchcode/composer.py @@ -26,7 +26,6 @@ class Composer: @classmethod def get_download_url(cls, purl): - """ Return the download URL for a Composer PURL. """ diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 1dcf746..c4adccb 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -19,6 +19,8 @@ import pytest from fetchcode import fetch +from fetchcode import resolve_purl +from fetchcode import resolve_url_from_purl @mock.patch("fetchcode.requests.get") @@ -63,3 +65,123 @@ def test_fetch_with_scheme_not_present(): url = "abc://speedtest/1KB.zip" response = fetch(url=url) assert "Not a supported/known scheme." == e_info + + +@mock.patch("fetchcode._http_exists") +@mock.patch("fetchcode.fetch_http") +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_fetch_purl_with_fetchcode(mock_fetch_json_response, mock_fetch_http, mock_http_exists): + mock_fetch_http.return_value = "mocked_purl_response" + mock_http_exists.return_value = True + mock_fetch_json_response.return_value = { + "urls": [{"url": "https://example.com/sample-1.0.0.zip"}] + } + + response = fetch("pkg:pypi/sample@1.0.0") + + assert response == "mocked_purl_response" + mock_http_exists.assert_called_once() + mock_fetch_http.assert_called_once() + + +@mock.patch("fetchcode._http_exists") +@mock.patch("fetchcode.fetch_http") +def test_fetch_purl_with_purl2url(mock_fetch_http, mock_http_exists): + mock_fetch_http.return_value = "mocked_purl_response" + mock_http_exists.return_value = True + + response = fetch("pkg:alpm/sample@1.0.0") + + assert response == "mocked_purl_response" + mock_http_exists.assert_called_once() + mock_fetch_http.assert_called_once() + + +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_fetch_invalid_purl(mock_fetch_json_response): + mock_fetch_json_response.return_value = {} + + with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"): + fetch("pkg:pypi/invalid-package@1.0.0") + + +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_fetch_invalid_purl(mock_fetch_json_response): + mock_fetch_json_response.return_value = {} + + with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"): + fetch("pkg:pypi/invalid-package@1.0.0") + + +def test_fetch_unsupported_scheme(): + with pytest.raises(Exception, match="Not a supported/known scheme"): + fetch("s3://bucket/object") + + +def test_resolve_url_from_purl_invalid(): + with pytest.raises(ValueError, match="Could not resolve PURL to a valid URL."): + fetch("pkg:invalid/invalid-package@1.0.0") + + +@mock.patch("fetchcode._http_exists") +def test_resolve_url_from_purl_using_purl2url(mock_http_exists): + mock_http_exists.return_value = True + + url, _ = resolve_url_from_purl("pkg:swift/github.com/Alamofire/Alamofire@5.4.3") + assert url == "https://github.com/Alamofire/Alamofire/archive/5.4.3.zip" + mock_http_exists.assert_called_once_with( + "https://github.com/Alamofire/Alamofire/archive/5.4.3.zip" + ) + + +@mock.patch("fetchcode._http_exists") +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_resolve_url_from_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists): + mock_http_exists.return_value = True + mock_fetch_json_response.return_value = { + "urls": [{"url": "https://example.com/sample-1.0.0.zip"}] + } + + url, _ = resolve_url_from_purl("pkg:pypi/example@1.0.0") + assert url == "https://example.com/sample-1.0.0.zip" + mock_http_exists.assert_called_once_with("https://example.com/sample-1.0.0.zip") + + +def test_resolve_purl_invalid(): + assert resolve_purl("pkg:invalid/invalid-package@1.0.0") is None + + +def test_resolve_purl_using_purl2url(): + url = resolve_purl("pkg:pub/http@0.13.3") + assert url == "https://pub.dev/api/archives/http-0.13.3.tar.gz" + + +@mock.patch("fetchcode._http_exists") +def test_resolve_purl_using_purl2url_url_does_not_exists(mock_http_exists): + mock_http_exists.return_value = False + url = resolve_purl("pkg:pub/http@0.13.3") + assert url is None + + +@mock.patch("fetchcode._http_exists") +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_resolve_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists): + mock_fetch_json_response.return_value = { + "urls": [{"url": "https://example.com/sample-1.0.0.zip"}] + } + mock_http_exists.return_value = True + url = resolve_purl("pkg:pypi/example@1.0.0") + assert url == "https://example.com/sample-1.0.0.zip" + + +@mock.patch("fetchcode._http_exists") +@mock.patch("fetchcode.pypi.fetch_json_response") +def test_resolve_purl_using_fetchcode_url_does_not_exists( + mock_fetch_json_response, mock_http_exists +): + mock_fetch_json_response.return_value = { + "urls": [{"url": "https://example.com/sample-1.0.0.zip"}] + } + mock_http_exists.return_value = False + url = resolve_purl("pkg:pypi/example@1.0.0") + assert url is None