diff --git a/docs/inputs.rst b/docs/inputs.rst
index cf16e9d3ed..2e78fd4a4b 100644
--- a/docs/inputs.rst
+++ b/docs/inputs.rst
@@ -81,6 +81,25 @@ Fetches packages from the `npm registry `_::
Resolves to: ``https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz``
+PyPI (Python)
+^^^^^^^^^^^^^
+
+Fetches packages from `PyPI `_::
+
+ pkg:pypi/django@5.0
+
+Resolves to: ``https://files.pythonhosted.org/packages/.../Django-5.0.tar.gz``
+
+.. note::
+ When multiple distributions are available, the **sdist** (source distribution) is
+ used as the preferred choice.
+
+If no version is provided, the **latest available release** will be fetched::
+
+ pkg:pypi/django
+
+Resolves to: ``https://files.pythonhosted.org/packages/.../django-5.2.8.tar.gz``
+
Hackage (Haskell)
^^^^^^^^^^^^^^^^^
diff --git a/pyproject.toml b/pyproject.toml
index f0ae21f332..705eee1bbd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,6 +67,7 @@ dependencies = [
"fingerprints==1.2.3",
"normality==2.6.1",
# FetchCode
+ "fetchcode==0.8.0",
"fetchcode-container==1.2.3.210512; sys_platform == 'linux'",
# Inspectors
"elf-inspector==0.0.3",
diff --git a/scanpipe/pipes/fetch.py b/scanpipe/pipes/fetch.py
index e9c328ca1d..f1b249fec9 100644
--- a/scanpipe/pipes/fetch.py
+++ b/scanpipe/pipes/fetch.py
@@ -38,6 +38,7 @@
from commoncode import command
from commoncode.hash import multi_checksums
from commoncode.text import python_safe_name
+from fetchcode.pypi import Pypi as PyPIFetcher
from packageurl import PackageURL
from packageurl.contrib import purl2url
from plugincode.location_provider import get_location
@@ -324,12 +325,18 @@ def fetch_git_repo(url, to=None):
def fetch_package_url(url):
# Ensure the provided Package URL is valid, or raise a ValueError.
- PackageURL.from_string(url)
+ purl = PackageURL.from_string(url)
# Resolve a Download URL using purl2url.
if download_url := purl2url.get_download_url(url):
return fetch_http(download_url)
+ # PyPI is not supported by purl2url.
+ # It requires an API call to resolve download URLs.
+ if purl.type == "pypi":
+ if download_url := PyPIFetcher.get_download_url(url, preferred_type="sdist"):
+ return fetch_http(download_url)
+
raise ValueError(f"Could not resolve a download URL for {url}.")
diff --git a/scanpipe/tests/pipes/test_fetch.py b/scanpipe/tests/pipes/test_fetch.py
index a53b86d267..6157d2a026 100644
--- a/scanpipe/tests/pipes/test_fetch.py
+++ b/scanpipe/tests/pipes/test_fetch.py
@@ -43,6 +43,7 @@ def test_scanpipe_pipes_fetch_get_fetcher(self):
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url))
self.assertEqual(fetch.fetch_git_repo, fetch.get_fetcher(git_http_url + "/"))
self.assertEqual(fetch.fetch_package_url, fetch.get_fetcher("pkg:npm/d3@5.8.0"))
+ self.assertEqual(fetch.fetch_package_url, fetch.get_fetcher("pkg:pypi/django"))
with self.assertRaises(ValueError) as cm:
fetch.get_fetcher("")
@@ -108,6 +109,19 @@ def test_scanpipe_pipes_fetch_package_url(self, mock_get):
downloaded_file = fetch.fetch_package_url(package_url)
self.assertTrue(Path(downloaded_file.directory, "filename.zip").exists())
+ @mock.patch("fetchcode.pypi.fetch_json_response")
+ @mock.patch("requests.sessions.Session.get")
+ def test_scanpipe_pipes_fetch_pypi_package_url(self, mock_get, mock_fetch_json):
+ package_url = "pkg:pypi/django@5.2"
+ download_url = "https://files.pythonhosted.org/packages/Django-5.2.tar.gz"
+
+ mock_get.return_value = make_mock_response(url=download_url)
+ mock_fetch_json.return_value = {"urls": [{"url": download_url}]}
+
+ downloaded_file = fetch.fetch_package_url(package_url)
+ self.assertEqual(download_url, mock_get.call_args[0][0])
+ self.assertTrue(Path(downloaded_file.directory, "Django-5.2.tar.gz").exists())
+
@mock.patch("scanpipe.pipes.fetch.get_docker_image_platform")
@mock.patch("scanpipe.pipes.fetch._get_skopeo_location")
@mock.patch("scanpipe.pipes.fetch.run_command_safely")