diff --git a/minecode/collectors/pub.py b/minecode/collectors/pub.py new file mode 100644 index 00000000..2190944a --- /dev/null +++ b/minecode/collectors/pub.py @@ -0,0 +1,86 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import requests +from packageurl import PackageURL + +from minecode.miners.pub import build_packages +from minecode import priority_router +from packagedb.models import PackageContentType + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def get_pub_package_json(name, version=None): + """ + Return the metadata JSON for a package from pub.dev API. + Example: https://pub.dev/api/packages/flutter + """ + if not version: + url = f"https://pub.dev/api/packages/{name}" + else: + url = f"https://pub.dev/api/packages/{name}/versions/{version}" + + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_pub_package(package_url, pipelines, priority=0): + """ + Add a pub `package_url` to the PackageDB. + """ + from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package + + name = package_url.name + package_json = get_pub_package_json(name=name, version=package_url.version) + + if not package_json: + error = f"Package does not exist on pub.dev: {package_url}" + logger.error(error) + return error + + packages = build_packages(package_json, package_url) + error = None + for package in packages: + package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE + db_package, _, _, error = merge_or_create_package(package, visit_level=0) + if error: + break + print(db_package) + if db_package: + add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority) + + return error + + +@priority_router.route("pkg:pub/.*") +def process_request(purl_str, **kwargs): + """ + Process `priority_resource_uri` containing a pub Package URL (PURL). + """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get("addon_pipelines", []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get("priority", 0) + + package_url = PackageURL.from_string(purl_str) + + error_msg = map_pub_package(package_url, pipelines, priority) + + if error_msg: + return error_msg diff --git a/minecode/miners/pub.py b/minecode/miners/pub.py new file mode 100644 index 00000000..b39fc701 --- /dev/null +++ b/minecode/miners/pub.py @@ -0,0 +1,75 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# + +from packageurl import PackageURL +from packagedcode import models as scan_models + + +def build_single_package(version_info, package_name): + """ + Build a single PackageData object from pub.dev version metadata. + `version_info` is a dict, as returned under "versions" or from + https://pub.dev/api/packages//versions/ + """ + version = version_info.get("version") + pubspec = version_info.get("pubspec", {}) or {} + + description = pubspec.get("description") + homepage_url = pubspec.get("homepage") + repository_url = pubspec.get("repository") + issue_tracker = pubspec.get("issue_tracker") + license_decl = pubspec.get("license") + + extracted_license_statement = [] + if license_decl and license_decl.lower() != "unknown": + extracted_license_statement.append(license_decl) + + common_data = dict( + name=package_name, + version=version, + description=description, + homepage_url=homepage_url, + repository_homepage_url=repository_url, + bug_tracking_url=issue_tracker, + extracted_license_statement=extracted_license_statement, + parties=[], + ) + + archive_url = f"https://pub.dev/packages/{package_name}/versions/{version}.tar.gz" + + download_data = dict( + datasource_id="pub_pkginfo", + type="pub", + download_url=archive_url, + ) + download_data.update(common_data) + + package = scan_models.PackageData.from_data(download_data) + package.datasource_id = "pub_api_metadata" + package.set_purl(PackageURL(type="pub", name=package_name, version=version)) + + return package + + +def build_packages(metadata_dict, purl): + """ + Yield one or more PackageData objects from pub.dev metadata. + If purl.version is set, use the single-version API response. + Otherwise, use the all-versions API response. + """ + if isinstance(purl, str): + purl = PackageURL.from_string(purl) + + purl_version = purl.version + package_name = purl.name + + if purl_version: + package = build_single_package(metadata_dict, package_name) + yield package + else: + versions = metadata_dict.get("versions", []) + for version_info in versions: + yield build_single_package(version_info, package_name) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index 50f04800..bea192cd 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -391,8 +391,10 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena stringify_null_purl_fields(package_data) - created_package = Package.objects.create(**package_data) - created_package.append_to_history(f"New Package created from URI: {package_uri}") + # if we try to create a package more than once it should not fail + created_package, created = Package.objects.get_or_create(**package_data) + if created: + created_package.append_to_history(f"New Package created from URI: {package_uri}") # This is used in the case of Maven packages created from the priority queue for h in history: diff --git a/minecode/tests/collectors/test_pub.py b/minecode/tests/collectors/test_pub.py new file mode 100644 index 00000000..af01ab69 --- /dev/null +++ b/minecode/tests/collectors/test_pub.py @@ -0,0 +1,58 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +import os + +from django.test import TestCase as DjangoTestCase +from packageurl import PackageURL + +import packagedb +from minecode.collectors import pub +from minecode.utils_test import JsonBasedTesting + + +class PubPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def setUp(self): + super().setUp() + self.expected_json_loc = self.get_test_loc("pub/flutter.json") + with open(self.expected_json_loc) as f: + self.expected_json_contents = json.load(f) + + def test_get_pub_package_json(self): + """ + Verify get_pub_package_json() returns expected keys for a pub package. + """ + json_contents = pub.get_pub_package_json(name="flutter") + self.assertIn("name", json_contents) + self.assertEqual("flutter", json_contents["name"]) + self.assertIn("versions", json_contents) + + def test_map_pub_package(self): + """ + Verify map_pub_package() creates a Package in the DB with correct PURL + and download URL. + """ + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + + package_url = PackageURL.from_string("pkg:pub/flutter@0.0.1") + pub.map_pub_package(package_url, ("test_pipeline",)) + + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:pub/flutter@0.0.1" + expected_download_url = "https://pub.dev/packages/flutter/versions/0.0.1.tar.gz" + + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) diff --git a/minecode/tests/testfiles/pub/flutter.json b/minecode/tests/testfiles/pub/flutter.json new file mode 100644 index 00000000..41658448 --- /dev/null +++ b/minecode/tests/testfiles/pub/flutter.json @@ -0,0 +1,16 @@ +{ +"version": "0.0.1", +"pubspec": { +"environment": { +"sdk": ">=1.12.0 <2.0.0" +}, +"homepage": "http://flutter.io", +"version": "0.0.1", +"name": "flutter", +"author": "Flutter Authors ", +"description": "A framework for writing Flutter applications" +}, +"archive_url": "https://pub.dev/api/archives/flutter-0.0.1.tar.gz", +"archive_sha256": "aec09e0c68fe848fc37089e29a64cf8dbc1e232e1e98e05af9b68114c699447d", +"published": "2015-09-19T17:58:43.990Z" +} \ No newline at end of file