Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions minecode/collectors/conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
from urllib.parse import urljoin

import requests
from packageurl import PackageURL
from minecode import priority_router
from minecode.miners.conda import build_packages
from minecode.utils import fetch_http, get_temp_file
from packagedb.models import PackageContentType
from packageurl.contrib.purl2url import build_conda_download_url

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def map_conda_package(package_url, pipelines, priority=0):
"""
Add a Conda distribution `package_url` to the PackageDB.
"""
from minecode.model_utils import add_package_to_scan_queue
from minecode.model_utils import merge_or_create_package

download_url = build_conda_download_url(str(package_url))
if not download_url:
return None

package_identifier = download_url.split("/")[-1]
package_indexes_url = urljoin(download_url, "./repodata.json.bz2")

content = fetch_http(package_indexes_url)
location = get_temp_file("NonPersistentHttpVisitor")
with open(location, "wb") as tmp:
tmp.write(content)

package_info = None
if package_url.namespace == "conda-forge":
package_info = get_package_info(package_url.name)
packages = build_packages(location, download_url, package_info, package_identifier, package_url)

error = None
for package in packages:
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
if error:
break

if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)

return error


def get_package_info(name):
url = f"https://api.anaconda.org/package/conda-forge/{name}"
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")
return None


@priority_router.route("pkg:conda/.*")
def process_request(purl_str, **kwargs):
"""
Process Conda Package URL (PURL).
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get("addon_pipelines", [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get("priority", 0)

package_url = PackageURL.from_string(purl_str)
error_msg = map_conda_package(package_url, pipelines, priority)

if error_msg:
return error_msg
81 changes: 81 additions & 0 deletions minecode/miners/conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import bz2
import json
import packagedcode.models as scan_models
from packageurl import PackageURL


def build_packages(location, download_url, package_info, package_identifier, package_url):
"""
Yield ScannedPackage built from Conda API.
"""
with bz2.open(location, "rt") as f:
repodata = json.load(f)

metadata_dict = repodata["packages"].get(package_identifier)
if package_identifier.endswith(".conda"):
metadata_dict = repodata["packages.conda"].get(package_identifier)

if not metadata_dict:
return

download_data = dict(
datasource_id="conda_api_metadata",
type="conda",
download_url=download_url,
)

extracted_license_statement = []
license = metadata_dict.get("license")
if license:
extracted_license_statement.append(license)

dependencies = []
for dep in metadata_dict.get("depends", []):
parts = dep.split()
name = parts[0]

dep_purl = PackageURL(type="conan", name=name)
dep = scan_models.DependentPackage(purl=dep_purl.to_string())
dependencies.append(dep)

common_data = dict(
name=package_url.name,
namespace=package_url.namespace,
version=package_url.version,
sha256=metadata_dict.get("sha256"),
md5=metadata_dict.get("md5"),
size=metadata_dict.get("size"),
extracted_license_statement=extracted_license_statement,
dependencies=dependencies,
)

if package_url.namespace == "conda-forge" and package_info:
description = package_info.get("description") or package_info.get("summary")
html_url = package_info.get("html_url")
dev_url = package_info.get("dev_url")

license_conda_forge = package_info.get("license")
if license_conda_forge:
common_data["extracted_license_statement"].append(license_conda_forge)

conda_forge_data = dict(
description=description,
homepage_url=html_url,
repository_homepage_url=dev_url,
)

download_data.update(conda_forge_data)

download_data.update(common_data)
package = scan_models.PackageData.from_data(download_data)
package.set_purl(package_url)
yield package
41 changes: 41 additions & 0 deletions minecode/tests/collectors/test_conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
from django.test import TestCase
from packageurl import PackageURL
import packagedb
from minecode.collectors import conda
from minecode.utils_test import JsonBasedTesting


class CondaPriorityQueueTests(JsonBasedTesting, TestCase):
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")

def setUp(self):
super().setUp()
self.package_url = PackageURL.from_string(
"pkg:conda/numpy@1.11.3?subdir=linux-64&build=py27h1b885b7_8&type=tar.bz2"
)
self.download_url = (
"https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.tar.bz2"
)

def test_map_conda_package(self):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(package_count, 0)

conda.map_conda_package(self.package_url, ("test_pipelines"))
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(package_count, 1)
package = packagedb.models.Package.objects.all().first()
expected_conda_download_url = self.download_url

self.assertEqual(package.purl, str(self.package_url))
self.assertEqual(package.download_url, expected_conda_download_url)
58 changes: 58 additions & 0 deletions minecode/tests/miners/test_conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import json
import os
from packageurl import PackageURL
from minecode.miners import conda
from minecode.tests import FIXTURES_REGEN
from minecode.utils_test import JsonBasedTesting
from django.test import TestCase as DjangoTestCase


class CondaMapperTest(JsonBasedTesting, DjangoTestCase):
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")

def test_build_packages_metafile_conda1(self):
package_url1 = PackageURL.from_string(
"pkg:conda/numpy@1.11.3?subdir=linux-64&build=py27h1b885b7_8&type=conda"
)
package_identifier1 = "numpy-1.11.3-py27h1b885b7_8.conda"
package_info1 = None
download_url1 = (
"https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.conda"
)
location1 = self.get_test_loc("conda/repodata.json.bz2")

result = conda.build_packages(
location1, download_url1, package_info1, package_identifier1, package_url1
)
result = [p.to_dict() for p in result]
expected_loc = self.get_test_loc("conda/mapper_numpy_expected.json")
self.check_expected_results(result, expected_loc, regen=FIXTURES_REGEN)

def test_build_packages_metafile_conda2(self):
package_url2 = PackageURL.from_string(
"pkg:conda/conda-forge/sqlalchemy@1.1.13?subdir=linux-64&build=py27hb0a01da_0&type=tar.bz2"
)
package_identifier2 = "sqlalchemy-1.1.13-py27hb0a01da_0.tar.bz2"

with open(self.get_test_loc("conda/package_info_sqlalchemy.json")) as f:
package_info2 = json.load(f)

download_url2 = (
"https://repo.anaconda.com/pkgs/main/linux-64/sqlalchemy-1.1.13-py27hb0a01da_0.tar.bz2"
)
location2 = self.get_test_loc("conda/repodata.json.bz2")

result = conda.build_packages(
location2, download_url2, package_info2, package_identifier2, package_url2
)
result = [p.to_dict() for p in result]
expected_loc = self.get_test_loc("conda/mapper_sqlalchemy_expected.json")
self.check_expected_results(result, expected_loc, regen=FIXTURES_REGEN)
118 changes: 118 additions & 0 deletions minecode/tests/testfiles/conda/mapper_numpy_expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
[
{
"api_data_url": null,
"bug_tracking_url": null,
"code_view_url": null,
"copyright": null,
"datasource_id": "conda_api_metadata",
"declared_license_expression": "bsd-new",
"declared_license_expression_spdx": "BSD-3-Clause",
"dependencies": [
{
"extra_data": {},
"extracted_requirement": null,
"is_direct": true,
"is_optional": false,
"is_pinned": false,
"is_runtime": true,
"purl": "pkg:conan/libgcc-ng",
"resolved_package": {},
"scope": null
},
{
"extra_data": {},
"extracted_requirement": null,
"is_direct": true,
"is_optional": false,
"is_pinned": false,
"is_runtime": true,
"purl": "pkg:conan/libgfortran-ng",
"resolved_package": {},
"scope": null
},
{
"extra_data": {},
"extracted_requirement": null,
"is_direct": true,
"is_optional": false,
"is_pinned": false,
"is_runtime": true,
"purl": "pkg:conan/numpy-base",
"resolved_package": {},
"scope": null
},
{
"extra_data": {},
"extracted_requirement": null,
"is_direct": true,
"is_optional": false,
"is_pinned": false,
"is_runtime": true,
"purl": "pkg:conan/python",
"resolved_package": {},
"scope": null
}
],
"description": null,
"download_url": "https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.conda",
"extra_data": {},
"extracted_license_statement": "- BSD 3-Clause\n",
"file_references": [],
"holder": null,
"homepage_url": null,
"is_private": false,
"is_virtual": false,
"keywords": [],
"license_detections": [
{
"identifier": "bsd_new-50fa5753-f24d-ec04-33a1-36bb8ac0492c",
"license_expression": "bsd-new",
"license_expression_spdx": "BSD-3-Clause",
"matches": [
{
"end_line": 1,
"from_file": null,
"license_expression": "bsd-new",
"license_expression_spdx": "BSD-3-Clause",
"match_coverage": 100.0,
"matched_length": 3,
"matched_text": "BSD 3-Clause",
"matcher": "1-hash",
"rule_identifier": "bsd-new_10.RULE",
"rule_relevance": 100,
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_10.RULE",
"score": 100.0,
"start_line": 1
}
]
}
],
"md5": "57d14eb0098432d8a03d87bb09ab3fa4",
"name": "numpy",
"namespace": null,
"notice_text": null,
"other_license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"parties": [],
"primary_language": null,
"purl": "pkg:conda/numpy@1.11.3?build=py27h1b885b7_8&subdir=linux-64&type=conda",
"qualifiers": {
"build": "py27h1b885b7_8",
"subdir": "linux-64",
"type": "conda"
},
"release_date": null,
"repository_download_url": null,
"repository_homepage_url": null,
"sha1": null,
"sha256": "fabbdc2d870a26bf24707e301da84377d0aae09f9a97add4cca2a53e075c57ed",
"sha512": null,
"size": 10127,
"source_packages": [],
"subpath": null,
"type": "conda",
"vcs_url": null,
"version": "1.11.3"
}
]
Loading