diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5b77ab4f..2e1e556c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +Next Release +---------------- + +- Add `/api/from_purl/purl2git` endpoint to get a git repo for a purl. +- Add `/api/to_purl/go` endpoint to get a purl from a golang import string or a package string in go.mod. + + v4.0.0 ------------ diff --git a/packagedb/find_source_repo.py b/packagedb/find_source_repo.py index 83f9eba0..62ebabe5 100644 --- a/packagedb/find_source_repo.py +++ b/packagedb/find_source_repo.py @@ -7,7 +7,6 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import enum import logging import subprocess from typing import Generator @@ -16,6 +15,7 @@ import requests from packageurl import PackageURL +from packageurl.contrib.django.utils import purl_to_lookups from packageurl.contrib.purl2url import get_download_url from packageurl.contrib.purl2url import purl2url from scancode.api import get_urls as get_urls_from_location @@ -29,92 +29,62 @@ logger = logging.getLogger(__name__) -class URLDataReturnType(enum.Enum): - """ - Return type for get_urls_from_text - """ - - url = "url" # This the final URL after redirects - text = "text" # This is the text of the response - - -non_reachable_urls = [ -] -CACHE = { - # url: data -} - - def get_urls_from_text(text): """ Return the URLs found in a text """ + if not text: + return lines = text.splitlines() # location can be a list of lines for url in get_urls_from_location(location=lines)["urls"]: yield url["url"] -def get_data_from_response(response, data_type=URLDataReturnType.text): - """ - Return the data from a response - """ - if not response: - return - data_by_type = { - URLDataReturnType.url: response.url, - URLDataReturnType.text: response.text, - } - if data_type in data_by_type: - return data_by_type[data_type] - else: - raise ValueError(f"Invalid data_type: {data_type}") +# We keep track of unreachable URLs during a session +UNREACHABLE_URLS = set() +# We keep cache of the requests.Response of each URL during a session +RESPONSE_BY_URL_CACHE = {} -def get_data_from_url( + +def fetch_response( url, - data_type=URLDataReturnType.text, timeout=10, ): """ - Take a ``url`` as input and return the data from the URL - depending on the ``data_type`` return URL or text if ``data_type`` is - ``URLDataReturnType.url`` or ``URLDataReturnType.text`` respectively. + Return the request response for url or None, use a session cache + and ignore unreachable URLs """ try: if not url: return - if url.startswith("https://github.com/assets"): + # This URL takes a lot of time to download + # and it does not contain any data of use + if not is_good_repo_url(url): + return + + if not is_url_with_usable_content(url): return - not_supported_extensions = [ - ".pdf", - ".zip", - ".woff2", - ".jar", - ".js", - ".png", - ".css", - ".svg", - ".jpg", - ".tgz", - ] - for extension in not_supported_extensions: - if url.endswith(extension): - return - if url in non_reachable_urls: + + if url in UNREACHABLE_URLS: return - if url in CACHE: - response = CACHE[url] - return get_data_from_response(response=response, data_type=data_type) + + response = RESPONSE_BY_URL_CACHE.get(url) + if response: + return response + response = requests.get(url=url, timeout=timeout) if response.status_code != 200: - non_reachable_urls.append(url) + UNREACHABLE_URLS.add(url) return - CACHE[url] = response - return get_data_from_response(response=response, data_type=data_type) + + RESPONSE_BY_URL_CACHE[url] = response + return response + except Exception as e: logger.error(f"Error getting {url}: {e}") - non_reachable_urls.append(url) + UNREACHABLE_URLS.add(url) return @@ -146,44 +116,25 @@ def convert_apache_svn_to_github_url(url): return f"https://github.com/apache/{name}/tree/{tag}" -def add_source_repo_to_package_set( - source_repo_type, - source_repo_name, - source_repo_namespace, - source_repo_version, - download_url, - purl, - source_purl, +def add_source_package_to_package_set( + source_package, package, ): """ - Take source repo package information, create source package - and add it to a Package set - """ - # Create new Package from the source_purl fields - source_repo_package, created = Package.objects.get_or_create( - type=source_repo_type, - namespace=source_repo_namespace, - name=source_repo_name, - version=source_repo_version, - download_url=download_url, - package_content=PackageContentType.SOURCE_REPO, - ) + Add ``source_package`` to the ``package`` package set. Create + the package set if it doesn't exist + """ package_sets = package.package_sets.all() - if not package_sets: + if not package_sets: # Create a Package set if we don't have one package_set = PackageSet.objects.create() package_set.add_to_package_set(package) - package_set.add_to_package_set(source_repo_package) - else: - for package_set in package_sets.all(): - package_set.add_to_package_set(source_repo_package) - if created: - add_package_to_scan_queue(source_repo_package) - logger.info(f"\tCreated source repo package {source_purl} for {purl}") - else: + package_sets = [package_set] + + for package_set in package_sets: + package_set.add_to_package_set(source_package) logger.info( - f"\tAssigned source repo package {source_purl} to Package set {package_set.uuid}" + f"Assigned source repo package {source_package.purl} to Package set {package_set.uuid}" ) @@ -193,23 +144,43 @@ def get_source_repo_and_add_to_package_set(): if found """ for package in Package.objects.all().paginated(): - source_purl_with_tag = get_source_repo(package=package) - download_url = None + source_purl = get_source_repo(package=package) + + if not source_purl: + continue + try: - download_url = get_download_url(str(source_purl_with_tag)) + download_url = get_download_url(str(source_purl)) + if not download_url: + continue except: - logger.error(f"Error getting download_url for {source_purl_with_tag}") + logger.error(f"Error getting download_url for {source_purl}") continue - if not download_url: + + source_package = Package.objects.for_package_url( + purl_str=str(source_purl) + ).get_or_none() + if not source_package: + source_package, _created = Package.objects.get_or_create( + type=source_purl.type, + namespace=source_purl.namespace, + name=source_purl.name, + version=source_purl.version, + download_url=download_url, + package_content=PackageContentType.SOURCE_REPO, + ) + add_package_to_scan_queue(source_package) + logger.info(f"Created source repo package {source_purl} for {package.purl}") + + package_set_ids = set(package.package_sets.all().values("uuid")) + source_package_set_ids = set(source_package.package_sets.all().values("uuid")) + + # If the package exists and already in the set then there is nothing left to do + if package_set_ids.intersection(source_package_set_ids): continue - add_source_repo_to_package_set( - source_repo_type=source_purl_with_tag.type, - source_repo_name=source_purl_with_tag.name, - source_repo_namespace=source_purl_with_tag.namespace, - source_repo_version=source_purl_with_tag.version, - download_url=download_url, - purl=package.purl, - source_purl=source_purl_with_tag, + + add_source_package_to_package_set( + source_package=source_package, package=package, ) @@ -217,7 +188,8 @@ def get_source_repo_and_add_to_package_set(): def get_source_repo(package: Package) -> PackageURL: """ Return the PackageURL of the source repository of a Package - or None if not found + or None if not found. Package is either a PackageCode Package object or + Package instance object. """ repo_urls = list(get_repo_urls(package)) if not repo_urls: @@ -260,17 +232,17 @@ def get_repo_urls(package: Package) -> Generator[str, None, None]: def get_source_urls_from_package_data_and_resources(package: Package) -> List[str]: """ - Return the URL of the source repository of a package - or None if not found + Return a list of URLs of source repositories for a package, + possibly empty. """ if not package: return [] - source_urls = list(get_urls_from_package_data(package)) - if source_urls: - return source_urls - source_urls = list(get_urls_from_package_resources(package)) - if source_urls: - return source_urls + metadata_urls = list(get_urls_from_package_data(package)) + if metadata_urls: + return metadata_urls + resource_urls = list(get_urls_from_package_resources(package)) + if resource_urls: + return resource_urls return [] @@ -341,11 +313,7 @@ def get_urls_from_package_data(package) -> Generator[str, None, None]: # TODO: Use the source package url # TODO: If the package is already a repo package then don't do anything # TODO: Search for URLs in description, qualifiers, download_url, notice_text, extracted_license_statement. - description = package.description or "" - urls_from_description_and_homepage_urls = ( - get_urls_from_description_and_homepage_urls(package, description) - ) - urls = [ + found_urls = [ package.code_view_url, package.homepage_url, package.bug_tracking_url, @@ -353,23 +321,20 @@ def get_urls_from_package_data(package) -> Generator[str, None, None]: package.vcs_url, package.repository_download_url, ] - urls.extend(urls_from_description_and_homepage_urls) - yield from get_git_repo_urls(urls=urls) + homepage_response = fetch_response(url=package.homepage_url) + homepage_text = homepage_response and homepage_response.text + found_urls.extend(get_urls_from_text(text=homepage_text)) -def get_urls_from_description_and_homepage_urls(package, description): - homepage_text = get_data_from_url( - url=package.homepage_url, data_type=URLDataReturnType.text + repository_homepage_response = fetch_response(url=package.repository_homepage_url) + repository_homepage_text = ( + repository_homepage_response and repository_homepage_response.text ) - repository_homepage_text = get_data_from_url( - url=package.repository_homepage_url, data_type=URLDataReturnType.text - ) - if homepage_text: - description += homepage_text - if repository_homepage_text: - description += repository_homepage_text - urls_from_description = list(get_urls_from_text(text=description)) - return urls_from_description + found_urls.extend(get_urls_from_text(text=repository_homepage_text)) + + found_urls.extend(get_urls_from_text(text=package.description)) + + yield from get_git_repo_urls(urls=found_urls) def get_git_repo_urls(urls): @@ -393,9 +358,8 @@ def get_git_repo_urls(urls): if url and url.startswith("git+"): _, _, url = url.partition("git+") try: - url = get_data_from_url( - url=url, data_type=URLDataReturnType.url - ) + resp = fetch_response(url=url) + url = resp and resp.url if not url: continue except Exception as e: @@ -411,10 +375,11 @@ def get_tags_and_commits(source_purl): """ try: repo_url = purl2url(str(source_purl)) - if not get_data_from_url(url=repo_url, data_type=URLDataReturnType.url): + resp = fetch_response(url=repo_url) + url = resp and resp.url + if not url: return - # This is a jQuery Plugins Site Reserved Word and we don't want to scan it - if repo_url.startswith("https://github.com/assets"): + if not is_good_repo_url(repo_url): return output = subprocess.getoutput(f"git ls-remote {repo_url}") yield from get_tags_and_commits_from_git_output(output) @@ -422,6 +387,40 @@ def get_tags_and_commits(source_purl): logger.error(f"Error getting tags and commits for {source_purl}: {e}") +def is_good_repo_url(url): + """ + Return True if it's a good repo URL or + False if it's some kind of problematic URL that we want to skip + """ + # This is a jQuery Plugins Site Reserved Word and we don't want to scan it + if url.startswith("https://github.com/assets"): + return False + return True + + +def is_url_with_usable_content(url): + """ + Return True if this URL contains usable + text data, otherwise False. Usable here means it is text + and we are likely to find interesting URLs in that. + """ + not_supported_extensions = ( + ".pdf", + ".zip", + ".woff2", + ".jar", + ".js", + ".png", + ".css", + ".svg", + ".jpg", + ".tgz", + ) + if url.endswith(not_supported_extensions): + return False + return True + + def get_tags_and_commits_from_git_output(git_ls_remote): """ Yield tuples of (tag, commit), given a git ls-remote output @@ -473,3 +472,27 @@ def find_package_version_tag_and_commit(version, source_purls): version=tag, qualifiers={"commit": commit}, ) + + +def get_package_object_from_purl(package_url): + """ + Get a ``Package`` object for a ``package_url`` string. + """ + lookups = purl_to_lookups(package_url) + packages = Package.objects.filter(**lookups) + packages_count = packages.count() + + if packages_count == 1: + package = packages.first() + return package + + if not packages_count: + return + + if packages_count > 1: + # Get the binary package + # We use .get(qualifiers="") because the binary maven JAR has no qualifiers + package = packages.get_or_none(qualifiers="") + if not package: + print(f"\t{package_url} does not exist in this database. Continuing.") + return diff --git a/packagedb/from_purl.py b/packagedb/from_purl.py new file mode 100644 index 00000000..1e0c7b5b --- /dev/null +++ b/packagedb/from_purl.py @@ -0,0 +1,65 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from drf_spectacular.utils import OpenApiParameter +from drf_spectacular.utils import extend_schema +from rest_framework import routers +from rest_framework import status +from rest_framework import viewsets +from rest_framework.response import Response + +from packagedb.find_source_repo import get_package_object_from_purl +from packagedb.find_source_repo import get_source_repo +from packagedb.serializers import PurltoGitRepoResponseSerializer +from packagedb.serializers import PurltoGitRepoSerializer + + +@extend_schema( + parameters=[ + OpenApiParameter("package_url", str, "query", description="package url"), + ], + responses={200: PurltoGitRepoResponseSerializer()}, +) +class FromPurlToGitRepoViewSet(viewsets.ViewSet): + """ + Return a ``git_repo`` from a standard PackageURL. + """ + + serializer_class = PurltoGitRepoSerializer + + def get_view_name(self): + return "Purl2Git" + + def list(self, request): + serializer = self.serializer_class(data=request.query_params) + response = {} + + if not serializer.is_valid(): + return Response( + {"errors": serializer.errors}, status=status.HTTP_400_BAD_REQUEST + ) + + validated_data = serializer.validated_data + package_url = validated_data.get("package_url") + package = get_package_object_from_purl(package_url=package_url) + if not package: + return Response( + {"errors": f"{package_url} does not exist in this database"}, + status=status.HTTP_404_NOT_FOUND, + ) + source_repo = get_source_repo(package=package) + response["git_repo"] = str(source_repo) + serializer = PurltoGitRepoResponseSerializer( + response, context={"request": request} + ) + return Response(serializer.data) + + +api_from_purl_router = routers.DefaultRouter() +api_from_purl_router.register("purl2git", FromPurlToGitRepoViewSet, "purl2git") diff --git a/packagedb/management/commands/create_source_repo_packages.py b/packagedb/management/commands/create_source_repo_packages.py index 612b7eb8..3cde76e1 100644 --- a/packagedb/management/commands/create_source_repo_packages.py +++ b/packagedb/management/commands/create_source_repo_packages.py @@ -15,8 +15,11 @@ from packageurl.contrib.django.utils import purl_to_lookups from minecode.management.commands import VerboseCommand -from packagedb.find_source_repo import add_source_repo_to_package_set +from minecode.model_utils import add_package_to_scan_queue +from packagedb.find_source_repo import add_source_package_to_package_set +from packagedb.find_source_repo import get_package_object_from_purl from packagedb.models import Package +from packagedb.models import PackageContentType TRACE = False @@ -30,74 +33,78 @@ def get_rows(workbook, sheet_name): inventory_sheet = workbook[sheet_name] except KeyError: return dict() - inventory_column_indices = {cell.value.lower(): i for i, cell in enumerate(inventory_sheet[1]) if cell.value} + inventory_column_indices = { + cell.value.lower(): i for i, cell in enumerate(inventory_sheet[1]) if cell.value + } rows = [] for row in inventory_sheet.iter_rows(min_row=2): - purl = row[inventory_column_indices['purl']].value - source_download_url = row[inventory_column_indices['source_download_url']].value - source_type = row[inventory_column_indices['source_type']].value - source_namespace = row[inventory_column_indices['source_namespace']].value - source_name = row[inventory_column_indices['source_name']].value - source_version = row[inventory_column_indices['source_version']].value - source_purl = row[inventory_column_indices['source_purl']].value + purl = row[inventory_column_indices["purl"]].value + source_download_url = row[inventory_column_indices["source_download_url"]].value + source_type = row[inventory_column_indices["source_type"]].value + source_namespace = row[inventory_column_indices["source_namespace"]].value + source_name = row[inventory_column_indices["source_name"]].value + source_version = row[inventory_column_indices["source_version"]].value + source_purl = row[inventory_column_indices["source_purl"]].value reportable = { - 'purl': purl, - 'source_download_url': source_download_url, - 'source_type': source_type, - 'source_namespace': source_namespace, - 'source_name': source_name, - 'source_version': source_version, - 'source_purl': source_purl, + "purl": purl, + "source_download_url": source_download_url, + "source_type": source_type, + "source_namespace": source_namespace, + "source_name": source_name, + "source_version": source_version, + "source_purl": source_purl, } rows.append(reportable) return rows class Command(VerboseCommand): - help = 'Create source archive packages for related' + help = "Create source archive packages for related" def add_arguments(self, parser): - parser.add_argument('--input', type=str) + parser.add_argument("--input", type=str) def handle(self, *args, **options): - input = options.get('input') + input = options.get("input") if not input: return # Collect resource info wb = openpyxl.load_workbook(input, read_only=True) - rows = get_rows(wb, 'PACKAGES WITH SOURCES') + rows = get_rows(wb, "PACKAGES WITH SOURCES") for row in rows: # Look up the package the row is for by using the purl to query the db. - purl = row['purl'] - source_purl = row['source_purl'] - print(f'Processing packages for: {purl}') - - lookups = purl_to_lookups(purl) - packages = Package.objects.filter(**lookups) - packages_count = packages.count() - - if packages_count > 1: - # Get the binary package - # We use .get(qualifiers="") because the binary maven JAR has no qualifiers - package = packages.get_or_none(qualifiers='') - if not package: - print(f'\t{purl} does not exist in this database. Continuing.') - continue - elif packages_count == 1: - package = packages.first() - else: - print(f'\t{purl} does not exist in this database. Continuing.') + purl = row["purl"] + source_purl = row["source_purl"] + print(f"Processing packages for: {purl}") + package = get_package_object_from_purl(package_url=purl) + if not package: + print(f"\t{purl} does not exist in this database. Continuing.") continue - # binary packages can only be part of one package set - add_source_repo_to_package_set(source_repo_type = row['source_type'], - source_repo_name = row['source_name'], - source_repo_namespace = row['source_namespace'], - source_repo_version = row['source_version'], - download_url = row['source_download_url'], - purl=purl, - source_purl=source_purl, + source_package, _created = Package.objects.get_or_create( + type=row["source_type"], + namespace=row["source_namespace"], + name=row["source_name"], + version=row["source_version"], + download_url=row["source_download_url"], + package_content=PackageContentType.SOURCE_REPO, + ) + + if _created: + add_package_to_scan_queue(source_package) + + package_set_ids = set(package.package_sets.all().values("uuid")) + source_package_set_ids = set( + source_package.package_sets.all().values("uuid") + ) + + # If the package exists and already in the set then there is nothing left to do + if package_set_ids.intersection(source_package_set_ids): + continue + + add_source_package_to_package_set( + source_package=source_package, package=package, ) diff --git a/packagedb/models.py b/packagedb/models.py index 8e9e453d..93c1690e 100644 --- a/packagedb/models.py +++ b/packagedb/models.py @@ -66,7 +66,7 @@ def get_or_none(self, *args, **kwargs): """ try: return self.get(*args, **kwargs) - except self.DoesNotExist: + except self.model.DoesNotExist: return def paginated(self, per_page=5000): diff --git a/packagedb/serializers.py b/packagedb/serializers.py index fb149aea..85eea257 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -423,3 +423,11 @@ class GoLangPurlSerializer(Serializer): class GoLangPurlResponseSerializer(Serializer): package_url = CharField() + + +class PurltoGitRepoSerializer(Serializer): + package_url = CharField(required=True) + + +class PurltoGitRepoResponseSerializer(Serializer): + git_repo = CharField(required=True) diff --git a/packagedb/tests/test_find_source_repo.py b/packagedb/tests/test_find_source_repo.py index 035d3921..2268f4eb 100644 --- a/packagedb/tests/test_find_source_repo.py +++ b/packagedb/tests/test_find_source_repo.py @@ -11,13 +11,12 @@ import os from unittest import mock from unittest.mock import patch -from uuid import uuid4 -import pytest from django.test import TestCase from packageurl import PackageURL from packagedb.find_source_repo import convert_repo_urls_to_purls +from packagedb.find_source_repo import fetch_response from packagedb.find_source_repo import get_repo_urls from packagedb.find_source_repo import get_source_repo from packagedb.find_source_repo import get_source_urls_from_package_data_and_resources @@ -31,7 +30,9 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "testfiles", "find_source_repo", "tags_commits.txt") -TAGS_COMMITS_FILE = os.path.join(BASE_DIR, "testfiles", "find_source_repo", "tags_commits_list.txt") +TAGS_COMMITS_FILE = os.path.join( + BASE_DIR, "testfiles", "find_source_repo", "tags_commits_list.txt" +) class TestFindSourceRepo(TestCase): @@ -122,10 +123,19 @@ def setUp(self): download_url="https://repo1.maven.org/maven2/com/foo/bar/11/bar.11.jar", ) - @mock.patch("packagedb.find_source_repo.get_urls_from_description_and_homepage_urls") - def test_get_source_purl_from_package_data(self, mock): - mock.return_value = ["https://bitbucket/ab/cd"] - source_urls = list(get_urls_from_package_data(self.package_with_resources_and_package_data)) + @mock.patch("packagedb.find_source_repo.fetch_response") + @mock.patch("packagedb.find_source_repo.get_urls_from_text") + def test_get_source_purl_from_package_data(self, mock_text, mock_response): + mock_response.side_effect = [ + None, + None, + None, + None, + ] + mock_text.side_effect = [["https://bitbucket/ab/cd"], [], [], []] + source_urls = list( + get_urls_from_package_data(self.package_with_resources_and_package_data) + ) assert source_urls == [ "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/src/master/", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", @@ -135,15 +145,19 @@ def test_get_source_purl_from_package_data(self, mock): def test_get_source_purl_from_package_resources(self): source_urls = list( - get_urls_from_package_resources(self.package_with_resources_and_package_data) + get_urls_from_package_resources( + self.package_with_resources_and_package_data + ) ) assert source_urls == [ "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_description_and_homepage_urls") - @mock.patch("packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package") + @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch( + "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + ) def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): mock1.return_value = None mock2.return_value = [] @@ -155,9 +169,8 @@ def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): self.package_with_resources_and_package_data ) assert source_urls == [ - "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/src/master/", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "git+https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions.git", + "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] source_urls = get_source_urls_from_package_data_and_resources( self.package_with_resources_and_without_package_data @@ -166,35 +179,40 @@ def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_description_and_homepage_urls") - @mock.patch("packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package") + @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch( + "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + ) def test_get_repo_urls(self, mock1, mock2): mock1.return_value = None mock2.return_value = [] - source_urls = list(get_repo_urls(package=self.package_without_resources_and_package_data)) + source_urls = list( + get_repo_urls(package=self.package_without_resources_and_package_data) + ) assert source_urls == [ "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/src/master/", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "git+https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions.git", + "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] source_urls = list(get_repo_urls(package=self.package_without_versions)) assert source_urls == [] - source_urls = list(get_repo_urls(package=self.package_with_resources_and_package_data)) + source_urls = list( + get_repo_urls(package=self.package_with_resources_and_package_data) + ) assert source_urls == [ - "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/src/master/", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "git+https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions.git", + "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/src/master/", "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", - "git+https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions.git", + "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] def test_convert_repo_urls_to_purls(self): source_urls = list( convert_repo_urls_to_purls( - ["https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions"] + [ + "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions" + ] ) ) source_urls = [str(source_url) for source_url in source_urls] @@ -214,7 +232,9 @@ def test_convert_repo_urls_to_purls(self): ] assert list( - convert_repo_urls_to_purls(["git://github.com:maxmind/MaxMind-DB-Reader-java"]) + convert_repo_urls_to_purls( + ["git://github.com:maxmind/MaxMind-DB-Reader-java"] + ) ) == [ PackageURL( type="github", @@ -244,7 +264,9 @@ def test_convert_repo_urls_to_purls(self): ] assert list( - convert_repo_urls_to_purls(["git+https://github.com/ckeditor/ckeditor4-react.git"]) + convert_repo_urls_to_purls( + ["git+https://github.com/ckeditor/ckeditor4-react.git"] + ) ) == [ PackageURL( type="github", @@ -257,7 +279,7 @@ def test_convert_repo_urls_to_purls(self): ] def test_get_tags_commits(self): - with patch("packagedb.find_source_repo.get_data_from_url"): + with patch("packagedb.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() with open(TAGS_COMMITS_FILE) as f: @@ -280,7 +302,7 @@ def test_get_tags_commits(self): ) == ("9.35", "fdc8117af75b192e3f8afcc0119c904b02686af8") def test_get_source_repo(self): - with patch("packagedb.find_source_repo.get_data_from_url"): + with patch("packagedb.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() assert get_source_repo( @@ -293,3 +315,19 @@ def test_get_source_repo(self): qualifiers={"commit": "e86fb3431972d302fcb615aca0baed4d8ab89791"}, subpath=None, ) + + @mock.patch("packagedb.find_source_repo.requests.get") + def test_fetch_response(self, mock_get): + mock_get.return_value.status_code = 200 + mock_get.return_value.text = "abc" + assert fetch_response("https://github.com/assets") == None + assert fetch_response("https://github.com/abc.js") == None + + def test_from_purl_to_git(self): + response = self.client.get( + "/api/from_purl/purl2git", + data={"package_url": str(self.package_without_resources_and_package_data)}, + follow=True, + ) + expected = "pkg:bitbucket/connect2id/oauth-2.0-sdk-with-openid-connect-extensions@9.36?commit=e86fb3431972d302fcb615aca0baed4d8ab89791" + self.assertEqual(expected, response.data["git_repo"]) diff --git a/packagedb/tests/test_models.py b/packagedb/tests/test_models.py index ed3f6e45..8cc10ff2 100644 --- a/packagedb/tests/test_models.py +++ b/packagedb/tests/test_models.py @@ -508,4 +508,13 @@ def test_package_watch_reschedule_on_modification(self, mock_create_new_job): self.assertEqual(None, self.package_watch1.schedule_work_id) - \ No newline at end of file + + def test_get_or_none(self): + Package.objects.create(download_url='http://a.ab', name='name', version='1.0', type="foo") + package = Package.objects.filter( + download_url="http://a.ab" + ).get_or_none() + assert package + assert Package.objects.filter( + download_url="http://a.ab-foobar" + ).get_or_none() == None diff --git a/packagedb/to_purl.py b/packagedb/to_purl.py index c5acf67a..736aa4a6 100644 --- a/packagedb/to_purl.py +++ b/packagedb/to_purl.py @@ -35,7 +35,6 @@ class GolangPurlViewSet(viewsets.ViewSet): >>> get_golang_purl("github.com/gorilla/mux v1.8.1").to_string() 'pkg:golang/github.com/gorilla/mux@v1.8.1' >>> # This is an example of go.mod string `package version` - >>> >>> get_golang_purl("github.com/gorilla/mux").to_string() 'pkg:golang/github.com/gorilla/mux' >>> #This is an example a go import string `package` diff --git a/purldb_project/urls.py b/purldb_project/urls.py index 1811badb..80105988 100644 --- a/purldb_project/urls.py +++ b/purldb_project/urls.py @@ -26,6 +26,7 @@ from minecode.api import PriorityResourceURIViewSet from packagedb.api import PurlValidateViewSet from packagedb.to_purl import api_to_purl_router +from packagedb.from_purl import api_from_purl_router from packagedb.api import CollectViewSet from drf_spectacular.views import SpectacularAPIView from drf_spectacular.views import SpectacularSwaggerView @@ -53,6 +54,7 @@ ), path('api/', include((api_router.urls, 'api'))), path('api/to_purl/', include((api_to_purl_router.urls, 'api_to'))), + path('api/from_purl/', include((api_from_purl_router.urls, 'api_from'))), path("", RedirectView.as_view(url="api/")), path('api/schema/', SpectacularAPIView.as_view(), name='schema'), path('api/docs/', SpectacularSwaggerView.as_view(url_name='schema'), name='swagger-ui'),