Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions minecode/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
DEFAULT_PIPELINES = (
'scan_single_package',
'fingerprint_codebase',
)

# These are the list of supported addon pipelines to run when we scan a Package for
# indexing.
SUPPORTED_ADDON_PIPELINES = (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give me an example of when these pipelines would be run when indexing a package. I see how the default pipelines would be run, but where would we add the symbol collecting pipelines when we index a package?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JonoYang
Suppose we receive this request: /api/collect/?purl=pkg:npm/foo@1.2.3&addon_pipelines=collect_symbols.

First, the CollectPackageSerializer will validate whether collect_symbols is a valid pipeline or not using SUPPORTED_ADDON_PIPELINES. Then, will add this package to the scan queue using add_package_to_scan_queue, with the pipeline argument being DEFAULT_PIPELINES + ( 'collect_symbols' ).

Copy link
Member

@JonoYang JonoYang Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@keshav-space I see, do we currently use this anywhere? Otherwise, I think the code looks good unless there's something you want to change.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JonoYang No more change from my side, it's ready for merge.

'collect_symbols',
'collect_source_strings',
'inspect_elf_binaries',
Expand Down
2 changes: 1 addition & 1 deletion minecode/tests/test_conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_map_conan_package(self, mock_get_conan_recipe):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(package_count, 0)

conan.map_conan_package(self.package_url1)
conan.map_conan_package(self.package_url1, ('test_pipelines'))
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(package_count, 1)
package = packagedb.models.Package.objects.all().first()
Expand Down
4 changes: 2 additions & 2 deletions minecode/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_map_generic_package(self):

purl = 'pkg:generic/test@1.0.0?download_url=http://example.com/test.tar.gz'
package_url = PackageURL.from_string(purl)
error_msg = generic.map_generic_package(package_url)
error_msg = generic.map_generic_package(package_url, ('test_pipeline'))

self.assertEqual('', error_msg)
package_count = Package.objects.all().count()
Expand All @@ -65,7 +65,7 @@ def test_map_fetchcode_supported_package(self):
self.assertEqual(0, package_count)

purl = PackageURL.from_string("pkg:generic/udhcp@0.9.1")
error_msg = generic.map_fetchcode_supported_package(purl)
error_msg = generic.map_fetchcode_supported_package(purl, ('test_pipeline'))

self.assertEqual('', error_msg)
package_count = Package.objects.all().count()
Expand Down
4 changes: 2 additions & 2 deletions minecode/tests/test_maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def test_map_maven_package(self):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)
package_url = PackageURL.from_string(self.scan_package.purl)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY, ('test_pipeline'))
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)
package = packagedb.models.Package.objects.all().first()
Expand All @@ -732,7 +732,7 @@ def test_map_maven_package_custom_repo_url(self):
self.assertEqual(0, package_count)
custom_repo_purl = "pkg:maven/org.eclipse.core/runtime@20070801?repository_url=https://packages.atlassian.com/mvn/maven-atlassian-external/"
package_url = PackageURL.from_string(custom_repo_purl)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY, ('test_pipeline'))
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)
package = packagedb.models.Package.objects.all().first()
Expand Down
2 changes: 1 addition & 1 deletion minecode/tests/test_npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def test_map_npm_package(self):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)
package_url = PackageURL.from_string(self.scan_package.purl)
npm.map_npm_package(package_url)
npm.map_npm_package(package_url, ('test_pipeline'))
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)
package = packagedb.models.Package.objects.all().first()
Expand Down
13 changes: 9 additions & 4 deletions minecode/visitors/conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def get_download_info(conandata, version):
return download_url, sha256


def map_conan_package(package_url):
def map_conan_package(package_url, pipelines):
"""
Add a conan `package_url` to the PackageDB.

Expand Down Expand Up @@ -134,13 +134,13 @@ def map_conan_package(package_url):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package)
add_package_to_scan_queue(db_package, pipelines)

return error


@priority_router.route("pkg:conan/.*")
def process_request(purl_str):
def process_request(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a conan Package URL (PURL) as a
URI.
Expand All @@ -149,11 +149,16 @@ def process_request(purl_str):
https://github.com/conan-io/conan-center-index and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
from minecode.model_utils import DEFAULT_PIPELINES

package_url = PackageURL.from_string(purl_str)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

if not package_url.version:
return

error_msg = map_conan_package(package_url)
error_msg = map_conan_package(package_url, pipelines)

if error_msg:
return error_msg
16 changes: 12 additions & 4 deletions minecode/visitors/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,12 @@ def process_request(purl_str, **kwargs):

Return an error string for errors that occur, or empty string if there is no error.
"""
from minecode.model_utils import DEFAULT_PIPELINES

source_purl = kwargs.get("source_purl", None)
addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

try:
package_url = PackageURL.from_string(purl_str)
source_package_url = None
Expand All @@ -348,12 +353,13 @@ def process_request(purl_str, **kwargs):
error = map_debian_metadata_binary_and_source(
package_url=package_url,
source_package_url=source_package_url,
pipelines=pipelines,
)

return error


def map_debian_package(debian_package, package_content):
def map_debian_package(debian_package, package_content, pipelines):
"""
Add a debian `package_url` to the PackageDB.

Expand Down Expand Up @@ -421,7 +427,7 @@ def map_debian_package(debian_package, package_content):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package)
add_package_to_scan_queue(db_package, pipelines)

return db_package, error

Expand Down Expand Up @@ -501,13 +507,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True):
setattr(package_to, field, value)


def map_debian_metadata_binary_and_source(package_url, source_package_url):
def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines):
"""
Get metadata for the binary and source release of the Debian package
`package_url` and save it to the PackageDB.

Return an error string for errors that occur, or empty string if there is no error.
"""
"""
error = ''

if "repository_url" in package_url.qualifiers:
Expand Down Expand Up @@ -537,6 +543,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url):
binary_package, emsg = map_debian_package(
debian_package,
PackageContentType.BINARY,
pipelines,
)
if emsg:
error += emsg
Expand All @@ -545,6 +552,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url):
source_package, emsg = map_debian_package(
debian_package,
PackageContentType.SOURCE_ARCHIVE,
pipelines,
)
if emsg:
error += emsg
Expand Down
26 changes: 18 additions & 8 deletions minecode/visitors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger.setLevel(logging.INFO)


def map_generic_package(package_url):
def map_generic_package(package_url, pipelines):
"""
Add a npm `package_url` to the PackageDB.

Expand All @@ -51,17 +51,22 @@ def map_generic_package(package_url):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package)
add_package_to_scan_queue(db_package, pipelines)

return error


@priority_router.route("pkg:generic/.*?download_url=.*")
def process_request(purl_str):
def process_request(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a generic Package URL (PURL) with
download_url as a qualifier
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
Expand All @@ -73,7 +78,7 @@ def process_request(purl_str):
error = f'package_url {purl_str} does not contain a download_url qualifier'
return error

error_msg = map_generic_package(package_url)
error_msg = map_generic_package(package_url, pipelines)

if error_msg:
return error_msg
Expand All @@ -91,7 +96,7 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_fetchcode_supported_package(package_url):
def map_fetchcode_supported_package(package_url, pipelines):
"""
Add a `package_url` supported by fetchcode to the PackageDB.

Expand All @@ -116,7 +121,7 @@ def map_fetchcode_supported_package(package_url):

# Submit package for scanning
if db_package:
add_package_to_scan_queue(db_package)
add_package_to_scan_queue(db_package, pipelines)

return error

Expand Down Expand Up @@ -156,7 +161,7 @@ def map_fetchcode_supported_package(package_url):
# Indexing some generic PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS)
def process_request_fetchcode_generic(purl_str):
def process_request_fetchcode_generic(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a generic Package URL (PURL)
supported by fetchcode.
Expand All @@ -165,13 +170,18 @@ def process_request_fetchcode_generic(purl_str):
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)
error_msg = map_fetchcode_supported_package(package_url, pipelines)

if error_msg:
return error_msg
8 changes: 6 additions & 2 deletions minecode/visitors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,21 +186,25 @@ def json_serial_date_obj(obj):
# Indexing GitHub PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route('pkg:github/.*')
def process_request_dir_listed(purl_str):
def process_request_dir_listed(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a GitHub Package URL (PURL).

This involves obtaining Package information for the PURL using
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)
error_msg = map_fetchcode_supported_package(package_url, pipelines)

if error_msg:
return error_msg
9 changes: 7 additions & 2 deletions minecode/visitors/gnu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


@priority_router.route("pkg:gnu/.*")
def process_request(purl_str):
def process_request(purl_str, **kwargs):
"""
Process `priority_resource_uri` containing a GNU Package URL (PURL) as a
URI.
Expand All @@ -31,11 +31,16 @@ def process_request(purl_str):
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get('addon_pipelines', [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)

package_url = PackageURL.from_string(purl_str)
if not package_url.version:
return

error_msg = map_fetchcode_supported_package(package_url)
error_msg = map_fetchcode_supported_package(package_url, pipelines)

if error_msg:
return error_msg
Loading