Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
13d0212
Move maven priority queue related code to minecode.collectors #283
JonoYang Feb 6, 2024
ada55ea
Move npm code #283
JonoYang Feb 6, 2024
cb4abad
Create viewset for ScannableURI #49
JonoYang Feb 7, 2024
fb9e696
Return empty strings if nothing else on scan queue #49
JonoYang Feb 10, 2024
93ad167
Update ScannableURI API and model #285
JonoYang Feb 14, 2024
4739fb7
Update scan_project_url in update_status #49 #285
JonoYang Feb 15, 2024
20a33bc
Modify ScannableURI fields #49 #285
JonoYang Feb 16, 2024
e07cef0
Ensure a response is returned in update_status #49 #285
JonoYang Feb 16, 2024
15f70c5
Add test for scan indexing through API #49 #285
JonoYang Feb 17, 2024
79b4aa7
Renam instances of rescan to reindex #49 #285
JonoYang Feb 21, 2024
c4a7d52
Create minecode/indexing.py #49 #285
JonoYang Feb 22, 2024
e5b78b2
Create command to manage in progress scans #49 #285
JonoYang Feb 29, 2024
42ae3e6
Pass pipelines to be run to scancode.io
JonoYang Mar 2, 2024
5856cde
Create scan_queue_workers group #49 #285
JonoYang Mar 5, 2024
86b0885
Update ScannableURI migrations #49 #285
JonoYang Mar 7, 2024
4169da5
Do not track scan progress from purldb #49 #285
JonoYang Mar 7, 2024
38c5805
Share code between user creation commands #49 #285
JonoYang Mar 8, 2024
d46c4b4
Set default pipelines to be run
JonoYang Mar 15, 2024
ef57b49
Get scan summary data and use it in index_package #49
JonoYang Mar 15, 2024
ccdee32
Fix Package indexing issues #49
JonoYang Mar 16, 2024
4921a9a
Update package checksums using project extra data #49
JonoYang Mar 16, 2024
1192879
Update update_status error responses #49
JonoYang Mar 16, 2024
b0b8ddd
Do not allow updates of finished ScannableURIs #49 #285
JonoYang Mar 18, 2024
ff83b09
Move validate_uuid to utils.py #49 #285
JonoYang Mar 18, 2024
4ecd97a
Move code from minecode/collectors to original location #49 #285
JonoYang Mar 18, 2024
06520b3
Merge branch 'main' into 49-scan-queue-update
JonoYang Mar 18, 2024
e3f46fd
Add tests to ensure proper API permissions for scan_queue #49
JonoYang Mar 19, 2024
093d94b
Remove previous scan queue Docker services #49 #285
JonoYang Mar 19, 2024
6fbd43b
Fix quotes #49 #285
JonoYang Mar 19, 2024
8dcc1a3
Update default pipelines to be run for packages #49 #285
JonoYang Mar 19, 2024
abb6439
Use reindex_uri attribute when calling index_package #49 #285
JonoYang Mar 19, 2024
767e8a9
Remove duplicate import #49 #285
JonoYang Mar 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,6 @@ run_visit: seed
run_map:
${MANAGE} run_map

request_scans:
${MANAGE} request_scans

process_scans:
${MANAGE} process_scans

test:
@echo "-> Run the test suite"
${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs --ignore matchcode_pipeline --ignore matchcode_project --ignore purldb-toolkit --ignore packagedb/tests/test_throttling.py
Expand Down
26 changes: 0 additions & 26 deletions docker-compose_purldb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,6 @@ services:
- db
- web # Ensure that potential db migrations run first

request_scan:
build: .
command: wait-for-it web:8000 -- python manage_purldb.py request_scans
env_file:
- docker_purldb.env
volumes:
- /etc/purldb/:/etc/purldb/
profiles:
- scan_queue
depends_on:
- db
- web

process_scan:
build: .
command: wait-for-it web:8000 -- python manage_purldb.py process_scans
env_file:
- docker_purldb.env
volumes:
- /etc/purldb/:/etc/purldb/
profiles:
- scan_queue
depends_on:
- db
- web

priority_queue:
build: .
command: wait-for-it web:8000 -- python manage_purldb.py priority_queue
Expand Down
26 changes: 0 additions & 26 deletions docker-compose_purldb_public.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,32 +74,6 @@ services:
- db
- web # Ensure that potential db migrations run first

request_scan:
build: .
command: wait-for-it web:8000 -- python manage_purldb_public.py request_scans
env_file:
- docker_purldb.env
volumes:
- /etc/purldb/:/etc/purldb/
profiles:
- scan_queue
depends_on:
- db
- web

process_scan:
build: .
command: wait-for-it web:8000 -- python manage_purldb_public.py process_scans
env_file:
- docker_purldb.env
volumes:
- /etc/purldb/:/etc/purldb/
profiles:
- scan_queue
depends_on:
- db
- web

priority_queue:
build: .
command: wait-for-it web:8000 -- python manage_purldb_public.py priority_queue
Expand Down
161 changes: 155 additions & 6 deletions minecode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,24 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

from rest_framework import serializers
from rest_framework import status
from rest_framework import viewsets
import json

from django.db import transaction
from django.utils import timezone
from packageurl import PackageURL
from rest_framework import serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.permissions import IsAdminUser
from rest_framework.response import Response
from packageurl import PackageURL

# UnusedImport here!
# But importing the mappers and visitors module triggers routes registration
from minecode import visitors # NOQA
from minecode import priority_router
from minecode.models import ResourceURI
from minecode.models import PriorityResourceURI
from minecode.management.indexing import index_package
from minecode.models import PriorityResourceURI, ResourceURI, ScannableURI
from minecode.permissions import IsScanQueueWorkerAPIUser
from minecode.utils import validate_uuid


class ResourceURISerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -83,3 +88,147 @@ def index_package(self, request, *args, **kwargs):
}
# TODO: revisiting a package should be handled on another level, dependent on data we store
return Response(message)


class ScannableURISerializer(serializers.ModelSerializer):
class Meta:
model = ScannableURI
fields = '__all__'


class ScannableURIViewSet(viewsets.ModelViewSet):
queryset = ScannableURI.objects.all()
serializer_class = ScannableURISerializer
permission_classes = [IsScanQueueWorkerAPIUser|IsAdminUser]

@action(detail=False, methods=['get'])
def get_next_download_url(self, request, *args, **kwargs):
"""
Return download url for next Package on scan queue
"""
with transaction.atomic():
scannable_uri = ScannableURI.objects.get_next_scannable()
if scannable_uri:
response = {
'scannable_uri_uuid': scannable_uri.uuid,
'download_url': scannable_uri.uri,
'pipelines': scannable_uri.pipelines,
}
scannable_uri.scan_status = ScannableURI.SCAN_SUBMITTED
scannable_uri.scan_date = timezone.now()
scannable_uri.save()
else:
response = {
'scannable_uri_uuid': '',
'download_url': '',
'pipelines': [],
}
return Response(response)

@action(detail=False, methods=['post'])
def update_status(self, request, *args, **kwargs):
"""
Update the status of a ScannableURI with UUID of `scannable_uri_uuid`
with `scan_status`

If `scan_status` is 'failed', then a `scan_log` string is expected and
should contain the error messages for that scan.

If `scan_status` is 'scanned', then a `scan_results_file`,
`scan_summary_file`, and `project_extra_data` mapping are expected.
`scan_results_file`, `scan_summary_file`, and `project_extra_data` are
then used to update Package data and its Resources.
"""
scannable_uri_uuid = request.data.get('scannable_uri_uuid')
scan_status = request.data.get('scan_status')
if not scannable_uri_uuid:
response = {
'error': 'missing scannable_uri_uuid'
}
return Response(response, status=status.HTTP_400_BAD_REQUEST)

if not scan_status:
response = {
'error': 'missing scan_status'
}
return Response(response, status=status.HTTP_400_BAD_REQUEST)

if not validate_uuid(scannable_uri_uuid):
response = {
'error': f'invalid scannable_uri_uuid: {scannable_uri_uuid}'
}
return Response(response, status=status.HTTP_400_BAD_REQUEST)

scannable_uri = ScannableURI.objects.get(uuid=scannable_uri_uuid)
scannable_uri_status = ScannableURI.SCAN_STATUSES_BY_CODE.get(scannable_uri.scan_status)
scan_status_code = ScannableURI.SCAN_STATUS_CODES_BY_SCAN_STATUS.get(scan_status)

if not scan_status_code:
msg = {
'error': f'invalid scan_status: {scan_status}'
}
return Response(msg, status=status.HTTP_400_BAD_REQUEST)

if scannable_uri.scan_status in [
ScannableURI.SCAN_INDEXED,
ScannableURI.SCAN_FAILED,
ScannableURI.SCAN_TIMEOUT,
ScannableURI.SCAN_INDEX_FAILED,
]:
response = {
'error': f'cannot update status for scannable_uri {scannable_uri_uuid}: '
f'scannable_uri has finished with status "{scannable_uri_status}"'
}
return Response(response, status=status.HTTP_400_BAD_REQUEST)

if scan_status == scannable_uri_status:
response = {
'error': f'cannot update status for scannable_uri {scannable_uri_uuid}: '
f'scannable_uri status is already "{scannable_uri_status}"'
}
return Response(response, status=status.HTTP_400_BAD_REQUEST)

if scan_status == 'failed':
scan_log = request.data.get('scan_log')
scannable_uri.scan_error = scan_log
scannable_uri.scan_status = ScannableURI.SCAN_FAILED
scannable_uri.wip_date = None
scannable_uri.save()
msg = {
'status': f'updated scannable_uri {scannable_uri_uuid} scan_status to {scan_status}'
}

elif scan_status == 'scanned':
scan_results_file = request.data.get('scan_results_file')
scan_summary_file = request.data.get('scan_summary_file')
project_extra_data = request.data.get('project_extra_data')
scan_data = json.load(scan_results_file)
summary_data = json.load(scan_summary_file)
project_extra_data = json.loads(project_extra_data)

scannable_uri.scan_status = ScannableURI.SCAN_COMPLETED

indexing_errors = index_package(
scannable_uri,
scannable_uri.package,
scan_data,
summary_data,
project_extra_data,
reindex=scannable_uri.reindex_uri,
)
if indexing_errors:
scannable_uri.scan_status = ScannableURI.SCAN_INDEX_FAILED
scannable_uri.index_error = indexing_errors
msg = {
'error': f'scan index failed for scannable_uri {scannable_uri_uuid}'
}
return Response(msg, status=status.HTTP_400_BAD_REQUEST)
else:
scannable_uri.scan_status = ScannableURI.SCAN_INDEXED
msg = {
'status': f'scan indexed for scannable_uri {scannable_uri_uuid}'
}
scannable_uri.wip_date = None
scannable_uri.save()

return Response(msg)
30 changes: 30 additions & 0 deletions minecode/management/commands/create-scan-queue-worker-user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from django.contrib.auth.models import Group
from minecode.management.user_creation import CreateUserCommand


class Command(CreateUserCommand):
help = 'Create a user and generate an API key for a scan queue worker'

def handle(self, *args, **options):
username = options['username']
interactive = options['interactive']
verbosity = options['verbosity']
user = self.create_user(
username=username,
interactive=interactive,
verbosity=verbosity
)
# Add user to `scan_queue_workers` group
scan_queue_workers_group, _ = Group.objects.get_or_create(name='scan_queue_workers')
scan_queue_workers_group.user_set.add(user)
msg = f'User {username} added to `scan_queue_workers` group'
self.stdout.write(msg, self.style.SUCCESS)
24 changes: 24 additions & 0 deletions minecode/management/commands/create-user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from minecode.management.user_creation import CreateUserCommand


class Command(CreateUserCommand):
help = 'Create a user and generate an API key for a scan queue worker'

def handle(self, *args, **options):
username = options['username']
interactive = options['interactive']
verbosity = options['verbosity']
self.create_user(
username=username,
interactive=interactive,
verbosity=verbosity
)
1 change: 0 additions & 1 deletion minecode/management/commands/import_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from minecode.visitors.maven import get_artifact_sha1
from minecode.model_utils import merge_or_create_package
from packagedcode.models import PackageData
from packagedb.models import Package
from minecode.visitors.maven import determine_namespace_name_version_from_url


Expand Down
Loading