From 3d1726af29e0569adb78dd1afb95f2c1778fedd5 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 21 Mar 2024 13:01:38 +0530 Subject: [PATCH 1/5] Add collect_symbols to the default pipelines - Use the `collect_symbols` add-on pipeline to collect the resource symbols. Signed-off-by: Keshav Priyadarshi --- minecode/model_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index f0c78b77..7cf39d8f 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -29,6 +29,7 @@ DEFAULT_PIPELINES = ( 'scan_single_package', 'fingerprint_codebase', + 'collect_symbols', ) From b1368bf3cac43c0105e5cda1d74e593f80acb776 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 21 Mar 2024 13:10:49 +0530 Subject: [PATCH 2/5] Store extra_data during resource creation Signed-off-by: Keshav Priyadarshi --- minecode/model_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index 7cf39d8f..a108227b 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -1,3 +1,4 @@ +import copy import logging import sys @@ -433,6 +434,10 @@ def merge_or_create_resource(package, resource_data): try: resource = Resource.objects.get(package=package, path=path) except Resource.DoesNotExist: + extra_data = copy.deepcopy(resource_data.get('extra_data', {})) + extra_data.pop("directory_content", None) + extra_data.pop("directory_structure", None) + resource = Resource( package=package, path=path, @@ -451,6 +456,7 @@ def merge_or_create_resource(package, resource_data): is_archive=resource_data.get('is_archive'), is_media=resource_data.get('is_media'), is_key_file=resource_data.get('is_key_file'), + extra_data=extra_data, ) created = True _ = resource.set_scan_results(resource_data, save=True) From 20f2474f26cd099f12b756eac5a749731fe1b6c2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 22 Mar 2024 19:07:30 +0530 Subject: [PATCH 3/5] Update extra_data for existing resource Signed-off-by: Keshav Priyadarshi --- minecode/model_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index a108227b..9c65a8c2 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -431,13 +431,14 @@ def merge_or_create_resource(package, resource_data): created = False resource = None path = resource_data.get('path') + + extra_data = copy.deepcopy(resource_data.get('extra_data', {})) + extra_data.pop("directory_content", None) + extra_data.pop("directory_structure", None) + try: resource = Resource.objects.get(package=package, path=path) except Resource.DoesNotExist: - extra_data = copy.deepcopy(resource_data.get('extra_data', {})) - extra_data.pop("directory_content", None) - extra_data.pop("directory_structure", None) - resource = Resource( package=package, path=path, @@ -460,4 +461,5 @@ def merge_or_create_resource(package, resource_data): ) created = True _ = resource.set_scan_results(resource_data, save=True) + resource.update_extra_data(extra_data) return resource, created, merged From 4ba2b1d22bd4a8483cf364274ebc5c71830ba385 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 22 Mar 2024 19:09:48 +0530 Subject: [PATCH 4/5] Add test for extra_data update Signed-off-by: Keshav Priyadarshi --- minecode/tests/test_model_utils.py | 59 +++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/minecode/tests/test_model_utils.py b/minecode/tests/test_model_utils.py index 908a8bc0..efd9064b 100644 --- a/minecode/tests/test_model_utils.py +++ b/minecode/tests/test_model_utils.py @@ -10,11 +10,12 @@ from datetime import timedelta import os +from django.test import TransactionTestCase from django.utils import timezone -from minecode.model_utils import merge_or_create_package +from minecode.model_utils import merge_or_create_package, merge_or_create_resource from minecode.utils_test import JsonBasedTesting, MiningTestCase -from packagedb.models import Package +from packagedb.models import Package, Resource from packagedcode.maven import _parse @@ -95,3 +96,57 @@ def test_merge_or_create_package_merge_package(self): updated_fields = data['updated_fields'] expected_updated_fields_loc = self.get_test_loc('model_utils/expected_updated_fields.json') self.check_expected_results(updated_fields, expected_updated_fields_loc, regen=False) + + +class MergeORCreateResourceTest(TransactionTestCase): + def setUp(self): + self.package = Package.objects.create(download_url='test-pkg.com') + self.resource_path = 'root/test.c' + self.old_extra_data = { + "source_symbols":[ + "Old-symb1", + "Old-symb2", + ] + } + + self.new_extra_data = { + "source_symbols":[ + "New-symb1", + "New-symb2", + ] + } + + self.resource = Resource.objects.create(package=self.package, path=self.resource_path, extra_data=self.old_extra_data) + + def test_merge_or_create_resource_update(self): + self.assertEqual(self.old_extra_data, self.resource.extra_data) + + merge_or_create_resource( + self.package, + {"extra_data":self.new_extra_data, "path":self.resource_path}, + ) + self.resource.refresh_from_db() + + self.assertEqual(self.new_extra_data, self.resource.extra_data) + + def test_merge_or_create_resource_create(self): + merge_or_create_resource( + self.package, + { + 'type': 'file', + 'name':"test_new", + 'extension':".c", + 'is_binary':False, + 'is_text':False, + 'is_archive':False, + 'is_media':False, + 'is_key_file':False, + "extra_data":self.new_extra_data, + "path":"root/test_new.c"}, + ) + + resource = Resource.objects.get(path="root/test_new.c") + self.assertEqual(self.new_extra_data, resource.extra_data) + + + From 40e035e167e5d01b1c13489f669d05de85f22843 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Sat, 23 Mar 2024 00:27:10 +0530 Subject: [PATCH 5/5] Rename merge_or_create_resource to update_or_create_resource Signed-off-by: Keshav Priyadarshi --- minecode/management/indexing.py | 4 +- minecode/model_utils.py | 7 +-- minecode/tests/test_model_utils.py | 75 ++++++++++++++++-------------- 3 files changed, 45 insertions(+), 41 deletions(-) diff --git a/minecode/management/indexing.py b/minecode/management/indexing.py index 593e53d7..2e435ba9 100644 --- a/minecode/management/indexing.py +++ b/minecode/management/indexing.py @@ -7,7 +7,7 @@ from minecode.management.commands import get_error_message import logging import sys -from minecode.model_utils import merge_or_create_resource +from minecode.model_utils import update_or_create_resource from packagedcode.utils import combine_expressions import traceback from minecode.models import ScannableURI @@ -37,7 +37,7 @@ def index_package_files(package, scan_data, reindex=False): try: logger.info(f'Indexing Resources and fingerprints related to {package.package_url} from scan data') for resource in scan_data.get('files', []): - r, _, _ = merge_or_create_resource(package, resource) + r, _, _ = update_or_create_resource(package, resource) path = r.path sha1 = r.sha1 if sha1: diff --git a/minecode/model_utils.py b/minecode/model_utils.py index 9c65a8c2..cf557488 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -418,7 +418,7 @@ def merge_or_create_package(scanned_package, visit_level): return package, created, merged, map_error -def merge_or_create_resource(package, resource_data): +def update_or_create_resource(package, resource_data): """ Using Resource data from `resource_data`, create or update the corresponding purldb Resource from `package`. @@ -427,7 +427,7 @@ def merge_or_create_resource(package, resource_data): `resource`, as well as booleans representing whether the Resource was created or if the Resources scan field data was updated. """ - merged = False + updated = False created = False resource = None path = resource_data.get('path') @@ -438,6 +438,7 @@ def merge_or_create_resource(package, resource_data): try: resource = Resource.objects.get(package=package, path=path) + updated = True except Resource.DoesNotExist: resource = Resource( package=package, @@ -462,4 +463,4 @@ def merge_or_create_resource(package, resource_data): created = True _ = resource.set_scan_results(resource_data, save=True) resource.update_extra_data(extra_data) - return resource, created, merged + return resource, created, updated diff --git a/minecode/tests/test_model_utils.py b/minecode/tests/test_model_utils.py index efd9064b..2ad10150 100644 --- a/minecode/tests/test_model_utils.py +++ b/minecode/tests/test_model_utils.py @@ -7,17 +7,18 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from datetime import timedelta import os from django.test import TransactionTestCase -from django.utils import timezone - -from minecode.model_utils import merge_or_create_package, merge_or_create_resource -from minecode.utils_test import JsonBasedTesting, MiningTestCase -from packagedb.models import Package, Resource from packagedcode.maven import _parse +from minecode.model_utils import merge_or_create_package +from minecode.model_utils import update_or_create_resource +from minecode.utils_test import JsonBasedTesting +from minecode.utils_test import MiningTestCase +from packagedb.models import Package +from packagedb.models import Resource + class ModelUtilsTestCase(MiningTestCase, JsonBasedTesting): BASE_DIR = os.path.join(os.path.dirname(__file__), 'testfiles') @@ -98,55 +99,57 @@ def test_merge_or_create_package_merge_package(self): self.check_expected_results(updated_fields, expected_updated_fields_loc, regen=False) -class MergeORCreateResourceTest(TransactionTestCase): +class UpdateORCreateResourceTest(TransactionTestCase): def setUp(self): - self.package = Package.objects.create(download_url='test-pkg.com') - self.resource_path = 'root/test.c' + self.package = Package.objects.create(download_url="test-pkg.com") + self.resource_path = "root/test.c" self.old_extra_data = { - "source_symbols":[ + "source_symbols": [ "Old-symb1", "Old-symb2", ] } - + self.new_extra_data = { - "source_symbols":[ + "source_symbols": [ "New-symb1", "New-symb2", ] } - self.resource = Resource.objects.create(package=self.package, path=self.resource_path, extra_data=self.old_extra_data) + self.resource = Resource.objects.create( + package=self.package, + path=self.resource_path, + extra_data=self.old_extra_data, + ) - def test_merge_or_create_resource_update(self): + def test_update_or_create_resource_update(self): self.assertEqual(self.old_extra_data, self.resource.extra_data) - merge_or_create_resource( - self.package, - {"extra_data":self.new_extra_data, "path":self.resource_path}, - ) + update_or_create_resource( + self.package, + {"extra_data": self.new_extra_data, "path": self.resource_path}, + ) self.resource.refresh_from_db() self.assertEqual(self.new_extra_data, self.resource.extra_data) - - def test_merge_or_create_resource_create(self): - merge_or_create_resource( - self.package, + + def test_update_or_create_resource_create(self): + update_or_create_resource( + self.package, { - 'type': 'file', - 'name':"test_new", - 'extension':".c", - 'is_binary':False, - 'is_text':False, - 'is_archive':False, - 'is_media':False, - 'is_key_file':False, - "extra_data":self.new_extra_data, - "path":"root/test_new.c"}, - ) + "type": "file", + "name": "test_new", + "extension": ".c", + "is_binary": False, + "is_text": False, + "is_archive": False, + "is_media": False, + "is_key_file": False, + "extra_data": self.new_extra_data, + "path": "root/test_new.c", + }, + ) resource = Resource.objects.get(path="root/test_new.c") self.assertEqual(self.new_extra_data, resource.extra_data) - - -