diff --git a/docs/source/img/Screenshot from 2024-11-26 17-05-51.png b/docs/source/img/Screenshot from 2024-11-26 17-05-51.png deleted file mode 100644 index 2bee704..0000000 Binary files a/docs/source/img/Screenshot from 2024-11-26 17-05-51.png and /dev/null differ diff --git a/docs/source/img/Screenshot from 2024-11-26 17-07-43.png b/docs/source/img/Screenshot from 2024-11-26 17-07-43.png deleted file mode 100644 index bc7f14a..0000000 Binary files a/docs/source/img/Screenshot from 2024-11-26 17-07-43.png and /dev/null differ diff --git a/docs/source/img/Screenshot from 2024-11-26 17-16-42.png b/docs/source/img/Screenshot from 2024-11-26 17-16-42.png deleted file mode 100644 index 14ee564..0000000 Binary files a/docs/source/img/Screenshot from 2024-11-26 17-16-42.png and /dev/null differ diff --git a/docs/source/img/tutorial_getting_started_repo_vulnerablecode_discover.png b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_discover.png new file mode 100644 index 0000000..2e59be3 Binary files /dev/null and b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_discover.png differ diff --git a/docs/source/img/tutorial_getting_started_repo_vulnerablecode_link_add.png b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_link_add.png new file mode 100644 index 0000000..b33ed35 Binary files /dev/null and b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_link_add.png differ diff --git a/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success.png b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success.png new file mode 100644 index 0000000..2c089ed Binary files /dev/null and b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success.png differ diff --git a/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success_link.png b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success_link.png new file mode 100644 index 0000000..2d0c0b6 Binary files /dev/null and b/docs/source/img/tutorial_getting_started_repo_vulnerablecode_success_link.png differ diff --git a/docs/source/img/tutorial_getting_started_user_creation.png b/docs/source/img/tutorial_getting_started_user_creation.png new file mode 100644 index 0000000..920576b Binary files /dev/null and b/docs/source/img/tutorial_getting_started_user_creation.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index c2fd803..bdcc21a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -25,7 +25,10 @@ you will find information on: :caption: Tutorial tutorial_getting_started + tutorial_sync_scan + tutorial_sync_vuln tutorial_remote_subscribe_package_update + tutorial_federate .. toctree:: :maxdepth: 2 @@ -36,7 +39,6 @@ you will find information on: command-line-interface rest-api application-settings - run-application Indices and tables ================== diff --git a/docs/source/run-application.rst b/docs/source/run-application.rst deleted file mode 100644 index 19d59fa..0000000 --- a/docs/source/run-application.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. _run-application: - -How to Run the Application -=========================== - -This section provides an example of how to run the Federated Code application. - - -Step 1: Create a Superuser ---------------------------- -Run the following command to create a superuser: - -.. code-block:: bash - - python manage.py createsuperuser - - -Step 2: Create a Service Account ---------------------------------- -Only superusers can create a Service account (e.g., VCIO or SCIO). - -1. Access the Django admin panel. -http://127.0.0.1:8000/admin/ - -2. Create a new user through the admin interface. -http://127.0.0.1:8000/admin/auth/user/add/ - -3. Assign the newly created user as a Service account. -http://127.0.0.1:8000/admin/fedcode/service/ - - -Step 3: Service Login and Git Repository Creation --------------------------------------------------- -Now you have a Service login ex `VCIO` use this credential to create a Git Repository -https://github.com/aboutcode-org/vulnerablecode-data - -Visit http://127.0.0.1:8000/create-repo - -Step 4: --------- -To initiate a sync of the master branch, you can either click on the sync request -button in the app or send an HTTP request directly to the endpoint using the auth service. -The endpoint is: `repository//sync-repo/` - -Visit http://127.0.0.1:8000/repo-list. - - -Alternatively, you can integrate this action with GitHub Actions or any other -CI tool to trigger the sync automatically whenever new data is pushed to the -master branch. - -Here’s an example of how to send the request manually using curl: -ex: - -.. code-block:: bash - - curl -v -X POST \ - -H "Authorization: Token your-auth-token" \ - http://127.0.0.1:8000/repository//sync-repo/ - - -**Note:** You can retrieve the service token after authenticating via the endpoint -`api/v0/users/@`. - -Step 5: --------- -The admin should regularly run the following commands, either manually or in a loop: - -.. code-block:: bash - - python manage.py sync && python manage.py federate - -Users can now log in or sign up, create and review metadata, and vote for packages. - -Happy Federation, everyone! diff --git a/docs/source/tutorial_federate.rst b/docs/source/tutorial_federate.rst new file mode 100644 index 0000000..48fe61f --- /dev/null +++ b/docs/source/tutorial_federate.rst @@ -0,0 +1,42 @@ +Federating Package Activity and Repository Sync +=============================================== + +Federating Package Activity +---------------------------- + +Run the following command to send activity updates to existing subscribers of the package + +.. code-block:: bash + + python manage.py federate + +Notifying FederatedCode of Repository Changes +---------------------------------------------- + +To create a sync request to a FederatedCode instance + +you can send an HTTP POST request directly to this endpoint: +`repository//sync-repo/` + +Example: +http://127.0.0.1:8000/repository/3g8d-4e5d-abff-90865d1e13b1/sync-repo/ + +**Note:** You can find the repository ID by visiting http://127.0.0.1:8000/repo-list. + +Here’s an example of how to send the request manually using `curl`: + +.. code-block:: bash + + curl -v -X POST \ + -H "Authorization: Token your-auth-token" \ + http://127.0.0.1:8000/repository//sync-repo/ + +**Note:** You can retrieve the service token after authenticating via this endpoint: +`api/v0/auth/token/` + +.. note:: + You can also integrate this action with GitHub Actions + or any other CI tool to automatically trigger + the sync request whenever new data is pushed to the main branch. + + diff --git a/docs/source/tutorial_getting_started.rst b/docs/source/tutorial_getting_started.rst index 263aaa4..af044c5 100644 --- a/docs/source/tutorial_getting_started.rst +++ b/docs/source/tutorial_getting_started.rst @@ -4,14 +4,14 @@ Getting Started =============== In this tutorial, we will guide you through the step-by-step process of getting started with syncing -and federating package metadata. +and federating package metadata and vulnerability metadata. .. note:: This tutorial assumes that you have a working installation of FederatedCode. If you don't, please refer to the :ref:`installation` page. Create Admin User ------------------ +------------------- .. code-block:: bash @@ -25,53 +25,17 @@ Navigate to http://127.0.0.1:8000/admin/ and log in using the credentials create .. image:: img/tutorial_getting_started_admin.jpg Create Service User -------------------- +-------------------- +1. Go to http://127.0.0.1:8000/admin/fedcode/service/add/ and create a service. -Go to http://127.0.0.1:8000/admin/fedcode/service/add/ and create a service. Select the newly created superuser as the user, and leave the "remote-actor" field empty. +2. Select the newly created superuser as the user, and leave the "remote-actor" field empty. .. image:: img/tutorial_getting_started_service_creation.jpg -Clone the FederatedCode Data Repository ---------------------------------------- - -Visit https://github.com/aboutcode-org/aboutcode-packages-npm-385 and clone the repository. - -Add Data Repository in FederatedCode -------------------------------------- - -Go to http://127.0.0.1:8000/create-repo and add the repository URL: https://github.com//aboutcode-packages-npm-385 - -.. image:: img/tutorial_getting_started_repo_add.jpg - -Sync Package Scan ------------------ - -Run the following command to sync the scan metadata from the FederatedCode Git repository - -.. code-block:: bash - - python manage.py sync sync_scancode_scans - -Federate Package Activity -------------------------- - -Run the following command to send activity updates to existing subscribers of the package - -.. code-block:: bash - - python manage.py federate - -Click on `Packages` link --------------------------- - -.. image:: img/tutorial_getting_started_step_packages.jpg - -Click on any PURL link ----------------------- +.. note:: + Or sign up and ask the FederatedCode admin to promote your account + to a service account. -.. image:: img/tutorial_getting_started_step_package_list.jpg + .. image:: img/tutorial_getting_started_user_creation.png -Package Activity ----------------- -.. image:: img/tutorial_getting_started_step_package_activity.jpg diff --git a/docs/source/tutorial_sync_scan.rst b/docs/source/tutorial_sync_scan.rst new file mode 100644 index 0000000..58f0611 --- /dev/null +++ b/docs/source/tutorial_sync_scan.rst @@ -0,0 +1,42 @@ +.. _tutorial_federate_scan: + +Syncing ScanCode Metadata with FederatedCode +============================================== + +Fork the FederatedCode Data Package Scan Repository +----------------------------------------------------- + +Visit https://github.com/aboutcode-org/aboutcode-packages-npm-385 and fork the repository. + +Add Data Repository in FederatedCode +------------------------------------- + +Go to http://127.0.0.1:8000/create-repo and add the repository URL: https://github.com//aboutcode-packages-npm-385, and click Submit. + +.. image:: img/tutorial_getting_started_repo_add.jpg + +Sync Package Scan +----------------- + +Run the following command to sync the scan metadata from the FederatedCode Git repository + +.. code-block:: bash + + python manage.py sync sync_scancode_scans + + +Click on `Packages` link +-------------------------- + +.. image:: img/tutorial_getting_started_step_packages.jpg + +Click on any PURL link +---------------------- + +.. image:: img/tutorial_getting_started_step_package_list.jpg + +Package Activity +---------------- + +.. image:: img/tutorial_getting_started_step_package_activity.jpg + diff --git a/docs/source/tutorial_sync_vuln.rst b/docs/source/tutorial_sync_vuln.rst new file mode 100644 index 0000000..1abf90c --- /dev/null +++ b/docs/source/tutorial_sync_vuln.rst @@ -0,0 +1,48 @@ +.. _tutorial_federate_vuln: + + +Syncing VulnerableCode Metadata with FederatedCode +============================================================= + +Fork the FederatedCode Data Vulnerablecode Repository +------------------------------------------------------ + +Visit https://github.com/aboutcode-data/vulnerablecode-data and fork the repository. + + +Add Data Repository in FederatedCode +------------------------------------- + +Go to http://127.0.0.1:8000/create-repo and add the repository URL: https://github.com//vulnerablecode-data, and **Click "Submit" button**. + +1. .. image:: img/tutorial_getting_started_repo_vulnerablecode_link_add.png + +2. .. image:: img/tutorial_getting_started_repo_vulnerablecode_success_link.png + +Sync Vulnerablecode metadata +---------------------------- + +Run the following command to sync the vulnerablecode metadata from the FederatedCode Git repository + +.. code-block:: bash + + python manage.py sync sync_vulnerablecode + + +Click on `Packages` link +-------------------------- + +.. image:: img/tutorial_getting_started_step_packages.jpg + +Click on any PURL link +---------------------- + +.. image:: img/tutorial_getting_started_repo_vulnerablecode_discover.png + +Package Activity +---------------- + +You can now see the package event data. + +.. image:: img/tutorial_getting_started_repo_vulnerablecode_success.png + diff --git a/fedcode/activitypub.py b/fedcode/activitypub.py index 427b1d6..05b6a9c 100644 --- a/fedcode/activitypub.py +++ b/fedcode/activitypub.py @@ -8,6 +8,7 @@ # import json import logging +from collections import defaultdict from dataclasses import asdict from dataclasses import dataclass from dataclasses import field @@ -196,6 +197,21 @@ def get_actor_permissions(cls, actor, object): # Return the permissions for the specific actor and object type return permissions.get(type(actor), {}).get(type(object), lambda: {}) + @classmethod + def bulk_federate(cls, activities): + """Bulk federate multiple activities""" + grouped = defaultdict(list) + + for activity in activities: + targets_key = tuple(sorted(activity["targets"])) + grouped[targets_key].append(activity) + + for targets, group in grouped.items(): + for activity in group: + cls.federate( + targets=list(targets), body=activity["body"], key_id=activity["key_id"] + ) + @dataclass class ApActor: diff --git a/fedcode/migrations/0006_note_is_deleted_package_is_deleted.py b/fedcode/migrations/0006_note_is_deleted_package_is_deleted.py new file mode 100644 index 0000000..eac6eb3 --- /dev/null +++ b/fedcode/migrations/0006_note_is_deleted_package_is_deleted.py @@ -0,0 +1,23 @@ +# Generated by Django 5.1.2 on 2025-08-19 13:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("fedcode", "0005_remove_person_avatar"), + ] + + operations = [ + migrations.AddField( + model_name="note", + name="is_deleted", + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name="package", + name="is_deleted", + field=models.BooleanField(default=False), + ), + ] diff --git a/fedcode/models.py b/fedcode/models.py index 1daf383..f0a3c7c 100644 --- a/fedcode/models.py +++ b/fedcode/models.py @@ -27,6 +27,11 @@ from federatedcode.settings import FEDERATEDCODE_WORKSPACE_LOCATION +class ActiveManager(models.Manager): + def get_queryset(self): + return super().get_queryset().filter(is_deleted=False) + + class RemoteActor(models.Model): """ Represent a remote actor with its username @@ -213,6 +218,16 @@ class Note(models.Model): Reputation, ) + objects = ActiveManager() # Default manager (excludes deleted) + all_objects = models.Manager() # Includes deleted + + is_deleted = models.BooleanField(default=False) + + def delete(self, *args, **kwargs): + """Soft delete instead of hard delete.""" + self.is_deleted = True + self.save() + class Meta: ordering = ["-updated_at"] @@ -285,6 +300,16 @@ class Package(Actor): help_text="""the notes created by this package""", ) + objects = ActiveManager() # Default manager (excludes deleted) + all_objects = models.Manager() # Includes deleted + + is_deleted = models.BooleanField(default=False) + + def delete(self, *args, **kwargs): + """Soft delete instead of hard delete.""" + self.is_deleted = True + self.save() + class Meta: ordering = ["purl"] diff --git a/fedcode/pipelines/sync_vulnerablecode.py b/fedcode/pipelines/sync_vulnerablecode.py index b5dc999..356ed5d 100644 --- a/fedcode/pipelines/sync_vulnerablecode.py +++ b/fedcode/pipelines/sync_vulnerablecode.py @@ -6,22 +6,29 @@ # See https://github.com/nexB/federatedcode for support or download. # See https://aboutcode.org for more information about AboutCode.org OSS projects. # - +import json import logging -import os.path from itertools import zip_longest +from pathlib import Path import saneyaml +from django.db.models import Case +from django.db.models import Q +from django.db.models import TextField +from django.db.models import Value +from django.db.models import When +from aboutcode.hashid import get_core_purl from aboutcode.pipeline import LoopProgress from fedcode.activitypub import Activity +from fedcode.activitypub import CreateActivity +from fedcode.activitypub import DeleteActivity from fedcode.activitypub import UpdateActivity from fedcode.models import Note from fedcode.models import Package from fedcode.models import Repository from fedcode.models import Vulnerability from fedcode.pipelines import FederatedCodePipeline -from fedcode.pipes import utils class SyncVulnerableCode(FederatedCodePipeline): @@ -40,6 +47,10 @@ def get_git_repos(self): self.git_repos = Repository.objects.all() def sync_vulnerablecode_repositories(self): + """ + Sync repositories + For vulnerablecode-data we have 3 files types vulnerabilities.yml, purls.yml, VCID-1ues-ahar-buaa.yml + """ repositories_count = self.git_repos.count() self.log(f"Syncing vulnerability from {repositories_count:,d} repositories") @@ -56,6 +67,7 @@ def sync_vulnerabilities(repository, logger): repo = repository.git_repo_obj latest_commit_hash = repo.head.commit.hexsha latest_commit = repo.commit(latest_commit_hash) + if repository.last_imported_commit: last_imported_commit = repo.commit(repository.last_imported_commit) diffs = last_imported_commit.diff(latest_commit) @@ -72,8 +84,18 @@ def sync_vulnerabilities(repository, logger): logger(f"Syncing {diff_count:,d} vulnerability scan from {repository.url}") progress = LoopProgress(total_iterations=diff_count, logger=logger) + vul_files_processed = 0 + purl_files_processed = 0 + pkg_changes = { + "create": [], + "update": [], + "delete": set(), + } + + vul_changes = {"create": [], "update": [], "delete": set()} + for diff in progress.iter(diffs): - if not diff.a_path.endswith(".yaml"): + if not diff.a_path.endswith(".yml"): continue if diff.a_path.startswith("."): @@ -82,100 +104,414 @@ def sync_vulnerabilities(repository, logger): yaml_data_a_blob = saneyaml.load(diff.a_blob.data_stream.read()) if diff.a_blob else None yaml_data_b_blob = saneyaml.load(diff.b_blob.data_stream.read()) if diff.b_blob else None - if os.path.split(diff.a_path)[1].startswith("VCID") or os.path.split(diff.b_path)[ - 1 - ].startswith("VCID"): + a_name = Path(diff.a_path).name + b_name = Path(diff.b_path).name + + if a_name == "vulnerabilities.yml" or b_name == "vulnerabilities.yml": + note_handler( + diff.change_type, repository.admin, yaml_data_a_blob, yaml_data_b_blob, logger + ) + + elif a_name == "purls.yml" or b_name == "purls.yml": + purl_files_processed += 1 + pkg_handler( + diff.change_type, + repository.admin, + yaml_data_a_blob, + yaml_data_b_blob, + logger, + pkg_changes, + ) + + if purl_files_processed % 10000 == 0: + logger( + f"Processed {purl_files_processed} purls.yml files, flushing bulk changes..." + ) + flush_pkg_changes(pkg_changes, logger) + pkg_changes = {"create": [], "update": [], "delete": set()} + + elif a_name.startswith("VCID") or b_name.startswith("VCID"): + vul_files_processed += 1 vul_handler( diff.change_type, repository, yaml_data_a_blob, yaml_data_b_blob, logger, + vul_changes, ) - continue - pkg_handler( - diff.change_type, - repository.admin, - yaml_data_a_blob, - yaml_data_b_blob, - ) + if vul_files_processed % 10000 == 0: + logger(f"Processed {vul_files_processed} VCID files, flushing bulk changes...") + flush_vul_changes(vul_changes, logger) + vul_changes = {"create": [], "update": [], "delete": set()} + + flush_pkg_changes(pkg_changes, logger) + flush_vul_changes(vul_changes, logger) repository.last_imported_commit = latest_commit_hash repository.save() logger("The Importer run successfully") -def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, logger): - if change_type == "A": # A for added paths - Vulnerability.objects.get_or_create( - id=yaml_data_b_blob.get("vulnerability_id"), - repo=repo_obj, - ) - elif change_type in [ - "M", - "R", - ]: # R for renamed paths , M for paths with modified data - vul = Vulnerability.objects.get( - id=yaml_data_a_blob.get("vulnerability_id"), - repo=repo_obj, - ) - vul.filename = yaml_data_b_blob.get("vulnerability_id") - vul.save() - elif change_type == "D": # D for deleted paths - vul = Vulnerability.objects.filter( - id=yaml_data_b_blob.get("vulnerability_id"), - repo=repo_obj, - ) - vul.delete() +def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, logger, vul_changes): + """ + Collect changes in VCID-XXXX-XXXX-XXXX.yml files for bulk processing. + """ + vulnerability_a_id = yaml_data_a_blob.get("vulnerability_id") if yaml_data_a_blob else None + vulnerability_b_id = yaml_data_b_blob.get("vulnerability_id") if yaml_data_b_blob else None + + if change_type == "A": # Added + if vulnerability_b_id: + vul_changes["create"].append(Vulnerability(id=vulnerability_b_id, repo=repo_obj)) + + elif change_type in ["M", "R"]: # Modified or Renamed + if vulnerability_a_id and vulnerability_a_id != vulnerability_b_id: + vul_changes["delete"].add((vulnerability_a_id, repo_obj)) + if vulnerability_b_id: + vul_changes["update"].append((vulnerability_b_id, repo_obj)) + + elif change_type == "D": # Deleted + if vulnerability_a_id: + vul_changes["delete"].add((vulnerability_a_id, repo_obj)) + else: logger(f"Invalid Vulnerability File", level=logging.ERROR) -def pkg_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob): +def pkg_handler( + change_type, default_service, yaml_data_a_blob, yaml_data_b_blob, logger, pkg_changes +): + """ + Handle changes in purls.yml but do not write immediately. + Collects changes in pkg_changes dict for bulk flush later. + """ + if change_type == "A": - package = yaml_data_b_blob.get("package") + for purl in yaml_data_b_blob or []: + core_purl = get_core_purl(purl) + pkg_changes["create"].append(Package(purl=core_purl, service=default_service)) - pkg, _ = Package.objects.get_or_create(purl=package, service=default_service) + elif change_type == "M": + for package_a, package_b in zip_longest(yaml_data_a_blob or [], yaml_data_b_blob or []): + if not package_a or not package_b: + continue + core_purl_a = get_core_purl(package_a) + core_purl_b = get_core_purl(package_b) + pkg_changes["update"].append((core_purl_a, core_purl_b, default_service)) - for version in yaml_data_b_blob.get("versions", []): - utils.create_note(pkg, version) + elif change_type == "D": + for purl in yaml_data_a_blob or []: + if not purl: + logger("Invalid PURL in deleted entry", level=logging.ERROR) + continue + core_purl = get_core_purl(purl) + pkg_changes["delete"].add((core_purl, default_service)) - elif change_type == "M": - old_package = yaml_data_a_blob.get("package") - new_package = yaml_data_b_blob.get("package") - - pkg = Package.objects.get(purl=old_package, service=default_service) - pkg.purl = new_package - pkg.save() - - for version_a, version_b in zip_longest( - yaml_data_a_blob.get("versions", []), yaml_data_b_blob.get("versions", []) - ): - if version_b and not version_a: - utils.create_note(pkg, version_b) - - if version_a and not version_b: - utils.delete_note(pkg, version_a) - - if version_a and version_b: - note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(version_a)) - if note.content == saneyaml.dump(version_b): - continue - - note.content = saneyaml.dump(version_b) - note.save() - - update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap) - Activity.federate( - targets=pkg.followers_inboxes, - body=update_activity.to_ap(), - key_id=pkg.key_id, + else: + logger(f"Unknown change_type: {change_type}", level=logging.ERROR) + + +def flush_pkg_changes(pkg_changes, logger): + if pkg_changes["create"]: + Package.objects.bulk_create(pkg_changes["create"], ignore_conflicts=True) + + if pkg_changes["update"]: + updates = [] + for old_purl, new_purl, service in pkg_changes["update"]: + try: + pkg = Package.objects.get(purl=old_purl, service=service) + if pkg.purl != new_purl: + pkg.purl = new_purl + updates.append(pkg) + except Package.DoesNotExist: + logger(f"Package not found for {old_purl}", level=logging.WARNING) + if updates: + Package.objects.bulk_update(updates, ["purl"]) + + if pkg_changes["delete"]: + purls, services = zip(*pkg_changes["delete"]) + Package.objects.filter(purl__in=purls, service__in=services).delete() + + +def flush_vul_changes(vul_changes, logger): + if vul_changes["create"]: + Vulnerability.objects.bulk_create(vul_changes["create"], ignore_conflicts=True) + + if vul_changes["update"]: + updates = [] + for vul_id, repo_obj in vul_changes["update"]: + try: + vul = Vulnerability.objects.get(id=vul_id, repo=repo_obj) + updates.append(vul) + except Vulnerability.DoesNotExist: + updates.append(Vulnerability(id=vul_id, repo=repo_obj)) + if updates: + Vulnerability.objects.bulk_update(updates, ["repo"]) + + if vul_changes["delete"]: + vul_ids, repos = zip(*vul_changes["delete"]) + Vulnerability.objects.filter(id__in=vul_ids, repo__in=repos).delete() + + +def note_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob, logger): + """ + Handle notes from vulnerabilities.yml changes. + Uses zip_longest so both old (A) and new (B) entries are processed together. + """ + notes_to_create = [] + notes_to_update = [] + notes_to_delete = [] + + purls_to_fetch = set() + for pkg_status_a, pkg_status_b in zip_longest(yaml_data_a_blob or [], yaml_data_b_blob or []): + if pkg_status_a: + purl_a = pkg_status_a.get("purl") + if purl_a: + purls_to_fetch.add(get_core_purl(purl_a)) + else: + logger("Invalid Vulnerability File: missing purl in old entry", level=logging.ERROR) + + if pkg_status_b: + purl_b = pkg_status_b.get("purl") + if purl_b: + purls_to_fetch.add(get_core_purl(purl_b)) + else: + logger("Invalid Vulnerability File: missing purl in new entry", level=logging.ERROR) + + packages_map = {} + if purls_to_fetch: + existing_packages = Package.objects.filter(purl__in=purls_to_fetch, service=default_service) + packages_map = {pkg.purl: pkg for pkg in existing_packages} + + missing_purls = purls_to_fetch - set(packages_map.keys()) + if missing_purls: + new_packages = [Package(purl=purl, service=default_service) for purl in missing_purls] + Package.objects.bulk_create(new_packages, ignore_conflicts=True) + refreshed = Package.objects.filter(purl__in=missing_purls, service=default_service) + packages_map.update({pkg.purl: pkg for pkg in refreshed}) + + for pkg_status_a, pkg_status_b in zip_longest(yaml_data_a_blob or [], yaml_data_b_blob or []): + pkg_a = pkg_b = None + + if pkg_status_a and pkg_status_a.get("purl"): + core_purl_a = get_core_purl(pkg_status_a["purl"]) + pkg_a = packages_map.get(str(core_purl_a)) + + if pkg_status_b and pkg_status_b.get("purl"): + core_purl_b = get_core_purl(pkg_status_b["purl"]) + pkg_b = packages_map.get(str(core_purl_b)) + + if change_type == "A": + if pkg_status_b and pkg_b: + notes_to_create.append((pkg_b, pkg_status_b)) + + elif change_type == "M": + if pkg_status_a and not pkg_status_b and pkg_a: + notes_to_delete.append((pkg_a, pkg_status_a)) + + elif pkg_status_b and not pkg_status_a and pkg_b: + notes_to_create.append((pkg_b, pkg_status_b)) + + elif pkg_status_a and pkg_status_b and pkg_b: + notes_to_update.append((pkg_b, pkg_status_a, pkg_status_b)) + + elif change_type == "D": + if pkg_status_a and pkg_a: + notes_to_delete.append((pkg_a, pkg_status_a)) + + else: + logger(f"Unknown change_type: {change_type}", level=logging.ERROR) + + if notes_to_create: + bulk_create_notes(notes_to_create) + if notes_to_update: + bulk_update_notes(notes_to_update) + if notes_to_delete: + bulk_delete_notes(notes_to_delete) + + +def bulk_create_notes(notes_to_create): + """Bulk create notes and federate activities""" + if not notes_to_create: + return + + notes_by_pkg = {} + note_objects_to_create = [] + + for pkg, note_dict in notes_to_create: + content = saneyaml.dump(note_dict) + if pkg not in notes_by_pkg: + notes_by_pkg[pkg] = [] + notes_by_pkg[pkg].append(content) + + existing_notes = set() + for pkg, contents in notes_by_pkg.items(): + existing = Note.objects.filter(acct=pkg.acct, content__in=contents).values_list( + "content", flat=True + ) + existing_notes.update(existing) + + pkg_note_pairs = [] + activities_to_federate = [] + + for pkg, note_dict in notes_to_create: + content = saneyaml.dump(note_dict) + + if content not in existing_notes: + note = Note(acct=pkg.acct, content=content) + note_objects_to_create.append(note) + pkg_note_pairs.append((pkg, note)) + + if note_objects_to_create: + created_notes = Note.objects.bulk_create(note_objects_to_create) + + through_objects = [] + for i, (pkg, _) in enumerate(pkg_note_pairs): + note = created_notes[i] + through_objects.append(Package.notes.through(package_id=pkg.id, note_id=note.id)) + + Package.notes.through.objects.bulk_create(through_objects, ignore_conflicts=True) + + for pkg, note in zip([p for p, _ in pkg_note_pairs], created_notes): + if pkg.followers_inboxes: + create_activity = CreateActivity(actor=pkg.to_ap, object=note.to_ap) + activities_to_federate.append( + { + "targets": pkg.followers_inboxes, + "body": json.dumps(create_activity.to_ap()), + "key_id": pkg.key_id, + } ) - elif change_type == "D": - package = yaml_data_a_blob.get("package") - pkg = Package.objects.get(purl=package, service=default_service) - for version in yaml_data_a_blob.get("versions", []): - utils.delete_note(pkg, version) - pkg.delete() + for pkg, note_dict in notes_to_create: + content = saneyaml.dump(note_dict) + if content in existing_notes: + note = Note.objects.get(acct=pkg.acct, content=content) + pkg.notes.add(note) + + # Still need to federate for existing notes + if pkg.followers_inboxes: + create_activity = CreateActivity(actor=pkg.to_ap, object=note.to_ap) + activities_to_federate.append( + { + "targets": pkg.followers_inboxes, + "body": json.dumps(create_activity.to_ap()), + "key_id": pkg.key_id, + } + ) + + if activities_to_federate: + Activity.bulk_federate(activities_to_federate) + + +def bulk_update_notes(notes_to_update): + """Bulk update notes and federate activities""" + if not notes_to_update: + return + + actual_updates = [] + for pkg, old_note_dict, new_note_dict in notes_to_update: + if old_note_dict != new_note_dict: + actual_updates.append((pkg, old_note_dict, new_note_dict)) + + if not actual_updates: + return + + query_conditions = Q() + update_mapping = {} + + for pkg, old_note_dict, new_note_dict in actual_updates: + old_content = saneyaml.dump(old_note_dict) + new_content = saneyaml.dump(new_note_dict) + query_conditions |= Q(acct=pkg.acct, content=old_content) + update_mapping[(pkg.acct, old_content)] = new_content + + notes_to_update_qs = Note.objects.filter(query_conditions) + existing_notes = list(notes_to_update_qs) + + if not existing_notes: + return + + when_clauses = [] + activities_to_federate = [] + note_id_to_pkg = {} + + for note in existing_notes: + key = (note.acct, note.content) + if key in update_mapping: + new_content = update_mapping[key] + when_clauses.append(When(id=note.id, then=Value(new_content))) + + for pkg, old_note_dict, new_note_dict in actual_updates: + if pkg.acct == note.acct and saneyaml.dump(old_note_dict) == note.content: + note_id_to_pkg[note.id] = (pkg, new_note_dict) + break + + if when_clauses: + Note.objects.filter(id__in=[note.id for note in existing_notes]).update( + content=Case(*when_clauses, output_field=TextField()) + ) + + for note in existing_notes: + if note.id in note_id_to_pkg: + pkg, new_note_dict = note_id_to_pkg[note.id] + if pkg.followers_inboxes: + note.content = saneyaml.dump(new_note_dict) + update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap) + activities_to_federate.append( + { + "targets": pkg.followers_inboxes, + "body": json.dumps(update_activity.to_ap()), + "key_id": pkg.key_id, + } + ) + + if activities_to_federate: + Activity.bulk_federate(activities_to_federate) + + +def bulk_delete_notes(notes_to_delete): + """Bulk delete notes (soft delete) and federate activities""" + if not notes_to_delete: + return + + query_conditions = Q() + delete_mapping = {} + + for pkg, note_dict in notes_to_delete: + content = saneyaml.dump(note_dict) + query_conditions |= Q(acct=pkg.acct, content=content) + delete_mapping[(pkg.acct, content)] = pkg + + notes_to_delete_qs = Note.objects.filter(query_conditions) + existing_notes = list(notes_to_delete_qs.select_related()) + + if not existing_notes: + return + + activities_to_federate = [] + notes_to_soft_delete = [] + + for note in existing_notes: + key = (note.acct, note.content) + if key in delete_mapping: + pkg = delete_mapping[key] + notes_to_soft_delete.append(note.id) + note_ap = note.to_ap + + if pkg.followers_inboxes: + deleted_activity = DeleteActivity(actor=pkg.to_ap, object=note_ap) + activities_to_federate.append( + { + "targets": pkg.followers_inboxes, + "body": json.dumps(deleted_activity.to_ap()), + "key_id": pkg.key_id, + } + ) + + if notes_to_soft_delete: + Note.objects.filter(id__in=notes_to_soft_delete).delete() + + if activities_to_federate: + Activity.bulk_federate(activities_to_federate) diff --git a/fedcode/pipes/utils.py b/fedcode/pipes/utils.py index 069fa74..96de829 100644 --- a/fedcode/pipes/utils.py +++ b/fedcode/pipes/utils.py @@ -8,6 +8,7 @@ # import json +import os import saneyaml from packageurl import PackageURL @@ -15,12 +16,12 @@ from fedcode.activitypub import Activity from fedcode.activitypub import CreateActivity from fedcode.activitypub import DeleteActivity +from fedcode.activitypub import UpdateActivity from fedcode.models import Note def create_note(pkg, note_dict): - # TODO: also take argument for source of the note ideally github blob for - # for file. + # TODO: also take argument for source of the note ideally github blob for file. note, _ = Note.objects.get_or_create(acct=pkg.acct, content=saneyaml.dump(note_dict)) pkg.notes.add(note) create_activity = CreateActivity(actor=pkg.to_ap, object=note.to_ap) @@ -31,11 +32,27 @@ def create_note(pkg, note_dict): ) +def update_note(pkg, old_note_dict, new_note_dict): + if old_note_dict == new_note_dict: + return + + note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(old_note_dict)) + + note.content = saneyaml.dump(new_note_dict) + note.save() + + update_activity = UpdateActivity(actor=pkg.to_ap, object=note.to_ap) + Activity.federate( + targets=pkg.followers_inboxes, + body=update_activity.to_ap(), + key_id=pkg.key_id, + ) + + def delete_note(pkg, note_dict): note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(note_dict)) note_ap = note.to_ap - note.delete() - pkg.notes.remove(note) + note.delete() # soft delete deleted_activity = DeleteActivity(actor=pkg.to_ap, object=note_ap) Activity.federate( @@ -69,7 +86,7 @@ def get_scan_note(path): purl = package_metadata_path_to_purl(path=path) # TODO: Use tool-alias.yml to get tool for corresponding tool - # for scan https://github.com/aboutcode-org/federatedcode/issues/24 + # for scan https://github.com/aboutcode-org/federatedcode/issues/24 return { "purl": str(purl), "scans": [ @@ -79,3 +96,22 @@ def get_scan_note(path): }, ], } + + +def get_vulnerability_path(repo_path: str, vulnerability_id: str) -> str: + """ + Get the vulnerability file path using repo_path and vulnerability_id. + Raise FileNotFoundError if the file does not exist. + """ + vul_filepath = os.path.join( + repo_path, + "aboutcode-vulnerabilities", + vulnerability_id[5:7], + vulnerability_id, + f"{vulnerability_id}.yml", + ) + + if not os.path.exists(vul_filepath): + raise FileNotFoundError(f"Vulnerability file not found: {vul_filepath}") + + return vul_filepath diff --git a/fedcode/templates/user_sign_up.html b/fedcode/templates/user_sign_up.html index 3503b65..1edd6bd 100644 --- a/fedcode/templates/user_sign_up.html +++ b/fedcode/templates/user_sign_up.html @@ -29,45 +29,39 @@ {% endif %} -

User Signup

+

Create your FederatedCode Account

{% csrf_token %}
- Username + - - Only letters, numbers, and @ . + - _ allowed. -
+
- Password + - -
    -
  • Password can't be too similar to your other personal information.
  • -
  • Password must contain at least 8 characters.
  • -
  • Password can't be a commonly used password.
  • -
  • Password can't be entirely numeric.
  • -
-
- Confirm Password +
+
{{ form.captcha }}
diff --git a/fedcode/utils.py b/fedcode/utils.py index d98a7b3..2afa4a6 100644 --- a/fedcode/utils.py +++ b/fedcode/utils.py @@ -42,8 +42,8 @@ def parse_webfinger(subject): return tuple(subject.split("@")) -def generate_webfinger(username, domain=FEDERATEDCODE_DOMAIN): - return username + "@" + domain +def generate_webfinger(username: str, domain=FEDERATEDCODE_DOMAIN) -> str: + return str(username) + "@" + domain def clone_git_repo(repo_path, repo_url): diff --git a/fedcode/views.py b/fedcode/views.py index de129b6..a6f40a1 100644 --- a/fedcode/views.py +++ b/fedcode/views.py @@ -68,6 +68,7 @@ from fedcode.models import Review from fedcode.models import SyncRequest from fedcode.models import Vulnerability +from fedcode.pipes.utils import get_vulnerability_path from fedcode.signatures import FEDERATEDCODE_PUBLIC_KEY from fedcode.signatures import HttpSignature from fedcode.utils import ap_collection @@ -810,16 +811,17 @@ def redirect_repository(request, repository_id): def redirect_vulnerability(request, vulnerability_id): try: vul = Vulnerability.objects.get(id=vulnerability_id) - vul_filepath = os.path.join( - vul.repo.path, - f"./aboutcode-vulnerabilities-{vulnerability_id[5:7]}/{vulnerability_id[10:12]}" - f"/{vulnerability_id}/{vulnerability_id}.yml", - ) - with open(vul_filepath) as f: - return HttpResponse(json.dumps(f.read())) + + repo_path = vul.repo.path + vul_filepath = get_vulnerability_path(repo_path, vulnerability_id) + + with open(vul_filepath, encoding="utf-8") as f: + return HttpResponse(json.dumps(f.read()), content_type="application/json") except Vulnerability.DoesNotExist: - return Http404("Vulnerability does not exist") + raise Http404("Vulnerability does not exist") + except FileNotFoundError: + raise Http404("Vulnerability file not found") class UserFollowing(View): diff --git a/federatedcode/settings.py b/federatedcode/settings.py index c1cfc25..a4c5bbf 100644 --- a/federatedcode/settings.py +++ b/federatedcode/settings.py @@ -124,8 +124,7 @@ } } -DEFAULT_AUTO_FIELD = "django.db.models.AutoField" - +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # Templates TEMPLATES = [ diff --git a/tests/pipelines/test_sync_vulnerablecode.py b/tests/pipelines/test_sync_vulnerablecode.py index 5ccc016..59abb87 100644 --- a/tests/pipelines/test_sync_vulnerablecode.py +++ b/tests/pipelines/test_sync_vulnerablecode.py @@ -6,16 +6,26 @@ # See https://github.com/nexB/federatedcode for support or download. # See https://aboutcode.org for more information about AboutCode.org OSS projects. # +from unittest.mock import PropertyMock +from unittest.mock import call +from unittest.mock import patch + import pytest from django.contrib.auth.models import User from fedcode_test_utils import mute_post_save_signal # NOQA +from aboutcode.hashid import get_core_purl from fedcode.models import Note from fedcode.models import Package from fedcode.models import Repository from fedcode.models import Service from fedcode.models import Vulnerability from fedcode.pipelines.sync_vulnerablecode import SyncVulnerableCode +from fedcode.pipelines.sync_vulnerablecode import note_handler +from fedcode.pipelines.sync_vulnerablecode import pkg_handler +from fedcode.pipelines.sync_vulnerablecode import vul_handler + +TEST_REPO_1_PATH = "/home/ziad-hany/PycharmProjects/vul-sample" @pytest.fixture @@ -30,80 +40,246 @@ def service(db): ) +@pytest.fixture +def mock_latest_commit_hexsha(monkeypatch): + """ + Fixture to override only repo.head.commit.hexsha while keeping + the rest of the git repo behavior intact. + """ + + def _mock(repo_instance, hexsha: str): + """ + repo_instance: a Repository model instance + hexsha: the fake hexsha to return for latest commit + """ + real_repo = repo_instance.git_repo_obj # use real GitPython repo + + # Patch only the hexsha property + type(real_repo.head.commit).hexsha = PropertyMock(return_value=hexsha) + + return real_repo + + return _mock + + @pytest.fixture def repo(db, service, mute_post_save_signal): """Simple Git Repository""" return Repository.objects.create( - url="https://github.com/nexB/fake-repo", - path="./review/tests/test_data/test_git_repo_v1", + url="https://github.com/ziadhany/vul-sample", + path=TEST_REPO_1_PATH, admin=service, ) -@pytest.mark.skip(reason="Need a real git repo to test the importer") -@pytest.mark.django_db -def test_simple_importer(service, repo, mute_post_save_signal): - # just add all packages and vulnerabilities - repo.path = "/home/ziad/vul-sample/repo1" - importer = SyncVulnerableCode() - importer.execute() +@pytest.fixture +def vul_changes(): + return {"create": [], "update": [], "delete": set()} - assert Note.objects.count() > 1 - assert Vulnerability.objects.count() > 1 - assert Package.objects.count() > 1 - assert repo.last_imported_commit - note_n = Note.objects.count() - vul_n = Vulnerability.objects.count() - purl_n = Package.objects.count() - last_imported_commit = repo.last_imported_commit +@pytest.fixture +def pkg_changes(): + return {"create": [], "update": [], "delete": set()} - # Run importer again without add any new data - importer = SyncVulnerableCode() - importer.execute() - assert note_n == Note.objects.count() - assert vul_n == Vulnerability.objects.count() - assert purl_n == Package.objects.count() - assert last_imported_commit == repo.last_imported_commit +@pytest.fixture +def example_notes(): + return [ + { + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=aarch64&distroversion=edge&reponame=community", + "affected_by_vulnerabilities": [], + "fixing_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + }, + { + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=armhf&distroversion=edge&reponame=community", + "affected_by_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + "fixing_vulnerabilities": [], + }, + ] - # Edit last_imported_commit - repo.last_imported_commit = "c8de84af0a7c11bf151e96142ce711824648ec41" - repo.save() - importer = SyncVulnerableCode() - importer.execute() +def test_added_vulnerability(repo, vul_changes): + vul_handler("A", repo, None, {"vulnerability_id": "VCID-1234"}, None, vul_changes) + assert len(vul_changes["create"]) == 1 + vuln = vul_changes["create"][0] + assert isinstance(vuln, Vulnerability) + assert vuln.id == "VCID-1234" + assert vuln.repo == repo -@pytest.mark.skip(reason="Need a real git repo to test the importer") -@pytest.mark.django_db -def test_complex_importer(service, repo, mute_post_save_signal): - # repo with 1 commit - repo.path = "/home/ziad/vul-sample/repo1" - importer = SyncVulnerableCode() - importer.execute() +def test_modified_vulnerability_changed_id(repo, vul_changes): + vul_handler( + "M", + repo, + {"vulnerability_id": "VCID-1111"}, + {"vulnerability_id": "VCID-2222"}, + None, + vul_changes, + ) - assert Note.objects.count() > 1 - assert Vulnerability.objects.count() > 1 - assert Package.objects.count() > 1 - assert repo.last_imported_commit + assert ("VCID-1111", repo) in vul_changes["delete"] + assert ("VCID-2222", repo) in vul_changes["update"] - note_n = Note.objects.count() - vul_n = Vulnerability.objects.count() - purl_n = Package.objects.count() - last_imported_commit = repo.last_imported_commit - # Run importer again without add any new data - # the same repo with 2 commit ( after pull ) - repo.path = "/home/ziad/vul-sample/repo2" - importer = SyncVulnerableCode() - importer.execute() +def test_modified_vulnerability_same_id(repo, vul_changes): + vul_handler( + "M", + repo, + {"vulnerability_id": "VCID-1111"}, + {"vulnerability_id": "VCID-1111"}, + None, + vul_changes, + ) + + assert not vul_changes["delete"] + assert ("VCID-1111", repo) in vul_changes["update"] + + +def test_deleted_vulnerability(repo, vul_changes): + vul_handler("D", repo, {"vulnerability_id": "VCID-3333"}, None, None, vul_changes) + assert ("VCID-3333", repo) in vul_changes["delete"] + - assert note_n > Note.objects.count() - assert vul_n > Vulnerability.objects.count() - assert purl_n > Package.objects.count() +def test_added_packages(service, pkg_changes): + yaml_data_b_blob = ["pkg:pypi/django@3.2.5", "pkg:pypi/requests@2.28.1"] - # Edit last_imported_commit - repo.last_imported_commit = "9c3ccee39baef6017d9152367402de9909eadd72" - repo.save() + pkg_handler("A", service, None, yaml_data_b_blob, None, pkg_changes) + + assert len(pkg_changes["create"]) == 2 + created = [pkg.purl for pkg in pkg_changes["create"]] + assert get_core_purl("pkg:pypi/django@3.2.5") in created + assert get_core_purl("pkg:pypi/requests@2.28.1") in created + + +def test_modified_packages(service, pkg_changes): + yaml_data_a_blob = ["pkg:pypi/django@3.2.5", "pkg:pypi/requests@2.27.0"] + yaml_data_b_blob = ["pkg:pypi/django@4.0.0", "pkg:pypi/requests@2.28.1"] + + pkg_handler("M", service, yaml_data_a_blob, yaml_data_b_blob, None, pkg_changes) + + assert len(pkg_changes["update"]) == 2 + updates = [(a, b) for (a, b, _) in pkg_changes["update"]] + assert ( + get_core_purl("pkg:pypi/django@3.2.5"), + get_core_purl("pkg:pypi/django@4.0.0"), + ) in updates + assert ( + get_core_purl("pkg:pypi/requests@2.27.0"), + get_core_purl("pkg:pypi/requests@2.28.1"), + ) in updates + + +def test_deleted_packages(service, pkg_changes): + yaml_data_a_blob = ["pkg:pypi/flask@2.0.0", "pkg:pypi/urllib3@1.26.0"] + + pkg_handler("D", service, yaml_data_a_blob, None, None, pkg_changes) + + assert len(pkg_changes["delete"]) == 2 + deletes = [(p, svc) for (p, svc) in pkg_changes["delete"]] + assert (get_core_purl("pkg:pypi/flask@2.0.0"), service) in deletes + assert (get_core_purl("pkg:pypi/urllib3@1.26.0"), service) in deletes + + +def test_note_handler_add(service, example_notes): + with patch("fedcode.pipelines.sync_vulnerablecode.bulk_create_notes") as mock_create: + note_handler("A", service, None, example_notes, None) + assert mock_create.called + args, _ = mock_create.call_args + notes_to_create = args[0] + assert len(notes_to_create) == len(example_notes) + pkg = Package.objects.get(purl="pkg:alpine/ansible") + expected = [ + ( + pkg, + { + "affected_by_vulnerabilities": [], + "fixing_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=aarch64&distroversion=edge&reponame=community", + }, + ), + ( + pkg, + { + "affected_by_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + "fixing_vulnerabilities": [], + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=armhf&distroversion=edge&reponame=community", + }, + ), + ] + assert notes_to_create == expected + + +def test_note_handler_modify(service, example_notes): + old_notes = example_notes[:1] # first note only + new_notes = example_notes[1:] # second note only + + with patch("fedcode.pipelines.sync_vulnerablecode.bulk_update_notes") as mock_update, patch( + "fedcode.pipelines.sync_vulnerablecode.bulk_create_notes" + ) as mock_create, patch( + "fedcode.pipelines.sync_vulnerablecode.bulk_delete_notes" + ) as mock_delete: + + note_handler("M", service, old_notes, new_notes, None) + pkg = Package.objects.get(purl="pkg:alpine/ansible") + assert mock_update.call_args_list == [ + call( + [ + ( + pkg, + { + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=aarch64&distroversion=edge&reponame=community", + "affected_by_vulnerabilities": [], + "fixing_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + }, + { + "purl": "pkg:alpine/ansible@2.10.1-r0?arch=armhf&distroversion=edge&reponame=community", + "affected_by_vulnerabilities": ["VCID-r7zs-rzfz-aaap"], + "fixing_vulnerabilities": [], + }, + ) + ] + ) + ] + + assert mock_delete.called == False + assert mock_create.called == False + assert mock_update.called + + +def test_note_handler_delete(service, example_notes): + with patch("fedcode.pipelines.sync_vulnerablecode.bulk_delete_notes") as mock_delete: + note_handler("D", service, example_notes, None, None) + assert mock_delete.called + args, _ = mock_delete.call_args + notes_to_delete = args[0] + assert len(notes_to_delete) == len(example_notes) + + +@pytest.mark.skip(reason="A real Git repository is needed to test the pipelines.") +@pytest.mark.django_db +def test_simple_importer(service, repo, mock_latest_commit_hexsha, mute_post_save_signal): + repo.path = TEST_REPO_1_PATH importer = SyncVulnerableCode() - importer.execute() + commits = [ + # (commit, last_imported_commit, note_count, vuln_count, pkg_count) + ( + "f7cd453ff1ef29a539723c44f82bcc582dac13b1", + None, + 28, + 6, + 3, + ), # vuln_count is 7, but one of them is duplicated. + ("d2115ebdc64341f5b9169e42c9edde9002898b3b", "f7cd453ff1ef29a539723c44f82bcc582dac13b1", 45, 6, 3), + ("d2115ebdc64341f5b9169e42c9edde9002898b3b", None, 0, 0, 0), + ("275987c1d758155e782b7fe0539d7089d4e618ea", None, 0, 0, 0), + ] + + for commit, last_imported_commit, note_count, vuln_count, pkg_count in commits: + repo.last_imported_commit = last_imported_commit + repo.save() + + mock_latest_commit_hexsha(repo, hexsha=commit) + importer.execute() + + assert Note.objects.count() == note_count + assert Vulnerability.objects.count() == vuln_count + assert Package.objects.count() == pkg_count diff --git a/tests/test_utils.py b/tests/test_utils.py index 5b37c59..e9024ab 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,7 @@ from fedcode.activitypub import AP_CONTEXT from fedcode.activitypub import Activity from fedcode.activitypub import create_activity_obj +from fedcode.pipes.utils import get_vulnerability_path from fedcode.utils import check_purl_actor from fedcode.utils import full_resolve from fedcode.utils import full_reverse @@ -83,3 +84,25 @@ def test_full_resolve(): def test_check_purl_actor(): assert check_purl_actor("pkg:maven/org.apache.logging") + + +def test_get_vulnerability_path(tmp_path): + repo_path = tmp_path + vulnerability_id = "VCID-1n1d-h5qn-nyau" + + vuln_dir = repo_path / "aboutcode-vulnerabilities" / vulnerability_id[5:7] / vulnerability_id + vuln_dir.mkdir(parents=True, exist_ok=True) + + vuln_file = vuln_dir / f"{vulnerability_id}.yml" + vuln_file.write_text("id: VCID-1n1d-h5qn-nyau\n") + + result = get_vulnerability_path(str(repo_path), vulnerability_id) + assert result == str(vuln_file) + + +def test_get_vulnerability_path_not_found(tmp_path): + repo_path = tmp_path + vulnerability_id = "VCID-1n1d-h5qn-nyau" + + with pytest.raises(FileNotFoundError): + get_vulnerability_path(str(repo_path), vulnerability_id)