diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 090af2a802..e3d4220470 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,10 @@ ### unreleased +- Add ability to register custom pipelines through a new SCANCODEIO_PIPELINES_DIRS + setting. + https://github.com/nexB/scancode.io/issues/237 + - Add a pipeline `scan_package.ScanPackage` to scan a single package archive with ScanCode-toolkit. https://github.com/nexB/scancode.io/issues/25 diff --git a/docs/scancodeio-settings.rst b/docs/scancodeio-settings.rst index 359f804a9c..9750e08d66 100644 --- a/docs/scancodeio-settings.rst +++ b/docs/scancodeio-settings.rst @@ -74,6 +74,15 @@ of parallel processes to 4:: SCANCODE_DEFAULT_OPTIONS=--processes 4,--timeout 120 +SCANCODEIO_PIPELINES_DIRS +------------------------- + +This setting defines the additional locations ScanCode.io will search for pipelines. +This should be set to a list of comma-separated strings that contain full paths to your additional +pipelines directories:: + + SCANCODEIO_PIPELINES_DIRS=/var/scancodeio/pipelines/,/home/user/pipelines/ + SCANCODEIO_POLICIES_FILE ------------------------ diff --git a/scancodeio/settings/base.py b/scancodeio/settings/base.py index 09ae8522da..7565ef683d 100644 --- a/scancodeio/settings/base.py +++ b/scancodeio/settings/base.py @@ -53,6 +53,11 @@ SCANCODEIO_POLICIES_FILE = env.str("SCANCODEIO_POLICIES_FILE", default="policies.yml") +# This setting defines the additional locations ScanCode.io will search for pipelines. +# This should be set to a list of strings that contain full paths to your additional +# pipelines directories. +SCANCODEIO_PIPELINES_DIRS = env.list("SCANCODEIO_PIPELINES_DIRS", default=[]) + # Application definition INSTALLED_APPS = ( diff --git a/scanpipe/apps.py b/scanpipe/apps.py index a711c4ac05..df06e107a1 100644 --- a/scanpipe/apps.py +++ b/scanpipe/apps.py @@ -20,6 +20,8 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import inspect +from importlib.machinery import SourceFileLoader from pathlib import Path from django.apps import AppConfig @@ -55,7 +57,8 @@ def ready(self): def load_pipelines(self): """ - Load Pipelines from the "scancodeio_pipelines" entry point group. + Load Pipelines from the "scancodeio_pipelines" entry point group and from the + pipelines Python files found at `SCANCODEIO_PIPELINES_DIRS` locations. """ entry_points = importlib_metadata.entry_points() @@ -63,9 +66,23 @@ def load_pipelines(self): pipeline_entry_points = set(entry_points.get("scancodeio_pipelines")) for entry_point in sorted(pipeline_entry_points): - pipeline_class = entry_point.load() - pipeline_name = entry_point.name - self.register_pipeline(pipeline_name, pipeline_class) + self.register_pipeline(name=entry_point.name, cls=entry_point.load()) + + # Register user provided pipelines + pipelines_dirs = getattr(settings, "SCANCODEIO_PIPELINES_DIRS", []) + + for pipelines_dir in pipelines_dirs: + pipelines_path = Path(pipelines_dir) + + if not pipelines_path.is_dir(): + raise ImproperlyConfigured( + f'The provided pipelines directory "{pipelines_dir}" in ' + f"the SCANCODEIO_PIPELINES_DIRS setting is not available." + ) + + python_files = pipelines_path.rglob("*.py") + for path in python_files: + self.register_pipeline_from_file(path) def register_pipeline(self, name, cls): """ @@ -83,6 +100,24 @@ def register_pipeline(self, name, cls): self._pipelines[name] = cls + def register_pipeline_from_file(self, path): + """ + Search for a Pipeline subclass in the provided file `path` and register it + when found. + """ + module_name = path.stem + module = SourceFileLoader(module_name, str(path)).load_module() + pipeline_classes = inspect.getmembers(module, is_pipeline) + + if len(pipeline_classes) > 1: + raise ImproperlyConfigured( + f"Only one Pipeline class allowed per pipeline file: {path}." + ) + + elif pipeline_classes: + pipeline_class = pipeline_classes[0][1] + self.register_pipeline(name=module_name, cls=pipeline_class) + @property def pipelines(self): return dict(self._pipelines) diff --git a/scanpipe/tests/pipelines/register_from_file.py b/scanpipe/tests/pipelines/register_from_file.py new file mode 100644 index 0000000000..15f8bf2c69 --- /dev/null +++ b/scanpipe/tests/pipelines/register_from_file.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +from scanpipe.pipelines import Pipeline + + +class RegisterFromFile(Pipeline): + """ + A pipeline to be registered from its file path. + """ + + @classmethod + def steps(cls): + return (cls.step1,) + + def step1(self): + pass diff --git a/scanpipe/tests/test_apps.py b/scanpipe/tests/test_apps.py index e2d6554704..db0ed405ec 100644 --- a/scanpipe/tests/test_apps.py +++ b/scanpipe/tests/test_apps.py @@ -23,18 +23,21 @@ from pathlib import Path from django.apps import apps +from django.core.exceptions import ImproperlyConfigured from django.test import TestCase from django.test import override_settings from scanpipe.apps import ScanPipeConfig from scanpipe.tests import license_policies from scanpipe.tests import license_policies_index +from scanpipe.tests.pipelines.register_from_file import RegisterFromFile scanpipe_app = apps.get_app_config("scanpipe") class ScanPipeAppsTest(TestCase): data_location = Path(__file__).parent / "data" + pipelines_location = Path(__file__).parent / "pipelines" def test_scanpipe_apps_get_policies_index(self): self.assertEqual({}, ScanPipeConfig.get_policies_index([], "license_key")) @@ -70,3 +73,16 @@ def test_scanpipe_apps_policies_enabled(self): self.assertFalse(scanpipe_app.policies_enabled) scanpipe_app.license_policies_index = {"key": "value"} self.assertTrue(scanpipe_app.policies_enabled) + + def test_scanpipe_apps_register_pipeline_from_file(self): + path = self.pipelines_location / "do_nothing.py" + with self.assertRaises(ImproperlyConfigured): + scanpipe_app.register_pipeline_from_file(path) + + path = self.pipelines_location / "register_from_file.py" + scanpipe_app.register_pipeline_from_file(path) + + self.assertEqual( + RegisterFromFile.__name__, + scanpipe_app.pipelines.get("register_from_file").__name__, + ) diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index f566780d28..95e68ba68d 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -33,7 +33,6 @@ from scanpipe.pipelines import Pipeline from scanpipe.pipelines import is_pipeline from scanpipe.pipelines import root_filesystems -from scanpipe.pipelines import scan_package from scanpipe.tests.pipelines.do_nothing import DoNothing from scanpipe.tests.pipelines.steps_as_attribute import StepsAsAttribute