diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index 1f2f75dab1..ebacc387d3 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -56,6 +56,8 @@ ScanPipe's own commands are listed under the ``[scanpipe]`` section:: [scanpipe] add-input add-pipeline + add-webhook + analyze-kubernetes archive-project batch-create check-compliance @@ -391,6 +393,119 @@ Example usage: $ scanpipe add-webhook my_project https://example.com/webhook --inactive +.. _cli_analyze_kubernetes: + +`$ scanpipe analyze-kubernetes ` +-------------------------------------- + +Analyzes all Docker images from a Kubernetes cluster by extracting image references +using ``kubectl`` and creating projects to scan them. + +This command connects to your Kubernetes cluster, retrieves all container images +(including init containers) from running pods, and creates projects to analyze each +image for packages, dependencies, and optionally vulnerabilities. + +Required arguments: + +- ``name`` Project name or prefix for the created projects. + +Optional arguments: + +- ``--multi`` Create multiple projects (one per image) instead of a single project + containing all images. When used, each project is named ``: ``. + +- ``--find-vulnerabilities`` Run the ``find_vulnerabilities`` pipeline during the + analysis to detect known security vulnerabilities in discovered packages. + +- ``--execute`` Execute the pipelines right after project creation. + +- ``--async`` Add the pipeline run to the tasks queue for execution by a worker instead + of running in the current thread. + Applies only when ``--execute`` is provided. + +- ``--namespace NAMESPACE`` Limit the image extraction to a specific Kubernetes + namespace. If not provided, images from all namespaces are collected. + +- ``--context CONTEXT`` Use a specific Kubernetes context. If not provided, the + current context is used. + +- ``--notes NOTES`` Optional notes about the project(s). + +- ``--label LABELS`` Optional labels for the project(s). Multiple labels can be + provided by using this argument multiple times. + +- ``--dry-run`` Do not create any projects; just print the images and projects that + would be created. + +- ``--no-global-webhook`` Skip the creation of the global webhook. This option is + only useful if a global webhook is defined in the settings. + +.. note:: + This command requires ``kubectl`` to be installed and configured with access to + your Kubernetes cluster. + +Example: Analyze All Cluster Images +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To analyze all images from all namespaces in your current Kubernetes cluster:: + + $ scanpipe analyze-kubernetes cluster-audit --multi --execute + +This creates separate projects for each unique image found in the cluster. + +Example: Analyze Production Namespace with Vulnerability Scanning +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To scan all images in the ``production`` namespace and check for vulnerabilities:: + + $ scanpipe analyze-kubernetes prod-security-scan \ + --namespace production \ + --find-vulnerabilities \ + --multi \ + --label "production" \ + --label "security-audit" \ + --execute + +Example: Dry Run Before Creating Projects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To preview which images would be analyzed without creating any projects:: + + $ scanpipe analyze-kubernetes cluster-preview \ + --namespace default \ + --dry-run + +This displays all images that would be scanned, allowing you to verify the scope +before running the actual analysis. + +Example: Analyze Specific Cluster Context +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To analyze images from a specific Kubernetes cluster when you have multiple contexts +configured:: + + $ scanpipe analyze-kubernetes staging-audit \ + --context staging-cluster \ + --namespace default \ + --multi \ + --execute --async + +Example: Single Project for All Images +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To create one project containing all images from the cluster:: + + $ scanpipe analyze-kubernetes full-cluster-scan \ + --find-vulnerabilities \ + --execute + +This creates a single project named ``full-cluster-scan`` that analyzes all discovered +images together. + +.. tip:: + Use ``--multi`` when analyzing large clusters to create separate projects per image, + making it easier to track and review results for individual container images. + `$ scanpipe execute --project PROJECT` -------------------------------------- diff --git a/scanpipe/management/commands/analyze-kubernetes.py b/scanpipe/management/commands/analyze-kubernetes.py new file mode 100644 index 0000000000..a0e6325dce --- /dev/null +++ b/scanpipe/management/commands/analyze-kubernetes.py @@ -0,0 +1,146 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +import sys + +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError +from django.utils.text import slugify + +from scanpipe.management.commands import CreateProjectCommandMixin +from scanpipe.management.commands import execute_project +from scanpipe.pipes.kubernetes import get_images_from_kubectl + + +class Command(CreateProjectCommandMixin, BaseCommand): + help = "Analyze all images of a Kubernetes cluster." + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument("name", help="Project name.") + parser.add_argument( + "--multi", + action="store_true", + help="Create multiple projects instead of a single one.", + ) + parser.add_argument( + "--find-vulnerabilities", + action="store_true", + help="Run the find_vulnerabilities pipeline during the analysis.", + ) + parser.add_argument( + "--execute", + action="store_true", + help="Execute the pipelines right after the project creation.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help=( + "Do not create any projects." + "Print the images and projects that would be created." + ), + ) + # Additional kubectl options + parser.add_argument( + "--namespace", + type=str, + help="Kubernetes namespace to query (for --kubectl mode).", + ) + parser.add_argument( + "--context", + type=str, + help="Kubernetes context to use (for --kubectl mode).", + ) + + def handle(self, *args, **options): + self.verbosity = options["verbosity"] + project_name = options["name"] + pipelines = ["analyze_docker_image"] + create_multiple_projects = options["multi"] + execute = options["execute"] + run_async = options["async"] + labels = options["labels"] + notes = options["notes"] + created_projects = [] + + if options["find_vulnerabilities"]: + pipelines.append("find_vulnerabilities") + + images = self.get_images(**options) + if not images: + raise CommandError("No images found.") + + create_project_options = { + "pipelines": pipelines, + "notes": notes, + "labels": labels, + } + + if create_multiple_projects: + labels.append(f"k8s-{slugify(project_name)}") + for reference in images: + project = self.create_project( + **create_project_options, + name=f"{project_name}: {reference}", + input_urls=[f"docker://{reference}"], + ) + created_projects.append(project) + + else: + project = self.create_project( + **create_project_options, + name=project_name, + input_urls=[f"docker://{reference}" for reference in images], + ) + created_projects.append(project) + + if execute: + for project in created_projects: + execute_project(project=project, run_async=run_async, command=self) + + def get_images(self, **options): + namespace = options.get("namespace") + context = options.get("context") + dry_run = options.get("dry_run") + + if self.verbosity >= 1: + self.stdout.write( + "Extracting images from Kubernetes cluster using kubectl..." + ) + + try: + images = get_images_from_kubectl(namespace=namespace, context=context) + except Exception as e: + raise CommandError(e) + + if self.verbosity >= 1 or dry_run: + self.stdout.write( + self.style.SUCCESS(f"Found {len(images)} images in the cluster:"), + ) + self.stdout.write("\n".join(images)) + + if dry_run: + self.stdout.write("Dry run mode, no projects were created.") + sys.exit(0) + + return images diff --git a/scanpipe/models.py b/scanpipe/models.py index 26d74303cb..f12d079957 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -1533,6 +1533,11 @@ def has_single_resource(self): """ return self.resource_count == 1 + @property + def pipelines(self): + """Return the list of pipeline names assigned to this Project.""" + return list(self.runs.values_list("pipeline_name", flat=True)) + def get_policies_dict(self): """ Load and return the policies from the following locations in that order: diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 31c9c2ba44..7fd885c36f 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -22,6 +22,7 @@ import difflib import logging +import subprocess import sys import time import uuid @@ -574,3 +575,40 @@ def poll_until_success(check, sleep=10, **kwargs): return False time.sleep(sleep) + + +def run_command_safely(command_args): + """ + Execute the external commands following security best practices. + + This function is using the subprocess.run function which simplifies running external + commands. It provides a safer and more straightforward API compared to older methods + like subprocess.Popen. + + WARNING: Please note that the `--option=value` syntax is required for args entries, + and not the `--option value` format. + + - This does not use the Shell (shell=False) to prevent injection vulnerabilities. + - The command should be provided as a list of ``command_args`` arguments. + - Only full paths to executable commands should be provided to avoid any ambiguity. + + WARNING: If you're incorporating user input into the command, make + sure to sanitize and validate the input to prevent any malicious commands from + being executed. + + Raise a SubprocessError if the exit code was non-zero. + """ + completed_process = subprocess.run( # noqa: S603 + command_args, + capture_output=True, + text=True, + ) + + if completed_process.returncode: + error_msg = ( + f'Error while executing cmd="{completed_process.args}": ' + f'"{completed_process.stderr.strip()}"' + ) + raise subprocess.SubprocessError(error_msg) + + return completed_process.stdout diff --git a/scanpipe/pipes/fetch.py b/scanpipe/pipes/fetch.py index 2f74d0cf10..e9c328ca1d 100644 --- a/scanpipe/pipes/fetch.py +++ b/scanpipe/pipes/fetch.py @@ -24,7 +24,6 @@ import logging import os import re -import subprocess import tempfile from collections import namedtuple from pathlib import Path @@ -44,6 +43,8 @@ from plugincode.location_provider import get_location from requests import auth as request_auth +from scanpipe.pipes import run_command_safely + logger = logging.getLogger("scanpipe.pipes") Download = namedtuple("Download", "uri directory filename path size sha1 md5") @@ -60,43 +61,6 @@ HTTP_REQUEST_TIMEOUT = 30 -def run_command_safely(command_args): - """ - Execute the external commands following security best practices. - - This function is using the subprocess.run function which simplifies running external - commands. It provides a safer and more straightforward API compared to older methods - like subprocess.Popen. - - WARNING: Please note that the `--option=value` syntax is required for args entries, - and not the `--option value` format. - - - This does not use the Shell (shell=False) to prevent injection vulnerabilities. - - The command should be provided as a list of ``command_args`` arguments. - - Only full paths to executable commands should be provided to avoid any ambiguity. - - WARNING: If you're incorporating user input into the command, make - sure to sanitize and validate the input to prevent any malicious commands from - being executed. - - Raise a SubprocessError if the exit code was non-zero. - """ - completed_process = subprocess.run( # noqa: S603 - command_args, - capture_output=True, - text=True, - ) - - if completed_process.returncode: - error_msg = ( - f'Error while executing cmd="{completed_process.args}": ' - f'"{completed_process.stderr.strip()}"' - ) - raise subprocess.SubprocessError(error_msg) - - return completed_process.stdout - - def get_request_session(uri): """Return a Requests session setup with authentication and headers.""" session = requests.Session() diff --git a/scanpipe/pipes/kubernetes.py b/scanpipe/pipes/kubernetes.py new file mode 100644 index 0000000000..c1df0eae33 --- /dev/null +++ b/scanpipe/pipes/kubernetes.py @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +import subprocess + +from scanpipe.pipes import run_command_safely + + +def get_images_from_kubectl(namespace=None, context=None): + """ + Extract container images from a running Kubernetes cluster using kubectl. + + Args: + namespace: Specific namespace to query (None for all namespaces) + context: Kubernetes context to use (None for current context) + + Returns: + list: List of unique image references + + """ + cmd = ["kubectl", "get", "pods"] + + if namespace: + cmd.extend(["-n", namespace]) + else: + cmd.append("--all-namespaces") + + if context: + cmd.extend(["--context", context]) + + # Get all images including init containers + cmd.extend( + ["-o", "jsonpath={.items[*].spec['initContainers','containers'][*].image}"] + ) + + try: + result = run_command_safely(cmd) + except subprocess.SubprocessError as error: + raise RuntimeError(f"Failed to execute kubectl command: {error}") + except FileNotFoundError: + raise FileNotFoundError( + "kubectl not found. Please ensure kubectl is installed and in your PATH." + ) + + # Parse the space-separated images + images = result.strip().split() + + # Remove duplicates while preserving order + unique_images = list(dict.fromkeys(image for image in images if image)) + + return unique_images diff --git a/scanpipe/tests/pipes/test_kubernetes.py b/scanpipe/tests/pipes/test_kubernetes.py new file mode 100644 index 0000000000..005db18a19 --- /dev/null +++ b/scanpipe/tests/pipes/test_kubernetes.py @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +from pathlib import Path +from subprocess import SubprocessError +from unittest import mock + +from django.test import TestCase + +from scanpipe.pipes import kubernetes + + +class ScanPipeKubernetesPipesTest(TestCase): + data = Path(__file__).parent.parent / "data" + + @mock.patch("scanpipe.pipes.kubernetes.run_command_safely") + def test_scanpipe_pipes_kubernetes_get_images_from_kubect( + self, mock_run_command_safely + ): + mock_run_command_safely.side_effect = FileNotFoundError + with self.assertRaises(FileNotFoundError) as cm: + kubernetes.get_images_from_kubectl() + expected = ( + "kubectl not found. Please ensure kubectl is installed and in your PATH." + ) + self.assertEqual(expected, str(cm.exception)) + + mock_run_command_safely.side_effect = SubprocessError + with self.assertRaises(RuntimeError) as cm: + kubernetes.get_images_from_kubectl() + expected = "Failed to execute kubectl command: " + self.assertEqual(expected, str(cm.exception)) + + mock_run_command_safely.side_effect = None + mock_run_command_safely.return_value = "nginx:latest redis:alpine redis:alpine" + expected = ["nginx:latest", "redis:alpine"] + self.assertEqual(expected, kubernetes.get_images_from_kubectl()) diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 5c74d56954..5963611f90 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -144,7 +144,7 @@ def test_scanpipe_management_command_create_project_pipelines(self): "analyze_root_filesystem_or_vm_image", "scan_single_package", ] - self.assertEqual(expected, [run.pipeline_name for run in project.runs.all()]) + self.assertEqual(expected, project.pipelines) run = project.runs.get(pipeline_name="analyze_root_filesystem_or_vm_image") self.assertEqual(["group1", "group2"], run.selected_groups) @@ -422,7 +422,7 @@ def test_scanpipe_management_command_add_pipeline(self): "analyze_root_filesystem_or_vm_image", "scan_single_package", ] - self.assertEqual(expected, [run.pipeline_name for run in project.runs.all()]) + self.assertEqual(expected, project.pipelines) run = project.runs.get(pipeline_name="analyze_root_filesystem_or_vm_image") self.assertEqual(["group1", "group2"], run.selected_groups) @@ -1474,6 +1474,56 @@ def test_scanpipe_management_command_extract_tag_from_input_file(self): expected = ("file.ext", "tag1,tag2") self.assertEqual(expected, extract_tag("file.ext:tag1,tag2")) + @mock.patch("scanpipe.pipes.kubernetes.get_images_from_kubectl") + def test_scanpipe_management_command_analyze_kubernetes_from_kubectl( + self, mock_get_images + ): + mock_get_images.return_value = ["nginx:latest", "redis:alpine"] + + project_name = "kube-from-cluster-single" + out = StringIO() + call_command( + "analyze-kubernetes", + project_name, + "--label", + "label1", + "--notes", + "Notes", + "--find-vulnerabilities", + stdout=out, + ) + self.assertIn("Extracting images from Kubernetes cluster", out.getvalue()) + self.assertEqual(1, Project.objects.count()) + project = Project.objects.get(name=project_name) + self.assertEqual(["label1"], list(project.labels.names())) + self.assertEqual("Notes", project.notes) + self.assertEqual( + ["analyze_docker_image", "find_vulnerabilities"], project.pipelines + ) + expected = ["docker://nginx:latest", "docker://redis:alpine"] + download_urls = project.inputsources.values_list("download_url", flat=True) + self.assertEqual(expected, sorted(download_urls)) + project.delete() + + project_name = "kube-from-cluster-multi" + out = StringIO() + call_command( + "analyze-kubernetes", + project_name, + "--multi", + stdout=out, + ) + self.assertIn("Extracting images from Kubernetes cluster", out.getvalue()) + self.assertEqual(2, Project.objects.count()) + expected = [ + "kube-from-cluster-multi: nginx:latest", + "kube-from-cluster-multi: redis:alpine", + ] + names = Project.objects.values_list("name", flat=True) + self.assertEqual(expected, sorted(names)) + for project in Project.objects.all(): + self.assertEqual(1, project.inputsources.count()) + class ScanPipeManagementCommandMixinTest(TestCase): class CreateProjectCommand( @@ -1525,7 +1575,7 @@ def test_scanpipe_management_command_mixin_create_project_pipelines(self): "analyze_root_filesystem_or_vm_image", "scan_single_package", ] - self.assertEqual(expected, [run.pipeline_name for run in project.runs.all()]) + self.assertEqual(expected, project.pipelines) run = project.runs.get(pipeline_name="analyze_root_filesystem_or_vm_image") self.assertEqual(["group1", "group2"], run.selected_groups)