From aabdcd75ba083da2742079b17add50cdf46aad32 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Thu, 21 Sep 2023 11:30:23 +0200 Subject: [PATCH 01/14] CM-26214, CM-26215, CM-26216 - Add SBOM reports --- cycode/cli/code_scanner.py | 421 ++---------------- .../{helpers => commands/report}/__init__.py | 0 cycode/cli/commands/report/report_command.py | 21 + .../report/sbom}/__init__.py | 0 cycode/cli/commands/report/sbom/common.py | 25 ++ .../cli/commands/report/sbom/sbom_command.py | 81 ++++ .../commands/report/sbom/sbom_path_command.py | 29 ++ .../sbom/sbom_repository_url_command.py | 15 + cycode/cli/config.yaml | 3 + .../files_collector}/__init__.py | 0 cycode/cli/files_collector/excluder.py | 133 ++++++ cycode/cli/files_collector/iac/__init__.py | 0 .../iac}/tf_content_generator.py | 23 +- cycode/cli/files_collector/models/__init__.py | 0 .../models/in_memory_zip.py} | 11 +- cycode/cli/files_collector/path_documents.py | 106 +++++ .../files_collector/repository_documents.py | 135 ++++++ cycode/cli/files_collector/sca/__init__.py | 0 .../cli/files_collector/sca/maven/__init__.py | 0 .../maven/base_restore_maven_dependencies.py | 0 .../sca}/maven/restore_gradle_dependencies.py | 2 +- .../sca}/maven/restore_maven_dependencies.py | 2 +- .../sca}/sca_code_scanner.py | 14 +- cycode/cli/files_collector/zip_documents.py | 42 ++ cycode/cli/main.py | 28 +- cycode/cli/utils/get_api_client.py | 40 ++ cycode/cli/utils/path_utils.py | 40 +- cycode/cyclient/client_creator.py | 23 + cycode/cyclient/models.py | 79 ++++ cycode/cyclient/report_client.py | 84 ++++ cycode/cyclient/scan_client.py | 13 +- .../scan_config/scan_config_creator.py | 29 -- .../{scan_config => }/scan_config_base.py | 0 .../cli/helpers/test_tf_content_generator.py | 2 +- tests/cli/test_code_scanner.py | 4 +- tests/conftest.py | 2 +- .../scan_config/test_default_scan_config.py | 2 +- .../scan_config/test_dev_scan_config.py | 2 +- tests/cyclient/test_scan_client.py | 6 +- tests/test_code_scanner.py | 4 +- tests/test_zip_file.py | 6 +- 41 files changed, 926 insertions(+), 501 deletions(-) rename cycode/cli/{helpers => commands/report}/__init__.py (100%) create mode 100644 cycode/cli/commands/report/report_command.py rename cycode/cli/{helpers/maven => commands/report/sbom}/__init__.py (100%) create mode 100644 cycode/cli/commands/report/sbom/common.py create mode 100644 cycode/cli/commands/report/sbom/sbom_command.py create mode 100644 cycode/cli/commands/report/sbom/sbom_path_command.py create mode 100644 cycode/cli/commands/report/sbom/sbom_repository_url_command.py rename cycode/{cyclient/scan_config => cli/files_collector}/__init__.py (100%) create mode 100644 cycode/cli/files_collector/excluder.py create mode 100644 cycode/cli/files_collector/iac/__init__.py rename cycode/cli/{helpers => files_collector/iac}/tf_content_generator.py (73%) create mode 100644 cycode/cli/files_collector/models/__init__.py rename cycode/cli/{zip_file.py => files_collector/models/in_memory_zip.py} (74%) create mode 100644 cycode/cli/files_collector/path_documents.py create mode 100644 cycode/cli/files_collector/repository_documents.py create mode 100644 cycode/cli/files_collector/sca/__init__.py create mode 100644 cycode/cli/files_collector/sca/maven/__init__.py rename cycode/cli/{helpers => files_collector/sca}/maven/base_restore_maven_dependencies.py (100%) rename cycode/cli/{helpers => files_collector/sca}/maven/restore_gradle_dependencies.py (88%) rename cycode/cli/{helpers => files_collector/sca}/maven/restore_maven_dependencies.py (97%) rename cycode/cli/{helpers => files_collector/sca}/sca_code_scanner.py (88%) create mode 100644 cycode/cli/files_collector/zip_documents.py create mode 100644 cycode/cli/utils/get_api_client.py create mode 100644 cycode/cyclient/client_creator.py create mode 100644 cycode/cyclient/report_client.py delete mode 100644 cycode/cyclient/scan_config/scan_config_creator.py rename cycode/cyclient/{scan_config => }/scan_config_base.py (100%) diff --git a/cycode/cli/code_scanner.py b/cycode/cli/code_scanner.py index 128e8a6f..a1b466e0 100644 --- a/cycode/cli/code_scanner.py +++ b/cycode/cli/code_scanner.py @@ -6,7 +6,7 @@ import traceback from platform import platform from sys import getsizeof -from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple from uuid import UUID, uuid4 import click @@ -15,39 +15,37 @@ from cycode.cli import consts from cycode.cli.ci_integrations import get_commit_range from cycode.cli.config import configuration_manager -from cycode.cli.consts import SCA_SKIP_RESTORE_DEPENDENCIES_FLAG from cycode.cli.exceptions import custom_exceptions -from cycode.cli.helpers import sca_code_scanner, tf_content_generator +from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cli.files_collector.path_documents import get_relevant_document +from cycode.cli.files_collector.repository_documents import ( + calculate_pre_receive_commit_range, + get_commit_range_modified_documents, + get_diff_file_content, + get_diff_file_path, + get_git_repository_tree_file_entries, + get_pre_commit_modified_documents, + parse_commit_range, +) +from cycode.cli.files_collector.sca import sca_code_scanner +from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions +from cycode.cli.files_collector.zip_documents import zip_documents from cycode.cli.models import CliError, CliErrors, Document, DocumentDetections, LocalScanResult, Severity from cycode.cli.printers import ConsolePrinter -from cycode.cli.user_settings.config_file_manager import ConfigFileManager from cycode.cli.utils import scan_utils from cycode.cli.utils.path_utils import ( - change_filename_extension, - get_file_content, - get_file_size, get_path_by_os, - get_relevant_files_in_path, - is_binary_file, - is_sub_path, - load_json, ) from cycode.cli.utils.progress_bar import ProgressBarSection from cycode.cli.utils.scan_batch import run_parallel_batched_scan from cycode.cli.utils.scan_utils import set_issue_detected -from cycode.cli.utils.string_utils import get_content_size, is_binary_content from cycode.cli.utils.task_timer import TimeoutAfter -from cycode.cli.zip_file import InMemoryZip from cycode.cyclient import logger from cycode.cyclient.config import set_logging_level from cycode.cyclient.models import Detection, DetectionSchema, DetectionsPerFile, ZippedFileScanResult if TYPE_CHECKING: - from git import Blob, Diff - from git.objects.base import IndexObjUnion - from git.objects.tree import TraversedTreeTup - - from cycode.cli.utils.progress_bar import BaseProgressBar from cycode.cyclient.models import ScanDetailsResponse from cycode.cyclient.scan_client import ScanClient @@ -88,7 +86,7 @@ def scan_repository(context: click.Context, path: str, branch: str) -> None: file_path = file.path if monitor else get_path_by_os(os.path.join(path, file.path)) documents_to_scan.append(Document(file_path, file.data_stream.read().decode('UTF-8', errors='replace'))) - documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan) + documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) perform_pre_scan_documents_actions(context, scan_type, documents_to_scan, is_git_diff=False) @@ -172,7 +170,7 @@ def scan_commit_range( {'path': path, 'commit_range': commit_range, 'commit_id': commit_id}, ) - documents_to_scan.extend(exclude_irrelevant_documents_to_scan(context, commit_documents_to_scan)) + documents_to_scan.extend(exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan)) scanned_commits_count += 1 logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan}) @@ -199,30 +197,7 @@ def scan_path(context: click.Context, path: str) -> None: progress_bar.start() logger.debug('Starting path scan process, %s', {'path': path}) - - all_files_to_scan = get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) - - # we are double the progress bar section length because we are going to process the files twice - # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) - # second time to get the files content - progress_bar_section_len = len(all_files_to_scan) * 2 - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) - - relevant_files_to_scan = exclude_irrelevant_files(context, all_files_to_scan) - - # after finishing the first processing (excluding), - # we must update the progress bar stage with respect of excluded files. - # now it's possible that we will not process x2 of the files count - # because some of them were excluded, we should subtract the excluded files count - # from the progress bar section length - excluded_files_count = len(all_files_to_scan) - len(relevant_files_to_scan) - progress_bar_section_len = progress_bar_section_len - excluded_files_count - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) - - logger.debug( - 'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)} - ) - scan_disk_files(context, path, relevant_files_to_scan) + scan_disk_files(context, path) @click.command(short_help='Use this command to scan any content that was not committed yet.') @@ -247,7 +222,7 @@ def pre_commit_scan(context: click.Context, ignored_args: List[str]) -> None: progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file))) - documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan) + documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) scan_documents(context, documents_to_scan, is_git_diff=True) @@ -293,10 +268,11 @@ def pre_receive_scan(context: click.Context, ignored_args: List[str]) -> None: def scan_sca_pre_commit(context: click.Context) -> None: + scan_type = context.obj['scan_type'] scan_parameters = get_default_scan_parameters(context) git_head_documents, pre_committed_documents = get_pre_commit_modified_documents(context.obj['progress_bar']) - git_head_documents = exclude_irrelevant_documents_to_scan(context, git_head_documents) - pre_committed_documents = exclude_irrelevant_documents_to_scan(context, pre_committed_documents) + git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents) + pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents) sca_code_scanner.perform_pre_hook_range_scan_actions(git_head_documents, pre_committed_documents) scan_commit_range_documents( context, @@ -308,6 +284,7 @@ def scan_sca_pre_commit(context: click.Context) -> None: def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) -> None: + scan_type = context.obj['scan_type'] progress_bar = context.obj['progress_bar'] scan_parameters = get_scan_parameters(context, path) @@ -315,8 +292,8 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) from_commit_documents, to_commit_documents = get_commit_range_modified_documents( progress_bar, path, from_commit_rev, to_commit_rev ) - from_commit_documents = exclude_irrelevant_documents_to_scan(context, from_commit_documents) - to_commit_documents = exclude_irrelevant_documents_to_scan(context, to_commit_documents) + from_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents) + to_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents) sca_code_scanner.perform_pre_commit_range_scan_actions( path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev ) @@ -324,27 +301,15 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) scan_commit_range_documents(context, from_commit_documents, to_commit_documents, scan_parameters=scan_parameters) -def scan_disk_files(context: click.Context, path: str, files_to_scan: List[str]) -> None: +def scan_disk_files(context: click.Context, path: str) -> None: scan_parameters = get_scan_parameters(context, path) scan_type = context.obj['scan_type'] progress_bar = context.obj['progress_bar'] - is_git_diff = False - try: - documents: List[Document] = [] - for file in files_to_scan: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - content = get_file_content(file) - if not content: - continue - - documents.append(_generate_document(file, scan_type, content, is_git_diff)) - - perform_pre_scan_documents_actions(context, scan_type, documents, is_git_diff) - scan_documents(context, documents, is_git_diff=is_git_diff, scan_parameters=scan_parameters) - + documents = get_relevant_document(progress_bar, scan_type, path) + perform_pre_scan_documents_actions(context, scan_type, documents) + scan_documents(context, documents, scan_parameters=scan_parameters) except Exception as e: _handle_exception(context, e) @@ -370,7 +335,7 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local try: logger.debug('Preparing local files, %s', {'batch_size': len(batch)}) - zipped_documents = zip_documents_to_scan(scan_type, InMemoryZip(), batch) + zipped_documents = zip_documents(scan_type, batch) zip_file_size = getsizeof(zipped_documents.in_memory_zip) scan_result = perform_scan( @@ -476,14 +441,10 @@ def scan_commit_range_documents( scan_result = init_default_scan_result(scan_id) if should_scan_documents(from_documents_to_scan, to_documents_to_scan): logger.debug('Preparing from-commit zip') - from_commit_zipped_documents = zip_documents_to_scan( - scan_type, from_commit_zipped_documents, from_documents_to_scan - ) + from_commit_zipped_documents = zip_documents(scan_type, from_documents_to_scan) logger.debug('Preparing to-commit zip') - to_commit_zipped_documents = zip_documents_to_scan( - scan_type, to_commit_zipped_documents, to_documents_to_scan - ) + to_commit_zipped_documents = zip_documents(scan_type, to_documents_to_scan) scan_result = perform_commit_range_scan_async( cycode_client, @@ -577,45 +538,9 @@ def create_local_scan_result( ) -def perform_pre_scan_documents_actions( - context: click.Context, scan_type: str, documents_to_scan: List[Document], is_git_diff: bool = False -) -> None: - if scan_type == consts.SCA_SCAN_TYPE and not context.obj.get(SCA_SKIP_RESTORE_DEPENDENCIES_FLAG): - logger.debug('Perform pre scan document add_dependencies_tree_document action') - sca_code_scanner.add_dependencies_tree_document(context, documents_to_scan, is_git_diff) - - -def zip_documents_to_scan(scan_type: str, zip_file: InMemoryZip, documents: List[Document]) -> InMemoryZip: - start_zip_creation_time = time.time() - - for index, document in enumerate(documents): - zip_file_size = getsizeof(zip_file.in_memory_zip) - validate_zip_file_size(scan_type, zip_file_size) - - logger.debug( - 'adding file to zip, %s', {'index': index, 'filename': document.path, 'unique_id': document.unique_id} - ) - zip_file.append(document.path, document.unique_id, document.content) - zip_file.close() - - end_zip_creation_time = time.time() - zip_creation_time = int(end_zip_creation_time - start_zip_creation_time) - logger.debug('finished to create zip file, %s', {'zip_creation_time': zip_creation_time}) - return zip_file - - -def validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: - if scan_type == consts.SCA_SCAN_TYPE: - if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES) - else: - if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES) - - def perform_scan( cycode_client: 'ScanClient', - zipped_documents: InMemoryZip, + zipped_documents: 'InMemoryZip', scan_type: str, scan_id: str, is_git_diff: bool, @@ -632,7 +557,7 @@ def perform_scan( def perform_scan_async( - cycode_client: 'ScanClient', zipped_documents: InMemoryZip, scan_type: str, scan_parameters: dict + cycode_client: 'ScanClient', zipped_documents: 'InMemoryZip', scan_type: str, scan_parameters: dict ) -> ZippedFileScanResult: scan_async_result = cycode_client.zipped_file_scan_async(zipped_documents, scan_type, scan_parameters) logger.debug('scan request has been triggered successfully, scan id: %s', scan_async_result.scan_id) @@ -642,8 +567,8 @@ def perform_scan_async( def perform_commit_range_scan_async( cycode_client: 'ScanClient', - from_commit_zipped_documents: InMemoryZip, - to_commit_zipped_documents: InMemoryZip, + from_commit_zipped_documents: 'InMemoryZip', + to_commit_zipped_documents: 'InMemoryZip', scan_type: str, scan_parameters: dict, timeout: Optional[int] = None, @@ -759,56 +684,6 @@ def parse_pre_receive_input() -> str: return pre_receive_input.splitlines()[0] -def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]: - end_commit = get_end_commit_from_branch_update_details(branch_update_details) - - # branch is deleted, no need to perform scan - if end_commit == consts.EMPTY_COMMIT_SHA: - return None - - start_commit = get_oldest_unupdated_commit_for_branch(end_commit) - - # no new commit to update found - if not start_commit: - return None - - return f'{start_commit}~1...{end_commit}' - - -def get_end_commit_from_branch_update_details(update_details: str) -> str: - # update details pattern: - _, end_commit, _ = update_details.split() - return end_commit - - -def get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]: - # get a list of commits by chronological order that are not in the remote repository yet - # more info about rev-list command: https://git-scm.com/docs/git-rev-list - not_updated_commits = Repo(os.getcwd()).git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all') - commits = not_updated_commits.splitlines() - if not commits: - return None - return commits[0] - - -def get_diff_file_path(file: 'Diff') -> Optional[str]: - return file.b_path if file.b_path else file.a_path - - -def get_diff_file_content(file: 'Diff') -> str: - return file.diff.decode('UTF-8', errors='replace') - - -def should_process_git_object(obj: 'Blob', _: int) -> bool: - return obj.type == 'blob' and obj.size > 0 - - -def get_git_repository_tree_file_entries( - path: str, branch: str -) -> Union[Iterator['IndexObjUnion'], Iterator['TraversedTreeTup']]: - return Repo(path).tree(branch).traverse(predicate=should_process_git_object) - - def get_default_scan_parameters(context: click.Context) -> dict: return { 'monitor': context.obj.get('monitor'), @@ -839,34 +714,6 @@ def try_get_git_remote_url(path: str) -> Optional[dict]: return None -def exclude_irrelevant_documents_to_scan(context: click.Context, documents_to_scan: List[Document]) -> List[Document]: - logger.debug('Excluding irrelevant documents to scan') - - scan_type = context.obj['scan_type'] - - relevant_documents = [] - for document in documents_to_scan: - if _is_relevant_document_to_scan(scan_type, document.path, document.content): - relevant_documents.append(document) - - return relevant_documents - - -def exclude_irrelevant_files(context: click.Context, filenames: List[str]) -> List[str]: - scan_type = context.obj['scan_type'] - progress_bar = context.obj['progress_bar'] - - relevant_files = [] - for filename in filenames: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - if _is_relevant_file_to_scan(scan_type, filename): - relevant_files.append(filename) - - is_sub_path.cache_clear() # free up memory - - return relevant_files - - def exclude_irrelevant_detections( detections: List[Detection], scan_type: str, command_scan_type: str, severity_threshold: str ) -> List[Detection]: @@ -916,60 +763,6 @@ def _exclude_detections_by_exclusions_configuration(detections: List[Detection], return [detection for detection in detections if not _should_exclude_detection(detection, exclusions)] -def get_pre_commit_modified_documents(progress_bar: 'BaseProgressBar') -> Tuple[List[Document], List[Document]]: - git_head_documents = [] - pre_committed_documents = [] - - repo = Repo(os.getcwd()) - diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) - for file in diff_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - diff_file_path = get_diff_file_path(file) - file_path = get_path_by_os(diff_file_path) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, consts.GIT_HEAD_COMMIT_REV, diff_file_path) - if file_content is not None: - git_head_documents.append(Document(file_path, file_content)) - - if os.path.exists(file_path): - file_content = get_file_content(file_path) - pre_committed_documents.append(Document(file_path, file_content)) - - return git_head_documents, pre_committed_documents - - -def get_commit_range_modified_documents( - progress_bar: 'BaseProgressBar', path: str, from_commit_rev: str, to_commit_rev: str -) -> Tuple[List[Document], List[Document]]: - from_commit_documents = [] - to_commit_documents = [] - - repo = Repo(path) - diff = repo.commit(from_commit_rev).diff(to_commit_rev) - - modified_files_diff = [ - change for change in diff if change.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE - ] - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(modified_files_diff)) - for blob in modified_files_diff: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - diff_file_path = get_diff_file_path(blob) - file_path = get_path_by_os(diff_file_path) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path) - if file_content is not None: - from_commit_documents.append(Document(file_path, file_content)) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path) - if file_content is not None: - to_commit_documents.append(Document(file_path, file_content)) - - return from_commit_documents, to_commit_documents - - def _should_exclude_detection(detection: Detection, exclusions: Dict) -> bool: exclusions_by_value = exclusions.get(consts.EXCLUSIONS_BY_VALUE_SECTION_NAME, []) if _is_detection_sha_configured_in_exclusions(detection, exclusions_by_value): @@ -1014,13 +807,6 @@ def _is_detection_sha_configured_in_exclusions(detection: Detection, exclusions: return detection_sha in exclusions -def _is_path_configured_in_exclusions(scan_type: str, file_path: str) -> bool: - exclusions_by_path = configuration_manager.get_exclusions_by_scan_type(scan_type).get( - consts.EXCLUSIONS_BY_PATH_SECTION_NAME, [] - ) - return any(is_sub_path(exclusion_path, file_path) for exclusion_path in exclusions_by_path) - - def _get_package_name(detection: Detection) -> str: package_name = detection.detection_details.get('vulnerable_component', '') package_version = detection.detection_details.get('vulnerable_component_version', '') @@ -1032,119 +818,6 @@ def _get_package_name(detection: Detection) -> str: return f'{package_name}@{package_version}' -def _is_file_relevant_for_sca_scan(filename: str) -> bool: - if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS): - logger.debug("file is irrelevant because it is from node_modules's inner path, %s", {'filename': filename}) - return False - - return True - - -def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool: - if _is_subpath_of_cycode_configuration_folder(filename): - logger.debug('file is irrelevant because it is in cycode configuration directory, %s', {'filename': filename}) - return False - - if _is_path_configured_in_exclusions(scan_type, filename): - logger.debug('file is irrelevant because the file path is in the ignore paths list, %s', {'filename': filename}) - return False - - if not _is_file_extension_supported(scan_type, filename): - logger.debug('file is irrelevant because the file extension is not supported, %s', {'filename': filename}) - return False - - if is_binary_file(filename): - logger.debug('file is irrelevant because it is binary file, %s', {'filename': filename}) - return False - - if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename): - logger.debug('file is irrelevant because its exceeded max size limit, %s', {'filename': filename}) - return False - - if scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename): - return False - - return True - - -def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool: - if _is_subpath_of_cycode_configuration_folder(filename): - logger.debug( - 'document is irrelevant because it is in cycode configuration directory, %s', {'filename': filename} - ) - return False - - if _is_path_configured_in_exclusions(scan_type, filename): - logger.debug( - 'document is irrelevant because the document path is in the ignore paths list, %s', {'filename': filename} - ) - return False - - if not _is_file_extension_supported(scan_type, filename): - logger.debug('document is irrelevant because the file extension is not supported, %s', {'filename': filename}) - return False - - if is_binary_content(content): - logger.debug('document is irrelevant because it is binary, %s', {'filename': filename}) - return False - - if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content): - logger.debug('document is irrelevant because its exceeded max size limit, %s', {'filename': filename}) - return False - return True - - -def _is_file_extension_supported(scan_type: str, filename: str) -> bool: - filename = filename.lower() - - if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: - return filename.endswith(consts.INFRA_CONFIGURATION_SCAN_SUPPORTED_FILES) - - if scan_type == consts.SCA_SCAN_TYPE: - return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES) - - return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE) - - -def _generate_document(file: str, scan_type: str, content: str, is_git_diff: bool) -> Document: - if _is_iac(scan_type) and _is_tfplan_file(file, content): - return _handle_tfplan_file(file, content, is_git_diff) - return Document(file, content, is_git_diff) - - -def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document: - document_name = _generate_tfplan_document_name(file) - tf_content = tf_content_generator.generate_tf_content_from_tfplan(file, content) - return Document(document_name, tf_content, is_git_diff) - - -def _generate_tfplan_document_name(path: str) -> str: - document_name = change_filename_extension(path, 'tf') - timestamp = int(time.time()) - return f'{timestamp}-{document_name}' - - -def _is_iac(scan_type: str) -> bool: - return scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE - - -def _is_tfplan_file(file: str, content: str) -> bool: - if not file.endswith('.json'): - return False - tf_plan = load_json(content) - if not isinstance(tf_plan, dict): - return False - return 'resource_changes' in tf_plan - - -def _does_file_exceed_max_size_limit(filename: str) -> bool: - return get_file_size(filename) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES - - -def _does_document_exceed_max_size_limit(content: str) -> bool: - return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES - - def _get_document_by_file_name( documents: List[Document], file_name: str, unique_id: Optional[str] = None ) -> Optional[Document]: @@ -1155,14 +828,6 @@ def _get_document_by_file_name( return None -def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool: - return ( - is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename) - or is_sub_path(configuration_manager.local_config_file_manager.get_config_directory_path(), filename) - or filename.endswith(ConfigFileManager.get_config_file_route()) - ) - - def _handle_exception(context: click.Context, e: Exception, *, return_exception: bool = False) -> Optional[CliError]: context.obj['did_fail'] = True @@ -1372,18 +1037,6 @@ def _does_reach_to_max_commits_to_scan_limit(commit_ids: List[str], max_commits_ return len(commit_ids) >= max_commits_count -def parse_commit_range(commit_range: str, path: str) -> Tuple[str, str]: - from_commit_rev = None - to_commit_rev = None - - for commit in Repo(path).iter_commits(rev=commit_range): - if not to_commit_rev: - to_commit_rev = commit.hexsha - from_commit_rev = commit.hexsha - - return from_commit_rev, to_commit_rev - - def _normalize_file_path(path: str) -> str: if path.startswith('/'): return path[1:] diff --git a/cycode/cli/helpers/__init__.py b/cycode/cli/commands/report/__init__.py similarity index 100% rename from cycode/cli/helpers/__init__.py rename to cycode/cli/commands/report/__init__.py diff --git a/cycode/cli/commands/report/report_command.py b/cycode/cli/commands/report/report_command.py new file mode 100644 index 00000000..1c5f09d7 --- /dev/null +++ b/cycode/cli/commands/report/report_command.py @@ -0,0 +1,21 @@ +import click + +from cycode.cli.commands.report.sbom.sbom_command import sbom_command +from cycode.cli.utils.get_api_client import get_report_cycode_client + + +@click.group( + commands={ + 'sbom': sbom_command, + }, + short_help='Generate report. You`ll need to specify which report type to perform.', +) +@click.pass_context +def report_command( + context: click.Context, +) -> int: + """Generate report.""" + + context.obj['client'] = get_report_cycode_client(hide_response_log=False) # TODO disable log + + return 1 diff --git a/cycode/cli/helpers/maven/__init__.py b/cycode/cli/commands/report/sbom/__init__.py similarity index 100% rename from cycode/cli/helpers/maven/__init__.py rename to cycode/cli/commands/report/sbom/__init__.py diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py new file mode 100644 index 00000000..bf9cf42a --- /dev/null +++ b/cycode/cli/commands/report/sbom/common.py @@ -0,0 +1,25 @@ +import time +from typing import TYPE_CHECKING + +import click + +if TYPE_CHECKING: + from cycode.cyclient.report_client import ReportClient + + +def create_sbom_report(client: 'ReportClient', report_id: int, output_file: str) -> None: + # TODO(MarshalX): API will be changed soon. Just MVP for now. + report_satus = None + status = 'Running' + while status == 'Running': + report_satus = client.get_execution_status(report_id)[0] + status = report_satus.report_executions[0].status + time.sleep(3) + + if not report_satus: + raise click.ClickException('Failed to get report status.') + + report_path = report_satus.report_executions[0].storage_details.path + report_content = client.get_file_content(report_path) + with open(output_file, 'w', encoding='UTF-8') as f: + f.write(report_content) diff --git a/cycode/cli/commands/report/sbom/sbom_command.py b/cycode/cli/commands/report/sbom/sbom_command.py new file mode 100644 index 00000000..de7ee37d --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_command.py @@ -0,0 +1,81 @@ +import click + +from cycode.cli.commands.report.sbom.sbom_path_command import sbom_path_command +from cycode.cli.commands.report.sbom.sbom_repository_url_command import sbom_repository_url_command +from cycode.cli.config import config +from cycode.cyclient.report_client import ReportParameters + + +@click.group( + commands={ + 'path': sbom_path_command, + 'repository_url': sbom_repository_url_command, + }, + short_help='Generate SBOM report. You`ll need to specify which report type to perform: path/repository_url.', +) +@click.option( + '--format', + '-f', + help='SBOM format.', + type=click.Choice(config['scans']['supported_sbom_formats']), + required=True, +) +@click.option( + '--output-format', + '-o', + default='json', + help='Specify the output file format (the default is json).', + type=click.Choice(['csv', 'json']), + required=False, +) +@click.option( + '--output-file', + help='Output file.', + default=None, + type=click.Path(resolve_path=True), + required=False, +) +@click.option( + '--include-vulnerabilities', + is_flag=True, + default=False, + help='Include vulnerabilities.', + type=bool, + required=False, +) +@click.option( + '--include-dev-dependencies', + is_flag=True, + default=False, + help='Include dev dependencies.', + type=bool, + required=False, +) +@click.pass_context +def sbom_command( + context: click.Context, + format: str, + output_format: str, + output_file: str, + include_vulnerabilities: bool, + include_dev_dependencies: bool, +) -> int: + """Generate SBOM report.""" + sbom_format_parts = format.split('-') + if len(sbom_format_parts) != 2: + raise click.ClickException('Invalid SBOM format.') + + sbom_format, sbom_format_version = sbom_format_parts + + report_parameters = ReportParameters( + entity_type='SbomCli', + sbom_report_type=sbom_format, + sbom_version=sbom_format_version, + output_format=output_format, + include_vulnerabilities=include_vulnerabilities, + include_dev_dependencies=include_dev_dependencies, + ) + context.obj['report_parameters'] = report_parameters + context.obj['output_file'] = output_file + + return 1 diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py new file mode 100644 index 00000000..9e054b9c --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -0,0 +1,29 @@ +import click + +from cycode.cli import consts +from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.files_collector.path_documents import get_relevant_document +from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions +from cycode.cli.files_collector.zip_documents import zip_documents + + +@click.command(short_help='Generate SBOM report for provided path in the command.') +@click.argument('path', nargs=1, type=click.Path(exists=True, resolve_path=True), required=True) +@click.pass_context +def sbom_path_command(context: click.Context, path: str) -> None: + client = context.obj['client'] + report_parameters = context.obj['report_parameters'] + output_file = context.obj['output_file'] + + # TODO(MarshalX): add support of progress bar somehow? + progress_bar = context.obj['progress_bar'] + progress_bar.start() + + documents = get_relevant_document(progress_bar, consts.SCA_SCAN_TYPE, path) + # TODO(MarshalX): refactoring more. Combine into one function. + perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) + + zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) + sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) + + create_sbom_report(client, sbom_report.id, output_file) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py new file mode 100644 index 00000000..5f9cb1ff --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -0,0 +1,15 @@ +import click + +from cycode.cli.commands.report.sbom.common import create_sbom_report + + +@click.command(short_help='Generate SBOM report for provided repository URI in the command.') +@click.argument('uri', nargs=1, type=str, required=True) +@click.pass_context +def sbom_repository_url_command(context: click.Context, uri: str) -> None: + client = context.obj['client'] + report_parameters = context.obj['report_parameters'] + output_file = context.obj['output_file'] + # TODO(MarshalX): add support of progress bar somehow? + sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) + create_sbom_report(client, sbom_report.id, output_file) diff --git a/cycode/cli/config.yaml b/cycode/cli/config.yaml index 0ffe7abc..dd18ffef 100644 --- a/cycode/cli/config.yaml +++ b/cycode/cli/config.yaml @@ -8,6 +8,9 @@ scans: supported_sca_scans: - package-vulnerabilities - license-compliance + supported_sbom_formats: + - spdx-2.2 + - cyclonedx-1.4 result_printer: default: lines_to_display: 3 diff --git a/cycode/cyclient/scan_config/__init__.py b/cycode/cli/files_collector/__init__.py similarity index 100% rename from cycode/cyclient/scan_config/__init__.py rename to cycode/cli/files_collector/__init__.py diff --git a/cycode/cli/files_collector/excluder.py b/cycode/cli/files_collector/excluder.py new file mode 100644 index 00000000..213b8db1 --- /dev/null +++ b/cycode/cli/files_collector/excluder.py @@ -0,0 +1,133 @@ +from typing import TYPE_CHECKING, List + +from cycode.cli import consts +from cycode.cli.config import configuration_manager +from cycode.cli.user_settings.config_file_manager import ConfigFileManager +from cycode.cli.utils.path_utils import get_file_size, is_binary_file, is_sub_path +from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cli.utils.string_utils import get_content_size, is_binary_content +from cycode.cyclient import logger + +if TYPE_CHECKING: + from cycode.cli.models import Document + from cycode.cli.utils.progress_bar import BaseProgressBar + + +def exclude_irrelevant_files(progress_bar: 'BaseProgressBar', scan_type: str, filenames: List[str]) -> List[str]: + relevant_files = [] + for filename in filenames: + progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + if _is_relevant_file_to_scan(scan_type, filename): + relevant_files.append(filename) + + is_sub_path.cache_clear() # free up memory + + return relevant_files + + +def exclude_irrelevant_documents_to_scan(scan_type: str, documents_to_scan: List['Document']) -> List['Document']: + logger.debug('Excluding irrelevant documents to scan') + + relevant_documents = [] + for document in documents_to_scan: + if _is_relevant_document_to_scan(scan_type, document.path, document.content): + relevant_documents.append(document) + + return relevant_documents + + +def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool: + return ( + is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename) + or is_sub_path(configuration_manager.local_config_file_manager.get_config_directory_path(), filename) + or filename.endswith(ConfigFileManager.get_config_file_route()) + ) + + +def _is_path_configured_in_exclusions(scan_type: str, file_path: str) -> bool: + exclusions_by_path = configuration_manager.get_exclusions_by_scan_type(scan_type).get( + consts.EXCLUSIONS_BY_PATH_SECTION_NAME, [] + ) + return any(is_sub_path(exclusion_path, file_path) for exclusion_path in exclusions_by_path) + + +def _does_file_exceed_max_size_limit(filename: str) -> bool: + return get_file_size(filename) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES + + +def _does_document_exceed_max_size_limit(content: str) -> bool: + return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES + + +def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool: + if _is_subpath_of_cycode_configuration_folder(filename): + logger.debug('file is irrelevant because it is in cycode configuration directory, %s', {'filename': filename}) + return False + + if _is_path_configured_in_exclusions(scan_type, filename): + logger.debug('file is irrelevant because the file path is in the ignore paths list, %s', {'filename': filename}) + return False + + if not _is_file_extension_supported(scan_type, filename): + logger.debug('file is irrelevant because the file extension is not supported, %s', {'filename': filename}) + return False + + if is_binary_file(filename): + logger.debug('file is irrelevant because it is binary file, %s', {'filename': filename}) + return False + + if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename): + logger.debug('file is irrelevant because its exceeded max size limit, %s', {'filename': filename}) + return False + + if scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename): + return False + + return True + + +def _is_file_relevant_for_sca_scan(filename: str) -> bool: + if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS): + logger.debug("file is irrelevant because it is from node_modules's inner path, %s", {'filename': filename}) + return False + + return True + + +def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool: + if _is_subpath_of_cycode_configuration_folder(filename): + logger.debug( + 'document is irrelevant because it is in cycode configuration directory, %s', {'filename': filename} + ) + return False + + if _is_path_configured_in_exclusions(scan_type, filename): + logger.debug( + 'document is irrelevant because the document path is in the ignore paths list, %s', {'filename': filename} + ) + return False + + if not _is_file_extension_supported(scan_type, filename): + logger.debug('document is irrelevant because the file extension is not supported, %s', {'filename': filename}) + return False + + if is_binary_content(content): + logger.debug('document is irrelevant because it is binary, %s', {'filename': filename}) + return False + + if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content): + logger.debug('document is irrelevant because its exceeded max size limit, %s', {'filename': filename}) + return False + return True + + +def _is_file_extension_supported(scan_type: str, filename: str) -> bool: + filename = filename.lower() + + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: + return filename.endswith(consts.INFRA_CONFIGURATION_SCAN_SUPPORTED_FILES) + + if scan_type == consts.SCA_SCAN_TYPE: + return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES) + + return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE) diff --git a/cycode/cli/files_collector/iac/__init__.py b/cycode/cli/files_collector/iac/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/helpers/tf_content_generator.py b/cycode/cli/files_collector/iac/tf_content_generator.py similarity index 73% rename from cycode/cli/helpers/tf_content_generator.py rename to cycode/cli/files_collector/iac/tf_content_generator.py index 7594a96f..4df6d827 100644 --- a/cycode/cli/helpers/tf_content_generator.py +++ b/cycode/cli/files_collector/iac/tf_content_generator.py @@ -1,13 +1,34 @@ import json +import time from typing import List +from cycode.cli import consts from cycode.cli.exceptions.custom_exceptions import TfplanKeyError from cycode.cli.models import ResourceChange -from cycode.cli.utils.path_utils import load_json +from cycode.cli.utils.path_utils import change_filename_extension, load_json ACTIONS_TO_OMIT_RESOURCE = ['delete'] +def generate_tfplan_document_name(path: str) -> str: + document_name = change_filename_extension(path, 'tf') + timestamp = int(time.time()) + return f'{timestamp}-{document_name}' + + +def is_iac(scan_type: str) -> bool: + return scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE + + +def is_tfplan_file(file: str, content: str) -> bool: + if not file.endswith('.json'): + return False + tf_plan = load_json(content) + if not isinstance(tf_plan, dict): + return False + return 'resource_changes' in tf_plan + + def generate_tf_content_from_tfplan(filename: str, tfplan: str) -> str: planned_resources = _extract_resources(tfplan, filename) return _generate_tf_content(planned_resources) diff --git a/cycode/cli/files_collector/models/__init__.py b/cycode/cli/files_collector/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/zip_file.py b/cycode/cli/files_collector/models/in_memory_zip.py similarity index 74% rename from cycode/cli/zip_file.py rename to cycode/cli/files_collector/models/in_memory_zip.py index 7d659c8e..df15cdd4 100644 --- a/cycode/cli/zip_file.py +++ b/cycode/cli/files_collector/models/in_memory_zip.py @@ -1,8 +1,9 @@ -import os.path from io import BytesIO from typing import Optional from zipfile import ZIP_DEFLATED, ZipFile +from cycode.cli.utils.path_utils import concat_unique_id + class InMemoryZip(object): def __init__(self) -> None: @@ -24,11 +25,3 @@ def close(self) -> None: def read(self) -> bytes: self.in_memory_zip.seek(0) return self.in_memory_zip.read() - - -def concat_unique_id(filename: str, unique_id: str) -> str: - if filename.startswith(os.sep): - # remove leading slash to join the path correctly - filename = filename[len(os.sep) :] - - return os.path.join(unique_id, filename) diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py new file mode 100644 index 00000000..0934170d --- /dev/null +++ b/cycode/cli/files_collector/path_documents.py @@ -0,0 +1,106 @@ +import os +from typing import TYPE_CHECKING, Iterable, List + +import pathspec + +from cycode.cli.files_collector.excluder import exclude_irrelevant_files +from cycode.cli.files_collector.iac.tf_content_generator import ( + generate_tf_content_from_tfplan, + generate_tfplan_document_name, + is_iac, + is_tfplan_file, +) +from cycode.cli.models import Document +from cycode.cli.utils.path_utils import get_absolute_path, get_file_content +from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cyclient import logger + +if TYPE_CHECKING: + from cycode.cli.utils.progress_bar import BaseProgressBar + + +def _get_all_existing_files_in_directory(path: str) -> List[str]: + files: List[str] = [] + + for root, _, filenames in os.walk(path): + for filename in filenames: + files.append(os.path.join(root, filename)) + + return files + + +def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: + absolute_path = get_absolute_path(path) + + if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): + raise FileNotFoundError(f'the specified path was not found, path: {absolute_path}') + + if os.path.isfile(absolute_path): + return [absolute_path] + + all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) + + path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) + excluded_file_paths = set(path_spec.match_files(all_file_paths)) + + relevant_file_paths = all_file_paths - excluded_file_paths + + return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] + + +def _get_relevant_files(progress_bar: 'BaseProgressBar', scan_type: str, path: str) -> List[str]: + all_files_to_scan = _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) + + # we are double the progress bar section length because we are going to process the files twice + # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) + # second time to get the files content + progress_bar_section_len = len(all_files_to_scan) * 2 + progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) + + relevant_files_to_scan = exclude_irrelevant_files(progress_bar, scan_type, all_files_to_scan) + + # after finishing the first processing (excluding), + # we must update the progress bar stage with respect of excluded files. + # now it's possible that we will not process x2 of the files count + # because some of them were excluded, we should subtract the excluded files count + # from the progress bar section length + excluded_files_count = len(all_files_to_scan) - len(relevant_files_to_scan) + progress_bar_section_len = progress_bar_section_len - excluded_files_count + progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) + + logger.debug( + 'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)} + ) + + return relevant_files_to_scan + + +def _generate_document(file: str, scan_type: str, content: str, is_git_diff: bool) -> Document: + if is_iac(scan_type) and is_tfplan_file(file, content): + return _handle_tfplan_file(file, content, is_git_diff) + + return Document(file, content, is_git_diff) + + +def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document: + document_name = generate_tfplan_document_name(file) + tf_content = generate_tf_content_from_tfplan(file, content) + return Document(document_name, tf_content, is_git_diff) + + +def get_relevant_document( + progress_bar: 'BaseProgressBar', scan_type: str, path: str, *, is_git_diff: bool = False +) -> List[Document]: + relevant_files = _get_relevant_files(progress_bar, scan_type, path) + + documents: List[Document] = [] + for file in relevant_files: + progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + + content = get_file_content(file) + if not content: + continue + + documents.append(_generate_document(file, scan_type, content, is_git_diff)) + + return documents diff --git a/cycode/cli/files_collector/repository_documents.py b/cycode/cli/files_collector/repository_documents.py new file mode 100644 index 00000000..9cde54fd --- /dev/null +++ b/cycode/cli/files_collector/repository_documents.py @@ -0,0 +1,135 @@ +import os +from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union + +from cycode.cli import consts +from cycode.cli.files_collector.sca import sca_code_scanner +from cycode.cli.models import Document +from cycode.cli.utils.path_utils import get_file_content, get_path_by_os +from cycode.cli.utils.progress_bar import ProgressBarSection + +if TYPE_CHECKING: + from git import Blob, Diff + from git.objects.base import IndexObjUnion + from git.objects.tree import TraversedTreeTup + + from cycode.cli.utils.progress_bar import BaseProgressBar + +from git import Repo + + +def should_process_git_object(obj: 'Blob', _: int) -> bool: + return obj.type == 'blob' and obj.size > 0 + + +def get_git_repository_tree_file_entries( + path: str, branch: str +) -> Union[Iterator['IndexObjUnion'], Iterator['TraversedTreeTup']]: + return Repo(path).tree(branch).traverse(predicate=should_process_git_object) + + +def parse_commit_range(commit_range: str, path: str) -> Tuple[str, str]: + from_commit_rev = None + to_commit_rev = None + + for commit in Repo(path).iter_commits(rev=commit_range): + if not to_commit_rev: + to_commit_rev = commit.hexsha + from_commit_rev = commit.hexsha + + return from_commit_rev, to_commit_rev + + +def get_diff_file_path(file: 'Diff') -> Optional[str]: + return file.b_path if file.b_path else file.a_path + + +def get_diff_file_content(file: 'Diff') -> str: + return file.diff.decode('UTF-8', errors='replace') + + +def get_pre_commit_modified_documents(progress_bar: 'BaseProgressBar') -> Tuple[List[Document], List[Document]]: + git_head_documents = [] + pre_committed_documents = [] + + repo = Repo(os.getcwd()) + diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) + progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) + for file in diff_files: + progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + + diff_file_path = get_diff_file_path(file) + file_path = get_path_by_os(diff_file_path) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, consts.GIT_HEAD_COMMIT_REV, diff_file_path) + if file_content is not None: + git_head_documents.append(Document(file_path, file_content)) + + if os.path.exists(file_path): + file_content = get_file_content(file_path) + pre_committed_documents.append(Document(file_path, file_content)) + + return git_head_documents, pre_committed_documents + + +def get_commit_range_modified_documents( + progress_bar: 'BaseProgressBar', path: str, from_commit_rev: str, to_commit_rev: str +) -> Tuple[List[Document], List[Document]]: + from_commit_documents = [] + to_commit_documents = [] + + repo = Repo(path) + diff = repo.commit(from_commit_rev).diff(to_commit_rev) + + modified_files_diff = [ + change for change in diff if change.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE + ] + progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(modified_files_diff)) + for blob in modified_files_diff: + progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + + diff_file_path = get_diff_file_path(blob) + file_path = get_path_by_os(diff_file_path) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path) + if file_content is not None: + from_commit_documents.append(Document(file_path, file_content)) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path) + if file_content is not None: + to_commit_documents.append(Document(file_path, file_content)) + + return from_commit_documents, to_commit_documents + + +def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]: + end_commit = _get_end_commit_from_branch_update_details(branch_update_details) + + # branch is deleted, no need to perform scan + if end_commit == consts.EMPTY_COMMIT_SHA: + return None + + start_commit = _get_oldest_unupdated_commit_for_branch(end_commit) + + # no new commit to update found + if not start_commit: + return None + + return f'{start_commit}~1...{end_commit}' + + +def _get_end_commit_from_branch_update_details(update_details: str) -> str: + # update details pattern: + _, end_commit, _ = update_details.split() + return end_commit + + +def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]: + # get a list of commits by chronological order that are not in the remote repository yet + # more info about rev-list command: https://git-scm.com/docs/git-rev-list + not_updated_commits = Repo(os.getcwd()).git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all') + + commits = not_updated_commits.splitlines() + if not commits: + return None + + return commits[0] diff --git a/cycode/cli/files_collector/sca/__init__.py b/cycode/cli/files_collector/sca/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/files_collector/sca/maven/__init__.py b/cycode/cli/files_collector/sca/maven/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/helpers/maven/base_restore_maven_dependencies.py b/cycode/cli/files_collector/sca/maven/base_restore_maven_dependencies.py similarity index 100% rename from cycode/cli/helpers/maven/base_restore_maven_dependencies.py rename to cycode/cli/files_collector/sca/maven/base_restore_maven_dependencies.py diff --git a/cycode/cli/helpers/maven/restore_gradle_dependencies.py b/cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py similarity index 88% rename from cycode/cli/helpers/maven/restore_gradle_dependencies.py rename to cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py index f8cd2fec..ef975ba5 100644 --- a/cycode/cli/helpers/maven/restore_gradle_dependencies.py +++ b/cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py @@ -2,7 +2,7 @@ import click -from cycode.cli.helpers.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies +from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies from cycode.cli.models import Document BUILD_GRADLE_FILE_NAME = 'build.gradle' diff --git a/cycode/cli/helpers/maven/restore_maven_dependencies.py b/cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py similarity index 97% rename from cycode/cli/helpers/maven/restore_maven_dependencies.py rename to cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py index d8e6675f..0e21df12 100644 --- a/cycode/cli/helpers/maven/restore_maven_dependencies.py +++ b/cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py @@ -3,7 +3,7 @@ import click -from cycode.cli.helpers.maven.base_restore_maven_dependencies import ( +from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import ( BaseRestoreMavenDependencies, build_dep_tree_path, execute_command, diff --git a/cycode/cli/helpers/sca_code_scanner.py b/cycode/cli/files_collector/sca/sca_code_scanner.py similarity index 88% rename from cycode/cli/helpers/sca_code_scanner.py rename to cycode/cli/files_collector/sca/sca_code_scanner.py index 227b553e..a6aa6b78 100644 --- a/cycode/cli/helpers/sca_code_scanner.py +++ b/cycode/cli/files_collector/sca/sca_code_scanner.py @@ -5,14 +5,14 @@ from git import GitCommandError, Repo from cycode.cli import consts -from cycode.cli.helpers.maven.restore_gradle_dependencies import RestoreGradleDependencies -from cycode.cli.helpers.maven.restore_maven_dependencies import RestoreMavenDependencies +from cycode.cli.files_collector.sca.maven.restore_gradle_dependencies import RestoreGradleDependencies +from cycode.cli.files_collector.sca.maven.restore_maven_dependencies import RestoreMavenDependencies from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_file_content, get_file_dir, join_paths from cycode.cyclient import logger if TYPE_CHECKING: - from cycode.cli.helpers.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies + from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies BUILD_GRADLE_FILE_NAME = 'build.gradle' BUILD_GRADLE_KTS_FILE_NAME = 'build.gradle.kts' @@ -141,3 +141,11 @@ def get_file_content_from_commit(repo: Repo, commit: str, file_path: str) -> Opt return repo.git.show(f'{commit}:{file_path}') except GitCommandError: return None + + +def perform_pre_scan_documents_actions( + context: click.Context, scan_type: str, documents_to_scan: List[Document], is_git_diff: bool = False +) -> None: + if scan_type == consts.SCA_SCAN_TYPE and not context.obj.get(consts.SCA_SKIP_RESTORE_DEPENDENCIES_FLAG): + logger.debug('Perform pre scan document add_dependencies_tree_document action') + add_dependencies_tree_document(context, documents_to_scan, is_git_diff) diff --git a/cycode/cli/files_collector/zip_documents.py b/cycode/cli/files_collector/zip_documents.py new file mode 100644 index 00000000..65bcdef1 --- /dev/null +++ b/cycode/cli/files_collector/zip_documents.py @@ -0,0 +1,42 @@ +import time +from sys import getsizeof +from typing import List, Optional + +from cycode.cli import consts +from cycode.cli.exceptions import custom_exceptions +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cli.models import Document +from cycode.cyclient import logger + + +def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: + if scan_type == consts.SCA_SCAN_TYPE: + if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES: + raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES) + else: + if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES: + raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES) + + +def zip_documents(scan_type: str, documents: List[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip: + if zip_file is None: + zip_file = InMemoryZip() + + start_zip_creation_time = time.time() + + for index, document in enumerate(documents): + zip_file_size = getsizeof(zip_file.in_memory_zip) + _validate_zip_file_size(scan_type, zip_file_size) + + logger.debug( + 'adding file to zip, %s', {'index': index, 'filename': document.path, 'unique_id': document.unique_id} + ) + zip_file.append(document.path, document.unique_id, document.content) + + zip_file.close() + + end_zip_creation_time = time.time() + zip_creation_time = int(end_zip_creation_time - start_zip_creation_time) + logger.debug('finished to create zip file, %s', {'zip_creation_time': zip_creation_time}) + + return zip_file diff --git a/cycode/cli/main.py b/cycode/cli/main.py index 94f3ff29..17cc65d8 100644 --- a/cycode/cli/main.py +++ b/cycode/cli/main.py @@ -1,13 +1,14 @@ import json import logging import sys -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import List, Optional import click from cycode import __version__ from cycode.cli import code_scanner from cycode.cli.auth.auth_command import authenticate +from cycode.cli.commands.report.report_command import report_command from cycode.cli.config import config from cycode.cli.consts import ( CLI_CONTEXT_SETTINGS, @@ -18,17 +19,13 @@ ) from cycode.cli.models import Severity from cycode.cli.user_settings.configuration_manager import ConfigurationManager -from cycode.cli.user_settings.credentials_manager import CredentialsManager from cycode.cli.user_settings.user_settings_commands import add_exclusions, set_credentials from cycode.cli.utils import scan_utils +from cycode.cli.utils.get_api_client import get_scan_cycode_client from cycode.cli.utils.progress_bar import get_progress_bar from cycode.cyclient.config import set_logging_level from cycode.cyclient.cycode_client_base import CycodeClientBase from cycode.cyclient.models import UserAgentOptionScheme -from cycode.cyclient.scan_config.scan_config_creator import create_scan_client - -if TYPE_CHECKING: - from cycode.cyclient.scan_client import ScanClient @click.group( @@ -137,7 +134,7 @@ def code_scan( else: context.obj['soft_fail'] = config['soft_fail'] - context.obj['client'] = get_cycode_client(client_id, secret, not context.obj['show_secret']) + context.obj['client'] = get_scan_cycode_client(client_id, secret, not context.obj['show_secret']) context.obj['scan_type'] = scan_type context.obj['severity_threshold'] = severity_threshold context.obj['monitor'] = monitor @@ -185,6 +182,7 @@ def version(context: click.Context) -> None: @click.group( commands={ 'scan': code_scan, + 'report': report_command, 'configure': set_credentials, 'ignore': add_exclusions, 'auth': authenticate, @@ -241,22 +239,6 @@ def main_cli( CycodeClientBase.enrich_user_agent(user_agent_option.user_agent_suffix) -def get_cycode_client(client_id: str, client_secret: str, hide_response_log: bool) -> 'ScanClient': - if not client_id or not client_secret: - client_id, client_secret = _get_configured_credentials() - if not client_id: - raise click.ClickException('Cycode client id needed.') - if not client_secret: - raise click.ClickException('Cycode client secret is needed.') - - return create_scan_client(client_id, client_secret, hide_response_log) - - -def _get_configured_credentials() -> Tuple[str, str]: - credentials_manager = CredentialsManager() - return credentials_manager.get_credentials() - - def _should_fail_scan(context: click.Context) -> bool: return scan_utils.is_scan_failed(context) diff --git a/cycode/cli/utils/get_api_client.py b/cycode/cli/utils/get_api_client.py new file mode 100644 index 00000000..7bbfa2d9 --- /dev/null +++ b/cycode/cli/utils/get_api_client.py @@ -0,0 +1,40 @@ +from typing import TYPE_CHECKING, Optional, Tuple, Union + +import click + +from cycode.cli.user_settings.credentials_manager import CredentialsManager +from cycode.cyclient.client_creator import create_report_client, create_scan_client + +if TYPE_CHECKING: + from cycode.cyclient.report_client import ReportClient + from cycode.cyclient.scan_client import ScanClient + + +def _get_cycode_client( + create_client_func: callable, client_id: Optional[str], client_secret: Optional[str], hide_response_log: bool +) -> Union['ScanClient', 'ReportClient']: + if not client_id or not client_secret: + client_id, client_secret = _get_configured_credentials() + if not client_id: + raise click.ClickException('Cycode client id needed.') + if not client_secret: + raise click.ClickException('Cycode client secret is needed.') + + return create_client_func(client_id, client_secret, hide_response_log) + + +def get_scan_cycode_client( + client_id: Optional[str] = None, client_secret: Optional[str] = None, hide_response_log: bool = True +) -> 'ScanClient': + return _get_cycode_client(create_scan_client, client_id, client_secret, hide_response_log) + + +def get_report_cycode_client( + client_id: Optional[str] = None, client_secret: Optional[str] = None, hide_response_log: bool = True +) -> 'ReportClient': + return _get_cycode_client(create_report_client, client_id, client_secret, hide_response_log) + + +def _get_configured_credentials() -> Tuple[str, str]: + credentials_manager = CredentialsManager() + return credentials_manager.get_credentials() diff --git a/cycode/cli/utils/path_utils.py b/cycode/cli/utils/path_utils.py index ad5ce94e..e0cedc88 100644 --- a/cycode/cli/utils/path_utils.py +++ b/cycode/cli/utils/path_utils.py @@ -1,31 +1,11 @@ import json import os from functools import lru_cache -from typing import AnyStr, Iterable, List, Optional +from typing import AnyStr, List, Optional -import pathspec from binaryornot.check import is_binary -def get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: - absolute_path = get_absolute_path(path) - - if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): - raise FileNotFoundError(f'the specified path was not found, path: {absolute_path}') - - if os.path.isfile(absolute_path): - return [absolute_path] - - all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) - - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - excluded_file_paths = set(path_spec.match_files(all_file_paths)) - - relevant_file_paths = all_file_paths - excluded_file_paths - - return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] - - @lru_cache(maxsize=None) def is_sub_path(path: str, sub_path: str) -> bool: try: @@ -54,16 +34,6 @@ def get_path_by_os(filename: str) -> str: return filename.replace('/', os.sep) -def _get_all_existing_files_in_directory(path: str) -> List[str]: - files: List[str] = [] - - for root, _, filenames in os.walk(path): - for filename in filenames: - files.append(os.path.join(root, filename)) - - return files - - def is_path_exists(path: str) -> bool: return os.path.exists(path) @@ -98,3 +68,11 @@ def load_json(txt: str) -> Optional[dict]: def change_filename_extension(filename: str, extension: str) -> str: base_name, _ = os.path.splitext(filename) return f'{base_name}.{extension}' + + +def concat_unique_id(filename: str, unique_id: str) -> str: + if filename.startswith(os.sep): + # remove leading slash to join the path correctly + filename = filename[len(os.sep) :] + + return os.path.join(unique_id, filename) diff --git a/cycode/cyclient/client_creator.py b/cycode/cyclient/client_creator.py new file mode 100644 index 00000000..da62bd5a --- /dev/null +++ b/cycode/cyclient/client_creator.py @@ -0,0 +1,23 @@ +from cycode.cyclient.config import dev_mode +from cycode.cyclient.config_dev import DEV_CYCODE_API_URL +from cycode.cyclient.cycode_dev_based_client import CycodeDevBasedClient +from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient +from cycode.cyclient.report_client import ReportClient +from cycode.cyclient.scan_client import ScanClient +from cycode.cyclient.scan_config_base import DefaultScanConfig, DevScanConfig + + +def create_scan_client(client_id: str, client_secret: str, hide_response_log: bool) -> ScanClient: + if dev_mode: + client = CycodeDevBasedClient(DEV_CYCODE_API_URL) + scan_config = DevScanConfig() + else: + client = CycodeTokenBasedClient(client_id, client_secret) + scan_config = DefaultScanConfig() + + return ScanClient(client, scan_config, hide_response_log) + + +def create_report_client(client_id: str, client_secret: str, hide_response_log: bool) -> ReportClient: + client = CycodeDevBasedClient(DEV_CYCODE_API_URL) if dev_mode else CycodeTokenBasedClient(client_id, client_secret) + return ReportClient(client, hide_response_log) diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index f5983083..5b73be19 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -344,3 +344,82 @@ def user_agent_suffix(self) -> str: f'EnvName: {self.env_name}; EnvVersion: {self.env_version}' f')' ) + + +class RequestedSbomReportResultSchema(Schema): + class Meta: + unknown = EXCLUDE + + id = fields.Integer() + name = fields.String() + # add more fields on demand + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReport': + return SbomReport(**data) + + +@dataclass +class SbomReport: + id: int + name: str + + +class ReportExecutionSchema(Schema): + class Meta: + unknown = EXCLUDE + + status = fields.String() + error_message = fields.String(allow_none=True) + status_message = fields.String(allow_none=True) + storage_details = fields.Nested('SbomReportStorageDetailsSchema', allow_none=True) + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> 'ReportExecution': + return ReportExecution(**data) + + +@dataclass +class ReportExecution: + status: str + error_message: Optional[str] = None + status_message: Optional[str] = None + storage_details: Optional['SbomReportStorageDetails'] = None + + +class SbomReportExecutionStatusResultSchema(Schema): + class Meta: + unknown = EXCLUDE + + id = fields.Integer() + report_executions = fields.List(fields.Nested(ReportExecutionSchema)) + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReportStatus': + return SbomReportStatus(**data) + + +class SbomReportStorageDetailsSchema(Schema): + class Meta: + unknown = EXCLUDE + + path = fields.String() + folder = fields.String() + size = fields.Integer() + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReportStorageDetails': + return SbomReportStorageDetails(**data) + + +@dataclass +class SbomReportStorageDetails: + path: str + folder: str + size: int + + +@dataclass +class SbomReportStatus: + id: int + report_executions: List[ReportExecution] diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py new file mode 100644 index 00000000..de8c64ce --- /dev/null +++ b/cycode/cyclient/report_client.py @@ -0,0 +1,84 @@ +import dataclasses +import json +from typing import List, Optional + +from requests import Response + +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cyclient import models +from cycode.cyclient.cycode_client_base import CycodeClientBase + + +@dataclasses.dataclass +class ReportParameters: + entity_type: str + sbom_report_type: str + sbom_version: str + output_format: str + include_vulnerabilities: bool + include_dev_dependencies: bool + + def to_dict(self) -> dict: + return dataclasses.asdict(self) + + def to_json(self) -> str: + return json.dumps(self.to_dict()) + + +class ReportClient: + SERVICE_NAME: str = 'report' + CREATE_SBOM_REPORT_REQUEST_PATH: str = 'api/v2/report/{report_type}/sbom' + GET_EXECUTIONS_STATUS_PATH: str = 'api/v2/report/{report_id}/executions' + + DOWNLOAD_REPORT_PATH: str = 'files/api/v1/file/sbom/{file_name}' # not in the report service + + def __init__(self, client: CycodeClientBase, hide_response_log: bool = True) -> None: + self.client = client + self._hide_response_log = hide_response_log + + def request_sbom_report( + self, params: ReportParameters, zip_file: InMemoryZip = None, repository_url: Optional[str] = None + ) -> models.SbomReport: + report_type = 'zipped-file' if zip_file else 'repository-url' + url_path = f'{self.SERVICE_NAME}/{self.CREATE_SBOM_REPORT_REQUEST_PATH}'.format(report_type=report_type) + + request_data = {'report_parameters': params.to_dict()} + if repository_url: + request_data['repository_url'] = repository_url + + # FIXME BACKEND ISSUE + del request_data['report_parameters']['entity_type'] + + # FIXME Delete after BE fix + request_data['report_parameters'] = json.dumps(request_data['report_parameters']) + + request_args = { + 'url_path': url_path, + 'data': request_data, + 'hide_response_content_log': self._hide_response_log, + } + + if zip_file: + request_args['files'] = {'file': ('sca_files.zip', zip_file.read())} + + response = self.client.post(**request_args) + return self.parse_requested_sbom_report_response(response) + + def get_execution_status(self, report_id: int) -> List[models.SbomReportStatus]: + url_path = f'{self.SERVICE_NAME}/{self.GET_EXECUTIONS_STATUS_PATH}'.format(report_id=report_id) + response = self.client.get(url_path=url_path) + return self.parse_execution_status_response(response) + + def get_file_content(self, file_name: str) -> str: + response = self.client.get( + url_path=self.DOWNLOAD_REPORT_PATH.format(file_name=file_name), params={'include_hidden': True} + ) + return response.text + + @staticmethod + def parse_requested_sbom_report_response(response: Response) -> models.SbomReport: + return models.RequestedSbomReportResultSchema().load(response.json()) + + @staticmethod + def parse_execution_status_response(response: Response) -> List[models.SbomReportStatus]: + return models.SbomReportExecutionStatusResultSchema().load(response.json(), many=True) diff --git a/cycode/cyclient/scan_client.py b/cycode/cyclient/scan_client.py index f09a96ef..5830e9dc 100644 --- a/cycode/cyclient/scan_client.py +++ b/cycode/cyclient/scan_client.py @@ -1,18 +1,19 @@ import json -from typing import List, Optional +from typing import TYPE_CHECKING, List, Optional from requests import Response -from cycode.cli.zip_file import InMemoryZip +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cyclient import models +from cycode.cyclient.cycode_client_base import CycodeClientBase -from . import models -from .cycode_client_base import CycodeClientBase -from .scan_config.scan_config_base import ScanConfigBase +if TYPE_CHECKING: + from .scan_config_base import ScanConfigBase class ScanClient: def __init__( - self, scan_cycode_client: CycodeClientBase, scan_config: ScanConfigBase, hide_response_log: bool = True + self, scan_cycode_client: CycodeClientBase, scan_config: 'ScanConfigBase', hide_response_log: bool = True ) -> None: self.scan_cycode_client = scan_cycode_client self.scan_config = scan_config diff --git a/cycode/cyclient/scan_config/scan_config_creator.py b/cycode/cyclient/scan_config/scan_config_creator.py deleted file mode 100644 index f17be424..00000000 --- a/cycode/cyclient/scan_config/scan_config_creator.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Tuple - -from cycode.cyclient.config import dev_mode -from cycode.cyclient.config_dev import DEV_CYCODE_API_URL -from cycode.cyclient.cycode_dev_based_client import CycodeDevBasedClient -from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient -from cycode.cyclient.scan_client import ScanClient -from cycode.cyclient.scan_config.scan_config_base import DefaultScanConfig, DevScanConfig - - -def create_scan_client(client_id: str, client_secret: str, hide_response_log: bool) -> ScanClient: - if dev_mode: - scan_cycode_client, scan_config = create_scan_for_dev_env() - else: - scan_cycode_client, scan_config = create_scan(client_id, client_secret) - - return ScanClient(scan_cycode_client, scan_config, hide_response_log) - - -def create_scan(client_id: str, client_secret: str) -> Tuple[CycodeTokenBasedClient, DefaultScanConfig]: - scan_cycode_client = CycodeTokenBasedClient(client_id, client_secret) - scan_config = DefaultScanConfig() - return scan_cycode_client, scan_config - - -def create_scan_for_dev_env() -> Tuple[CycodeDevBasedClient, DevScanConfig]: - scan_cycode_client = CycodeDevBasedClient(DEV_CYCODE_API_URL) - scan_config = DevScanConfig() - return scan_cycode_client, scan_config diff --git a/cycode/cyclient/scan_config/scan_config_base.py b/cycode/cyclient/scan_config_base.py similarity index 100% rename from cycode/cyclient/scan_config/scan_config_base.py rename to cycode/cyclient/scan_config_base.py diff --git a/tests/cli/helpers/test_tf_content_generator.py b/tests/cli/helpers/test_tf_content_generator.py index ae19b2f6..7953ed81 100644 --- a/tests/cli/helpers/test_tf_content_generator.py +++ b/tests/cli/helpers/test_tf_content_generator.py @@ -1,6 +1,6 @@ import os -from cycode.cli.helpers import tf_content_generator +from cycode.cli.files_collector.iac import tf_content_generator from cycode.cli.utils.path_utils import get_file_content, get_immediate_subdirectories from tests.conftest import TEST_FILES_PATH diff --git a/tests/cli/test_code_scanner.py b/tests/cli/test_code_scanner.py index b715e9c6..f4fe4f69 100644 --- a/tests/cli/test_code_scanner.py +++ b/tests/cli/test_code_scanner.py @@ -8,8 +8,10 @@ from requests import Response from cycode.cli import consts -from cycode.cli.code_scanner import _generate_document, _handle_exception, _is_file_relevant_for_sca_scan +from cycode.cli.code_scanner import _handle_exception from cycode.cli.exceptions import custom_exceptions +from cycode.cli.files_collector.excluder import _is_file_relevant_for_sca_scan +from cycode.cli.files_collector.path_documents import _generate_document from cycode.cli.models import Document if TYPE_CHECKING: diff --git a/tests/conftest.py b/tests/conftest.py index a763f6bb..fdb02ec2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,9 +3,9 @@ import pytest import responses +from cycode.cyclient.client_creator import create_scan_client from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient from cycode.cyclient.scan_client import ScanClient -from cycode.cyclient.scan_config.scan_config_creator import create_scan_client _EXPECTED_API_TOKEN = 'someJWT' diff --git a/tests/cyclient/scan_config/test_default_scan_config.py b/tests/cyclient/scan_config/test_default_scan_config.py index 0945402b..e0a84ad2 100644 --- a/tests/cyclient/scan_config/test_default_scan_config.py +++ b/tests/cyclient/scan_config/test_default_scan_config.py @@ -1,4 +1,4 @@ -from cycode.cyclient.scan_config.scan_config_creator import DefaultScanConfig +from cycode.cyclient.scan_config_base import DefaultScanConfig def test_get_service_name() -> None: diff --git a/tests/cyclient/scan_config/test_dev_scan_config.py b/tests/cyclient/scan_config/test_dev_scan_config.py index 0673d601..3ea3127e 100644 --- a/tests/cyclient/scan_config/test_dev_scan_config.py +++ b/tests/cyclient/scan_config/test_dev_scan_config.py @@ -1,4 +1,4 @@ -from cycode.cyclient.scan_config.scan_config_creator import DevScanConfig +from cycode.cyclient.scan_config_base import DevScanConfig def test_get_service_name() -> None: diff --git a/tests/cyclient/test_scan_client.py b/tests/cyclient/test_scan_client.py index db867a9f..2ca374b2 100644 --- a/tests/cyclient/test_scan_client.py +++ b/tests/cyclient/test_scan_client.py @@ -8,11 +8,11 @@ from requests import Timeout from requests.exceptions import ProxyError -from cycode.cli.code_scanner import zip_documents_to_scan +from cycode.cli.code_scanner import zip_documents from cycode.cli.config import config from cycode.cli.exceptions.custom_exceptions import CycodeError, HttpUnauthorizedError +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip from cycode.cli.models import Document -from cycode.cli.zip_file import InMemoryZip from cycode.cyclient.scan_client import ScanClient from tests.conftest import TEST_FILES_PATH @@ -42,7 +42,7 @@ def get_test_zip_file(scan_type: str) -> InMemoryZip: with open(path, 'r', encoding='UTF-8') as f: test_documents.append(Document(path, f.read(), is_git_diff_format=False)) - return zip_documents_to_scan(scan_type, InMemoryZip(), test_documents) + return zip_documents(scan_type, test_documents) def get_zipped_file_scan_response(url: str, scan_id: Optional[UUID] = None) -> responses.Response: diff --git a/tests/test_code_scanner.py b/tests/test_code_scanner.py index b1f7e163..6eb494e9 100644 --- a/tests/test_code_scanner.py +++ b/tests/test_code_scanner.py @@ -1,9 +1,9 @@ import os -from cycode.cli import code_scanner +from cycode.cli.files_collector.excluder import _is_relevant_file_to_scan from tests.conftest import TEST_FILES_PATH def test_is_relevant_file_to_scan_sca() -> None: path = os.path.join(TEST_FILES_PATH, 'package.json') - assert code_scanner._is_relevant_file_to_scan('sca', path) is True + assert _is_relevant_file_to_scan('sca', path) is True diff --git a/tests/test_zip_file.py b/tests/test_zip_file.py index f73514c8..15c53c17 100644 --- a/tests/test_zip_file.py +++ b/tests/test_zip_file.py @@ -1,6 +1,6 @@ import os -from cycode.cli import zip_file +from cycode.cli.utils.path_utils import concat_unique_id def test_concat_unique_id_to_file_with_leading_slash() -> None: @@ -10,7 +10,7 @@ def test_concat_unique_id_to_file_with_leading_slash() -> None: expected_path = os.path.join(unique_id, filename) filename = os.sep + filename - assert zip_file.concat_unique_id(filename, unique_id) == expected_path + assert concat_unique_id(filename, unique_id) == expected_path def test_concat_unique_id_to_file_without_leading_slash() -> None: @@ -19,4 +19,4 @@ def test_concat_unique_id_to_file_without_leading_slash() -> None: expected_path = os.path.join(unique_id, *filename.split('/')) - assert zip_file.concat_unique_id(filename, unique_id) == expected_path + assert concat_unique_id(filename, unique_id) == expected_path From 3beda0bb0f606d9a15f9311cc389860f7389c22f Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 22 Sep 2023 10:52:26 +0200 Subject: [PATCH 02/14] send entity_type only for sbom from zipped file --- cycode/cyclient/report_client.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py index de8c64ce..ea7c016a 100644 --- a/cycode/cyclient/report_client.py +++ b/cycode/cyclient/report_client.py @@ -18,11 +18,14 @@ class ReportParameters: include_vulnerabilities: bool include_dev_dependencies: bool - def to_dict(self) -> dict: - return dataclasses.asdict(self) + def to_dict(self, *, without_entity_type: bool) -> dict: + model_dict = dataclasses.asdict(self) + if without_entity_type: + del model_dict['entity_type'] + return model_dict - def to_json(self) -> str: - return json.dumps(self.to_dict()) + def to_json(self, *, without_entity_type: bool) -> str: + return json.dumps(self.to_dict(without_entity_type=without_entity_type)) class ReportClient: @@ -42,16 +45,11 @@ def request_sbom_report( report_type = 'zipped-file' if zip_file else 'repository-url' url_path = f'{self.SERVICE_NAME}/{self.CREATE_SBOM_REPORT_REQUEST_PATH}'.format(report_type=report_type) - request_data = {'report_parameters': params.to_dict()} + # entity type required only for zipped-file + request_data = {'report_parameters': params.to_json(without_entity_type=zip_file is None)} if repository_url: request_data['repository_url'] = repository_url - # FIXME BACKEND ISSUE - del request_data['report_parameters']['entity_type'] - - # FIXME Delete after BE fix - request_data['report_parameters'] = json.dumps(request_data['report_parameters']) - request_args = { 'url_path': url_path, 'data': request_data, From 4c24769255b34e26158ad1032d18d2f1cd863c3c Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 25 Sep 2023 20:51:07 +0200 Subject: [PATCH 03/14] add dynamic label for progress bar; add the ability to change the set of progress bar sections; add progress bar for SBOM reports --- cycode/cli/code_scanner.py | 36 +++--- cycode/cli/commands/report/report_command.py | 2 + cycode/cli/commands/report/sbom/common.py | 18 ++- .../commands/report/sbom/sbom_path_command.py | 8 +- .../sbom/sbom_repository_url_command.py | 8 +- cycode/cli/files_collector/excluder.py | 9 +- cycode/cli/files_collector/path_documents.py | 24 ++-- .../files_collector/repository_documents.py | 21 ++-- cycode/cli/main.py | 4 +- cycode/cli/utils/progress_bar.py | 113 +++++++++++++----- cycode/cli/utils/scan_batch.py | 6 +- 11 files changed, 172 insertions(+), 77 deletions(-) diff --git a/cycode/cli/code_scanner.py b/cycode/cli/code_scanner.py index a1b466e0..f47f8e01 100644 --- a/cycode/cli/code_scanner.py +++ b/cycode/cli/code_scanner.py @@ -37,7 +37,7 @@ from cycode.cli.utils.path_utils import ( get_path_by_os, ) -from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cli.utils.progress_bar import ScanProgressBarSection from cycode.cli.utils.scan_batch import run_parallel_batched_scan from cycode.cli.utils.scan_utils import set_issue_detected from cycode.cli.utils.task_timer import TimeoutAfter @@ -76,12 +76,12 @@ def scan_repository(context: click.Context, path: str, branch: str) -> None: progress_bar.start() file_entries = list(get_git_repository_tree_file_entries(path, branch)) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(file_entries)) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(file_entries)) documents_to_scan = [] for file in file_entries: # FIXME(MarshalX): probably file could be tree or submodule too. we expect blob only - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) file_path = file.path if monitor else get_path_by_os(os.path.join(path, file.path)) documents_to_scan.append(Document(file_path, file.data_stream.read().decode('UTF-8', errors='replace'))) @@ -138,16 +138,16 @@ def scan_commit_range( total_commits_count = int(repo.git.rev_list('--count', commit_range)) logger.debug(f'Calculating diffs for {total_commits_count} commits in the commit range {commit_range}') - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count) scanned_commits_count = 0 for commit in repo.iter_commits(rev=commit_range): if _does_reach_to_max_commits_to_scan_limit(commit_ids_to_scan, max_commits_count): logger.debug(f'Reached to max commits to scan count. Going to scan only {max_commits_count} last commits') - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count) break - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) commit_id = commit.hexsha commit_ids_to_scan.append(commit_id) @@ -215,11 +215,11 @@ def pre_commit_scan(context: click.Context, ignored_args: List[str]) -> None: diff_files = Repo(os.getcwd()).index.diff('HEAD', create_patch=True, R=True) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) documents_to_scan = [] for file in diff_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file))) documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) @@ -270,7 +270,9 @@ def pre_receive_scan(context: click.Context, ignored_args: List[str]) -> None: def scan_sca_pre_commit(context: click.Context) -> None: scan_type = context.obj['scan_type'] scan_parameters = get_default_scan_parameters(context) - git_head_documents, pre_committed_documents = get_pre_commit_modified_documents(context.obj['progress_bar']) + git_head_documents, pre_committed_documents = get_pre_commit_modified_documents( + context.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES + ) git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents) pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents) sca_code_scanner.perform_pre_hook_range_scan_actions(git_head_documents, pre_committed_documents) @@ -290,7 +292,7 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) scan_parameters = get_scan_parameters(context, path) from_commit_rev, to_commit_rev = parse_commit_range(commit_range, path) from_commit_documents, to_commit_documents = get_commit_range_modified_documents( - progress_bar, path, from_commit_rev, to_commit_rev + progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev ) from_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents) to_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents) @@ -307,7 +309,7 @@ def scan_disk_files(context: click.Context, path: str) -> None: progress_bar = context.obj['progress_bar'] try: - documents = get_relevant_document(progress_bar, scan_type, path) + documents = get_relevant_document(progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, scan_type, path) perform_pre_scan_documents_actions(context, scan_type, documents) scan_documents(context, documents, scan_parameters=scan_parameters) except Exception as e: @@ -407,8 +409,8 @@ def scan_documents( scan_batch_thread_func, documents_to_scan, progress_bar=progress_bar ) - progress_bar.set_section_length(ProgressBarSection.GENERATE_REPORT, 1) - progress_bar.update(ProgressBarSection.GENERATE_REPORT) + progress_bar.set_section_length(ScanProgressBarSection.GENERATE_REPORT, 1) + progress_bar.update(ScanProgressBarSection.GENERATE_REPORT) progress_bar.stop() set_issue_detected_by_scan_results(context, local_scan_results) @@ -436,7 +438,7 @@ def scan_commit_range_documents( to_commit_zipped_documents = InMemoryZip() try: - progress_bar.set_section_length(ProgressBarSection.SCAN, 1) + progress_bar.set_section_length(ScanProgressBarSection.SCAN, 1) scan_result = init_default_scan_result(scan_id) if should_scan_documents(from_documents_to_scan, to_documents_to_scan): @@ -455,15 +457,15 @@ def scan_commit_range_documents( timeout, ) - progress_bar.update(ProgressBarSection.SCAN) - progress_bar.set_section_length(ProgressBarSection.GENERATE_REPORT, 1) + progress_bar.update(ScanProgressBarSection.SCAN) + progress_bar.set_section_length(ScanProgressBarSection.GENERATE_REPORT, 1) local_scan_result = create_local_scan_result( scan_result, to_documents_to_scan, scan_command_type, scan_type, severity_threshold ) set_issue_detected_by_scan_results(context, [local_scan_result]) - progress_bar.update(ProgressBarSection.GENERATE_REPORT) + progress_bar.update(ScanProgressBarSection.GENERATE_REPORT) progress_bar.stop() # errors will be handled with try-except block; printing will not occur on errors diff --git a/cycode/cli/commands/report/report_command.py b/cycode/cli/commands/report/report_command.py index 1c5f09d7..4722a6b2 100644 --- a/cycode/cli/commands/report/report_command.py +++ b/cycode/cli/commands/report/report_command.py @@ -2,6 +2,7 @@ from cycode.cli.commands.report.sbom.sbom_command import sbom_command from cycode.cli.utils.get_api_client import get_report_cycode_client +from cycode.cli.utils.progress_bar import SBOM_REPORT_PROGRESS_BAR_SECTIONS, get_progress_bar @click.group( @@ -17,5 +18,6 @@ def report_command( """Generate report.""" context.obj['client'] = get_report_cycode_client(hide_response_log=False) # TODO disable log + context.obj['progress_bar'] = get_progress_bar(hidden=False, sections=SBOM_REPORT_PROGRESS_BAR_SECTIONS) return 1 diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index bf9cf42a..bcda7df9 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -3,23 +3,37 @@ import click +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection + if TYPE_CHECKING: + from cycode.cli.utils.progress_bar import BaseProgressBar from cycode.cyclient.report_client import ReportClient -def create_sbom_report(client: 'ReportClient', report_id: int, output_file: str) -> None: +def create_sbom_report( + progress_bar: 'BaseProgressBar', client: 'ReportClient', report_id: int, output_file: str +) -> None: # TODO(MarshalX): API will be changed soon. Just MVP for now. report_satus = None status = 'Running' while status == 'Running': report_satus = client.get_execution_status(report_id)[0] - status = report_satus.report_executions[0].status + execution = report_satus.report_executions[0] + + status = execution.status + + progress_bar.update_label(execution.error_message or execution.status_message) time.sleep(3) if not report_satus: raise click.ClickException('Failed to get report status.') + progress_bar.set_section_length(SbomReportProgressBarSection.GENERATION) + report_path = report_satus.report_executions[0].storage_details.path report_content = client.get_file_content(report_path) with open(output_file, 'w', encoding='UTF-8') as f: f.write(report_content) + + progress_bar.set_section_length(SbomReportProgressBarSection.RECEIVE_REPORT) + progress_bar.stop() diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index 9e054b9c..caea4d09 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -5,6 +5,7 @@ from cycode.cli.files_collector.path_documents import get_relevant_document from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions from cycode.cli.files_collector.zip_documents import zip_documents +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection @click.command(short_help='Generate SBOM report for provided path in the command.') @@ -15,15 +16,16 @@ def sbom_path_command(context: click.Context, path: str) -> None: report_parameters = context.obj['report_parameters'] output_file = context.obj['output_file'] - # TODO(MarshalX): add support of progress bar somehow? progress_bar = context.obj['progress_bar'] progress_bar.start() - documents = get_relevant_document(progress_bar, consts.SCA_SCAN_TYPE, path) + documents = get_relevant_document( + progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path + ) # TODO(MarshalX): refactoring more. Combine into one function. perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) - create_sbom_report(client, sbom_report.id, output_file) + create_sbom_report(progress_bar, client, sbom_report.id, output_file) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index 5f9cb1ff..75359cc2 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -1,15 +1,21 @@ import click from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection @click.command(short_help='Generate SBOM report for provided repository URI in the command.') @click.argument('uri', nargs=1, type=str, required=True) @click.pass_context def sbom_repository_url_command(context: click.Context, uri: str) -> None: + progress_bar = context.obj['progress_bar'] + progress_bar.start() + + progress_bar.set_section_length(SbomReportProgressBarSection.PREPARE_LOCAL_FILES) + client = context.obj['client'] report_parameters = context.obj['report_parameters'] output_file = context.obj['output_file'] # TODO(MarshalX): add support of progress bar somehow? sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) - create_sbom_report(client, sbom_report.id, output_file) + create_sbom_report(progress_bar, client, sbom_report.id, output_file) diff --git a/cycode/cli/files_collector/excluder.py b/cycode/cli/files_collector/excluder.py index 213b8db1..cbbb358f 100644 --- a/cycode/cli/files_collector/excluder.py +++ b/cycode/cli/files_collector/excluder.py @@ -4,19 +4,20 @@ from cycode.cli.config import configuration_manager from cycode.cli.user_settings.config_file_manager import ConfigFileManager from cycode.cli.utils.path_utils import get_file_size, is_binary_file, is_sub_path -from cycode.cli.utils.progress_bar import ProgressBarSection from cycode.cli.utils.string_utils import get_content_size, is_binary_content from cycode.cyclient import logger if TYPE_CHECKING: from cycode.cli.models import Document - from cycode.cli.utils.progress_bar import BaseProgressBar + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection -def exclude_irrelevant_files(progress_bar: 'BaseProgressBar', scan_type: str, filenames: List[str]) -> List[str]: +def exclude_irrelevant_files( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, filenames: List[str] +) -> List[str]: relevant_files = [] for filename in filenames: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(progress_bar_section) if _is_relevant_file_to_scan(scan_type, filename): relevant_files.append(filename) diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 0934170d..a0df5ac0 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -12,11 +12,10 @@ ) from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_absolute_path, get_file_content -from cycode.cli.utils.progress_bar import ProgressBarSection from cycode.cyclient import logger if TYPE_CHECKING: - from cycode.cli.utils.progress_bar import BaseProgressBar + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection def _get_all_existing_files_in_directory(path: str) -> List[str]: @@ -48,16 +47,18 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] -def _get_relevant_files(progress_bar: 'BaseProgressBar', scan_type: str, path: str) -> List[str]: +def _get_relevant_files( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, path: str +) -> List[str]: all_files_to_scan = _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) # we are double the progress bar section length because we are going to process the files twice # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) # second time to get the files content progress_bar_section_len = len(all_files_to_scan) * 2 - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) + progress_bar.set_section_length(progress_bar_section, progress_bar_section_len) - relevant_files_to_scan = exclude_irrelevant_files(progress_bar, scan_type, all_files_to_scan) + relevant_files_to_scan = exclude_irrelevant_files(progress_bar, progress_bar_section, scan_type, all_files_to_scan) # after finishing the first processing (excluding), # we must update the progress bar stage with respect of excluded files. @@ -66,7 +67,7 @@ def _get_relevant_files(progress_bar: 'BaseProgressBar', scan_type: str, path: s # from the progress bar section length excluded_files_count = len(all_files_to_scan) - len(relevant_files_to_scan) progress_bar_section_len = progress_bar_section_len - excluded_files_count - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) + progress_bar.set_section_length(progress_bar_section, progress_bar_section_len) logger.debug( 'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)} @@ -89,13 +90,18 @@ def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document: def get_relevant_document( - progress_bar: 'BaseProgressBar', scan_type: str, path: str, *, is_git_diff: bool = False + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + scan_type: str, + path: str, + *, + is_git_diff: bool = False, ) -> List[Document]: - relevant_files = _get_relevant_files(progress_bar, scan_type, path) + relevant_files = _get_relevant_files(progress_bar, progress_bar_section, scan_type, path) documents: List[Document] = [] for file in relevant_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(progress_bar_section) content = get_file_content(file) if not content: diff --git a/cycode/cli/files_collector/repository_documents.py b/cycode/cli/files_collector/repository_documents.py index 9cde54fd..acd9c225 100644 --- a/cycode/cli/files_collector/repository_documents.py +++ b/cycode/cli/files_collector/repository_documents.py @@ -5,14 +5,13 @@ from cycode.cli.files_collector.sca import sca_code_scanner from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_file_content, get_path_by_os -from cycode.cli.utils.progress_bar import ProgressBarSection if TYPE_CHECKING: from git import Blob, Diff from git.objects.base import IndexObjUnion from git.objects.tree import TraversedTreeTup - from cycode.cli.utils.progress_bar import BaseProgressBar + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection from git import Repo @@ -47,15 +46,17 @@ def get_diff_file_content(file: 'Diff') -> str: return file.diff.decode('UTF-8', errors='replace') -def get_pre_commit_modified_documents(progress_bar: 'BaseProgressBar') -> Tuple[List[Document], List[Document]]: +def get_pre_commit_modified_documents( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection' +) -> Tuple[List[Document], List[Document]]: git_head_documents = [] pre_committed_documents = [] repo = Repo(os.getcwd()) diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) + progress_bar.set_section_length(progress_bar_section, len(diff_files)) for file in diff_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(progress_bar_section) diff_file_path = get_diff_file_path(file) file_path = get_path_by_os(diff_file_path) @@ -72,7 +73,11 @@ def get_pre_commit_modified_documents(progress_bar: 'BaseProgressBar') -> Tuple[ def get_commit_range_modified_documents( - progress_bar: 'BaseProgressBar', path: str, from_commit_rev: str, to_commit_rev: str + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + path: str, + from_commit_rev: str, + to_commit_rev: str, ) -> Tuple[List[Document], List[Document]]: from_commit_documents = [] to_commit_documents = [] @@ -83,9 +88,9 @@ def get_commit_range_modified_documents( modified_files_diff = [ change for change in diff if change.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE ] - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(modified_files_diff)) + progress_bar.set_section_length(progress_bar_section, len(modified_files_diff)) for blob in modified_files_diff: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(progress_bar_section) diff_file_path = get_diff_file_path(blob) file_path = get_path_by_os(diff_file_path) diff --git a/cycode/cli/main.py b/cycode/cli/main.py index 17cc65d8..efa2b200 100644 --- a/cycode/cli/main.py +++ b/cycode/cli/main.py @@ -22,7 +22,7 @@ from cycode.cli.user_settings.user_settings_commands import add_exclusions, set_credentials from cycode.cli.utils import scan_utils from cycode.cli.utils.get_api_client import get_scan_cycode_client -from cycode.cli.utils.progress_bar import get_progress_bar +from cycode.cli.utils.progress_bar import SCAN_PROGRESS_BAR_SECTIONS, get_progress_bar from cycode.cyclient.config import set_logging_level from cycode.cyclient.cycode_client_base import CycodeClientBase from cycode.cyclient.models import UserAgentOptionScheme @@ -232,7 +232,7 @@ def main_cli( if output == 'json': no_progress_meter = True - context.obj['progress_bar'] = get_progress_bar(hidden=no_progress_meter) + context.obj['progress_bar'] = get_progress_bar(hidden=no_progress_meter, sections=SCAN_PROGRESS_BAR_SECTIONS) if user_agent: user_agent_option = UserAgentOptionScheme().loads(user_agent) diff --git a/cycode/cli/utils/progress_bar.py b/cycode/cli/utils/progress_bar.py index 083d0715..b0e94d92 100644 --- a/cycode/cli/utils/progress_bar.py +++ b/cycode/cli/utils/progress_bar.py @@ -16,15 +16,11 @@ class ProgressBarSection(AutoCountEnum): - PREPARE_LOCAL_FILES = auto() - SCAN = auto() - GENERATE_REPORT = auto() - def has_next(self) -> bool: - return self.value < len(ProgressBarSection) - 1 + return self.value < len(type(self)) - 1 def next(self) -> 'ProgressBarSection': - return ProgressBarSection(self.value + 1) + return type(self)(self.value + 1) class ProgressBarSectionInfo(NamedTuple): @@ -32,25 +28,62 @@ class ProgressBarSectionInfo(NamedTuple): label: str start_percent: int stop_percent: int + initial: bool = False _PROGRESS_BAR_LENGTH = 100 -_PROGRESS_BAR_SECTIONS = { - ProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( - ProgressBarSection.PREPARE_LOCAL_FILES, 'Prepare local files', start_percent=0, stop_percent=5 +ProgressBarSections = Dict[ProgressBarSection, ProgressBarSectionInfo] + + +class ScanProgressBarSection(ProgressBarSection): + PREPARE_LOCAL_FILES = auto() + SCAN = auto() + GENERATE_REPORT = auto() + + +SCAN_PROGRESS_BAR_SECTIONS: ProgressBarSections = { + ScanProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( + ScanProgressBarSection.PREPARE_LOCAL_FILES, 'Prepare local files', start_percent=0, stop_percent=5, initial=True + ), + ScanProgressBarSection.SCAN: ProgressBarSectionInfo( + ScanProgressBarSection.SCAN, 'Scan in progress', start_percent=5, stop_percent=95 + ), + ScanProgressBarSection.GENERATE_REPORT: ProgressBarSectionInfo( + ScanProgressBarSection.GENERATE_REPORT, 'Generate report', start_percent=95, stop_percent=100 + ), +} + + +class SbomReportProgressBarSection(ProgressBarSection): + PREPARE_LOCAL_FILES = auto() + GENERATION = auto() + RECEIVE_REPORT = auto() + + +SBOM_REPORT_PROGRESS_BAR_SECTIONS: ProgressBarSections = { + SbomReportProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( + SbomReportProgressBarSection.PREPARE_LOCAL_FILES, + 'Prepare local files', + start_percent=0, + stop_percent=30, + initial=True, ), - ProgressBarSection.SCAN: ProgressBarSectionInfo( - ProgressBarSection.SCAN, 'Scan in progress', start_percent=5, stop_percent=95 + SbomReportProgressBarSection.GENERATION: ProgressBarSectionInfo( + SbomReportProgressBarSection.GENERATION, 'Report generation in progress', start_percent=30, stop_percent=90 ), - ProgressBarSection.GENERATE_REPORT: ProgressBarSectionInfo( - ProgressBarSection.GENERATE_REPORT, 'Generate report', start_percent=95, stop_percent=100 + SbomReportProgressBarSection.RECEIVE_REPORT: ProgressBarSectionInfo( + SbomReportProgressBarSection.RECEIVE_REPORT, 'Receive report', start_percent=90, stop_percent=100 ), } -def _get_section_length(section: 'ProgressBarSection') -> int: - return _PROGRESS_BAR_SECTIONS[section].stop_percent - _PROGRESS_BAR_SECTIONS[section].start_percent +def _get_initial_section(progress_bar_sections: ProgressBarSections) -> ProgressBarSectionInfo: + for section in progress_bar_sections.values(): + if section.initial: + return section + + raise ValueError('No initial section found') class BaseProgressBar(ABC): @@ -75,13 +108,17 @@ def stop(self) -> None: ... @abstractmethod - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: ... @abstractmethod def update(self, section: 'ProgressBarSection') -> None: ... + @abstractmethod + def update_label(self, label: Optional[str] = None) -> None: + ... + class DummyProgressBar(BaseProgressBar): def __init__(self, *args, **kwargs) -> None: @@ -99,16 +136,22 @@ def start(self) -> None: def stop(self) -> None: pass - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: pass def update(self, section: 'ProgressBarSection') -> None: pass + def update_label(self, label: Optional[str] = None) -> None: + pass + class CompositeProgressBar(BaseProgressBar): - def __init__(self) -> None: + def __init__(self, progress_bar_sections: ProgressBarSections) -> None: super().__init__() + + self._progress_bar_sections = progress_bar_sections + self._progress_bar_context_manager = click.progressbar( length=_PROGRESS_BAR_LENGTH, item_show_func=self._progress_bar_item_show_func, @@ -121,7 +164,7 @@ def __init__(self) -> None: self._section_values: Dict[ProgressBarSection, int] = {} self._current_section_value = 0 - self._current_section: ProgressBarSectionInfo = _PROGRESS_BAR_SECTIONS[ProgressBarSection.PREPARE_LOCAL_FILES] + self._current_section: ProgressBarSectionInfo = _get_initial_section(self._progress_bar_sections) def __enter__(self) -> 'CompositeProgressBar': self._progress_bar = self._progress_bar_context_manager.__enter__() @@ -140,7 +183,7 @@ def stop(self) -> None: if self._run: self.__exit__(None, None, None) - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: logger.debug(f'set_section_length: {section} {length}') self._section_lengths[section] = length @@ -149,8 +192,12 @@ def set_section_length(self, section: 'ProgressBarSection', length: int) -> None else: self._maybe_update_current_section() + def _get_section_length(self, section: 'ProgressBarSection') -> int: + section_info = self._progress_bar_sections[section] + return section_info.stop_percent - section_info.start_percent + def _skip_section(self, section: 'ProgressBarSection') -> None: - self._progress_bar.update(_get_section_length(section)) + self._progress_bar.update(self._get_section_length(section)) self._maybe_update_current_section() def _increment_section_value(self, section: 'ProgressBarSection', value: int) -> None: @@ -164,7 +211,7 @@ def _rerender_progress_bar(self) -> None: """Used to update label right after changing the progress bar section.""" self._progress_bar.update(0) - def _increment_progress(self, section: ProgressBarSection) -> None: + def _increment_progress(self, section: 'ProgressBarSection') -> None: increment_value = self._get_increment_progress_value(section) self._current_section_value += increment_value @@ -177,7 +224,7 @@ def _maybe_update_current_section(self) -> None: max_val = self._section_lengths.get(self._current_section.section, 0) cur_val = self._section_values.get(self._current_section.section, 0) if cur_val >= max_val: - next_section = _PROGRESS_BAR_SECTIONS[self._current_section.section.next()] + next_section = self._progress_bar_sections[self._current_section.section.next()] logger.debug(f'_update_current_section: {self._current_section.section} -> {next_section.section}') self._current_section = next_section @@ -188,7 +235,7 @@ def _get_increment_progress_value(self, section: 'ProgressBarSection') -> int: max_val = self._section_lengths[section] cur_val = self._section_values[section] - expected_value = round(_get_section_length(section) * (cur_val / max_val)) + expected_value = round(self._get_section_length(section) * (cur_val / max_val)) return expected_value - self._current_section_value @@ -210,12 +257,19 @@ def update(self, section: 'ProgressBarSection', value: int = 1) -> None: self._increment_progress(section) self._maybe_update_current_section() + def update_label(self, label: Optional[str] = None) -> None: + if not self._progress_bar: + raise ValueError('Progress bar is not initialized. Call start() first or use "with" statement.') + + self._progress_bar.label = label or '' + self._progress_bar.render_progress() -def get_progress_bar(*, hidden: bool) -> BaseProgressBar: + +def get_progress_bar(*, hidden: bool, sections: ProgressBarSections) -> BaseProgressBar: if hidden: return DummyProgressBar() - return CompositeProgressBar() + return CompositeProgressBar(sections) if __name__ == '__main__': @@ -223,15 +277,18 @@ def get_progress_bar(*, hidden: bool) -> BaseProgressBar: import random import time - bar = get_progress_bar(hidden=False) + bar = get_progress_bar(hidden=False, sections=SCAN_PROGRESS_BAR_SECTIONS) bar.start() - for bar_section in ProgressBarSection: + for bar_section in ScanProgressBarSection: section_capacity = random.randint(500, 1000) # noqa: S311 bar.set_section_length(bar_section, section_capacity) for _i in range(section_capacity): time.sleep(0.01) + bar.update_label(f'{bar_section} {_i}/{section_capacity}') bar.update(bar_section) + bar.update_label() + bar.stop() diff --git a/cycode/cli/utils/scan_batch.py b/cycode/cli/utils/scan_batch.py index 4c839440..ede229e2 100644 --- a/cycode/cli/utils/scan_batch.py +++ b/cycode/cli/utils/scan_batch.py @@ -9,7 +9,7 @@ SCAN_BATCH_SCANS_PER_CPU, ) from cycode.cli.models import Document -from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cli.utils.progress_bar import ScanProgressBarSection if TYPE_CHECKING: from cycode.cli.models import CliError, LocalScanResult @@ -56,7 +56,7 @@ def run_parallel_batched_scan( max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT, ) -> Tuple[Dict[str, 'CliError'], List['LocalScanResult']]: batches = split_documents_into_batches(documents, max_size_mb, max_files_count) - progress_bar.set_section_length(ProgressBarSection.SCAN, len(batches)) # * 3 + progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3 # TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps: # 1. scan creation # 2. scan completion @@ -73,6 +73,6 @@ def run_parallel_batched_scan( if err: cli_errors[scan_id] = err - progress_bar.update(ProgressBarSection.SCAN) + progress_bar.update(ScanProgressBarSection.SCAN) return cli_errors, local_scan_results From 68a07a9ad638a296255dbe490b47ad0676982a9a Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 25 Sep 2023 21:46:40 +0200 Subject: [PATCH 04/14] add default output file; add resolving of the output file extension; add prompt for overriding existing file; fix notes from the SCA team --- cycode/cli/commands/report/sbom/common.py | 15 ++++++--- .../cli/commands/report/sbom/sbom_command.py | 26 ++++++--------- .../commands/report/sbom/sbom_report_file.py | 32 +++++++++++++++++++ .../sbom/sbom_repository_url_command.py | 3 +- 4 files changed, 55 insertions(+), 21 deletions(-) create mode 100644 cycode/cli/commands/report/sbom/sbom_report_file.py diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index bcda7df9..b4b18015 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -1,8 +1,10 @@ +import pathlib import time -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional import click +from cycode.cli.commands.report.sbom.sbom_report_file import SbomReportFile from cycode.cli.utils.progress_bar import SbomReportProgressBarSection if TYPE_CHECKING: @@ -11,7 +13,11 @@ def create_sbom_report( - progress_bar: 'BaseProgressBar', client: 'ReportClient', report_id: int, output_file: str + progress_bar: 'BaseProgressBar', + client: 'ReportClient', + report_id: int, + output_file: Optional[pathlib.Path], + output_format: str, ) -> None: # TODO(MarshalX): API will be changed soon. Just MVP for now. report_satus = None @@ -32,8 +38,9 @@ def create_sbom_report( report_path = report_satus.report_executions[0].storage_details.path report_content = client.get_file_content(report_path) - with open(output_file, 'w', encoding='UTF-8') as f: - f.write(report_content) progress_bar.set_section_length(SbomReportProgressBarSection.RECEIVE_REPORT) progress_bar.stop() + + sbom_report = SbomReportFile(report_path, output_format, output_file) + sbom_report.write(report_content) diff --git a/cycode/cli/commands/report/sbom/sbom_command.py b/cycode/cli/commands/report/sbom/sbom_command.py index de7ee37d..0fb2323a 100644 --- a/cycode/cli/commands/report/sbom/sbom_command.py +++ b/cycode/cli/commands/report/sbom/sbom_command.py @@ -1,3 +1,6 @@ +import pathlib +from typing import Optional + import click from cycode.cli.commands.report.sbom.sbom_path_command import sbom_path_command @@ -11,7 +14,7 @@ 'path': sbom_path_command, 'repository_url': sbom_repository_url_command, }, - short_help='Generate SBOM report. You`ll need to specify which report type to perform: path/repository_url.', + short_help='Generate SBOM report for remote repository by url or local directory by path.', ) @click.option( '--format', @@ -25,14 +28,14 @@ '-o', default='json', help='Specify the output file format (the default is json).', - type=click.Choice(['csv', 'json']), + type=click.Choice(['json']), required=False, ) @click.option( '--output-file', - help='Output file.', + help='Output file (the default is autogenerated filename saved to the current directory).', default=None, - type=click.Path(resolve_path=True), + type=click.Path(resolve_path=True, writable=True, path_type=pathlib.Path), required=False, ) @click.option( @@ -43,22 +46,13 @@ type=bool, required=False, ) -@click.option( - '--include-dev-dependencies', - is_flag=True, - default=False, - help='Include dev dependencies.', - type=bool, - required=False, -) @click.pass_context def sbom_command( context: click.Context, format: str, - output_format: str, - output_file: str, + output_format: Optional[str], + output_file: Optional[pathlib.Path], include_vulnerabilities: bool, - include_dev_dependencies: bool, ) -> int: """Generate SBOM report.""" sbom_format_parts = format.split('-') @@ -73,7 +67,7 @@ def sbom_command( sbom_version=sbom_format_version, output_format=output_format, include_vulnerabilities=include_vulnerabilities, - include_dev_dependencies=include_dev_dependencies, + include_dev_dependencies=False, # is not supported by BE yet ) context.obj['report_parameters'] = report_parameters context.obj['output_file'] = output_file diff --git a/cycode/cli/commands/report/sbom/sbom_report_file.py b/cycode/cli/commands/report/sbom/sbom_report_file.py new file mode 100644 index 00000000..3e12e712 --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_report_file.py @@ -0,0 +1,32 @@ +import pathlib +from typing import Optional + +import click + + +class SbomReportFile: + def __init__(self, storage_path: str, output_format: str, output_file: Optional[pathlib.Path]) -> None: + if output_file is None: + output_file = pathlib.Path(storage_path) + + output_ext = f'.{output_format}' + if output_file.suffix != output_ext: + output_file = output_file.with_suffix(output_ext) + + self._file_path = output_file + + def is_exists(self) -> bool: + return self._file_path.exists() + + def _prompt_overwrite(self) -> bool: + return click.confirm(f'File {self._file_path} already exists. Overwrite?') + + def _write(self, content: str) -> None: + with open(self._file_path, 'w', encoding='UTF-8') as f: + f.write(content) + + def write(self, content: str) -> None: + if self.is_exists() and not self._prompt_overwrite(): + return + + self._write(content) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index 75359cc2..e6cef474 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -16,6 +16,7 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: client = context.obj['client'] report_parameters = context.obj['report_parameters'] output_file = context.obj['output_file'] + output_format = report_parameters.output_format # TODO(MarshalX): add support of progress bar somehow? sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) - create_sbom_report(progress_bar, client, sbom_report.id, output_file) + create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) From 47e6def81d9a3aa0dc6b77957a99dd69c1986156 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 25 Sep 2023 22:02:30 +0200 Subject: [PATCH 05/14] fix sbom path cmd --- cycode/cli/commands/report/sbom/sbom_path_command.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index caea4d09..fe8c6ff9 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -14,6 +14,7 @@ def sbom_path_command(context: click.Context, path: str) -> None: client = context.obj['client'] report_parameters = context.obj['report_parameters'] + output_format = report_parameters.output_format output_file = context.obj['output_file'] progress_bar = context.obj['progress_bar'] @@ -28,4 +29,4 @@ def sbom_path_command(context: click.Context, path: str) -> None: zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) - create_sbom_report(progress_bar, client, sbom_report.id, output_file) + create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) From 3f430d5b4a0b6b66f11a64785126eebfe742552e Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 25 Sep 2023 22:15:26 +0200 Subject: [PATCH 06/14] add user-friendly error handling --- .../cli/commands/report/sbom/handle_errors.py | 42 +++++++++++++++++++ .../commands/report/sbom/sbom_path_command.py | 21 ++++++---- .../sbom/sbom_repository_url_command.py | 12 ++++-- 3 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 cycode/cli/commands/report/sbom/handle_errors.py diff --git a/cycode/cli/commands/report/sbom/handle_errors.py b/cycode/cli/commands/report/sbom/handle_errors.py new file mode 100644 index 00000000..6904625c --- /dev/null +++ b/cycode/cli/commands/report/sbom/handle_errors.py @@ -0,0 +1,42 @@ +import traceback +from typing import Optional + +import click + +from cycode.cli.exceptions import custom_exceptions +from cycode.cli.models import CliError, CliErrors +from cycode.cli.printers import ConsolePrinter + + +def handle_report_exception(context: click.Context, err: Exception) -> Optional[CliError]: + if context.obj['verbose']: + click.secho(f'Error: {traceback.format_exc()}', fg='red') + + errors: CliErrors = { + custom_exceptions.NetworkError: CliError( + code='cycode_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), + custom_exceptions.ScanAsyncError: CliError( + code='report_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), + custom_exceptions.HttpUnauthorizedError: CliError( + code='auth_error', + message='Unable to authenticate to Cycode, your token is either invalid or has expired. ' + 'Please re-generate your token and reconfigure it by running the `cycode configure` command', + ), + } + + if type(err) in errors: + error = errors[type(err)] + + ConsolePrinter(context).print_error(error) + return None + + if isinstance(err, click.ClickException): + raise err + + raise click.ClickException(str(err)) diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index fe8c6ff9..4360c5c8 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -2,6 +2,7 @@ from cycode.cli import consts from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception from cycode.cli.files_collector.path_documents import get_relevant_document from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions from cycode.cli.files_collector.zip_documents import zip_documents @@ -20,13 +21,17 @@ def sbom_path_command(context: click.Context, path: str) -> None: progress_bar = context.obj['progress_bar'] progress_bar.start() - documents = get_relevant_document( - progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path - ) - # TODO(MarshalX): refactoring more. Combine into one function. - perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) + try: + documents = get_relevant_document( + progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path + ) + # TODO(MarshalX): refactoring more. Combine into one function. + perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) - zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) - sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) + zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) + sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) - create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + except Exception as e: + progress_bar.stop() + handle_report_exception(context, e) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index e6cef474..1bc46787 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -1,6 +1,7 @@ import click from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception from cycode.cli.utils.progress_bar import SbomReportProgressBarSection @@ -10,13 +11,16 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: progress_bar = context.obj['progress_bar'] progress_bar.start() - progress_bar.set_section_length(SbomReportProgressBarSection.PREPARE_LOCAL_FILES) client = context.obj['client'] report_parameters = context.obj['report_parameters'] output_file = context.obj['output_file'] output_format = report_parameters.output_format - # TODO(MarshalX): add support of progress bar somehow? - sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) - create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + + try: + sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) + create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + except Exception as e: + progress_bar.stop() + handle_report_exception(context, e) From b832d90148b8a8ba5bfa19e9a2d588981477447a Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 2 Oct 2023 17:28:00 +0200 Subject: [PATCH 07/14] update endpoints --- cycode/cli/commands/report/sbom/common.py | 23 ++---- .../commands/report/sbom/sbom_path_command.py | 7 +- .../sbom/sbom_repository_url_command.py | 4 +- cycode/cyclient/models.py | 71 +++++++------------ cycode/cyclient/report_client.py | 34 ++++++--- 5 files changed, 63 insertions(+), 76 deletions(-) diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index b4b18015..500963c6 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -2,8 +2,6 @@ import time from typing import TYPE_CHECKING, Optional -import click - from cycode.cli.commands.report.sbom.sbom_report_file import SbomReportFile from cycode.cli.utils.progress_bar import SbomReportProgressBarSection @@ -15,28 +13,21 @@ def create_sbom_report( progress_bar: 'BaseProgressBar', client: 'ReportClient', - report_id: int, + report_execution_id: int, output_file: Optional[pathlib.Path], output_format: str, ) -> None: - # TODO(MarshalX): API will be changed soon. Just MVP for now. - report_satus = None - status = 'Running' - while status == 'Running': - report_satus = client.get_execution_status(report_id)[0] - execution = report_satus.report_executions[0] - - status = execution.status - - progress_bar.update_label(execution.error_message or execution.status_message) + report_execution = client.get_report_execution(report_execution_id) + while report_execution.status == 'Running': time.sleep(3) - if not report_satus: - raise click.ClickException('Failed to get report status.') + report_execution = client.get_report_execution(report_execution_id) + report_label = report_execution.error_message or report_execution.status_message + progress_bar.update_label(report_label) progress_bar.set_section_length(SbomReportProgressBarSection.GENERATION) - report_path = report_satus.report_executions[0].storage_details.path + report_path = report_execution.storage_details.path report_content = client.get_file_content(report_path) progress_bar.set_section_length(SbomReportProgressBarSection.RECEIVE_REPORT) diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index 4360c5c8..4732e616 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -25,13 +25,14 @@ def sbom_path_command(context: click.Context, path: str) -> None: documents = get_relevant_document( progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path ) - # TODO(MarshalX): refactoring more. Combine into one function. + # TODO(MarshalX): combine perform_pre_scan_documents_actions with get_relevant_document. + # unhardcode usage of context in perform_pre_scan_documents_actions perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) - sbom_report = client.request_sbom_report(report_parameters, zip_file=zipped_documents) + report_execution = client.request_sbom_report_execution(report_parameters, zip_file=zipped_documents) - create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) except Exception as e: progress_bar.stop() handle_report_exception(context, e) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index 1bc46787..46789a1e 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -19,8 +19,8 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: output_format = report_parameters.output_format try: - sbom_report = client.request_sbom_report(report_parameters, repository_url=uri) - create_sbom_report(progress_bar, client, sbom_report.id, output_file, output_format) + report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri) + create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) except Exception as e: progress_bar.stop() handle_report_exception(context, e) diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index 5b73be19..0401e3fb 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -346,80 +346,61 @@ def user_agent_suffix(self) -> str: ) -class RequestedSbomReportResultSchema(Schema): +@dataclass +class SbomReportStorageDetails: + path: str + folder: str + size: int + + +class SbomReportStorageDetailsSchema(Schema): class Meta: unknown = EXCLUDE - id = fields.Integer() - name = fields.String() - # add more fields on demand + path = fields.String() + folder = fields.String() + size = fields.Integer() @post_load - def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReport': - return SbomReport(**data) + def build_dto(self, data: Dict[str, Any], **_) -> SbomReportStorageDetails: + return SbomReportStorageDetails(**data) @dataclass -class SbomReport: +class ReportExecution: id: int - name: str + status: str + error_message: Optional[str] = None + status_message: Optional[str] = None + storage_details: Optional[SbomReportStorageDetails] = None class ReportExecutionSchema(Schema): class Meta: unknown = EXCLUDE + id = fields.Integer() status = fields.String() error_message = fields.String(allow_none=True) status_message = fields.String(allow_none=True) - storage_details = fields.Nested('SbomReportStorageDetailsSchema', allow_none=True) + storage_details = fields.Nested(SbomReportStorageDetailsSchema, allow_none=True) @post_load - def build_dto(self, data: Dict[str, Any], **_) -> 'ReportExecution': + def build_dto(self, data: Dict[str, Any], **_) -> ReportExecution: return ReportExecution(**data) @dataclass -class ReportExecution: - status: str - error_message: Optional[str] = None - status_message: Optional[str] = None - storage_details: Optional['SbomReportStorageDetails'] = None +class SbomReport: + report_executions: List[ReportExecution] -class SbomReportExecutionStatusResultSchema(Schema): +class RequestedSbomReportResultSchema(Schema): class Meta: unknown = EXCLUDE - id = fields.Integer() report_executions = fields.List(fields.Nested(ReportExecutionSchema)) @post_load - def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReportStatus': - return SbomReportStatus(**data) - - -class SbomReportStorageDetailsSchema(Schema): - class Meta: - unknown = EXCLUDE - - path = fields.String() - folder = fields.String() - size = fields.Integer() - - @post_load - def build_dto(self, data: Dict[str, Any], **_) -> 'SbomReportStorageDetails': - return SbomReportStorageDetails(**data) - - -@dataclass -class SbomReportStorageDetails: - path: str - folder: str - size: int - - -@dataclass -class SbomReportStatus: - id: int - report_executions: List[ReportExecution] + def build_dto(self, data: Dict[str, Any], **_) -> SbomReport: + return SbomReport(**data) diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py index ea7c016a..70cf2884 100644 --- a/cycode/cyclient/report_client.py +++ b/cycode/cyclient/report_client.py @@ -4,6 +4,7 @@ from requests import Response +from cycode.cli.exceptions.custom_exceptions import CycodeError from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip from cycode.cyclient import models from cycode.cyclient.cycode_client_base import CycodeClientBase @@ -31,7 +32,7 @@ def to_json(self, *, without_entity_type: bool) -> str: class ReportClient: SERVICE_NAME: str = 'report' CREATE_SBOM_REPORT_REQUEST_PATH: str = 'api/v2/report/{report_type}/sbom' - GET_EXECUTIONS_STATUS_PATH: str = 'api/v2/report/{report_id}/executions' + GET_EXECUTIONS_STATUS_PATH: str = 'api/v2/report/executions' DOWNLOAD_REPORT_PATH: str = 'files/api/v1/file/sbom/{file_name}' # not in the report service @@ -39,9 +40,9 @@ def __init__(self, client: CycodeClientBase, hide_response_log: bool = True) -> self.client = client self._hide_response_log = hide_response_log - def request_sbom_report( + def request_sbom_report_execution( self, params: ReportParameters, zip_file: InMemoryZip = None, repository_url: Optional[str] = None - ) -> models.SbomReport: + ) -> models.ReportExecution: report_type = 'zipped-file' if zip_file else 'repository-url' url_path = f'{self.SERVICE_NAME}/{self.CREATE_SBOM_REPORT_REQUEST_PATH}'.format(report_type=report_type) @@ -60,12 +61,25 @@ def request_sbom_report( request_args['files'] = {'file': ('sca_files.zip', zip_file.read())} response = self.client.post(**request_args) - return self.parse_requested_sbom_report_response(response) + sbom_report = self.parse_requested_sbom_report_response(response) + if not sbom_report.report_executions: + raise CycodeError('Failed to get SBOM report. No executions found.') - def get_execution_status(self, report_id: int) -> List[models.SbomReportStatus]: - url_path = f'{self.SERVICE_NAME}/{self.GET_EXECUTIONS_STATUS_PATH}'.format(report_id=report_id) - response = self.client.get(url_path=url_path) - return self.parse_execution_status_response(response) + return sbom_report.report_executions[0] + + def get_report_execution(self, report_execution_id: int) -> models.ReportExecutionSchema: + url_path = f'{self.SERVICE_NAME}/{self.GET_EXECUTIONS_STATUS_PATH}' + params = { + 'executions_ids': report_execution_id, + 'include_orphan_executions': True, + } + response = self.client.get(url_path=url_path, params=params) + + report_executions = self.parse_execution_status_response(response) + if not report_executions: + raise CycodeError('Failed to get report execution.') + + return report_executions[0] def get_file_content(self, file_name: str) -> str: response = self.client.get( @@ -78,5 +92,5 @@ def parse_requested_sbom_report_response(response: Response) -> models.SbomRepor return models.RequestedSbomReportResultSchema().load(response.json()) @staticmethod - def parse_execution_status_response(response: Response) -> List[models.SbomReportStatus]: - return models.SbomReportExecutionStatusResultSchema().load(response.json(), many=True) + def parse_execution_status_response(response: Response) -> List[models.ReportExecutionSchema]: + return models.ReportExecutionSchema().load(response.json(), many=True) From 2788c5cf4e8ac2fc23e322540654379e8877659a Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 2 Oct 2023 18:24:06 +0200 Subject: [PATCH 08/14] add timeout for sbom report pulling --- cycode/cli/commands/report/sbom/common.py | 39 +++++++++++++++---- .../cli/commands/report/sbom/handle_errors.py | 5 +++ cycode/cli/consts.py | 9 +++++ cycode/cli/exceptions/custom_exceptions.py | 4 ++ .../user_settings/configuration_manager.py | 7 ++++ 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index 500963c6..146b5328 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -2,29 +2,54 @@ import time from typing import TYPE_CHECKING, Optional +from cycode.cli import consts from cycode.cli.commands.report.sbom.sbom_report_file import SbomReportFile +from cycode.cli.config import configuration_manager +from cycode.cli.exceptions.custom_exceptions import ReportAsyncError from cycode.cli.utils.progress_bar import SbomReportProgressBarSection +from cycode.cyclient.models import ReportExecutionSchema if TYPE_CHECKING: from cycode.cli.utils.progress_bar import BaseProgressBar from cycode.cyclient.report_client import ReportClient -def create_sbom_report( +def _poll_report_execution_until_completed( progress_bar: 'BaseProgressBar', client: 'ReportClient', report_execution_id: int, - output_file: Optional[pathlib.Path], - output_format: str, -) -> None: - report_execution = client.get_report_execution(report_execution_id) - while report_execution.status == 'Running': - time.sleep(3) + polling_timeout: Optional[int] = None, +) -> ReportExecutionSchema: + if polling_timeout is None: + polling_timeout = configuration_manager.get_report_polling_timeout_in_seconds() + end_polling_time = time.time() + polling_timeout + while time.time() < end_polling_time: report_execution = client.get_report_execution(report_execution_id) report_label = report_execution.error_message or report_execution.status_message + progress_bar.update_label(report_label) + if report_execution.status == consts.REPORT_STATUS_COMPLETED: + return report_execution + + if report_execution.status == consts.REPORT_STATUS_ERROR: + raise ReportAsyncError(f'Error occurred while trying to generate report: {report_label}') + + time.sleep(consts.REPORT_POLLING_WAIT_INTERVAL_IN_SECONDS) + + raise ReportAsyncError(f'Timeout exceeded while waiting for report to complete. Timeout: {polling_timeout} sec.') + + +def create_sbom_report( + progress_bar: 'BaseProgressBar', + client: 'ReportClient', + report_execution_id: int, + output_file: Optional[pathlib.Path], + output_format: str, +) -> None: + report_execution = _poll_report_execution_until_completed(progress_bar, client, report_execution_id) + progress_bar.set_section_length(SbomReportProgressBarSection.GENERATION) report_path = report_execution.storage_details.path diff --git a/cycode/cli/commands/report/sbom/handle_errors.py b/cycode/cli/commands/report/sbom/handle_errors.py index 6904625c..b9ca9084 100644 --- a/cycode/cli/commands/report/sbom/handle_errors.py +++ b/cycode/cli/commands/report/sbom/handle_errors.py @@ -23,6 +23,11 @@ def handle_report_exception(context: click.Context, err: Exception) -> Optional[ message='Cycode was unable to complete this report. ' 'Please try again by executing the `cycode report` command', ), + custom_exceptions.ReportAsyncError: CliError( + code='report_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), custom_exceptions.HttpUnauthorizedError: CliError( code='auth_error', message='Unable to authenticate to Cycode, your token is either invalid or has expired. ' diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 23b7471a..9479765e 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -138,6 +138,11 @@ SCAN_BATCH_MAX_PARALLEL_SCANS = 5 SCAN_BATCH_SCANS_PER_CPU = 1 +# report with polling +REPORT_POLLING_WAIT_INTERVAL_IN_SECONDS = 5 +DEFAULT_REPORT_POLLING_TIMEOUT_IN_SECONDS = 600 +REPORT_POLLING_TIMEOUT_IN_SECONDS_ENV_VAR_NAME = 'REPORT_POLLING_TIMEOUT_IN_SECONDS' + # scan with polling SCAN_POLLING_WAIT_INTERVAL_IN_SECONDS = 5 DEFAULT_SCAN_POLLING_TIMEOUT_IN_SECONDS = 3600 @@ -162,6 +167,10 @@ EXCLUDE_DETECTIONS_IN_DELETED_LINES_ENV_VAR_NAME = 'EXCLUDE_DETECTIONS_IN_DELETED_LINES' DEFAULT_EXCLUDE_DETECTIONS_IN_DELETED_LINES = True +# report statuses +REPORT_STATUS_COMPLETED = 'Completed' +REPORT_STATUS_ERROR = 'Failed' + # scan statuses SCAN_STATUS_COMPLETED = 'Completed' SCAN_STATUS_ERROR = 'Error' diff --git a/cycode/cli/exceptions/custom_exceptions.py b/cycode/cli/exceptions/custom_exceptions.py index ea98a0aa..1b218353 100644 --- a/cycode/cli/exceptions/custom_exceptions.py +++ b/cycode/cli/exceptions/custom_exceptions.py @@ -28,6 +28,10 @@ def __str__(self) -> str: return f'error occurred during the scan. error message: {self.error_message}' +class ReportAsyncError(CycodeError): + pass + + class HttpUnauthorizedError(CycodeError): def __init__(self, error_message: str, response: Response) -> None: self.status_code = 401 diff --git a/cycode/cli/user_settings/configuration_manager.py b/cycode/cli/user_settings/configuration_manager.py index 98e62e07..65da08fc 100644 --- a/cycode/cli/user_settings/configuration_manager.py +++ b/cycode/cli/user_settings/configuration_manager.py @@ -103,6 +103,13 @@ def get_scan_polling_timeout_in_seconds(self) -> int: ) ) + def get_report_polling_timeout_in_seconds(self) -> int: + return int( + self._get_value_from_environment_variables( + consts.REPORT_POLLING_TIMEOUT_IN_SECONDS_ENV_VAR_NAME, consts.DEFAULT_REPORT_POLLING_TIMEOUT_IN_SECONDS + ) + ) + def get_sca_pre_commit_timeout_in_seconds(self) -> int: return int( self._get_value_from_environment_variables( From 6c7b60c01a2a43096ca6e320e7fcb1e9f6c65ecb Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 2 Oct 2023 19:32:19 +0200 Subject: [PATCH 09/14] add report feedback --- cycode/cli/code_scanner.py | 7 ++-- cycode/cli/commands/report/sbom/common.py | 33 +++++++++++++++++++ .../commands/report/sbom/sbom_path_command.py | 30 ++++++++++++++++- .../sbom/sbom_repository_url_command.py | 29 +++++++++++++++- .../files_collector/models/in_memory_zip.py | 5 +++ cycode/cli/files_collector/zip_documents.py | 4 +-- cycode/cyclient/report_client.py | 5 +++ 7 files changed, 103 insertions(+), 10 deletions(-) diff --git a/cycode/cli/code_scanner.py b/cycode/cli/code_scanner.py index f47f8e01..1801d63c 100644 --- a/cycode/cli/code_scanner.py +++ b/cycode/cli/code_scanner.py @@ -5,7 +5,6 @@ import time import traceback from platform import platform -from sys import getsizeof from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple from uuid import UUID, uuid4 @@ -338,7 +337,7 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local try: logger.debug('Preparing local files, %s', {'batch_size': len(batch)}) zipped_documents = zip_documents(scan_type, batch) - zip_file_size = getsizeof(zipped_documents.in_memory_zip) + zip_file_size = zipped_documents.size scan_result = perform_scan( cycode_client, zipped_documents, scan_type, scan_id, is_git_diff, is_commit_range, scan_parameters @@ -476,9 +475,7 @@ def scan_commit_range_documents( _handle_exception(context, e) error_message = str(e) - zip_file_size = getsizeof(from_commit_zipped_documents.in_memory_zip) + getsizeof( - to_commit_zipped_documents.in_memory_zip - ) + zip_file_size = from_commit_zipped_documents.size + to_commit_zipped_documents.size detections_count = relevant_detections_count = 0 if local_scan_result: diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index 146b5328..94bdaa24 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -1,5 +1,6 @@ import pathlib import time +from platform import platform from typing import TYPE_CHECKING, Optional from cycode.cli import consts @@ -7,6 +8,7 @@ from cycode.cli.config import configuration_manager from cycode.cli.exceptions.custom_exceptions import ReportAsyncError from cycode.cli.utils.progress_bar import SbomReportProgressBarSection +from cycode.cyclient import logger from cycode.cyclient.models import ReportExecutionSchema if TYPE_CHECKING: @@ -41,6 +43,37 @@ def _poll_report_execution_until_completed( raise ReportAsyncError(f'Timeout exceeded while waiting for report to complete. Timeout: {polling_timeout} sec.') +def send_report_feedback( + client: 'ReportClient', + start_scan_time: float, + success: bool, + output_format: str, + report_type: str, + report_command_type: str, + report_parameters: dict, + report_execution_id: int, + error_message: Optional[str] = None, + report_size: Optional[int] = None, +) -> None: + try: + end_scan_time = time.time() + scan_status = { + 'status': consts.REPORT_STATUS_COMPLETED if success else consts.REPORT_STATUS_ERROR, + 'output_format': output_format, + 'report_type': report_type, + 'report_command_type': report_command_type, + 'report_parameters': report_parameters, + 'operation_system': platform(), + 'error_message': error_message, + 'execution_time': int(end_scan_time - start_scan_time), + 'report_size': report_size, + } + + client.report_status(report_execution_id, scan_status) + except Exception as e: + logger.debug(f'Failed to send report feedback: {e}') + + def create_sbom_report( progress_bar: 'BaseProgressBar', client: 'ReportClient', diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index 4732e616..2007280e 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -1,7 +1,9 @@ +import time + import click from cycode.cli import consts -from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.commands.report.sbom.common import create_sbom_report, send_report_feedback from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception from cycode.cli.files_collector.path_documents import get_relevant_document from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions @@ -21,6 +23,8 @@ def sbom_path_command(context: click.Context, path: str) -> None: progress_bar = context.obj['progress_bar'] progress_bar.start() + start_scan_time = time.time() + try: documents = get_relevant_document( progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path @@ -33,6 +37,30 @@ def sbom_path_command(context: click.Context, path: str) -> None: report_execution = client.request_sbom_report_execution(report_parameters, zip_file=zipped_documents) create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + success=True, + output_format=output_format, + report_type='idk', # FIXME + report_command_type='path', + report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution.id, + report_size=zipped_documents.size, + ) except Exception as e: progress_bar.stop() + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + success=False, + output_format=output_format, + report_type='idk', # FIXME + report_command_type='path', + report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=0, # FIXME + ) + handle_report_exception(context, e) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index 46789a1e..9ff75868 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -1,6 +1,8 @@ +import time + import click -from cycode.cli.commands.report.sbom.common import create_sbom_report +from cycode.cli.commands.report.sbom.common import create_sbom_report, send_report_feedback from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception from cycode.cli.utils.progress_bar import SbomReportProgressBarSection @@ -18,9 +20,34 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: output_file = context.obj['output_file'] output_format = report_parameters.output_format + start_scan_time = time.time() + try: report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri) create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + success=True, + output_format=output_format, + report_type='idk', # FIXME + report_command_type='repository_url', + report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution.id, + ) except Exception as e: progress_bar.stop() + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + success=False, + output_format=output_format, + report_type='idk', # FIXME + report_command_type='repository_url', + report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=0, # FIXME + ) + handle_report_exception(context, e) diff --git a/cycode/cli/files_collector/models/in_memory_zip.py b/cycode/cli/files_collector/models/in_memory_zip.py index df15cdd4..410d00ca 100644 --- a/cycode/cli/files_collector/models/in_memory_zip.py +++ b/cycode/cli/files_collector/models/in_memory_zip.py @@ -1,4 +1,5 @@ from io import BytesIO +from sys import getsizeof from typing import Optional from zipfile import ZIP_DEFLATED, ZipFile @@ -25,3 +26,7 @@ def close(self) -> None: def read(self) -> bytes: self.in_memory_zip.seek(0) return self.in_memory_zip.read() + + @property + def size(self) -> int: + return getsizeof(self.in_memory_zip) diff --git a/cycode/cli/files_collector/zip_documents.py b/cycode/cli/files_collector/zip_documents.py index 65bcdef1..b2b252f4 100644 --- a/cycode/cli/files_collector/zip_documents.py +++ b/cycode/cli/files_collector/zip_documents.py @@ -1,5 +1,4 @@ import time -from sys import getsizeof from typing import List, Optional from cycode.cli import consts @@ -25,8 +24,7 @@ def zip_documents(scan_type: str, documents: List[Document], zip_file: Optional[ start_zip_creation_time = time.time() for index, document in enumerate(documents): - zip_file_size = getsizeof(zip_file.in_memory_zip) - _validate_zip_file_size(scan_type, zip_file_size) + _validate_zip_file_size(scan_type, zip_file.size) logger.debug( 'adding file to zip, %s', {'index': index, 'filename': document.path, 'unique_id': document.unique_id} diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py index 70cf2884..581b8eed 100644 --- a/cycode/cyclient/report_client.py +++ b/cycode/cyclient/report_client.py @@ -33,6 +33,7 @@ class ReportClient: SERVICE_NAME: str = 'report' CREATE_SBOM_REPORT_REQUEST_PATH: str = 'api/v2/report/{report_type}/sbom' GET_EXECUTIONS_STATUS_PATH: str = 'api/v2/report/executions' + REPORT_STATUS_PATH: str = 'api/v2/report/{report_execution_id}/status' DOWNLOAD_REPORT_PATH: str = 'files/api/v1/file/sbom/{file_name}' # not in the report service @@ -87,6 +88,10 @@ def get_file_content(self, file_name: str) -> str: ) return response.text + def report_status(self, report_execution_id: int, status: dict) -> None: + url_path = self.REPORT_STATUS_PATH.format(report_execution_id=report_execution_id) + self.client.post(url_path=url_path, body=status) + @staticmethod def parse_requested_sbom_report_response(response: Response) -> models.SbomReport: return models.RequestedSbomReportResultSchema().load(response.json()) From 5ab670ef0003f7f95f4eabf951a6d5121496ea8c Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 2 Oct 2023 20:12:38 +0200 Subject: [PATCH 10/14] gen unique filename if exists and confirmed by user --- .../commands/report/sbom/sbom_report_file.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cycode/cli/commands/report/sbom/sbom_report_file.py b/cycode/cli/commands/report/sbom/sbom_report_file.py index 3e12e712..a7220a80 100644 --- a/cycode/cli/commands/report/sbom/sbom_report_file.py +++ b/cycode/cli/commands/report/sbom/sbom_report_file.py @@ -1,3 +1,4 @@ +import os import pathlib from typing import Optional @@ -19,14 +20,25 @@ def is_exists(self) -> bool: return self._file_path.exists() def _prompt_overwrite(self) -> bool: - return click.confirm(f'File {self._file_path} already exists. Overwrite?') + return click.confirm(f'File {self._file_path} already exists. Save with a different filename?', default=True) def _write(self, content: str) -> None: with open(self._file_path, 'w', encoding='UTF-8') as f: f.write(content) + def _notify_about_saved_file(self) -> None: + click.echo(f'Report saved to {self._file_path}') + + def _find_and_set_unique_filename(self) -> None: + attempt_no = 1 + while self.is_exists(): + base, ext = os.path.splitext(self._file_path) + self._file_path = pathlib.Path(f'{base}-{attempt_no}{ext}') + attempt_no += 1 + def write(self, content: str) -> None: - if self.is_exists() and not self._prompt_overwrite(): - return + if self.is_exists() and self._prompt_overwrite(): + self._find_and_set_unique_filename() self._write(content) + self._notify_about_saved_file() From 25193cdf4d043b1b798ea43a59a68ad5a1a07053 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Mon, 2 Oct 2023 20:45:44 +0200 Subject: [PATCH 11/14] fix unique filename gen --- cycode/cli/commands/report/sbom/sbom_report_file.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cycode/cli/commands/report/sbom/sbom_report_file.py b/cycode/cli/commands/report/sbom/sbom_report_file.py index a7220a80..4d58f89f 100644 --- a/cycode/cli/commands/report/sbom/sbom_report_file.py +++ b/cycode/cli/commands/report/sbom/sbom_report_file.py @@ -1,5 +1,6 @@ import os import pathlib +import re from typing import Optional import click @@ -30,11 +31,15 @@ def _notify_about_saved_file(self) -> None: click.echo(f'Report saved to {self._file_path}') def _find_and_set_unique_filename(self) -> None: - attempt_no = 1 + attempt_no = 0 while self.is_exists(): + attempt_no += 1 + base, ext = os.path.splitext(self._file_path) + # Remove previous suffix + base = re.sub(r'-\d+$', '', base) + self._file_path = pathlib.Path(f'{base}-{attempt_no}{ext}') - attempt_no += 1 def write(self, content: str) -> None: if self.is_exists() and self._prompt_overwrite(): From 20783e3f54070dee17c8d40243b8b93652f6828c Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 3 Oct 2023 12:38:37 +0200 Subject: [PATCH 12/14] update feedback sending --- cycode/cli/commands/report/sbom/common.py | 15 ++++++------ .../commands/report/sbom/sbom_path_command.py | 23 +++++++++--------- .../sbom/sbom_repository_url_command.py | 24 ++++++++++--------- cycode/cyclient/report_client.py | 2 +- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py index 94bdaa24..334e7275 100644 --- a/cycode/cli/commands/report/sbom/common.py +++ b/cycode/cli/commands/report/sbom/common.py @@ -46,27 +46,26 @@ def _poll_report_execution_until_completed( def send_report_feedback( client: 'ReportClient', start_scan_time: float, - success: bool, - output_format: str, report_type: str, report_command_type: str, - report_parameters: dict, + request_report_parameters: dict, report_execution_id: int, error_message: Optional[str] = None, - report_size: Optional[int] = None, + request_zip_file_size: Optional[int] = None, + **kwargs, ) -> None: try: + request_report_parameters.update(kwargs) + end_scan_time = time.time() scan_status = { - 'status': consts.REPORT_STATUS_COMPLETED if success else consts.REPORT_STATUS_ERROR, - 'output_format': output_format, 'report_type': report_type, 'report_command_type': report_command_type, - 'report_parameters': report_parameters, + 'request_report_parameters': request_report_parameters, 'operation_system': platform(), 'error_message': error_message, 'execution_time': int(end_scan_time - start_scan_time), - 'report_size': report_size, + 'request_zip_file_size': request_zip_file_size, } client.report_status(report_execution_id, scan_status) diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py index 2007280e..36b9c4d9 100644 --- a/cycode/cli/commands/report/sbom/sbom_path_command.py +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -24,6 +24,7 @@ def sbom_path_command(context: click.Context, path: str) -> None: progress_bar.start() start_scan_time = time.time() + report_execution_id = -1 try: documents = get_relevant_document( @@ -35,19 +36,18 @@ def sbom_path_command(context: click.Context, path: str) -> None: zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) report_execution = client.request_sbom_report_execution(report_parameters, zip_file=zipped_documents) + report_execution_id = report_execution.id - create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) + create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format) send_report_feedback( client=client, start_scan_time=start_scan_time, - success=True, - output_format=output_format, - report_type='idk', # FIXME + report_type='SBOM', report_command_type='path', - report_parameters=report_parameters.to_dict(without_entity_type=False), - report_execution_id=report_execution.id, - report_size=zipped_documents.size, + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + request_zip_file_size=zipped_documents.size, ) except Exception as e: progress_bar.stop() @@ -55,12 +55,11 @@ def sbom_path_command(context: click.Context, path: str) -> None: send_report_feedback( client=client, start_scan_time=start_scan_time, - success=False, - output_format=output_format, - report_type='idk', # FIXME + report_type='SBOM', report_command_type='path', - report_parameters=report_parameters.to_dict(without_entity_type=False), - report_execution_id=0, # FIXME + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + error_message=str(e), ) handle_report_exception(context, e) diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py index 9ff75868..a3cb2570 100644 --- a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -21,20 +21,22 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: output_format = report_parameters.output_format start_scan_time = time.time() + report_execution_id = -1 try: report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri) - create_sbom_report(progress_bar, client, report_execution.id, output_file, output_format) + report_execution_id = report_execution.id + + create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format) send_report_feedback( client=client, start_scan_time=start_scan_time, - success=True, - output_format=output_format, - report_type='idk', # FIXME + report_type='SBOM', report_command_type='repository_url', - report_parameters=report_parameters.to_dict(without_entity_type=False), - report_execution_id=report_execution.id, + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + repository_uri=uri, ) except Exception as e: progress_bar.stop() @@ -42,12 +44,12 @@ def sbom_repository_url_command(context: click.Context, uri: str) -> None: send_report_feedback( client=client, start_scan_time=start_scan_time, - success=False, - output_format=output_format, - report_type='idk', # FIXME + report_type='SBOM', report_command_type='repository_url', - report_parameters=report_parameters.to_dict(without_entity_type=False), - report_execution_id=0, # FIXME + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + error_message=str(e), + repository_uri=uri, ) handle_report_exception(context, e) diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py index 581b8eed..ade7d850 100644 --- a/cycode/cyclient/report_client.py +++ b/cycode/cyclient/report_client.py @@ -89,7 +89,7 @@ def get_file_content(self, file_name: str) -> str: return response.text def report_status(self, report_execution_id: int, status: dict) -> None: - url_path = self.REPORT_STATUS_PATH.format(report_execution_id=report_execution_id) + url_path = f'{self.SERVICE_NAME}/{self.REPORT_STATUS_PATH}'.format(report_execution_id=report_execution_id) self.client.post(url_path=url_path, body=status) @staticmethod From 1566302b4f84e588c57a293028f13df6f2d4b3b8 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 3 Oct 2023 12:42:52 +0200 Subject: [PATCH 13/14] return include_dev_dependencies flag --- cycode/cli/commands/report/sbom/sbom_command.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cycode/cli/commands/report/sbom/sbom_command.py b/cycode/cli/commands/report/sbom/sbom_command.py index 0fb2323a..ecfd2782 100644 --- a/cycode/cli/commands/report/sbom/sbom_command.py +++ b/cycode/cli/commands/report/sbom/sbom_command.py @@ -46,6 +46,14 @@ type=bool, required=False, ) +@click.option( + '--include-dev-dependencies', + is_flag=True, + default=False, + help='Include dev dependencies.', + type=bool, + required=False, +) @click.pass_context def sbom_command( context: click.Context, @@ -53,6 +61,7 @@ def sbom_command( output_format: Optional[str], output_file: Optional[pathlib.Path], include_vulnerabilities: bool, + include_dev_dependencies: bool, ) -> int: """Generate SBOM report.""" sbom_format_parts = format.split('-') @@ -67,7 +76,7 @@ def sbom_command( sbom_version=sbom_format_version, output_format=output_format, include_vulnerabilities=include_vulnerabilities, - include_dev_dependencies=False, # is not supported by BE yet + include_dev_dependencies=include_dev_dependencies, ) context.obj['report_parameters'] = report_parameters context.obj['output_file'] = output_file From 6bbe2872fa2e625a4312e475a25ddc8d4ad8f0c8 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 3 Oct 2023 12:44:05 +0200 Subject: [PATCH 14/14] sync supported sbom formats --- cycode/cli/config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/cycode/cli/config.yaml b/cycode/cli/config.yaml index dd18ffef..875f37c1 100644 --- a/cycode/cli/config.yaml +++ b/cycode/cli/config.yaml @@ -10,6 +10,7 @@ scans: - license-compliance supported_sbom_formats: - spdx-2.2 + - spdx-2.3 - cyclonedx-1.4 result_printer: default: