diff --git a/cycode/cli/code_scanner.py b/cycode/cli/code_scanner.py index 128e8a6f..1801d63c 100644 --- a/cycode/cli/code_scanner.py +++ b/cycode/cli/code_scanner.py @@ -5,8 +5,7 @@ import time import traceback from platform import platform -from sys import getsizeof -from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple from uuid import UUID, uuid4 import click @@ -15,39 +14,37 @@ from cycode.cli import consts from cycode.cli.ci_integrations import get_commit_range from cycode.cli.config import configuration_manager -from cycode.cli.consts import SCA_SKIP_RESTORE_DEPENDENCIES_FLAG from cycode.cli.exceptions import custom_exceptions -from cycode.cli.helpers import sca_code_scanner, tf_content_generator +from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cli.files_collector.path_documents import get_relevant_document +from cycode.cli.files_collector.repository_documents import ( + calculate_pre_receive_commit_range, + get_commit_range_modified_documents, + get_diff_file_content, + get_diff_file_path, + get_git_repository_tree_file_entries, + get_pre_commit_modified_documents, + parse_commit_range, +) +from cycode.cli.files_collector.sca import sca_code_scanner +from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions +from cycode.cli.files_collector.zip_documents import zip_documents from cycode.cli.models import CliError, CliErrors, Document, DocumentDetections, LocalScanResult, Severity from cycode.cli.printers import ConsolePrinter -from cycode.cli.user_settings.config_file_manager import ConfigFileManager from cycode.cli.utils import scan_utils from cycode.cli.utils.path_utils import ( - change_filename_extension, - get_file_content, - get_file_size, get_path_by_os, - get_relevant_files_in_path, - is_binary_file, - is_sub_path, - load_json, ) -from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cli.utils.progress_bar import ScanProgressBarSection from cycode.cli.utils.scan_batch import run_parallel_batched_scan from cycode.cli.utils.scan_utils import set_issue_detected -from cycode.cli.utils.string_utils import get_content_size, is_binary_content from cycode.cli.utils.task_timer import TimeoutAfter -from cycode.cli.zip_file import InMemoryZip from cycode.cyclient import logger from cycode.cyclient.config import set_logging_level from cycode.cyclient.models import Detection, DetectionSchema, DetectionsPerFile, ZippedFileScanResult if TYPE_CHECKING: - from git import Blob, Diff - from git.objects.base import IndexObjUnion - from git.objects.tree import TraversedTreeTup - - from cycode.cli.utils.progress_bar import BaseProgressBar from cycode.cyclient.models import ScanDetailsResponse from cycode.cyclient.scan_client import ScanClient @@ -78,17 +75,17 @@ def scan_repository(context: click.Context, path: str, branch: str) -> None: progress_bar.start() file_entries = list(get_git_repository_tree_file_entries(path, branch)) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(file_entries)) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(file_entries)) documents_to_scan = [] for file in file_entries: # FIXME(MarshalX): probably file could be tree or submodule too. we expect blob only - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) file_path = file.path if monitor else get_path_by_os(os.path.join(path, file.path)) documents_to_scan.append(Document(file_path, file.data_stream.read().decode('UTF-8', errors='replace'))) - documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan) + documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) perform_pre_scan_documents_actions(context, scan_type, documents_to_scan, is_git_diff=False) @@ -140,16 +137,16 @@ def scan_commit_range( total_commits_count = int(repo.git.rev_list('--count', commit_range)) logger.debug(f'Calculating diffs for {total_commits_count} commits in the commit range {commit_range}') - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count) scanned_commits_count = 0 for commit in repo.iter_commits(rev=commit_range): if _does_reach_to_max_commits_to_scan_limit(commit_ids_to_scan, max_commits_count): logger.debug(f'Reached to max commits to scan count. Going to scan only {max_commits_count} last commits') - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count) break - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) commit_id = commit.hexsha commit_ids_to_scan.append(commit_id) @@ -172,7 +169,7 @@ def scan_commit_range( {'path': path, 'commit_range': commit_range, 'commit_id': commit_id}, ) - documents_to_scan.extend(exclude_irrelevant_documents_to_scan(context, commit_documents_to_scan)) + documents_to_scan.extend(exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan)) scanned_commits_count += 1 logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan}) @@ -199,30 +196,7 @@ def scan_path(context: click.Context, path: str) -> None: progress_bar.start() logger.debug('Starting path scan process, %s', {'path': path}) - - all_files_to_scan = get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) - - # we are double the progress bar section length because we are going to process the files twice - # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) - # second time to get the files content - progress_bar_section_len = len(all_files_to_scan) * 2 - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) - - relevant_files_to_scan = exclude_irrelevant_files(context, all_files_to_scan) - - # after finishing the first processing (excluding), - # we must update the progress bar stage with respect of excluded files. - # now it's possible that we will not process x2 of the files count - # because some of them were excluded, we should subtract the excluded files count - # from the progress bar section length - excluded_files_count = len(all_files_to_scan) - len(relevant_files_to_scan) - progress_bar_section_len = progress_bar_section_len - excluded_files_count - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, progress_bar_section_len) - - logger.debug( - 'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)} - ) - scan_disk_files(context, path, relevant_files_to_scan) + scan_disk_files(context, path) @click.command(short_help='Use this command to scan any content that was not committed yet.') @@ -240,14 +214,14 @@ def pre_commit_scan(context: click.Context, ignored_args: List[str]) -> None: diff_files = Repo(os.getcwd()).index.diff('HEAD', create_patch=True, R=True) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) + progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) documents_to_scan = [] for file in diff_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) + progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES) documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file))) - documents_to_scan = exclude_irrelevant_documents_to_scan(context, documents_to_scan) + documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) scan_documents(context, documents_to_scan, is_git_diff=True) @@ -293,10 +267,13 @@ def pre_receive_scan(context: click.Context, ignored_args: List[str]) -> None: def scan_sca_pre_commit(context: click.Context) -> None: + scan_type = context.obj['scan_type'] scan_parameters = get_default_scan_parameters(context) - git_head_documents, pre_committed_documents = get_pre_commit_modified_documents(context.obj['progress_bar']) - git_head_documents = exclude_irrelevant_documents_to_scan(context, git_head_documents) - pre_committed_documents = exclude_irrelevant_documents_to_scan(context, pre_committed_documents) + git_head_documents, pre_committed_documents = get_pre_commit_modified_documents( + context.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES + ) + git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents) + pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents) sca_code_scanner.perform_pre_hook_range_scan_actions(git_head_documents, pre_committed_documents) scan_commit_range_documents( context, @@ -308,15 +285,16 @@ def scan_sca_pre_commit(context: click.Context) -> None: def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) -> None: + scan_type = context.obj['scan_type'] progress_bar = context.obj['progress_bar'] scan_parameters = get_scan_parameters(context, path) from_commit_rev, to_commit_rev = parse_commit_range(commit_range, path) from_commit_documents, to_commit_documents = get_commit_range_modified_documents( - progress_bar, path, from_commit_rev, to_commit_rev + progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev ) - from_commit_documents = exclude_irrelevant_documents_to_scan(context, from_commit_documents) - to_commit_documents = exclude_irrelevant_documents_to_scan(context, to_commit_documents) + from_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents) + to_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents) sca_code_scanner.perform_pre_commit_range_scan_actions( path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev ) @@ -324,27 +302,15 @@ def scan_sca_commit_range(context: click.Context, path: str, commit_range: str) scan_commit_range_documents(context, from_commit_documents, to_commit_documents, scan_parameters=scan_parameters) -def scan_disk_files(context: click.Context, path: str, files_to_scan: List[str]) -> None: +def scan_disk_files(context: click.Context, path: str) -> None: scan_parameters = get_scan_parameters(context, path) scan_type = context.obj['scan_type'] progress_bar = context.obj['progress_bar'] - is_git_diff = False - try: - documents: List[Document] = [] - for file in files_to_scan: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - content = get_file_content(file) - if not content: - continue - - documents.append(_generate_document(file, scan_type, content, is_git_diff)) - - perform_pre_scan_documents_actions(context, scan_type, documents, is_git_diff) - scan_documents(context, documents, is_git_diff=is_git_diff, scan_parameters=scan_parameters) - + documents = get_relevant_document(progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, scan_type, path) + perform_pre_scan_documents_actions(context, scan_type, documents) + scan_documents(context, documents, scan_parameters=scan_parameters) except Exception as e: _handle_exception(context, e) @@ -370,8 +336,8 @@ def _scan_batch_thread_func(batch: List[Document]) -> Tuple[str, CliError, Local try: logger.debug('Preparing local files, %s', {'batch_size': len(batch)}) - zipped_documents = zip_documents_to_scan(scan_type, InMemoryZip(), batch) - zip_file_size = getsizeof(zipped_documents.in_memory_zip) + zipped_documents = zip_documents(scan_type, batch) + zip_file_size = zipped_documents.size scan_result = perform_scan( cycode_client, zipped_documents, scan_type, scan_id, is_git_diff, is_commit_range, scan_parameters @@ -442,8 +408,8 @@ def scan_documents( scan_batch_thread_func, documents_to_scan, progress_bar=progress_bar ) - progress_bar.set_section_length(ProgressBarSection.GENERATE_REPORT, 1) - progress_bar.update(ProgressBarSection.GENERATE_REPORT) + progress_bar.set_section_length(ScanProgressBarSection.GENERATE_REPORT, 1) + progress_bar.update(ScanProgressBarSection.GENERATE_REPORT) progress_bar.stop() set_issue_detected_by_scan_results(context, local_scan_results) @@ -471,19 +437,15 @@ def scan_commit_range_documents( to_commit_zipped_documents = InMemoryZip() try: - progress_bar.set_section_length(ProgressBarSection.SCAN, 1) + progress_bar.set_section_length(ScanProgressBarSection.SCAN, 1) scan_result = init_default_scan_result(scan_id) if should_scan_documents(from_documents_to_scan, to_documents_to_scan): logger.debug('Preparing from-commit zip') - from_commit_zipped_documents = zip_documents_to_scan( - scan_type, from_commit_zipped_documents, from_documents_to_scan - ) + from_commit_zipped_documents = zip_documents(scan_type, from_documents_to_scan) logger.debug('Preparing to-commit zip') - to_commit_zipped_documents = zip_documents_to_scan( - scan_type, to_commit_zipped_documents, to_documents_to_scan - ) + to_commit_zipped_documents = zip_documents(scan_type, to_documents_to_scan) scan_result = perform_commit_range_scan_async( cycode_client, @@ -494,15 +456,15 @@ def scan_commit_range_documents( timeout, ) - progress_bar.update(ProgressBarSection.SCAN) - progress_bar.set_section_length(ProgressBarSection.GENERATE_REPORT, 1) + progress_bar.update(ScanProgressBarSection.SCAN) + progress_bar.set_section_length(ScanProgressBarSection.GENERATE_REPORT, 1) local_scan_result = create_local_scan_result( scan_result, to_documents_to_scan, scan_command_type, scan_type, severity_threshold ) set_issue_detected_by_scan_results(context, [local_scan_result]) - progress_bar.update(ProgressBarSection.GENERATE_REPORT) + progress_bar.update(ScanProgressBarSection.GENERATE_REPORT) progress_bar.stop() # errors will be handled with try-except block; printing will not occur on errors @@ -513,9 +475,7 @@ def scan_commit_range_documents( _handle_exception(context, e) error_message = str(e) - zip_file_size = getsizeof(from_commit_zipped_documents.in_memory_zip) + getsizeof( - to_commit_zipped_documents.in_memory_zip - ) + zip_file_size = from_commit_zipped_documents.size + to_commit_zipped_documents.size detections_count = relevant_detections_count = 0 if local_scan_result: @@ -577,45 +537,9 @@ def create_local_scan_result( ) -def perform_pre_scan_documents_actions( - context: click.Context, scan_type: str, documents_to_scan: List[Document], is_git_diff: bool = False -) -> None: - if scan_type == consts.SCA_SCAN_TYPE and not context.obj.get(SCA_SKIP_RESTORE_DEPENDENCIES_FLAG): - logger.debug('Perform pre scan document add_dependencies_tree_document action') - sca_code_scanner.add_dependencies_tree_document(context, documents_to_scan, is_git_diff) - - -def zip_documents_to_scan(scan_type: str, zip_file: InMemoryZip, documents: List[Document]) -> InMemoryZip: - start_zip_creation_time = time.time() - - for index, document in enumerate(documents): - zip_file_size = getsizeof(zip_file.in_memory_zip) - validate_zip_file_size(scan_type, zip_file_size) - - logger.debug( - 'adding file to zip, %s', {'index': index, 'filename': document.path, 'unique_id': document.unique_id} - ) - zip_file.append(document.path, document.unique_id, document.content) - zip_file.close() - - end_zip_creation_time = time.time() - zip_creation_time = int(end_zip_creation_time - start_zip_creation_time) - logger.debug('finished to create zip file, %s', {'zip_creation_time': zip_creation_time}) - return zip_file - - -def validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: - if scan_type == consts.SCA_SCAN_TYPE: - if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES) - else: - if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES) - - def perform_scan( cycode_client: 'ScanClient', - zipped_documents: InMemoryZip, + zipped_documents: 'InMemoryZip', scan_type: str, scan_id: str, is_git_diff: bool, @@ -632,7 +556,7 @@ def perform_scan( def perform_scan_async( - cycode_client: 'ScanClient', zipped_documents: InMemoryZip, scan_type: str, scan_parameters: dict + cycode_client: 'ScanClient', zipped_documents: 'InMemoryZip', scan_type: str, scan_parameters: dict ) -> ZippedFileScanResult: scan_async_result = cycode_client.zipped_file_scan_async(zipped_documents, scan_type, scan_parameters) logger.debug('scan request has been triggered successfully, scan id: %s', scan_async_result.scan_id) @@ -642,8 +566,8 @@ def perform_scan_async( def perform_commit_range_scan_async( cycode_client: 'ScanClient', - from_commit_zipped_documents: InMemoryZip, - to_commit_zipped_documents: InMemoryZip, + from_commit_zipped_documents: 'InMemoryZip', + to_commit_zipped_documents: 'InMemoryZip', scan_type: str, scan_parameters: dict, timeout: Optional[int] = None, @@ -759,56 +683,6 @@ def parse_pre_receive_input() -> str: return pre_receive_input.splitlines()[0] -def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]: - end_commit = get_end_commit_from_branch_update_details(branch_update_details) - - # branch is deleted, no need to perform scan - if end_commit == consts.EMPTY_COMMIT_SHA: - return None - - start_commit = get_oldest_unupdated_commit_for_branch(end_commit) - - # no new commit to update found - if not start_commit: - return None - - return f'{start_commit}~1...{end_commit}' - - -def get_end_commit_from_branch_update_details(update_details: str) -> str: - # update details pattern: - _, end_commit, _ = update_details.split() - return end_commit - - -def get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]: - # get a list of commits by chronological order that are not in the remote repository yet - # more info about rev-list command: https://git-scm.com/docs/git-rev-list - not_updated_commits = Repo(os.getcwd()).git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all') - commits = not_updated_commits.splitlines() - if not commits: - return None - return commits[0] - - -def get_diff_file_path(file: 'Diff') -> Optional[str]: - return file.b_path if file.b_path else file.a_path - - -def get_diff_file_content(file: 'Diff') -> str: - return file.diff.decode('UTF-8', errors='replace') - - -def should_process_git_object(obj: 'Blob', _: int) -> bool: - return obj.type == 'blob' and obj.size > 0 - - -def get_git_repository_tree_file_entries( - path: str, branch: str -) -> Union[Iterator['IndexObjUnion'], Iterator['TraversedTreeTup']]: - return Repo(path).tree(branch).traverse(predicate=should_process_git_object) - - def get_default_scan_parameters(context: click.Context) -> dict: return { 'monitor': context.obj.get('monitor'), @@ -839,34 +713,6 @@ def try_get_git_remote_url(path: str) -> Optional[dict]: return None -def exclude_irrelevant_documents_to_scan(context: click.Context, documents_to_scan: List[Document]) -> List[Document]: - logger.debug('Excluding irrelevant documents to scan') - - scan_type = context.obj['scan_type'] - - relevant_documents = [] - for document in documents_to_scan: - if _is_relevant_document_to_scan(scan_type, document.path, document.content): - relevant_documents.append(document) - - return relevant_documents - - -def exclude_irrelevant_files(context: click.Context, filenames: List[str]) -> List[str]: - scan_type = context.obj['scan_type'] - progress_bar = context.obj['progress_bar'] - - relevant_files = [] - for filename in filenames: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - if _is_relevant_file_to_scan(scan_type, filename): - relevant_files.append(filename) - - is_sub_path.cache_clear() # free up memory - - return relevant_files - - def exclude_irrelevant_detections( detections: List[Detection], scan_type: str, command_scan_type: str, severity_threshold: str ) -> List[Detection]: @@ -916,60 +762,6 @@ def _exclude_detections_by_exclusions_configuration(detections: List[Detection], return [detection for detection in detections if not _should_exclude_detection(detection, exclusions)] -def get_pre_commit_modified_documents(progress_bar: 'BaseProgressBar') -> Tuple[List[Document], List[Document]]: - git_head_documents = [] - pre_committed_documents = [] - - repo = Repo(os.getcwd()) - diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(diff_files)) - for file in diff_files: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - diff_file_path = get_diff_file_path(file) - file_path = get_path_by_os(diff_file_path) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, consts.GIT_HEAD_COMMIT_REV, diff_file_path) - if file_content is not None: - git_head_documents.append(Document(file_path, file_content)) - - if os.path.exists(file_path): - file_content = get_file_content(file_path) - pre_committed_documents.append(Document(file_path, file_content)) - - return git_head_documents, pre_committed_documents - - -def get_commit_range_modified_documents( - progress_bar: 'BaseProgressBar', path: str, from_commit_rev: str, to_commit_rev: str -) -> Tuple[List[Document], List[Document]]: - from_commit_documents = [] - to_commit_documents = [] - - repo = Repo(path) - diff = repo.commit(from_commit_rev).diff(to_commit_rev) - - modified_files_diff = [ - change for change in diff if change.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE - ] - progress_bar.set_section_length(ProgressBarSection.PREPARE_LOCAL_FILES, len(modified_files_diff)) - for blob in modified_files_diff: - progress_bar.update(ProgressBarSection.PREPARE_LOCAL_FILES) - - diff_file_path = get_diff_file_path(blob) - file_path = get_path_by_os(diff_file_path) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path) - if file_content is not None: - from_commit_documents.append(Document(file_path, file_content)) - - file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path) - if file_content is not None: - to_commit_documents.append(Document(file_path, file_content)) - - return from_commit_documents, to_commit_documents - - def _should_exclude_detection(detection: Detection, exclusions: Dict) -> bool: exclusions_by_value = exclusions.get(consts.EXCLUSIONS_BY_VALUE_SECTION_NAME, []) if _is_detection_sha_configured_in_exclusions(detection, exclusions_by_value): @@ -1014,13 +806,6 @@ def _is_detection_sha_configured_in_exclusions(detection: Detection, exclusions: return detection_sha in exclusions -def _is_path_configured_in_exclusions(scan_type: str, file_path: str) -> bool: - exclusions_by_path = configuration_manager.get_exclusions_by_scan_type(scan_type).get( - consts.EXCLUSIONS_BY_PATH_SECTION_NAME, [] - ) - return any(is_sub_path(exclusion_path, file_path) for exclusion_path in exclusions_by_path) - - def _get_package_name(detection: Detection) -> str: package_name = detection.detection_details.get('vulnerable_component', '') package_version = detection.detection_details.get('vulnerable_component_version', '') @@ -1032,119 +817,6 @@ def _get_package_name(detection: Detection) -> str: return f'{package_name}@{package_version}' -def _is_file_relevant_for_sca_scan(filename: str) -> bool: - if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS): - logger.debug("file is irrelevant because it is from node_modules's inner path, %s", {'filename': filename}) - return False - - return True - - -def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool: - if _is_subpath_of_cycode_configuration_folder(filename): - logger.debug('file is irrelevant because it is in cycode configuration directory, %s', {'filename': filename}) - return False - - if _is_path_configured_in_exclusions(scan_type, filename): - logger.debug('file is irrelevant because the file path is in the ignore paths list, %s', {'filename': filename}) - return False - - if not _is_file_extension_supported(scan_type, filename): - logger.debug('file is irrelevant because the file extension is not supported, %s', {'filename': filename}) - return False - - if is_binary_file(filename): - logger.debug('file is irrelevant because it is binary file, %s', {'filename': filename}) - return False - - if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename): - logger.debug('file is irrelevant because its exceeded max size limit, %s', {'filename': filename}) - return False - - if scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename): - return False - - return True - - -def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool: - if _is_subpath_of_cycode_configuration_folder(filename): - logger.debug( - 'document is irrelevant because it is in cycode configuration directory, %s', {'filename': filename} - ) - return False - - if _is_path_configured_in_exclusions(scan_type, filename): - logger.debug( - 'document is irrelevant because the document path is in the ignore paths list, %s', {'filename': filename} - ) - return False - - if not _is_file_extension_supported(scan_type, filename): - logger.debug('document is irrelevant because the file extension is not supported, %s', {'filename': filename}) - return False - - if is_binary_content(content): - logger.debug('document is irrelevant because it is binary, %s', {'filename': filename}) - return False - - if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content): - logger.debug('document is irrelevant because its exceeded max size limit, %s', {'filename': filename}) - return False - return True - - -def _is_file_extension_supported(scan_type: str, filename: str) -> bool: - filename = filename.lower() - - if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: - return filename.endswith(consts.INFRA_CONFIGURATION_SCAN_SUPPORTED_FILES) - - if scan_type == consts.SCA_SCAN_TYPE: - return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES) - - return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE) - - -def _generate_document(file: str, scan_type: str, content: str, is_git_diff: bool) -> Document: - if _is_iac(scan_type) and _is_tfplan_file(file, content): - return _handle_tfplan_file(file, content, is_git_diff) - return Document(file, content, is_git_diff) - - -def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document: - document_name = _generate_tfplan_document_name(file) - tf_content = tf_content_generator.generate_tf_content_from_tfplan(file, content) - return Document(document_name, tf_content, is_git_diff) - - -def _generate_tfplan_document_name(path: str) -> str: - document_name = change_filename_extension(path, 'tf') - timestamp = int(time.time()) - return f'{timestamp}-{document_name}' - - -def _is_iac(scan_type: str) -> bool: - return scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE - - -def _is_tfplan_file(file: str, content: str) -> bool: - if not file.endswith('.json'): - return False - tf_plan = load_json(content) - if not isinstance(tf_plan, dict): - return False - return 'resource_changes' in tf_plan - - -def _does_file_exceed_max_size_limit(filename: str) -> bool: - return get_file_size(filename) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES - - -def _does_document_exceed_max_size_limit(content: str) -> bool: - return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES - - def _get_document_by_file_name( documents: List[Document], file_name: str, unique_id: Optional[str] = None ) -> Optional[Document]: @@ -1155,14 +827,6 @@ def _get_document_by_file_name( return None -def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool: - return ( - is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename) - or is_sub_path(configuration_manager.local_config_file_manager.get_config_directory_path(), filename) - or filename.endswith(ConfigFileManager.get_config_file_route()) - ) - - def _handle_exception(context: click.Context, e: Exception, *, return_exception: bool = False) -> Optional[CliError]: context.obj['did_fail'] = True @@ -1372,18 +1036,6 @@ def _does_reach_to_max_commits_to_scan_limit(commit_ids: List[str], max_commits_ return len(commit_ids) >= max_commits_count -def parse_commit_range(commit_range: str, path: str) -> Tuple[str, str]: - from_commit_rev = None - to_commit_rev = None - - for commit in Repo(path).iter_commits(rev=commit_range): - if not to_commit_rev: - to_commit_rev = commit.hexsha - from_commit_rev = commit.hexsha - - return from_commit_rev, to_commit_rev - - def _normalize_file_path(path: str) -> str: if path.startswith('/'): return path[1:] diff --git a/cycode/cli/helpers/__init__.py b/cycode/cli/commands/report/__init__.py similarity index 100% rename from cycode/cli/helpers/__init__.py rename to cycode/cli/commands/report/__init__.py diff --git a/cycode/cli/commands/report/report_command.py b/cycode/cli/commands/report/report_command.py new file mode 100644 index 00000000..4722a6b2 --- /dev/null +++ b/cycode/cli/commands/report/report_command.py @@ -0,0 +1,23 @@ +import click + +from cycode.cli.commands.report.sbom.sbom_command import sbom_command +from cycode.cli.utils.get_api_client import get_report_cycode_client +from cycode.cli.utils.progress_bar import SBOM_REPORT_PROGRESS_BAR_SECTIONS, get_progress_bar + + +@click.group( + commands={ + 'sbom': sbom_command, + }, + short_help='Generate report. You`ll need to specify which report type to perform.', +) +@click.pass_context +def report_command( + context: click.Context, +) -> int: + """Generate report.""" + + context.obj['client'] = get_report_cycode_client(hide_response_log=False) # TODO disable log + context.obj['progress_bar'] = get_progress_bar(hidden=False, sections=SBOM_REPORT_PROGRESS_BAR_SECTIONS) + + return 1 diff --git a/cycode/cli/helpers/maven/__init__.py b/cycode/cli/commands/report/sbom/__init__.py similarity index 100% rename from cycode/cli/helpers/maven/__init__.py rename to cycode/cli/commands/report/sbom/__init__.py diff --git a/cycode/cli/commands/report/sbom/common.py b/cycode/cli/commands/report/sbom/common.py new file mode 100644 index 00000000..334e7275 --- /dev/null +++ b/cycode/cli/commands/report/sbom/common.py @@ -0,0 +1,94 @@ +import pathlib +import time +from platform import platform +from typing import TYPE_CHECKING, Optional + +from cycode.cli import consts +from cycode.cli.commands.report.sbom.sbom_report_file import SbomReportFile +from cycode.cli.config import configuration_manager +from cycode.cli.exceptions.custom_exceptions import ReportAsyncError +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection +from cycode.cyclient import logger +from cycode.cyclient.models import ReportExecutionSchema + +if TYPE_CHECKING: + from cycode.cli.utils.progress_bar import BaseProgressBar + from cycode.cyclient.report_client import ReportClient + + +def _poll_report_execution_until_completed( + progress_bar: 'BaseProgressBar', + client: 'ReportClient', + report_execution_id: int, + polling_timeout: Optional[int] = None, +) -> ReportExecutionSchema: + if polling_timeout is None: + polling_timeout = configuration_manager.get_report_polling_timeout_in_seconds() + + end_polling_time = time.time() + polling_timeout + while time.time() < end_polling_time: + report_execution = client.get_report_execution(report_execution_id) + report_label = report_execution.error_message or report_execution.status_message + + progress_bar.update_label(report_label) + + if report_execution.status == consts.REPORT_STATUS_COMPLETED: + return report_execution + + if report_execution.status == consts.REPORT_STATUS_ERROR: + raise ReportAsyncError(f'Error occurred while trying to generate report: {report_label}') + + time.sleep(consts.REPORT_POLLING_WAIT_INTERVAL_IN_SECONDS) + + raise ReportAsyncError(f'Timeout exceeded while waiting for report to complete. Timeout: {polling_timeout} sec.') + + +def send_report_feedback( + client: 'ReportClient', + start_scan_time: float, + report_type: str, + report_command_type: str, + request_report_parameters: dict, + report_execution_id: int, + error_message: Optional[str] = None, + request_zip_file_size: Optional[int] = None, + **kwargs, +) -> None: + try: + request_report_parameters.update(kwargs) + + end_scan_time = time.time() + scan_status = { + 'report_type': report_type, + 'report_command_type': report_command_type, + 'request_report_parameters': request_report_parameters, + 'operation_system': platform(), + 'error_message': error_message, + 'execution_time': int(end_scan_time - start_scan_time), + 'request_zip_file_size': request_zip_file_size, + } + + client.report_status(report_execution_id, scan_status) + except Exception as e: + logger.debug(f'Failed to send report feedback: {e}') + + +def create_sbom_report( + progress_bar: 'BaseProgressBar', + client: 'ReportClient', + report_execution_id: int, + output_file: Optional[pathlib.Path], + output_format: str, +) -> None: + report_execution = _poll_report_execution_until_completed(progress_bar, client, report_execution_id) + + progress_bar.set_section_length(SbomReportProgressBarSection.GENERATION) + + report_path = report_execution.storage_details.path + report_content = client.get_file_content(report_path) + + progress_bar.set_section_length(SbomReportProgressBarSection.RECEIVE_REPORT) + progress_bar.stop() + + sbom_report = SbomReportFile(report_path, output_format, output_file) + sbom_report.write(report_content) diff --git a/cycode/cli/commands/report/sbom/handle_errors.py b/cycode/cli/commands/report/sbom/handle_errors.py new file mode 100644 index 00000000..b9ca9084 --- /dev/null +++ b/cycode/cli/commands/report/sbom/handle_errors.py @@ -0,0 +1,47 @@ +import traceback +from typing import Optional + +import click + +from cycode.cli.exceptions import custom_exceptions +from cycode.cli.models import CliError, CliErrors +from cycode.cli.printers import ConsolePrinter + + +def handle_report_exception(context: click.Context, err: Exception) -> Optional[CliError]: + if context.obj['verbose']: + click.secho(f'Error: {traceback.format_exc()}', fg='red') + + errors: CliErrors = { + custom_exceptions.NetworkError: CliError( + code='cycode_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), + custom_exceptions.ScanAsyncError: CliError( + code='report_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), + custom_exceptions.ReportAsyncError: CliError( + code='report_error', + message='Cycode was unable to complete this report. ' + 'Please try again by executing the `cycode report` command', + ), + custom_exceptions.HttpUnauthorizedError: CliError( + code='auth_error', + message='Unable to authenticate to Cycode, your token is either invalid or has expired. ' + 'Please re-generate your token and reconfigure it by running the `cycode configure` command', + ), + } + + if type(err) in errors: + error = errors[type(err)] + + ConsolePrinter(context).print_error(error) + return None + + if isinstance(err, click.ClickException): + raise err + + raise click.ClickException(str(err)) diff --git a/cycode/cli/commands/report/sbom/sbom_command.py b/cycode/cli/commands/report/sbom/sbom_command.py new file mode 100644 index 00000000..ecfd2782 --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_command.py @@ -0,0 +1,84 @@ +import pathlib +from typing import Optional + +import click + +from cycode.cli.commands.report.sbom.sbom_path_command import sbom_path_command +from cycode.cli.commands.report.sbom.sbom_repository_url_command import sbom_repository_url_command +from cycode.cli.config import config +from cycode.cyclient.report_client import ReportParameters + + +@click.group( + commands={ + 'path': sbom_path_command, + 'repository_url': sbom_repository_url_command, + }, + short_help='Generate SBOM report for remote repository by url or local directory by path.', +) +@click.option( + '--format', + '-f', + help='SBOM format.', + type=click.Choice(config['scans']['supported_sbom_formats']), + required=True, +) +@click.option( + '--output-format', + '-o', + default='json', + help='Specify the output file format (the default is json).', + type=click.Choice(['json']), + required=False, +) +@click.option( + '--output-file', + help='Output file (the default is autogenerated filename saved to the current directory).', + default=None, + type=click.Path(resolve_path=True, writable=True, path_type=pathlib.Path), + required=False, +) +@click.option( + '--include-vulnerabilities', + is_flag=True, + default=False, + help='Include vulnerabilities.', + type=bool, + required=False, +) +@click.option( + '--include-dev-dependencies', + is_flag=True, + default=False, + help='Include dev dependencies.', + type=bool, + required=False, +) +@click.pass_context +def sbom_command( + context: click.Context, + format: str, + output_format: Optional[str], + output_file: Optional[pathlib.Path], + include_vulnerabilities: bool, + include_dev_dependencies: bool, +) -> int: + """Generate SBOM report.""" + sbom_format_parts = format.split('-') + if len(sbom_format_parts) != 2: + raise click.ClickException('Invalid SBOM format.') + + sbom_format, sbom_format_version = sbom_format_parts + + report_parameters = ReportParameters( + entity_type='SbomCli', + sbom_report_type=sbom_format, + sbom_version=sbom_format_version, + output_format=output_format, + include_vulnerabilities=include_vulnerabilities, + include_dev_dependencies=include_dev_dependencies, + ) + context.obj['report_parameters'] = report_parameters + context.obj['output_file'] = output_file + + return 1 diff --git a/cycode/cli/commands/report/sbom/sbom_path_command.py b/cycode/cli/commands/report/sbom/sbom_path_command.py new file mode 100644 index 00000000..36b9c4d9 --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_path_command.py @@ -0,0 +1,65 @@ +import time + +import click + +from cycode.cli import consts +from cycode.cli.commands.report.sbom.common import create_sbom_report, send_report_feedback +from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception +from cycode.cli.files_collector.path_documents import get_relevant_document +from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions +from cycode.cli.files_collector.zip_documents import zip_documents +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection + + +@click.command(short_help='Generate SBOM report for provided path in the command.') +@click.argument('path', nargs=1, type=click.Path(exists=True, resolve_path=True), required=True) +@click.pass_context +def sbom_path_command(context: click.Context, path: str) -> None: + client = context.obj['client'] + report_parameters = context.obj['report_parameters'] + output_format = report_parameters.output_format + output_file = context.obj['output_file'] + + progress_bar = context.obj['progress_bar'] + progress_bar.start() + + start_scan_time = time.time() + report_execution_id = -1 + + try: + documents = get_relevant_document( + progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, path + ) + # TODO(MarshalX): combine perform_pre_scan_documents_actions with get_relevant_document. + # unhardcode usage of context in perform_pre_scan_documents_actions + perform_pre_scan_documents_actions(context, consts.SCA_SCAN_TYPE, documents) + + zipped_documents = zip_documents(consts.SCA_SCAN_TYPE, documents) + report_execution = client.request_sbom_report_execution(report_parameters, zip_file=zipped_documents) + report_execution_id = report_execution.id + + create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format) + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + report_type='SBOM', + report_command_type='path', + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + request_zip_file_size=zipped_documents.size, + ) + except Exception as e: + progress_bar.stop() + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + report_type='SBOM', + report_command_type='path', + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + error_message=str(e), + ) + + handle_report_exception(context, e) diff --git a/cycode/cli/commands/report/sbom/sbom_report_file.py b/cycode/cli/commands/report/sbom/sbom_report_file.py new file mode 100644 index 00000000..4d58f89f --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_report_file.py @@ -0,0 +1,49 @@ +import os +import pathlib +import re +from typing import Optional + +import click + + +class SbomReportFile: + def __init__(self, storage_path: str, output_format: str, output_file: Optional[pathlib.Path]) -> None: + if output_file is None: + output_file = pathlib.Path(storage_path) + + output_ext = f'.{output_format}' + if output_file.suffix != output_ext: + output_file = output_file.with_suffix(output_ext) + + self._file_path = output_file + + def is_exists(self) -> bool: + return self._file_path.exists() + + def _prompt_overwrite(self) -> bool: + return click.confirm(f'File {self._file_path} already exists. Save with a different filename?', default=True) + + def _write(self, content: str) -> None: + with open(self._file_path, 'w', encoding='UTF-8') as f: + f.write(content) + + def _notify_about_saved_file(self) -> None: + click.echo(f'Report saved to {self._file_path}') + + def _find_and_set_unique_filename(self) -> None: + attempt_no = 0 + while self.is_exists(): + attempt_no += 1 + + base, ext = os.path.splitext(self._file_path) + # Remove previous suffix + base = re.sub(r'-\d+$', '', base) + + self._file_path = pathlib.Path(f'{base}-{attempt_no}{ext}') + + def write(self, content: str) -> None: + if self.is_exists() and self._prompt_overwrite(): + self._find_and_set_unique_filename() + + self._write(content) + self._notify_about_saved_file() diff --git a/cycode/cli/commands/report/sbom/sbom_repository_url_command.py b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py new file mode 100644 index 00000000..a3cb2570 --- /dev/null +++ b/cycode/cli/commands/report/sbom/sbom_repository_url_command.py @@ -0,0 +1,55 @@ +import time + +import click + +from cycode.cli.commands.report.sbom.common import create_sbom_report, send_report_feedback +from cycode.cli.commands.report.sbom.handle_errors import handle_report_exception +from cycode.cli.utils.progress_bar import SbomReportProgressBarSection + + +@click.command(short_help='Generate SBOM report for provided repository URI in the command.') +@click.argument('uri', nargs=1, type=str, required=True) +@click.pass_context +def sbom_repository_url_command(context: click.Context, uri: str) -> None: + progress_bar = context.obj['progress_bar'] + progress_bar.start() + progress_bar.set_section_length(SbomReportProgressBarSection.PREPARE_LOCAL_FILES) + + client = context.obj['client'] + report_parameters = context.obj['report_parameters'] + output_file = context.obj['output_file'] + output_format = report_parameters.output_format + + start_scan_time = time.time() + report_execution_id = -1 + + try: + report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri) + report_execution_id = report_execution.id + + create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format) + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + report_type='SBOM', + report_command_type='repository_url', + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + repository_uri=uri, + ) + except Exception as e: + progress_bar.stop() + + send_report_feedback( + client=client, + start_scan_time=start_scan_time, + report_type='SBOM', + report_command_type='repository_url', + request_report_parameters=report_parameters.to_dict(without_entity_type=False), + report_execution_id=report_execution_id, + error_message=str(e), + repository_uri=uri, + ) + + handle_report_exception(context, e) diff --git a/cycode/cli/config.yaml b/cycode/cli/config.yaml index 0ffe7abc..875f37c1 100644 --- a/cycode/cli/config.yaml +++ b/cycode/cli/config.yaml @@ -8,6 +8,10 @@ scans: supported_sca_scans: - package-vulnerabilities - license-compliance + supported_sbom_formats: + - spdx-2.2 + - spdx-2.3 + - cyclonedx-1.4 result_printer: default: lines_to_display: 3 diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 23b7471a..9479765e 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -138,6 +138,11 @@ SCAN_BATCH_MAX_PARALLEL_SCANS = 5 SCAN_BATCH_SCANS_PER_CPU = 1 +# report with polling +REPORT_POLLING_WAIT_INTERVAL_IN_SECONDS = 5 +DEFAULT_REPORT_POLLING_TIMEOUT_IN_SECONDS = 600 +REPORT_POLLING_TIMEOUT_IN_SECONDS_ENV_VAR_NAME = 'REPORT_POLLING_TIMEOUT_IN_SECONDS' + # scan with polling SCAN_POLLING_WAIT_INTERVAL_IN_SECONDS = 5 DEFAULT_SCAN_POLLING_TIMEOUT_IN_SECONDS = 3600 @@ -162,6 +167,10 @@ EXCLUDE_DETECTIONS_IN_DELETED_LINES_ENV_VAR_NAME = 'EXCLUDE_DETECTIONS_IN_DELETED_LINES' DEFAULT_EXCLUDE_DETECTIONS_IN_DELETED_LINES = True +# report statuses +REPORT_STATUS_COMPLETED = 'Completed' +REPORT_STATUS_ERROR = 'Failed' + # scan statuses SCAN_STATUS_COMPLETED = 'Completed' SCAN_STATUS_ERROR = 'Error' diff --git a/cycode/cli/exceptions/custom_exceptions.py b/cycode/cli/exceptions/custom_exceptions.py index ea98a0aa..1b218353 100644 --- a/cycode/cli/exceptions/custom_exceptions.py +++ b/cycode/cli/exceptions/custom_exceptions.py @@ -28,6 +28,10 @@ def __str__(self) -> str: return f'error occurred during the scan. error message: {self.error_message}' +class ReportAsyncError(CycodeError): + pass + + class HttpUnauthorizedError(CycodeError): def __init__(self, error_message: str, response: Response) -> None: self.status_code = 401 diff --git a/cycode/cyclient/scan_config/__init__.py b/cycode/cli/files_collector/__init__.py similarity index 100% rename from cycode/cyclient/scan_config/__init__.py rename to cycode/cli/files_collector/__init__.py diff --git a/cycode/cli/files_collector/excluder.py b/cycode/cli/files_collector/excluder.py new file mode 100644 index 00000000..cbbb358f --- /dev/null +++ b/cycode/cli/files_collector/excluder.py @@ -0,0 +1,134 @@ +from typing import TYPE_CHECKING, List + +from cycode.cli import consts +from cycode.cli.config import configuration_manager +from cycode.cli.user_settings.config_file_manager import ConfigFileManager +from cycode.cli.utils.path_utils import get_file_size, is_binary_file, is_sub_path +from cycode.cli.utils.string_utils import get_content_size, is_binary_content +from cycode.cyclient import logger + +if TYPE_CHECKING: + from cycode.cli.models import Document + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection + + +def exclude_irrelevant_files( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, filenames: List[str] +) -> List[str]: + relevant_files = [] + for filename in filenames: + progress_bar.update(progress_bar_section) + if _is_relevant_file_to_scan(scan_type, filename): + relevant_files.append(filename) + + is_sub_path.cache_clear() # free up memory + + return relevant_files + + +def exclude_irrelevant_documents_to_scan(scan_type: str, documents_to_scan: List['Document']) -> List['Document']: + logger.debug('Excluding irrelevant documents to scan') + + relevant_documents = [] + for document in documents_to_scan: + if _is_relevant_document_to_scan(scan_type, document.path, document.content): + relevant_documents.append(document) + + return relevant_documents + + +def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool: + return ( + is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename) + or is_sub_path(configuration_manager.local_config_file_manager.get_config_directory_path(), filename) + or filename.endswith(ConfigFileManager.get_config_file_route()) + ) + + +def _is_path_configured_in_exclusions(scan_type: str, file_path: str) -> bool: + exclusions_by_path = configuration_manager.get_exclusions_by_scan_type(scan_type).get( + consts.EXCLUSIONS_BY_PATH_SECTION_NAME, [] + ) + return any(is_sub_path(exclusion_path, file_path) for exclusion_path in exclusions_by_path) + + +def _does_file_exceed_max_size_limit(filename: str) -> bool: + return get_file_size(filename) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES + + +def _does_document_exceed_max_size_limit(content: str) -> bool: + return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES + + +def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool: + if _is_subpath_of_cycode_configuration_folder(filename): + logger.debug('file is irrelevant because it is in cycode configuration directory, %s', {'filename': filename}) + return False + + if _is_path_configured_in_exclusions(scan_type, filename): + logger.debug('file is irrelevant because the file path is in the ignore paths list, %s', {'filename': filename}) + return False + + if not _is_file_extension_supported(scan_type, filename): + logger.debug('file is irrelevant because the file extension is not supported, %s', {'filename': filename}) + return False + + if is_binary_file(filename): + logger.debug('file is irrelevant because it is binary file, %s', {'filename': filename}) + return False + + if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename): + logger.debug('file is irrelevant because its exceeded max size limit, %s', {'filename': filename}) + return False + + if scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename): + return False + + return True + + +def _is_file_relevant_for_sca_scan(filename: str) -> bool: + if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS): + logger.debug("file is irrelevant because it is from node_modules's inner path, %s", {'filename': filename}) + return False + + return True + + +def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool: + if _is_subpath_of_cycode_configuration_folder(filename): + logger.debug( + 'document is irrelevant because it is in cycode configuration directory, %s', {'filename': filename} + ) + return False + + if _is_path_configured_in_exclusions(scan_type, filename): + logger.debug( + 'document is irrelevant because the document path is in the ignore paths list, %s', {'filename': filename} + ) + return False + + if not _is_file_extension_supported(scan_type, filename): + logger.debug('document is irrelevant because the file extension is not supported, %s', {'filename': filename}) + return False + + if is_binary_content(content): + logger.debug('document is irrelevant because it is binary, %s', {'filename': filename}) + return False + + if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content): + logger.debug('document is irrelevant because its exceeded max size limit, %s', {'filename': filename}) + return False + return True + + +def _is_file_extension_supported(scan_type: str, filename: str) -> bool: + filename = filename.lower() + + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: + return filename.endswith(consts.INFRA_CONFIGURATION_SCAN_SUPPORTED_FILES) + + if scan_type == consts.SCA_SCAN_TYPE: + return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES) + + return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE) diff --git a/cycode/cli/files_collector/iac/__init__.py b/cycode/cli/files_collector/iac/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/helpers/tf_content_generator.py b/cycode/cli/files_collector/iac/tf_content_generator.py similarity index 73% rename from cycode/cli/helpers/tf_content_generator.py rename to cycode/cli/files_collector/iac/tf_content_generator.py index 7594a96f..4df6d827 100644 --- a/cycode/cli/helpers/tf_content_generator.py +++ b/cycode/cli/files_collector/iac/tf_content_generator.py @@ -1,13 +1,34 @@ import json +import time from typing import List +from cycode.cli import consts from cycode.cli.exceptions.custom_exceptions import TfplanKeyError from cycode.cli.models import ResourceChange -from cycode.cli.utils.path_utils import load_json +from cycode.cli.utils.path_utils import change_filename_extension, load_json ACTIONS_TO_OMIT_RESOURCE = ['delete'] +def generate_tfplan_document_name(path: str) -> str: + document_name = change_filename_extension(path, 'tf') + timestamp = int(time.time()) + return f'{timestamp}-{document_name}' + + +def is_iac(scan_type: str) -> bool: + return scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE + + +def is_tfplan_file(file: str, content: str) -> bool: + if not file.endswith('.json'): + return False + tf_plan = load_json(content) + if not isinstance(tf_plan, dict): + return False + return 'resource_changes' in tf_plan + + def generate_tf_content_from_tfplan(filename: str, tfplan: str) -> str: planned_resources = _extract_resources(tfplan, filename) return _generate_tf_content(planned_resources) diff --git a/cycode/cli/files_collector/models/__init__.py b/cycode/cli/files_collector/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/zip_file.py b/cycode/cli/files_collector/models/in_memory_zip.py similarity index 74% rename from cycode/cli/zip_file.py rename to cycode/cli/files_collector/models/in_memory_zip.py index 7d659c8e..410d00ca 100644 --- a/cycode/cli/zip_file.py +++ b/cycode/cli/files_collector/models/in_memory_zip.py @@ -1,8 +1,10 @@ -import os.path from io import BytesIO +from sys import getsizeof from typing import Optional from zipfile import ZIP_DEFLATED, ZipFile +from cycode.cli.utils.path_utils import concat_unique_id + class InMemoryZip(object): def __init__(self) -> None: @@ -25,10 +27,6 @@ def read(self) -> bytes: self.in_memory_zip.seek(0) return self.in_memory_zip.read() - -def concat_unique_id(filename: str, unique_id: str) -> str: - if filename.startswith(os.sep): - # remove leading slash to join the path correctly - filename = filename[len(os.sep) :] - - return os.path.join(unique_id, filename) + @property + def size(self) -> int: + return getsizeof(self.in_memory_zip) diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py new file mode 100644 index 00000000..a0df5ac0 --- /dev/null +++ b/cycode/cli/files_collector/path_documents.py @@ -0,0 +1,112 @@ +import os +from typing import TYPE_CHECKING, Iterable, List + +import pathspec + +from cycode.cli.files_collector.excluder import exclude_irrelevant_files +from cycode.cli.files_collector.iac.tf_content_generator import ( + generate_tf_content_from_tfplan, + generate_tfplan_document_name, + is_iac, + is_tfplan_file, +) +from cycode.cli.models import Document +from cycode.cli.utils.path_utils import get_absolute_path, get_file_content +from cycode.cyclient import logger + +if TYPE_CHECKING: + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection + + +def _get_all_existing_files_in_directory(path: str) -> List[str]: + files: List[str] = [] + + for root, _, filenames in os.walk(path): + for filename in filenames: + files.append(os.path.join(root, filename)) + + return files + + +def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: + absolute_path = get_absolute_path(path) + + if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): + raise FileNotFoundError(f'the specified path was not found, path: {absolute_path}') + + if os.path.isfile(absolute_path): + return [absolute_path] + + all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) + + path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) + excluded_file_paths = set(path_spec.match_files(all_file_paths)) + + relevant_file_paths = all_file_paths - excluded_file_paths + + return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] + + +def _get_relevant_files( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, path: str +) -> List[str]: + all_files_to_scan = _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) + + # we are double the progress bar section length because we are going to process the files twice + # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) + # second time to get the files content + progress_bar_section_len = len(all_files_to_scan) * 2 + progress_bar.set_section_length(progress_bar_section, progress_bar_section_len) + + relevant_files_to_scan = exclude_irrelevant_files(progress_bar, progress_bar_section, scan_type, all_files_to_scan) + + # after finishing the first processing (excluding), + # we must update the progress bar stage with respect of excluded files. + # now it's possible that we will not process x2 of the files count + # because some of them were excluded, we should subtract the excluded files count + # from the progress bar section length + excluded_files_count = len(all_files_to_scan) - len(relevant_files_to_scan) + progress_bar_section_len = progress_bar_section_len - excluded_files_count + progress_bar.set_section_length(progress_bar_section, progress_bar_section_len) + + logger.debug( + 'Found all relevant files for scanning %s', {'path': path, 'file_to_scan_count': len(relevant_files_to_scan)} + ) + + return relevant_files_to_scan + + +def _generate_document(file: str, scan_type: str, content: str, is_git_diff: bool) -> Document: + if is_iac(scan_type) and is_tfplan_file(file, content): + return _handle_tfplan_file(file, content, is_git_diff) + + return Document(file, content, is_git_diff) + + +def _handle_tfplan_file(file: str, content: str, is_git_diff: bool) -> Document: + document_name = generate_tfplan_document_name(file) + tf_content = generate_tf_content_from_tfplan(file, content) + return Document(document_name, tf_content, is_git_diff) + + +def get_relevant_document( + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + scan_type: str, + path: str, + *, + is_git_diff: bool = False, +) -> List[Document]: + relevant_files = _get_relevant_files(progress_bar, progress_bar_section, scan_type, path) + + documents: List[Document] = [] + for file in relevant_files: + progress_bar.update(progress_bar_section) + + content = get_file_content(file) + if not content: + continue + + documents.append(_generate_document(file, scan_type, content, is_git_diff)) + + return documents diff --git a/cycode/cli/files_collector/repository_documents.py b/cycode/cli/files_collector/repository_documents.py new file mode 100644 index 00000000..acd9c225 --- /dev/null +++ b/cycode/cli/files_collector/repository_documents.py @@ -0,0 +1,140 @@ +import os +from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union + +from cycode.cli import consts +from cycode.cli.files_collector.sca import sca_code_scanner +from cycode.cli.models import Document +from cycode.cli.utils.path_utils import get_file_content, get_path_by_os + +if TYPE_CHECKING: + from git import Blob, Diff + from git.objects.base import IndexObjUnion + from git.objects.tree import TraversedTreeTup + + from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection + +from git import Repo + + +def should_process_git_object(obj: 'Blob', _: int) -> bool: + return obj.type == 'blob' and obj.size > 0 + + +def get_git_repository_tree_file_entries( + path: str, branch: str +) -> Union[Iterator['IndexObjUnion'], Iterator['TraversedTreeTup']]: + return Repo(path).tree(branch).traverse(predicate=should_process_git_object) + + +def parse_commit_range(commit_range: str, path: str) -> Tuple[str, str]: + from_commit_rev = None + to_commit_rev = None + + for commit in Repo(path).iter_commits(rev=commit_range): + if not to_commit_rev: + to_commit_rev = commit.hexsha + from_commit_rev = commit.hexsha + + return from_commit_rev, to_commit_rev + + +def get_diff_file_path(file: 'Diff') -> Optional[str]: + return file.b_path if file.b_path else file.a_path + + +def get_diff_file_content(file: 'Diff') -> str: + return file.diff.decode('UTF-8', errors='replace') + + +def get_pre_commit_modified_documents( + progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection' +) -> Tuple[List[Document], List[Document]]: + git_head_documents = [] + pre_committed_documents = [] + + repo = Repo(os.getcwd()) + diff_files = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True) + progress_bar.set_section_length(progress_bar_section, len(diff_files)) + for file in diff_files: + progress_bar.update(progress_bar_section) + + diff_file_path = get_diff_file_path(file) + file_path = get_path_by_os(diff_file_path) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, consts.GIT_HEAD_COMMIT_REV, diff_file_path) + if file_content is not None: + git_head_documents.append(Document(file_path, file_content)) + + if os.path.exists(file_path): + file_content = get_file_content(file_path) + pre_committed_documents.append(Document(file_path, file_content)) + + return git_head_documents, pre_committed_documents + + +def get_commit_range_modified_documents( + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + path: str, + from_commit_rev: str, + to_commit_rev: str, +) -> Tuple[List[Document], List[Document]]: + from_commit_documents = [] + to_commit_documents = [] + + repo = Repo(path) + diff = repo.commit(from_commit_rev).diff(to_commit_rev) + + modified_files_diff = [ + change for change in diff if change.change_type != consts.COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE + ] + progress_bar.set_section_length(progress_bar_section, len(modified_files_diff)) + for blob in modified_files_diff: + progress_bar.update(progress_bar_section) + + diff_file_path = get_diff_file_path(blob) + file_path = get_path_by_os(diff_file_path) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, from_commit_rev, diff_file_path) + if file_content is not None: + from_commit_documents.append(Document(file_path, file_content)) + + file_content = sca_code_scanner.get_file_content_from_commit(repo, to_commit_rev, diff_file_path) + if file_content is not None: + to_commit_documents.append(Document(file_path, file_content)) + + return from_commit_documents, to_commit_documents + + +def calculate_pre_receive_commit_range(branch_update_details: str) -> Optional[str]: + end_commit = _get_end_commit_from_branch_update_details(branch_update_details) + + # branch is deleted, no need to perform scan + if end_commit == consts.EMPTY_COMMIT_SHA: + return None + + start_commit = _get_oldest_unupdated_commit_for_branch(end_commit) + + # no new commit to update found + if not start_commit: + return None + + return f'{start_commit}~1...{end_commit}' + + +def _get_end_commit_from_branch_update_details(update_details: str) -> str: + # update details pattern: + _, end_commit, _ = update_details.split() + return end_commit + + +def _get_oldest_unupdated_commit_for_branch(commit: str) -> Optional[str]: + # get a list of commits by chronological order that are not in the remote repository yet + # more info about rev-list command: https://git-scm.com/docs/git-rev-list + not_updated_commits = Repo(os.getcwd()).git.rev_list(commit, '--topo-order', '--reverse', '--not', '--all') + + commits = not_updated_commits.splitlines() + if not commits: + return None + + return commits[0] diff --git a/cycode/cli/files_collector/sca/__init__.py b/cycode/cli/files_collector/sca/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/files_collector/sca/maven/__init__.py b/cycode/cli/files_collector/sca/maven/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cycode/cli/helpers/maven/base_restore_maven_dependencies.py b/cycode/cli/files_collector/sca/maven/base_restore_maven_dependencies.py similarity index 100% rename from cycode/cli/helpers/maven/base_restore_maven_dependencies.py rename to cycode/cli/files_collector/sca/maven/base_restore_maven_dependencies.py diff --git a/cycode/cli/helpers/maven/restore_gradle_dependencies.py b/cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py similarity index 88% rename from cycode/cli/helpers/maven/restore_gradle_dependencies.py rename to cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py index f8cd2fec..ef975ba5 100644 --- a/cycode/cli/helpers/maven/restore_gradle_dependencies.py +++ b/cycode/cli/files_collector/sca/maven/restore_gradle_dependencies.py @@ -2,7 +2,7 @@ import click -from cycode.cli.helpers.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies +from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies from cycode.cli.models import Document BUILD_GRADLE_FILE_NAME = 'build.gradle' diff --git a/cycode/cli/helpers/maven/restore_maven_dependencies.py b/cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py similarity index 97% rename from cycode/cli/helpers/maven/restore_maven_dependencies.py rename to cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py index d8e6675f..0e21df12 100644 --- a/cycode/cli/helpers/maven/restore_maven_dependencies.py +++ b/cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py @@ -3,7 +3,7 @@ import click -from cycode.cli.helpers.maven.base_restore_maven_dependencies import ( +from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import ( BaseRestoreMavenDependencies, build_dep_tree_path, execute_command, diff --git a/cycode/cli/helpers/sca_code_scanner.py b/cycode/cli/files_collector/sca/sca_code_scanner.py similarity index 88% rename from cycode/cli/helpers/sca_code_scanner.py rename to cycode/cli/files_collector/sca/sca_code_scanner.py index 227b553e..a6aa6b78 100644 --- a/cycode/cli/helpers/sca_code_scanner.py +++ b/cycode/cli/files_collector/sca/sca_code_scanner.py @@ -5,14 +5,14 @@ from git import GitCommandError, Repo from cycode.cli import consts -from cycode.cli.helpers.maven.restore_gradle_dependencies import RestoreGradleDependencies -from cycode.cli.helpers.maven.restore_maven_dependencies import RestoreMavenDependencies +from cycode.cli.files_collector.sca.maven.restore_gradle_dependencies import RestoreGradleDependencies +from cycode.cli.files_collector.sca.maven.restore_maven_dependencies import RestoreMavenDependencies from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_file_content, get_file_dir, join_paths from cycode.cyclient import logger if TYPE_CHECKING: - from cycode.cli.helpers.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies + from cycode.cli.files_collector.sca.maven.base_restore_maven_dependencies import BaseRestoreMavenDependencies BUILD_GRADLE_FILE_NAME = 'build.gradle' BUILD_GRADLE_KTS_FILE_NAME = 'build.gradle.kts' @@ -141,3 +141,11 @@ def get_file_content_from_commit(repo: Repo, commit: str, file_path: str) -> Opt return repo.git.show(f'{commit}:{file_path}') except GitCommandError: return None + + +def perform_pre_scan_documents_actions( + context: click.Context, scan_type: str, documents_to_scan: List[Document], is_git_diff: bool = False +) -> None: + if scan_type == consts.SCA_SCAN_TYPE and not context.obj.get(consts.SCA_SKIP_RESTORE_DEPENDENCIES_FLAG): + logger.debug('Perform pre scan document add_dependencies_tree_document action') + add_dependencies_tree_document(context, documents_to_scan, is_git_diff) diff --git a/cycode/cli/files_collector/zip_documents.py b/cycode/cli/files_collector/zip_documents.py new file mode 100644 index 00000000..b2b252f4 --- /dev/null +++ b/cycode/cli/files_collector/zip_documents.py @@ -0,0 +1,40 @@ +import time +from typing import List, Optional + +from cycode.cli import consts +from cycode.cli.exceptions import custom_exceptions +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cli.models import Document +from cycode.cyclient import logger + + +def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: + if scan_type == consts.SCA_SCAN_TYPE: + if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES: + raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES) + else: + if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES: + raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES) + + +def zip_documents(scan_type: str, documents: List[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip: + if zip_file is None: + zip_file = InMemoryZip() + + start_zip_creation_time = time.time() + + for index, document in enumerate(documents): + _validate_zip_file_size(scan_type, zip_file.size) + + logger.debug( + 'adding file to zip, %s', {'index': index, 'filename': document.path, 'unique_id': document.unique_id} + ) + zip_file.append(document.path, document.unique_id, document.content) + + zip_file.close() + + end_zip_creation_time = time.time() + zip_creation_time = int(end_zip_creation_time - start_zip_creation_time) + logger.debug('finished to create zip file, %s', {'zip_creation_time': zip_creation_time}) + + return zip_file diff --git a/cycode/cli/main.py b/cycode/cli/main.py index 94f3ff29..efa2b200 100644 --- a/cycode/cli/main.py +++ b/cycode/cli/main.py @@ -1,13 +1,14 @@ import json import logging import sys -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import List, Optional import click from cycode import __version__ from cycode.cli import code_scanner from cycode.cli.auth.auth_command import authenticate +from cycode.cli.commands.report.report_command import report_command from cycode.cli.config import config from cycode.cli.consts import ( CLI_CONTEXT_SETTINGS, @@ -18,17 +19,13 @@ ) from cycode.cli.models import Severity from cycode.cli.user_settings.configuration_manager import ConfigurationManager -from cycode.cli.user_settings.credentials_manager import CredentialsManager from cycode.cli.user_settings.user_settings_commands import add_exclusions, set_credentials from cycode.cli.utils import scan_utils -from cycode.cli.utils.progress_bar import get_progress_bar +from cycode.cli.utils.get_api_client import get_scan_cycode_client +from cycode.cli.utils.progress_bar import SCAN_PROGRESS_BAR_SECTIONS, get_progress_bar from cycode.cyclient.config import set_logging_level from cycode.cyclient.cycode_client_base import CycodeClientBase from cycode.cyclient.models import UserAgentOptionScheme -from cycode.cyclient.scan_config.scan_config_creator import create_scan_client - -if TYPE_CHECKING: - from cycode.cyclient.scan_client import ScanClient @click.group( @@ -137,7 +134,7 @@ def code_scan( else: context.obj['soft_fail'] = config['soft_fail'] - context.obj['client'] = get_cycode_client(client_id, secret, not context.obj['show_secret']) + context.obj['client'] = get_scan_cycode_client(client_id, secret, not context.obj['show_secret']) context.obj['scan_type'] = scan_type context.obj['severity_threshold'] = severity_threshold context.obj['monitor'] = monitor @@ -185,6 +182,7 @@ def version(context: click.Context) -> None: @click.group( commands={ 'scan': code_scan, + 'report': report_command, 'configure': set_credentials, 'ignore': add_exclusions, 'auth': authenticate, @@ -234,29 +232,13 @@ def main_cli( if output == 'json': no_progress_meter = True - context.obj['progress_bar'] = get_progress_bar(hidden=no_progress_meter) + context.obj['progress_bar'] = get_progress_bar(hidden=no_progress_meter, sections=SCAN_PROGRESS_BAR_SECTIONS) if user_agent: user_agent_option = UserAgentOptionScheme().loads(user_agent) CycodeClientBase.enrich_user_agent(user_agent_option.user_agent_suffix) -def get_cycode_client(client_id: str, client_secret: str, hide_response_log: bool) -> 'ScanClient': - if not client_id or not client_secret: - client_id, client_secret = _get_configured_credentials() - if not client_id: - raise click.ClickException('Cycode client id needed.') - if not client_secret: - raise click.ClickException('Cycode client secret is needed.') - - return create_scan_client(client_id, client_secret, hide_response_log) - - -def _get_configured_credentials() -> Tuple[str, str]: - credentials_manager = CredentialsManager() - return credentials_manager.get_credentials() - - def _should_fail_scan(context: click.Context) -> bool: return scan_utils.is_scan_failed(context) diff --git a/cycode/cli/user_settings/configuration_manager.py b/cycode/cli/user_settings/configuration_manager.py index 98e62e07..65da08fc 100644 --- a/cycode/cli/user_settings/configuration_manager.py +++ b/cycode/cli/user_settings/configuration_manager.py @@ -103,6 +103,13 @@ def get_scan_polling_timeout_in_seconds(self) -> int: ) ) + def get_report_polling_timeout_in_seconds(self) -> int: + return int( + self._get_value_from_environment_variables( + consts.REPORT_POLLING_TIMEOUT_IN_SECONDS_ENV_VAR_NAME, consts.DEFAULT_REPORT_POLLING_TIMEOUT_IN_SECONDS + ) + ) + def get_sca_pre_commit_timeout_in_seconds(self) -> int: return int( self._get_value_from_environment_variables( diff --git a/cycode/cli/utils/get_api_client.py b/cycode/cli/utils/get_api_client.py new file mode 100644 index 00000000..7bbfa2d9 --- /dev/null +++ b/cycode/cli/utils/get_api_client.py @@ -0,0 +1,40 @@ +from typing import TYPE_CHECKING, Optional, Tuple, Union + +import click + +from cycode.cli.user_settings.credentials_manager import CredentialsManager +from cycode.cyclient.client_creator import create_report_client, create_scan_client + +if TYPE_CHECKING: + from cycode.cyclient.report_client import ReportClient + from cycode.cyclient.scan_client import ScanClient + + +def _get_cycode_client( + create_client_func: callable, client_id: Optional[str], client_secret: Optional[str], hide_response_log: bool +) -> Union['ScanClient', 'ReportClient']: + if not client_id or not client_secret: + client_id, client_secret = _get_configured_credentials() + if not client_id: + raise click.ClickException('Cycode client id needed.') + if not client_secret: + raise click.ClickException('Cycode client secret is needed.') + + return create_client_func(client_id, client_secret, hide_response_log) + + +def get_scan_cycode_client( + client_id: Optional[str] = None, client_secret: Optional[str] = None, hide_response_log: bool = True +) -> 'ScanClient': + return _get_cycode_client(create_scan_client, client_id, client_secret, hide_response_log) + + +def get_report_cycode_client( + client_id: Optional[str] = None, client_secret: Optional[str] = None, hide_response_log: bool = True +) -> 'ReportClient': + return _get_cycode_client(create_report_client, client_id, client_secret, hide_response_log) + + +def _get_configured_credentials() -> Tuple[str, str]: + credentials_manager = CredentialsManager() + return credentials_manager.get_credentials() diff --git a/cycode/cli/utils/path_utils.py b/cycode/cli/utils/path_utils.py index ad5ce94e..e0cedc88 100644 --- a/cycode/cli/utils/path_utils.py +++ b/cycode/cli/utils/path_utils.py @@ -1,31 +1,11 @@ import json import os from functools import lru_cache -from typing import AnyStr, Iterable, List, Optional +from typing import AnyStr, List, Optional -import pathspec from binaryornot.check import is_binary -def get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: - absolute_path = get_absolute_path(path) - - if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): - raise FileNotFoundError(f'the specified path was not found, path: {absolute_path}') - - if os.path.isfile(absolute_path): - return [absolute_path] - - all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) - - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - excluded_file_paths = set(path_spec.match_files(all_file_paths)) - - relevant_file_paths = all_file_paths - excluded_file_paths - - return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] - - @lru_cache(maxsize=None) def is_sub_path(path: str, sub_path: str) -> bool: try: @@ -54,16 +34,6 @@ def get_path_by_os(filename: str) -> str: return filename.replace('/', os.sep) -def _get_all_existing_files_in_directory(path: str) -> List[str]: - files: List[str] = [] - - for root, _, filenames in os.walk(path): - for filename in filenames: - files.append(os.path.join(root, filename)) - - return files - - def is_path_exists(path: str) -> bool: return os.path.exists(path) @@ -98,3 +68,11 @@ def load_json(txt: str) -> Optional[dict]: def change_filename_extension(filename: str, extension: str) -> str: base_name, _ = os.path.splitext(filename) return f'{base_name}.{extension}' + + +def concat_unique_id(filename: str, unique_id: str) -> str: + if filename.startswith(os.sep): + # remove leading slash to join the path correctly + filename = filename[len(os.sep) :] + + return os.path.join(unique_id, filename) diff --git a/cycode/cli/utils/progress_bar.py b/cycode/cli/utils/progress_bar.py index 083d0715..b0e94d92 100644 --- a/cycode/cli/utils/progress_bar.py +++ b/cycode/cli/utils/progress_bar.py @@ -16,15 +16,11 @@ class ProgressBarSection(AutoCountEnum): - PREPARE_LOCAL_FILES = auto() - SCAN = auto() - GENERATE_REPORT = auto() - def has_next(self) -> bool: - return self.value < len(ProgressBarSection) - 1 + return self.value < len(type(self)) - 1 def next(self) -> 'ProgressBarSection': - return ProgressBarSection(self.value + 1) + return type(self)(self.value + 1) class ProgressBarSectionInfo(NamedTuple): @@ -32,25 +28,62 @@ class ProgressBarSectionInfo(NamedTuple): label: str start_percent: int stop_percent: int + initial: bool = False _PROGRESS_BAR_LENGTH = 100 -_PROGRESS_BAR_SECTIONS = { - ProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( - ProgressBarSection.PREPARE_LOCAL_FILES, 'Prepare local files', start_percent=0, stop_percent=5 +ProgressBarSections = Dict[ProgressBarSection, ProgressBarSectionInfo] + + +class ScanProgressBarSection(ProgressBarSection): + PREPARE_LOCAL_FILES = auto() + SCAN = auto() + GENERATE_REPORT = auto() + + +SCAN_PROGRESS_BAR_SECTIONS: ProgressBarSections = { + ScanProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( + ScanProgressBarSection.PREPARE_LOCAL_FILES, 'Prepare local files', start_percent=0, stop_percent=5, initial=True + ), + ScanProgressBarSection.SCAN: ProgressBarSectionInfo( + ScanProgressBarSection.SCAN, 'Scan in progress', start_percent=5, stop_percent=95 + ), + ScanProgressBarSection.GENERATE_REPORT: ProgressBarSectionInfo( + ScanProgressBarSection.GENERATE_REPORT, 'Generate report', start_percent=95, stop_percent=100 + ), +} + + +class SbomReportProgressBarSection(ProgressBarSection): + PREPARE_LOCAL_FILES = auto() + GENERATION = auto() + RECEIVE_REPORT = auto() + + +SBOM_REPORT_PROGRESS_BAR_SECTIONS: ProgressBarSections = { + SbomReportProgressBarSection.PREPARE_LOCAL_FILES: ProgressBarSectionInfo( + SbomReportProgressBarSection.PREPARE_LOCAL_FILES, + 'Prepare local files', + start_percent=0, + stop_percent=30, + initial=True, ), - ProgressBarSection.SCAN: ProgressBarSectionInfo( - ProgressBarSection.SCAN, 'Scan in progress', start_percent=5, stop_percent=95 + SbomReportProgressBarSection.GENERATION: ProgressBarSectionInfo( + SbomReportProgressBarSection.GENERATION, 'Report generation in progress', start_percent=30, stop_percent=90 ), - ProgressBarSection.GENERATE_REPORT: ProgressBarSectionInfo( - ProgressBarSection.GENERATE_REPORT, 'Generate report', start_percent=95, stop_percent=100 + SbomReportProgressBarSection.RECEIVE_REPORT: ProgressBarSectionInfo( + SbomReportProgressBarSection.RECEIVE_REPORT, 'Receive report', start_percent=90, stop_percent=100 ), } -def _get_section_length(section: 'ProgressBarSection') -> int: - return _PROGRESS_BAR_SECTIONS[section].stop_percent - _PROGRESS_BAR_SECTIONS[section].start_percent +def _get_initial_section(progress_bar_sections: ProgressBarSections) -> ProgressBarSectionInfo: + for section in progress_bar_sections.values(): + if section.initial: + return section + + raise ValueError('No initial section found') class BaseProgressBar(ABC): @@ -75,13 +108,17 @@ def stop(self) -> None: ... @abstractmethod - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: ... @abstractmethod def update(self, section: 'ProgressBarSection') -> None: ... + @abstractmethod + def update_label(self, label: Optional[str] = None) -> None: + ... + class DummyProgressBar(BaseProgressBar): def __init__(self, *args, **kwargs) -> None: @@ -99,16 +136,22 @@ def start(self) -> None: def stop(self) -> None: pass - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: pass def update(self, section: 'ProgressBarSection') -> None: pass + def update_label(self, label: Optional[str] = None) -> None: + pass + class CompositeProgressBar(BaseProgressBar): - def __init__(self) -> None: + def __init__(self, progress_bar_sections: ProgressBarSections) -> None: super().__init__() + + self._progress_bar_sections = progress_bar_sections + self._progress_bar_context_manager = click.progressbar( length=_PROGRESS_BAR_LENGTH, item_show_func=self._progress_bar_item_show_func, @@ -121,7 +164,7 @@ def __init__(self) -> None: self._section_values: Dict[ProgressBarSection, int] = {} self._current_section_value = 0 - self._current_section: ProgressBarSectionInfo = _PROGRESS_BAR_SECTIONS[ProgressBarSection.PREPARE_LOCAL_FILES] + self._current_section: ProgressBarSectionInfo = _get_initial_section(self._progress_bar_sections) def __enter__(self) -> 'CompositeProgressBar': self._progress_bar = self._progress_bar_context_manager.__enter__() @@ -140,7 +183,7 @@ def stop(self) -> None: if self._run: self.__exit__(None, None, None) - def set_section_length(self, section: 'ProgressBarSection', length: int) -> None: + def set_section_length(self, section: 'ProgressBarSection', length: int = 0) -> None: logger.debug(f'set_section_length: {section} {length}') self._section_lengths[section] = length @@ -149,8 +192,12 @@ def set_section_length(self, section: 'ProgressBarSection', length: int) -> None else: self._maybe_update_current_section() + def _get_section_length(self, section: 'ProgressBarSection') -> int: + section_info = self._progress_bar_sections[section] + return section_info.stop_percent - section_info.start_percent + def _skip_section(self, section: 'ProgressBarSection') -> None: - self._progress_bar.update(_get_section_length(section)) + self._progress_bar.update(self._get_section_length(section)) self._maybe_update_current_section() def _increment_section_value(self, section: 'ProgressBarSection', value: int) -> None: @@ -164,7 +211,7 @@ def _rerender_progress_bar(self) -> None: """Used to update label right after changing the progress bar section.""" self._progress_bar.update(0) - def _increment_progress(self, section: ProgressBarSection) -> None: + def _increment_progress(self, section: 'ProgressBarSection') -> None: increment_value = self._get_increment_progress_value(section) self._current_section_value += increment_value @@ -177,7 +224,7 @@ def _maybe_update_current_section(self) -> None: max_val = self._section_lengths.get(self._current_section.section, 0) cur_val = self._section_values.get(self._current_section.section, 0) if cur_val >= max_val: - next_section = _PROGRESS_BAR_SECTIONS[self._current_section.section.next()] + next_section = self._progress_bar_sections[self._current_section.section.next()] logger.debug(f'_update_current_section: {self._current_section.section} -> {next_section.section}') self._current_section = next_section @@ -188,7 +235,7 @@ def _get_increment_progress_value(self, section: 'ProgressBarSection') -> int: max_val = self._section_lengths[section] cur_val = self._section_values[section] - expected_value = round(_get_section_length(section) * (cur_val / max_val)) + expected_value = round(self._get_section_length(section) * (cur_val / max_val)) return expected_value - self._current_section_value @@ -210,12 +257,19 @@ def update(self, section: 'ProgressBarSection', value: int = 1) -> None: self._increment_progress(section) self._maybe_update_current_section() + def update_label(self, label: Optional[str] = None) -> None: + if not self._progress_bar: + raise ValueError('Progress bar is not initialized. Call start() first or use "with" statement.') + + self._progress_bar.label = label or '' + self._progress_bar.render_progress() -def get_progress_bar(*, hidden: bool) -> BaseProgressBar: + +def get_progress_bar(*, hidden: bool, sections: ProgressBarSections) -> BaseProgressBar: if hidden: return DummyProgressBar() - return CompositeProgressBar() + return CompositeProgressBar(sections) if __name__ == '__main__': @@ -223,15 +277,18 @@ def get_progress_bar(*, hidden: bool) -> BaseProgressBar: import random import time - bar = get_progress_bar(hidden=False) + bar = get_progress_bar(hidden=False, sections=SCAN_PROGRESS_BAR_SECTIONS) bar.start() - for bar_section in ProgressBarSection: + for bar_section in ScanProgressBarSection: section_capacity = random.randint(500, 1000) # noqa: S311 bar.set_section_length(bar_section, section_capacity) for _i in range(section_capacity): time.sleep(0.01) + bar.update_label(f'{bar_section} {_i}/{section_capacity}') bar.update(bar_section) + bar.update_label() + bar.stop() diff --git a/cycode/cli/utils/scan_batch.py b/cycode/cli/utils/scan_batch.py index 4c839440..ede229e2 100644 --- a/cycode/cli/utils/scan_batch.py +++ b/cycode/cli/utils/scan_batch.py @@ -9,7 +9,7 @@ SCAN_BATCH_SCANS_PER_CPU, ) from cycode.cli.models import Document -from cycode.cli.utils.progress_bar import ProgressBarSection +from cycode.cli.utils.progress_bar import ScanProgressBarSection if TYPE_CHECKING: from cycode.cli.models import CliError, LocalScanResult @@ -56,7 +56,7 @@ def run_parallel_batched_scan( max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT, ) -> Tuple[Dict[str, 'CliError'], List['LocalScanResult']]: batches = split_documents_into_batches(documents, max_size_mb, max_files_count) - progress_bar.set_section_length(ProgressBarSection.SCAN, len(batches)) # * 3 + progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3 # TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps: # 1. scan creation # 2. scan completion @@ -73,6 +73,6 @@ def run_parallel_batched_scan( if err: cli_errors[scan_id] = err - progress_bar.update(ProgressBarSection.SCAN) + progress_bar.update(ScanProgressBarSection.SCAN) return cli_errors, local_scan_results diff --git a/cycode/cyclient/client_creator.py b/cycode/cyclient/client_creator.py new file mode 100644 index 00000000..da62bd5a --- /dev/null +++ b/cycode/cyclient/client_creator.py @@ -0,0 +1,23 @@ +from cycode.cyclient.config import dev_mode +from cycode.cyclient.config_dev import DEV_CYCODE_API_URL +from cycode.cyclient.cycode_dev_based_client import CycodeDevBasedClient +from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient +from cycode.cyclient.report_client import ReportClient +from cycode.cyclient.scan_client import ScanClient +from cycode.cyclient.scan_config_base import DefaultScanConfig, DevScanConfig + + +def create_scan_client(client_id: str, client_secret: str, hide_response_log: bool) -> ScanClient: + if dev_mode: + client = CycodeDevBasedClient(DEV_CYCODE_API_URL) + scan_config = DevScanConfig() + else: + client = CycodeTokenBasedClient(client_id, client_secret) + scan_config = DefaultScanConfig() + + return ScanClient(client, scan_config, hide_response_log) + + +def create_report_client(client_id: str, client_secret: str, hide_response_log: bool) -> ReportClient: + client = CycodeDevBasedClient(DEV_CYCODE_API_URL) if dev_mode else CycodeTokenBasedClient(client_id, client_secret) + return ReportClient(client, hide_response_log) diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index f5983083..0401e3fb 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -344,3 +344,63 @@ def user_agent_suffix(self) -> str: f'EnvName: {self.env_name}; EnvVersion: {self.env_version}' f')' ) + + +@dataclass +class SbomReportStorageDetails: + path: str + folder: str + size: int + + +class SbomReportStorageDetailsSchema(Schema): + class Meta: + unknown = EXCLUDE + + path = fields.String() + folder = fields.String() + size = fields.Integer() + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> SbomReportStorageDetails: + return SbomReportStorageDetails(**data) + + +@dataclass +class ReportExecution: + id: int + status: str + error_message: Optional[str] = None + status_message: Optional[str] = None + storage_details: Optional[SbomReportStorageDetails] = None + + +class ReportExecutionSchema(Schema): + class Meta: + unknown = EXCLUDE + + id = fields.Integer() + status = fields.String() + error_message = fields.String(allow_none=True) + status_message = fields.String(allow_none=True) + storage_details = fields.Nested(SbomReportStorageDetailsSchema, allow_none=True) + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> ReportExecution: + return ReportExecution(**data) + + +@dataclass +class SbomReport: + report_executions: List[ReportExecution] + + +class RequestedSbomReportResultSchema(Schema): + class Meta: + unknown = EXCLUDE + + report_executions = fields.List(fields.Nested(ReportExecutionSchema)) + + @post_load + def build_dto(self, data: Dict[str, Any], **_) -> SbomReport: + return SbomReport(**data) diff --git a/cycode/cyclient/report_client.py b/cycode/cyclient/report_client.py new file mode 100644 index 00000000..ade7d850 --- /dev/null +++ b/cycode/cyclient/report_client.py @@ -0,0 +1,101 @@ +import dataclasses +import json +from typing import List, Optional + +from requests import Response + +from cycode.cli.exceptions.custom_exceptions import CycodeError +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cyclient import models +from cycode.cyclient.cycode_client_base import CycodeClientBase + + +@dataclasses.dataclass +class ReportParameters: + entity_type: str + sbom_report_type: str + sbom_version: str + output_format: str + include_vulnerabilities: bool + include_dev_dependencies: bool + + def to_dict(self, *, without_entity_type: bool) -> dict: + model_dict = dataclasses.asdict(self) + if without_entity_type: + del model_dict['entity_type'] + return model_dict + + def to_json(self, *, without_entity_type: bool) -> str: + return json.dumps(self.to_dict(without_entity_type=without_entity_type)) + + +class ReportClient: + SERVICE_NAME: str = 'report' + CREATE_SBOM_REPORT_REQUEST_PATH: str = 'api/v2/report/{report_type}/sbom' + GET_EXECUTIONS_STATUS_PATH: str = 'api/v2/report/executions' + REPORT_STATUS_PATH: str = 'api/v2/report/{report_execution_id}/status' + + DOWNLOAD_REPORT_PATH: str = 'files/api/v1/file/sbom/{file_name}' # not in the report service + + def __init__(self, client: CycodeClientBase, hide_response_log: bool = True) -> None: + self.client = client + self._hide_response_log = hide_response_log + + def request_sbom_report_execution( + self, params: ReportParameters, zip_file: InMemoryZip = None, repository_url: Optional[str] = None + ) -> models.ReportExecution: + report_type = 'zipped-file' if zip_file else 'repository-url' + url_path = f'{self.SERVICE_NAME}/{self.CREATE_SBOM_REPORT_REQUEST_PATH}'.format(report_type=report_type) + + # entity type required only for zipped-file + request_data = {'report_parameters': params.to_json(without_entity_type=zip_file is None)} + if repository_url: + request_data['repository_url'] = repository_url + + request_args = { + 'url_path': url_path, + 'data': request_data, + 'hide_response_content_log': self._hide_response_log, + } + + if zip_file: + request_args['files'] = {'file': ('sca_files.zip', zip_file.read())} + + response = self.client.post(**request_args) + sbom_report = self.parse_requested_sbom_report_response(response) + if not sbom_report.report_executions: + raise CycodeError('Failed to get SBOM report. No executions found.') + + return sbom_report.report_executions[0] + + def get_report_execution(self, report_execution_id: int) -> models.ReportExecutionSchema: + url_path = f'{self.SERVICE_NAME}/{self.GET_EXECUTIONS_STATUS_PATH}' + params = { + 'executions_ids': report_execution_id, + 'include_orphan_executions': True, + } + response = self.client.get(url_path=url_path, params=params) + + report_executions = self.parse_execution_status_response(response) + if not report_executions: + raise CycodeError('Failed to get report execution.') + + return report_executions[0] + + def get_file_content(self, file_name: str) -> str: + response = self.client.get( + url_path=self.DOWNLOAD_REPORT_PATH.format(file_name=file_name), params={'include_hidden': True} + ) + return response.text + + def report_status(self, report_execution_id: int, status: dict) -> None: + url_path = f'{self.SERVICE_NAME}/{self.REPORT_STATUS_PATH}'.format(report_execution_id=report_execution_id) + self.client.post(url_path=url_path, body=status) + + @staticmethod + def parse_requested_sbom_report_response(response: Response) -> models.SbomReport: + return models.RequestedSbomReportResultSchema().load(response.json()) + + @staticmethod + def parse_execution_status_response(response: Response) -> List[models.ReportExecutionSchema]: + return models.ReportExecutionSchema().load(response.json(), many=True) diff --git a/cycode/cyclient/scan_client.py b/cycode/cyclient/scan_client.py index f09a96ef..5830e9dc 100644 --- a/cycode/cyclient/scan_client.py +++ b/cycode/cyclient/scan_client.py @@ -1,18 +1,19 @@ import json -from typing import List, Optional +from typing import TYPE_CHECKING, List, Optional from requests import Response -from cycode.cli.zip_file import InMemoryZip +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip +from cycode.cyclient import models +from cycode.cyclient.cycode_client_base import CycodeClientBase -from . import models -from .cycode_client_base import CycodeClientBase -from .scan_config.scan_config_base import ScanConfigBase +if TYPE_CHECKING: + from .scan_config_base import ScanConfigBase class ScanClient: def __init__( - self, scan_cycode_client: CycodeClientBase, scan_config: ScanConfigBase, hide_response_log: bool = True + self, scan_cycode_client: CycodeClientBase, scan_config: 'ScanConfigBase', hide_response_log: bool = True ) -> None: self.scan_cycode_client = scan_cycode_client self.scan_config = scan_config diff --git a/cycode/cyclient/scan_config/scan_config_creator.py b/cycode/cyclient/scan_config/scan_config_creator.py deleted file mode 100644 index f17be424..00000000 --- a/cycode/cyclient/scan_config/scan_config_creator.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Tuple - -from cycode.cyclient.config import dev_mode -from cycode.cyclient.config_dev import DEV_CYCODE_API_URL -from cycode.cyclient.cycode_dev_based_client import CycodeDevBasedClient -from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient -from cycode.cyclient.scan_client import ScanClient -from cycode.cyclient.scan_config.scan_config_base import DefaultScanConfig, DevScanConfig - - -def create_scan_client(client_id: str, client_secret: str, hide_response_log: bool) -> ScanClient: - if dev_mode: - scan_cycode_client, scan_config = create_scan_for_dev_env() - else: - scan_cycode_client, scan_config = create_scan(client_id, client_secret) - - return ScanClient(scan_cycode_client, scan_config, hide_response_log) - - -def create_scan(client_id: str, client_secret: str) -> Tuple[CycodeTokenBasedClient, DefaultScanConfig]: - scan_cycode_client = CycodeTokenBasedClient(client_id, client_secret) - scan_config = DefaultScanConfig() - return scan_cycode_client, scan_config - - -def create_scan_for_dev_env() -> Tuple[CycodeDevBasedClient, DevScanConfig]: - scan_cycode_client = CycodeDevBasedClient(DEV_CYCODE_API_URL) - scan_config = DevScanConfig() - return scan_cycode_client, scan_config diff --git a/cycode/cyclient/scan_config/scan_config_base.py b/cycode/cyclient/scan_config_base.py similarity index 100% rename from cycode/cyclient/scan_config/scan_config_base.py rename to cycode/cyclient/scan_config_base.py diff --git a/tests/cli/helpers/test_tf_content_generator.py b/tests/cli/helpers/test_tf_content_generator.py index ae19b2f6..7953ed81 100644 --- a/tests/cli/helpers/test_tf_content_generator.py +++ b/tests/cli/helpers/test_tf_content_generator.py @@ -1,6 +1,6 @@ import os -from cycode.cli.helpers import tf_content_generator +from cycode.cli.files_collector.iac import tf_content_generator from cycode.cli.utils.path_utils import get_file_content, get_immediate_subdirectories from tests.conftest import TEST_FILES_PATH diff --git a/tests/cli/test_code_scanner.py b/tests/cli/test_code_scanner.py index b715e9c6..f4fe4f69 100644 --- a/tests/cli/test_code_scanner.py +++ b/tests/cli/test_code_scanner.py @@ -8,8 +8,10 @@ from requests import Response from cycode.cli import consts -from cycode.cli.code_scanner import _generate_document, _handle_exception, _is_file_relevant_for_sca_scan +from cycode.cli.code_scanner import _handle_exception from cycode.cli.exceptions import custom_exceptions +from cycode.cli.files_collector.excluder import _is_file_relevant_for_sca_scan +from cycode.cli.files_collector.path_documents import _generate_document from cycode.cli.models import Document if TYPE_CHECKING: diff --git a/tests/conftest.py b/tests/conftest.py index a763f6bb..fdb02ec2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,9 +3,9 @@ import pytest import responses +from cycode.cyclient.client_creator import create_scan_client from cycode.cyclient.cycode_token_based_client import CycodeTokenBasedClient from cycode.cyclient.scan_client import ScanClient -from cycode.cyclient.scan_config.scan_config_creator import create_scan_client _EXPECTED_API_TOKEN = 'someJWT' diff --git a/tests/cyclient/scan_config/test_default_scan_config.py b/tests/cyclient/scan_config/test_default_scan_config.py index 0945402b..e0a84ad2 100644 --- a/tests/cyclient/scan_config/test_default_scan_config.py +++ b/tests/cyclient/scan_config/test_default_scan_config.py @@ -1,4 +1,4 @@ -from cycode.cyclient.scan_config.scan_config_creator import DefaultScanConfig +from cycode.cyclient.scan_config_base import DefaultScanConfig def test_get_service_name() -> None: diff --git a/tests/cyclient/scan_config/test_dev_scan_config.py b/tests/cyclient/scan_config/test_dev_scan_config.py index 0673d601..3ea3127e 100644 --- a/tests/cyclient/scan_config/test_dev_scan_config.py +++ b/tests/cyclient/scan_config/test_dev_scan_config.py @@ -1,4 +1,4 @@ -from cycode.cyclient.scan_config.scan_config_creator import DevScanConfig +from cycode.cyclient.scan_config_base import DevScanConfig def test_get_service_name() -> None: diff --git a/tests/cyclient/test_scan_client.py b/tests/cyclient/test_scan_client.py index db867a9f..2ca374b2 100644 --- a/tests/cyclient/test_scan_client.py +++ b/tests/cyclient/test_scan_client.py @@ -8,11 +8,11 @@ from requests import Timeout from requests.exceptions import ProxyError -from cycode.cli.code_scanner import zip_documents_to_scan +from cycode.cli.code_scanner import zip_documents from cycode.cli.config import config from cycode.cli.exceptions.custom_exceptions import CycodeError, HttpUnauthorizedError +from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip from cycode.cli.models import Document -from cycode.cli.zip_file import InMemoryZip from cycode.cyclient.scan_client import ScanClient from tests.conftest import TEST_FILES_PATH @@ -42,7 +42,7 @@ def get_test_zip_file(scan_type: str) -> InMemoryZip: with open(path, 'r', encoding='UTF-8') as f: test_documents.append(Document(path, f.read(), is_git_diff_format=False)) - return zip_documents_to_scan(scan_type, InMemoryZip(), test_documents) + return zip_documents(scan_type, test_documents) def get_zipped_file_scan_response(url: str, scan_id: Optional[UUID] = None) -> responses.Response: diff --git a/tests/test_code_scanner.py b/tests/test_code_scanner.py index b1f7e163..6eb494e9 100644 --- a/tests/test_code_scanner.py +++ b/tests/test_code_scanner.py @@ -1,9 +1,9 @@ import os -from cycode.cli import code_scanner +from cycode.cli.files_collector.excluder import _is_relevant_file_to_scan from tests.conftest import TEST_FILES_PATH def test_is_relevant_file_to_scan_sca() -> None: path = os.path.join(TEST_FILES_PATH, 'package.json') - assert code_scanner._is_relevant_file_to_scan('sca', path) is True + assert _is_relevant_file_to_scan('sca', path) is True diff --git a/tests/test_zip_file.py b/tests/test_zip_file.py index f73514c8..15c53c17 100644 --- a/tests/test_zip_file.py +++ b/tests/test_zip_file.py @@ -1,6 +1,6 @@ import os -from cycode.cli import zip_file +from cycode.cli.utils.path_utils import concat_unique_id def test_concat_unique_id_to_file_with_leading_slash() -> None: @@ -10,7 +10,7 @@ def test_concat_unique_id_to_file_with_leading_slash() -> None: expected_path = os.path.join(unique_id, filename) filename = os.sep + filename - assert zip_file.concat_unique_id(filename, unique_id) == expected_path + assert concat_unique_id(filename, unique_id) == expected_path def test_concat_unique_id_to_file_without_leading_slash() -> None: @@ -19,4 +19,4 @@ def test_concat_unique_id_to_file_without_leading_slash() -> None: expected_path = os.path.join(unique_id, *filename.split('/')) - assert zip_file.concat_unique_id(filename, unique_id) == expected_path + assert concat_unique_id(filename, unique_id) == expected_path