diff --git a/cycode/cli/commands/scan/code_scanner.py b/cycode/cli/commands/scan/code_scanner.py index 42b305be..b3fddf59 100644 --- a/cycode/cli/commands/scan/code_scanner.py +++ b/cycode/cli/commands/scan/code_scanner.py @@ -301,6 +301,7 @@ def scan_documents( if not scan_parameters: scan_parameters = get_default_scan_parameters(context) + scan_type = context.obj['scan_type'] progress_bar = context.obj['progress_bar'] if not documents_to_scan: @@ -318,13 +319,13 @@ def scan_documents( context, is_git_diff, is_commit_range, scan_parameters ) errors, local_scan_results = run_parallel_batched_scan( - scan_batch_thread_func, documents_to_scan, progress_bar=progress_bar + scan_batch_thread_func, scan_type, documents_to_scan, progress_bar=progress_bar ) if len(local_scan_results) > 1: # if we used more than one batch, we need to fetch aggregate report url aggregation_report_url = _try_get_aggregation_report_url_if_needed( - scan_parameters, context.obj['client'], context.obj['scan_type'] + scan_parameters, context.obj['client'], scan_type ) set_aggregation_report_url(context, aggregation_report_url) diff --git a/cycode/cli/commands/scan/scan_command.py b/cycode/cli/commands/scan/scan_command.py index 5282dfb7..37b0a227 100644 --- a/cycode/cli/commands/scan/scan_command.py +++ b/cycode/cli/commands/scan/scan_command.py @@ -3,6 +3,7 @@ import click +from cycode.cli import consts from cycode.cli.commands.scan.commit_history.commit_history_command import commit_history_command from cycode.cli.commands.scan.path.path_command import path_command from cycode.cli.commands.scan.pre_commit.pre_commit_command import pre_commit_command @@ -34,7 +35,7 @@ @click.option( '--scan-type', '-t', - default='secret', + default=consts.SECRET_SCAN_TYPE, help='Specify the type of scan you wish to execute (the default is Secrets).', type=click.Choice(config['scans']['supported_scans']), ) diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index b4b09a15..3640d82a 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -136,14 +136,16 @@ # 5MB in bytes (in decimal) FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000 -# 20MB in bytes (in binary) -ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20971520 -# 200MB in bytes (in binary) -SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 209715200 +DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024 +ZIP_MAX_SIZE_LIMIT_IN_BYTES = { + SCA_SCAN_TYPE: 200 * 1024 * 1024, + SAST_SCAN_TYPE: 50 * 1024 * 1024, +} # scan in batches -SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024 -SCAN_BATCH_MAX_FILES_COUNT = 1000 +DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES = 9 * 1024 * 1024 +SCAN_BATCH_MAX_SIZE_IN_BYTES = {SAST_SCAN_TYPE: 50 * 1024 * 1024} +DEFAULT_SCAN_BATCH_MAX_FILES_COUNT = 1000 # if we increase this values, the server doesn't allow connecting (ConnectionError) SCAN_BATCH_MAX_PARALLEL_SCANS = 5 SCAN_BATCH_SCANS_PER_CPU = 1 diff --git a/cycode/cli/files_collector/zip_documents.py b/cycode/cli/files_collector/zip_documents.py index 7d57a47c..9547f7fb 100644 --- a/cycode/cli/files_collector/zip_documents.py +++ b/cycode/cli/files_collector/zip_documents.py @@ -10,12 +10,9 @@ def _validate_zip_file_size(scan_type: str, zip_file_size: int) -> None: - if scan_type == consts.SCA_SCAN_TYPE: - if zip_file_size > consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.SCA_ZIP_MAX_SIZE_LIMIT_IN_BYTES) - else: - if zip_file_size > consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES: - raise custom_exceptions.ZipTooLargeError(consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES) + max_size_limit = consts.ZIP_MAX_SIZE_LIMIT_IN_BYTES.get(scan_type, consts.DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES) + if zip_file_size > max_size_limit: + raise custom_exceptions.ZipTooLargeError(max_size_limit) def zip_documents(scan_type: str, documents: List[Document], zip_file: Optional[InMemoryZip] = None) -> InMemoryZip: diff --git a/cycode/cli/utils/scan_batch.py b/cycode/cli/utils/scan_batch.py index ede229e2..1ecfcf49 100644 --- a/cycode/cli/utils/scan_batch.py +++ b/cycode/cli/utils/scan_batch.py @@ -2,12 +2,7 @@ from multiprocessing.pool import ThreadPool from typing import TYPE_CHECKING, Callable, Dict, List, Tuple -from cycode.cli.consts import ( - SCAN_BATCH_MAX_FILES_COUNT, - SCAN_BATCH_MAX_PARALLEL_SCANS, - SCAN_BATCH_MAX_SIZE_IN_BYTES, - SCAN_BATCH_SCANS_PER_CPU, -) +from cycode.cli import consts from cycode.cli.models import Document from cycode.cli.utils.progress_bar import ScanProgressBarSection @@ -18,8 +13,8 @@ def split_documents_into_batches( documents: List[Document], - max_size_mb: int = SCAN_BATCH_MAX_SIZE_IN_BYTES, - max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT, + max_size: int = consts.DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES, + max_files_count: int = consts.DEFAULT_SCAN_BATCH_MAX_FILES_COUNT, ) -> List[List[Document]]: batches = [] @@ -28,7 +23,7 @@ def split_documents_into_batches( for document in documents: document_size = len(document.content.encode('UTF-8')) - if (current_size + document_size > max_size_mb) or (len(current_batch) >= max_files_count): + if (current_size + document_size > max_size) or (len(current_batch) >= max_files_count): batches.append(current_batch) current_batch = [document] @@ -45,17 +40,18 @@ def split_documents_into_batches( def _get_threads_count() -> int: cpu_count = os.cpu_count() or 1 - return min(cpu_count * SCAN_BATCH_SCANS_PER_CPU, SCAN_BATCH_MAX_PARALLEL_SCANS) + return min(cpu_count * consts.SCAN_BATCH_SCANS_PER_CPU, consts.SCAN_BATCH_MAX_PARALLEL_SCANS) def run_parallel_batched_scan( scan_function: Callable[[List[Document]], Tuple[str, 'CliError', 'LocalScanResult']], + scan_type: str, documents: List[Document], progress_bar: 'BaseProgressBar', - max_size_mb: int = SCAN_BATCH_MAX_SIZE_IN_BYTES, - max_files_count: int = SCAN_BATCH_MAX_FILES_COUNT, ) -> Tuple[Dict[str, 'CliError'], List['LocalScanResult']]: - batches = split_documents_into_batches(documents, max_size_mb, max_files_count) + max_size = consts.SCAN_BATCH_MAX_SIZE_IN_BYTES.get(scan_type, consts.DEFAULT_SCAN_BATCH_MAX_SIZE_IN_BYTES) + batches = split_documents_into_batches(documents, max_size) + progress_bar.set_section_length(ScanProgressBarSection.SCAN, len(batches)) # * 3 # TODO(MarshalX): we should multiply the count of batches in SCAN section because each batch has 3 steps: # 1. scan creation diff --git a/cycode/cyclient/scan_client.py b/cycode/cyclient/scan_client.py index b63f49e1..31abba17 100644 --- a/cycode/cyclient/scan_client.py +++ b/cycode/cyclient/scan_client.py @@ -328,11 +328,11 @@ def parse_zipped_file_scan_response(response: Response) -> models.ZippedFileScan @staticmethod def get_service_name(scan_type: str) -> Optional[str]: # TODO(MarshalX): get_service_name should be removed from ScanClient? Because it exists in ScanConfig - if scan_type == 'secret': + if scan_type == consts.SECRET_SCAN_TYPE: return 'secret' - if scan_type == 'iac': + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: return 'iac' - if scan_type == 'sca' or scan_type == 'sast': + if scan_type == consts.SCA_SCAN_TYPE or scan_type == consts.SAST_SCAN_TYPE: return 'scans' return None diff --git a/cycode/cyclient/scan_config_base.py b/cycode/cyclient/scan_config_base.py index e0bdd7ef..1ff1da6c 100644 --- a/cycode/cyclient/scan_config_base.py +++ b/cycode/cyclient/scan_config_base.py @@ -9,9 +9,9 @@ def get_service_name(self, scan_type: str, should_use_scan_service: bool = False @staticmethod def get_async_scan_type(scan_type: str) -> str: - if scan_type == 'secret': + if scan_type == consts.SECRET_SCAN_TYPE: return 'Secrets' - if scan_type == 'iac': + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: return 'InfraConfiguration' return scan_type.upper() @@ -31,9 +31,9 @@ class DevScanConfig(ScanConfigBase): def get_service_name(self, scan_type: str, should_use_scan_service: bool = False) -> str: if should_use_scan_service: return '5004' - if scan_type == 'secret': + if scan_type == consts.SECRET_SCAN_TYPE: return '5025' - if scan_type == 'iac': + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: return '5026' # sca and sast @@ -47,9 +47,9 @@ class DefaultScanConfig(ScanConfigBase): def get_service_name(self, scan_type: str, should_use_scan_service: bool = False) -> str: if should_use_scan_service: return 'scans' - if scan_type == 'secret': + if scan_type == consts.SECRET_SCAN_TYPE: return 'secret' - if scan_type == 'iac': + if scan_type == consts.INFRA_CONFIGURATION_SCAN_TYPE: return 'iac' # sca and sast diff --git a/tests/cli/commands/test_main_command.py b/tests/cli/commands/test_main_command.py index 32a55972..7e588cf2 100644 --- a/tests/cli/commands/test_main_command.py +++ b/tests/cli/commands/test_main_command.py @@ -6,6 +6,7 @@ import responses from click.testing import CliRunner +from cycode.cli import consts from cycode.cli.commands.main_cli import main_cli from cycode.cli.utils.git_proxy import git_proxy from tests.conftest import CLI_ENV_VARS, TEST_FILES_PATH, ZIP_CONTENT_PATH @@ -29,7 +30,7 @@ def _is_json(plain: str) -> bool: @responses.activate @pytest.mark.parametrize('output', ['text', 'json']) def test_passing_output_option(output: str, scan_client: 'ScanClient', api_token_response: responses.Response) -> None: - scan_type = 'secret' + scan_type = consts.SECRET_SCAN_TYPE scan_id = uuid4() mock_scan_responses(responses, scan_type, scan_client, scan_id, ZIP_CONTENT_PATH) @@ -52,8 +53,10 @@ def test_passing_output_option(output: str, scan_client: 'ScanClient', api_token @responses.activate def test_optional_git_with_path_scan(scan_client: 'ScanClient', api_token_response: responses.Response) -> None: - mock_scan_responses(responses, 'secret', scan_client, uuid4(), ZIP_CONTENT_PATH) - responses.add(get_zipped_file_scan_response(get_zipped_file_scan_url('secret', scan_client), ZIP_CONTENT_PATH)) + mock_scan_responses(responses, consts.SECRET_SCAN_TYPE, scan_client, uuid4(), ZIP_CONTENT_PATH) + responses.add( + get_zipped_file_scan_response(get_zipped_file_scan_url(consts.SECRET_SCAN_TYPE, scan_client), ZIP_CONTENT_PATH) + ) responses.add(api_token_response) # fake env without Git executable diff --git a/tests/cyclient/scan_config/test_default_scan_config.py b/tests/cyclient/scan_config/test_default_scan_config.py index e659f71f..75b305b5 100644 --- a/tests/cyclient/scan_config/test_default_scan_config.py +++ b/tests/cyclient/scan_config/test_default_scan_config.py @@ -1,14 +1,15 @@ +from cycode.cli import consts from cycode.cyclient.scan_config_base import DefaultScanConfig def test_get_service_name() -> None: default_scan_config = DefaultScanConfig() - assert default_scan_config.get_service_name('secret') == 'secret' - assert default_scan_config.get_service_name('iac') == 'iac' - assert default_scan_config.get_service_name('sca') == 'scans' - assert default_scan_config.get_service_name('sast') == 'scans' - assert default_scan_config.get_service_name('secret', True) == 'scans' + assert default_scan_config.get_service_name(consts.SECRET_SCAN_TYPE) == 'secret' + assert default_scan_config.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == 'iac' + assert default_scan_config.get_service_name(consts.SCA_SCAN_TYPE) == 'scans' + assert default_scan_config.get_service_name(consts.SAST_SCAN_TYPE) == 'scans' + assert default_scan_config.get_service_name(consts.SECRET_SCAN_TYPE, True) == 'scans' def test_get_detections_prefix() -> None: diff --git a/tests/cyclient/scan_config/test_dev_scan_config.py b/tests/cyclient/scan_config/test_dev_scan_config.py index 7419b002..63c99169 100644 --- a/tests/cyclient/scan_config/test_dev_scan_config.py +++ b/tests/cyclient/scan_config/test_dev_scan_config.py @@ -1,14 +1,15 @@ +from cycode.cli import consts from cycode.cyclient.scan_config_base import DevScanConfig def test_get_service_name() -> None: dev_scan_config = DevScanConfig() - assert dev_scan_config.get_service_name('secret') == '5025' - assert dev_scan_config.get_service_name('iac') == '5026' - assert dev_scan_config.get_service_name('sca') == '5004' - assert dev_scan_config.get_service_name('sast') == '5004' - assert dev_scan_config.get_service_name('secret', should_use_scan_service=True) == '5004' + assert dev_scan_config.get_service_name(consts.SECRET_SCAN_TYPE) == '5025' + assert dev_scan_config.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == '5026' + assert dev_scan_config.get_service_name(consts.SCA_SCAN_TYPE) == '5004' + assert dev_scan_config.get_service_name(consts.SAST_SCAN_TYPE) == '5004' + assert dev_scan_config.get_service_name(consts.SECRET_SCAN_TYPE, should_use_scan_service=True) == '5004' def test_get_detections_prefix() -> None: diff --git a/tests/cyclient/test_scan_client.py b/tests/cyclient/test_scan_client.py index d51e43f6..2b8fc3f3 100644 --- a/tests/cyclient/test_scan_client.py +++ b/tests/cyclient/test_scan_client.py @@ -8,6 +8,7 @@ from requests import Timeout from requests.exceptions import ProxyError +from cycode.cli import consts from cycode.cli.config import config from cycode.cli.exceptions.custom_exceptions import ( CycodeError, @@ -49,10 +50,10 @@ def get_test_zip_file(scan_type: str) -> InMemoryZip: def test_get_service_name(scan_client: ScanClient) -> None: # TODO(MarshalX): get_service_name should be removed from ScanClient? Because it exists in ScanConfig - assert scan_client.get_service_name('secret') == 'secret' - assert scan_client.get_service_name('iac') == 'iac' - assert scan_client.get_service_name('sca') == 'scans' - assert scan_client.get_service_name('sast') == 'scans' + assert scan_client.get_service_name(consts.SECRET_SCAN_TYPE) == 'secret' + assert scan_client.get_service_name(consts.INFRA_CONFIGURATION_SCAN_TYPE) == 'iac' + assert scan_client.get_service_name(consts.SCA_SCAN_TYPE) == 'scans' + assert scan_client.get_service_name(consts.SAST_SCAN_TYPE) == 'scans' @pytest.mark.parametrize('scan_type', config['scans']['supported_scans']) diff --git a/tests/test_code_scanner.py b/tests/test_code_scanner.py index d789312d..10726a65 100644 --- a/tests/test_code_scanner.py +++ b/tests/test_code_scanner.py @@ -4,6 +4,7 @@ import pytest import responses +from cycode.cli import consts from cycode.cli.commands.scan.code_scanner import ( _try_get_aggregation_report_url_if_needed, _try_get_report_url_if_needed, @@ -22,13 +23,13 @@ def test_is_relevant_file_to_scan_sca() -> None: path = os.path.join(TEST_FILES_PATH, 'package.json') - assert _is_relevant_file_to_scan('sca', path) is True + assert _is_relevant_file_to_scan(consts.SCA_SCAN_TYPE, path) is True @pytest.mark.parametrize('scan_type', config['scans']['supported_scans']) def test_try_get_report_url_if_needed_return_none(scan_type: str, scan_client: ScanClient) -> None: scan_id = uuid4().hex - result = _try_get_report_url_if_needed(scan_client, False, scan_id, 'secret') + result = _try_get_report_url_if_needed(scan_client, False, scan_id, consts.SECRET_SCAN_TYPE) assert result is None