From 8d847a288d2744124b6dc9168d520490b57429f5 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 7 Sep 2023 11:12:19 +0900 Subject: [PATCH 1/6] Create run_scanners for API Signed-off-by: Wonjae Park --- src/fosslight_source/cli.py | 76 +++++++++++++++++++--------- src/fosslight_source/run_scancode.py | 7 +-- 2 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index 8489961..6a5e818 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -50,8 +50,6 @@ def main(): selected_scanner = "" correct_mode = True - license_list = [] - scanoss_result = [] time_out = 120 core = -1 @@ -113,27 +111,11 @@ def main(): True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan) if os.path.isdir(path_to_scan): - scancode_result = [] - scanoss_result = [] - merged_result = [] - spdx_downloads = {} - success = True - - if selected_scanner == 'scancode' or selected_scanner == 'all' or selected_scanner == '': - success, _result_log["Scan Result"], scancode_result, license_list = run_scan(path_to_scan, output_file_name, - write_json_file, core, True, - print_matched_text, format, True, - time_out, correct_mode, - correct_filepath) - if selected_scanner == 'scanoss' or selected_scanner == 'all' or selected_scanner == '': - scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file) - if selected_scanner not in SCANNER_TYPE: - print_help_msg_source_scanner() - sys.exit(1) - spdx_downloads = get_spdx_downloads(path_to_scan) - merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads) - create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text, - output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) + result = [] + result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True, + print_matched_text, format, time_out, correct_mode, correct_filepath, output_path, output_file, + output_extension, selected_scanner) + _result_log["Scan Result"] = result[1] try: logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)) @@ -249,5 +231,53 @@ def merge_results(scancode_result=[], scanoss_result=[], spdx_downloads={}): return scancode_result +def run_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, called_by_cli=True, + print_matched_text=False, format="", time_out=120, correct_mode=True, correct_filepath="", + output_path="", output_file="", output_extension="", + selected_scanner='all'): + """ + Run Scancode and scanoss.py for the given path. + + :param path_to_scan: path of sourcecode to scan. + :param output_file_name: path or file name (with path) for the output. + :param _write_json_file: if requested, keep the raw files. + :param num_cores: number of cores used for scancode scanning. + :param called_by_cli: if not called by cli, initialize logger. + :param print_matched_text: if requested, output matched text (only for scancode). + :param format: output format (excel, csv, opossum). + :return success: success or failure of scancode. + :return _result_log["Scan Result"]: + :return merged_result: merged scan result of scancode and scanoss. + :return license_list: matched text.(only for scancode) + """ + _start_time = datetime.now().strftime('%y%m%d_%H%M') + scancode_result = [] + scanoss_result = [] + merged_result = [] + spdx_downloads = {} + success = True + _result_log = {} + + if selected_scanner == 'scancode' or selected_scanner == 'all' or selected_scanner == '': + success, _result_log["Scan Result"], scancode_result, license_list = run_scan(path_to_scan, output_file_name, + _write_json_file, num_cores, True, + print_matched_text, format, called_by_cli, + time_out, correct_mode, correct_filepath) + if selected_scanner == 'scanoss' or selected_scanner == 'all' or selected_scanner == '': + scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, True, _write_json_file) + if selected_scanner not in SCANNER_TYPE: + print_help_msg_source_scanner() + sys.exit(1) + + spdx_downloads = get_spdx_downloads(path_to_scan) + merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads) + + if called_by_cli: + create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text, + output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) + + return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result + + if __name__ == '__main__': main() diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index 53be2cb..1d10ab4 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -106,9 +106,10 @@ def run_scan(path_to_scan, output_file_name="", output_file_without_ext = os.path.join(output_path, output_file) if not called_by_cli: if correct_mode: - success, msg_correct, correct_list = correct_with_yaml(correct_filepath, - path_to_scan, sheet_list) - if not success: + correct_success = True + correct_success, msg_correct, correct_list = correct_with_yaml(correct_filepath, + path_to_scan, sheet_list) + if not correct_success: logger.info(f"No correction with yaml: {msg_correct}") else: sheet_list = correct_list From 37345f711b2009ac9caca5e44dc549252f675bc9 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 7 Sep 2023 14:00:07 +0900 Subject: [PATCH 2/6] Remove deprecated code Signed-off-by: Wonjae Park --- src/fosslight_source/run_scancode.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index 1d10ab4..16d0612 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -103,23 +103,6 @@ def run_scan(path_to_scan, output_file_name="", if need_license: sheet_list["matched_text"] = get_license_list_to_print(license_list) - output_file_without_ext = os.path.join(output_path, output_file) - if not called_by_cli: - if correct_mode: - correct_success = True - correct_success, msg_correct, correct_list = correct_with_yaml(correct_filepath, - path_to_scan, sheet_list) - if not correct_success: - logger.info(f"No correction with yaml: {msg_correct}") - else: - sheet_list = correct_list - logger.info("Success to correct with yaml.") - success_to_write, writing_msg, result_file = write_output_file(output_file_without_ext, - output_extension, sheet_list) - if success_to_write: - logger.info(f"Writing Output file({result_file}, success:{success_to_write}") - else: - logger.error(f"Fail to generate result file. msg:({writing_msg})") except Exception as ex: success = False msg = str(ex) From 72794a406b27dc5066495f45496c5992cdcb589e Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 7 Sep 2023 16:02:57 +0900 Subject: [PATCH 3/6] Set exclude .m4 files Signed-off-by: Wonjae Park --- src/fosslight_source/_scan_item.py | 3 +++ src/fosslight_source/run_scancode.py | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index baa5ae5..7c803d8 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -14,6 +14,7 @@ "aclocal.m4", "configure", "configure.ac", "depcomp", "compile", "missing", "libtool.m4", "makefile"] +_exclude_extension = [".m4"] _exclude_directory = ["test", "tests", "doc", "docs"] _exclude_directory = [os.path.sep + dir_name + os.path.sep for dir_name in _exclude_directory] @@ -104,6 +105,8 @@ def is_exclude_dir(dir_path): def is_exclude_file(file_path, prev_dir=None, prev_dir_exclude_value=None): file_path = file_path.lower() filename = os.path.basename(file_path) + if os.path.splitext(filename)[1] in _exclude_extension: + return True if filename in _exclude_filename: return True diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index 16d0612..458e03c 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -15,8 +15,7 @@ from ._parsing_scancode_file_item import parsing_file_item from ._parsing_scancode_file_item import get_error_from_header from ._license_matched import get_license_list_to_print -from fosslight_util.output_format import check_output_format, write_output_file -from fosslight_util.correct import correct_with_yaml +from fosslight_util.output_format import check_output_format logger = logging.getLogger(constant.LOGGER_NAME) warnings.filterwarnings("ignore", category=FutureWarning) From 11154a65f65c586c653ddee42d7a2e13df3ff2f8 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 7 Sep 2023 17:07:56 +0900 Subject: [PATCH 4/6] Set binary files from ScanCode result Excluded Signed-off-by: Wonjae Park --- src/fosslight_source/run_scancode.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index 458e03c..b3d4a8b 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -16,6 +16,7 @@ from ._parsing_scancode_file_item import get_error_from_header from ._license_matched import get_license_list_to_print from fosslight_util.output_format import check_output_format +from fosslight_binary.binary_analysis import check_binary logger = logging.getLogger(constant.LOGGER_NAME) warnings.filterwarnings("ignore", category=FutureWarning) @@ -98,6 +99,11 @@ def run_scan(path_to_scan, output_file_name="", success = True result_list = sorted( result_list, key=lambda row: (''.join(row.licenses))) + + for scan_item in result_list: + if check_binary(os.path.join(path_to_scan, scan_item.file)): + scan_item.exclude = True + sheet_list["SRC_FL_Source"] = [scan_item.get_row_to_print() for scan_item in result_list] if need_license: sheet_list["matched_text"] = get_license_list_to_print(license_list) From 18f4315488b78824b657eea08fd3dc72900e9f03 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Fri, 8 Sep 2023 11:40:30 +0900 Subject: [PATCH 5/6] Fix requirements Signed-off-by: Wonjae Park --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index e7732e2..8cf9bc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ typecode-libmagic fosslight_util>=1.4.28 PyYAML wheel>=0.38.1 +fosslight_binary From 3a221d7e13a13c389222e700d0e73445b8e10936 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Mon, 11 Sep 2023 16:50:02 +0900 Subject: [PATCH 6/6] Remove condition for creating report Signed-off-by: Wonjae Park --- src/fosslight_source/cli.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index 6a5e818..aa47f48 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -39,7 +39,6 @@ def main(): global logger - success = True _result_log = {} path_to_scan = os.getcwd() @@ -99,22 +98,10 @@ def main(): timer.setDaemon(True) timer.start() - _start_time = datetime.now().strftime('%y%m%d_%H%M') - success, msg, output_path, output_file, output_extension = check_output_format(output_file_name, format) - if output_extension != '.xlsx' and output_extension != "" and print_matched_text: - logger.warning("-m option is only available for excel.") - print_matched_text = False - if not success: - logger.error(f"Format error. {msg}") - sys.exit(1) - logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"), - True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan) - if os.path.isdir(path_to_scan): result = [] result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True, - print_matched_text, format, time_out, correct_mode, correct_filepath, output_path, output_file, - output_extension, selected_scanner) + print_matched_text, format, time_out, correct_mode, correct_filepath, selected_scanner) _result_log["Scan Result"] = result[1] try: @@ -233,7 +220,6 @@ def merge_results(scancode_result=[], scanoss_result=[], spdx_downloads={}): def run_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, called_by_cli=True, print_matched_text=False, format="", time_out=120, correct_mode=True, correct_filepath="", - output_path="", output_file="", output_extension="", selected_scanner='all'): """ Run Scancode and scanoss.py for the given path. @@ -250,13 +236,23 @@ def run_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_ :return merged_result: merged scan result of scancode and scanoss. :return license_list: matched text.(only for scancode) """ + global logger + _start_time = datetime.now().strftime('%y%m%d_%H%M') scancode_result = [] scanoss_result = [] merged_result = [] spdx_downloads = {} - success = True - _result_log = {} + + success, msg, output_path, output_file, output_extension = check_output_format(output_file_name, format) + if output_extension != '.xlsx' and output_extension != "" and print_matched_text: + logger.warning("-m option is only available for excel.") + print_matched_text = False + if not success: + logger.error(f"Format error. {msg}") + sys.exit(1) + logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"), + True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan) if selected_scanner == 'scancode' or selected_scanner == 'all' or selected_scanner == '': success, _result_log["Scan Result"], scancode_result, license_list = run_scan(path_to_scan, output_file_name, @@ -272,9 +268,8 @@ def run_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_ spdx_downloads = get_spdx_downloads(path_to_scan) merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads) - if called_by_cli: - create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text, - output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) + create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text, + output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result