diff --git a/src/fosslight_source/_license_matched.py b/src/fosslight_source/_license_matched.py new file mode 100755 index 0000000..d3bbd5c --- /dev/null +++ b/src/fosslight_source/_license_matched.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2021 LG Electronics Inc. +# SPDX-License-Identifier: Apache-2.0 +import logging +import fosslight_util.constant as constant + +logger = logging.getLogger(constant.LOGGER_NAME) +HEADER = ['No', 'Category', 'License', + 'Matched Text', 'File Count', 'Files'] +LOW_PRIORITY = ['Permissive', 'Public Domain'] + + +class MatchedLicense: + license = "" + files = [] + category = "" + matched_text = "" + priority = 0 + + def __init__(self, lic, category, text, file): + self.files = [file] + self.license = lic + self.matched_text = text + self.set_category(category) + + def __del__(self): + pass + + def set_license(self, value): + self.license = value + + def set_files(self, value): + self.files.append(value) + + def set_category(self, value): + self.category = value + if value in LOW_PRIORITY: + self.priority = 1 + else: + self.priority = 0 + + def set_matched_text(self, value): + self.matched_text = value + + def get_row_to_print(self): + print_rows = [self.category, self.license, self.matched_text, str(len(self.files)), ','.join(self.files)] + return print_rows + + +def get_license_list_to_print(license_list): + license_items = license_list.values() + license_items = sorted(license_items, key=lambda row: (row.priority, row.category, row.license)) + license_rows = [lic_item.get_row_to_print() for lic_item in license_items] + license_rows.insert(0, HEADER) + return license_rows diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 908ee07..49b1948 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -7,6 +7,7 @@ import logging import re import fosslight_util.constant as constant +from ._license_matched import MatchedLicense logger = logging.getLogger(constant.LOGGER_NAME) _replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-"] @@ -116,10 +117,11 @@ def get_error_from_header(header_item): return has_error, str_error -def parsing_file_item(scancode_file_list, has_error): +def parsing_file_item(scancode_file_list, has_error, need_matched_license=False): rc = True scancode_file_item = [] + license_list = {} # Key :[license]+[matched_text], value: MatchedLicense() msg = "TOTAL FILE COUNT: " + str(len(scancode_file_list)) + "\n" prev_dir = "" @@ -199,6 +201,18 @@ def parsing_file_item(scancode_file_list, has_error): license_value = license_value.replace(word, "") license_detected.append(license_value) + # Add matched licenses + if need_matched_license and "category" in lic_item: + lic_category = lic_item["category"] + if "matched_text" in lic_item: + lic_matched_text = lic_item["matched_text"] + lic_matched_key = license_value + lic_matched_text + if lic_matched_key in license_list: + license_list[lic_matched_key].set_files(file_path) + else: + lic_info = MatchedLicense(license_value, lic_category, lic_matched_text, file_path) + license_list[lic_matched_key] = lic_info + matched_rule = lic_item["matched_rule"] if matched_rule["is_license_text"]: result_item.set_is_license_text(True) @@ -221,4 +235,4 @@ def parsing_file_item(scancode_file_list, has_error): rc = False logger.debug(msg) - return rc, scancode_file_item, msg.strip() + return rc, scancode_file_item, msg.strip(), license_list diff --git a/src/fosslight_source/convert_scancode.py b/src/fosslight_source/convert_scancode.py index 9faf178..7ba0af8 100755 --- a/src/fosslight_source/convert_scancode.py +++ b/src/fosslight_source/convert_scancode.py @@ -15,55 +15,65 @@ from ._parsing_scancode_file_item import parsing_file_item, get_error_from_header from fosslight_util.write_excel import write_excel_and_csv from ._help import print_help_msg_convert +from ._license_matched import get_license_list_to_print logger = logging.getLogger(constant.LOGGER_NAME) _PKG_NAME = "fosslight_source" -def convert_json_to_excel(scancode_json, excel_name, _result_log): +def convert_json_to_excel(scancode_json, excel_name, result_log, need_license=False): + sheet_license_prefix = "matched_text" + sheet_SRC_prefix = "SRC" file_list = [] + lic_list = {} msg = "" success = True try: sheet_list = {} if os.path.isfile(scancode_json): - file_list = get_detected_licenses_from_scancode( - scancode_json) + file_list, lic_list = get_detected_licenses_from_scancode( + scancode_json, need_license) if len(file_list) > 0: file_list = sorted( file_list, key=lambda row: (''.join(row.licenses))) - sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in file_list] + sheet_list[sheet_SRC_prefix] = [scan_item.get_row_to_print() for scan_item in file_list] + if need_license: + sheet_list[sheet_license_prefix] = get_license_list_to_print(lic_list) elif os.path.isdir(scancode_json): for root, dirs, files in os.walk(scancode_json): for file in files: if file.endswith(".json"): try: result_file = os.path.join(root, file) - file_list = get_detected_licenses_from_scancode( - result_file) + file_list, lic_list = get_detected_licenses_from_scancode( + result_file, need_license) if len(file_list) > 0: file_name = os.path.basename(file) file_list = sorted( file_list, key=lambda row: (''.join(row.licenses))) - sheet_list["SRC_" + file_name] = [scan_item.get_row_to_print() for scan_item in file_list] + sheet_name = sheet_SRC_prefix + "_" + file_name + sheet_list[sheet_name] = [scan_item.get_row_to_print() for scan_item in file_list] + if need_license: + lic_sheet_name = sheet_license_prefix + "_" + file_name + sheet_list[lic_sheet_name] = get_license_list_to_print(lic_list) except Exception as ex: logger.warning("Error parsing "+file+":" + str(ex)) success_to_write, writing_msg = write_excel_and_csv(excel_name, sheet_list) logger.info("Writing excel :" + str(success_to_write) + " " + writing_msg) if success_to_write: - _result_log["FOSSLight Report"] = excel_name + ".xlsx" + result_log["FOSSLight Report"] = excel_name + ".xlsx" except Exception as ex: success = False logger.warning(str(ex)) scan_result_msg = str(success) if msg == "" else str(success) + "," + msg - _result_log["Scan Result"] = scan_result_msg + result_log["Scan Result"] = scan_result_msg try: - _str_final_result_log = yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True) + _str_final_result_log = yaml.safe_dump(result_log, allow_unicode=True, sort_keys=True) logger.info(_str_final_result_log) except Exception as ex: logger.warning("Failed to print result log.: " + str(ex)) @@ -71,21 +81,22 @@ def convert_json_to_excel(scancode_json, excel_name, _result_log): return file_list -def get_detected_licenses_from_scancode(scancode_json_file): +def get_detected_licenses_from_scancode(scancode_json_file, need_license): file_list = [] + license_list = {} try: logger.info("Start parsing " + scancode_json_file) with open(scancode_json_file, "r") as st_json: st_python = json.load(st_json) has_error, str_error = get_error_from_header(st_python["headers"]) - rc, file_list, msg = parsing_file_item(st_python["files"], has_error) + rc, file_list, msg, license_list = parsing_file_item(st_python["files"], has_error, need_license) logger.info("|---"+msg) if has_error: logger.info("|---Scan error:"+str_error) except Exception as error: logger.warning("Parsing " + scancode_json_file + ":" + str(error)) logger.info("|---Number of files detected: " + str(len(file_list))) - return file_list + return file_list, license_list def main(): @@ -95,9 +106,10 @@ def main(): path_to_find_bin = os.getcwd() start_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') output_file_name = "" + print_matched_text = False try: - opts, args = getopt.getopt(argv, 'hp:o:') + opts, args = getopt.getopt(argv, 'hmp:o:') for opt, arg in opts: if opt == "-h": print_help_msg_convert() @@ -105,6 +117,8 @@ def main(): path_to_find_bin = arg elif opt == "-o": output_file_name = arg + elif opt == "-m": + print_matched_text = True except Exception: print_help_msg_convert() @@ -117,7 +131,7 @@ def main(): logger, _result_log = init_log(os.path.join(output_dir, "fosslight_src_log_" + start_time + ".txt"), True, logging.INFO, logging.DEBUG, _PKG_NAME) - convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log) + convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log, print_matched_text) if __name__ == '__main__': diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index d854df5..2124367 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -20,6 +20,7 @@ from ._parsing_scancode_file_item import get_error_from_header from fosslight_util.write_excel import write_excel_and_csv from ._help import print_help_msg_source +from ._license_matched import get_license_list_to_print logger = logging.getLogger(constant.LOGGER_NAME) warnings.filterwarnings("ignore", category=FutureWarning) @@ -28,33 +29,35 @@ def main(): argv = sys.argv[1:] - _path_to_scan = "" - _write_json_file = False - _output_file = "" + path_to_scan = "" + write_json_file = False + output_file = "" + print_matched_text = False try: - opts, args = getopt.getopt(argv, 'hjp:o:') + opts, args = getopt.getopt(argv, 'hmjp:o:') for opt, arg in opts: if opt == "-h": print_help_msg_source() elif opt == "-p": - _path_to_scan = arg + path_to_scan = arg elif opt == "-j": - _write_json_file = True + write_json_file = True elif opt == "-o": - _output_file = arg - + output_file = arg + elif opt == "-m": + print_matched_text = True except Exception: print_help_msg_source() timer = TimerThread() timer.setDaemon(True) timer.start() - run_scan(_path_to_scan, _output_file, _write_json_file, -1, False) + run_scan(path_to_scan, output_file, write_json_file, -1, False, print_matched_text) def run_scan(path_to_scan, output_file_name="", - _write_json_file=False, num_cores=-1, return_results=False): + _write_json_file=False, num_cores=-1, return_results=False, need_license=False): global logger success = True @@ -111,7 +114,7 @@ def run_scan(path_to_scan, output_file_name="", _result_log["Error_files"] = error_msg msg = "Failed to analyze :" + error_msg if "files" in results: - rc, result_list, parsing_msg = parsing_file_item(results["files"], has_error) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license) _result_log["Parsing Log"] = parsing_msg if rc: if not success: @@ -119,6 +122,8 @@ def run_scan(path_to_scan, output_file_name="", result_list = sorted( result_list, key=lambda row: (''.join(row.licenses))) sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in result_list] + if need_license: + sheet_list["matched_text"] = get_license_list_to_print(license_list) success_to_write, writing_msg = write_excel_and_csv( output_file, sheet_list) diff --git a/tox.ini b/tox.ini index da3d272..32f79de 100644 --- a/tox.ini +++ b/tox.ini @@ -20,10 +20,10 @@ filterwarnings = ignore::DeprecationWarning [testenv:test_run] commands = - fosslight_source -p tests/test_files -j -o test_scan/scan_result - cat test_scan/scan_result.csv + fosslight_source -p tests/test_files -j -o test_scan/scan_result -m + cat test_scan/scan_result_SRC.csv fosslight_convert -p tests/json_result/scan_has_error.json -o test_convert/convert_result2 - fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result + fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result -m cat test_convert/convert_result_SRC.csv python tests/cli_test.py @@ -34,10 +34,10 @@ deps = commands = fosslight_source -h fosslight_convert -h - fosslight_source -p tests/test_files -j -o test_scan/scan_result + fosslight_source -p tests/test_files -j -o test_scan/scan_result -m cat test_scan/scan_result_SRC.csv fosslight_convert -p tests/json_result/scan_has_error.json -o test_convert/convert_result2 - fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result + fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result -m cat test_convert/convert_result_SRC.csv python tests/cli_test.py pytest -v --flake8