diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 8ace4bb..35d2663 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -7,6 +7,7 @@ import logging import re import fosslight_util.constant as constant +import mmap from ._license_matched import MatchedLicense from ._scan_item import ScanItem from ._scan_item import is_exclude_dir @@ -40,7 +41,7 @@ def get_error_from_header(header_item): return has_error, str_error -def parsing_file_item(scancode_file_list, has_error, need_matched_license=False): +def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False): rc = True scancode_file_item = [] @@ -50,6 +51,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) prev_dir = "" prev_dir_value = False regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) + find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE) if scancode_file_list: for file in scancode_file_list: @@ -71,6 +73,18 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) result_item = ScanItem(file_path) + fullpath = os.path.join(path_to_scan, file_path) + + urls = file.get("urls", []) + url_list = [] + + if urls: + with open(fullpath, "r") as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj: + for word in find_word.findall(mmap_obj): + url_list.append(word.decode('utf-8')) + result_item.download_location = url_list + if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: @@ -165,7 +179,6 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if is_exclude_file(file_path, prev_dir, prev_dir_value): result_item.exclude = True scancode_file_item.append(result_item) - except Exception as ex: msg.append(f"Error Parsing item: {ex}") rc = False diff --git a/src/fosslight_source/_parsing_scanoss_file.py b/src/fosslight_source/_parsing_scanoss_file.py index de7d5e1..a39384f 100644 --- a/src/fosslight_source/_parsing_scanoss_file.py +++ b/src/fosslight_source/_parsing_scanoss_file.py @@ -48,7 +48,7 @@ def parsing_scanResult(scanoss_report): if 'version' in findings[0]: result_item.oss_version = findings[0]['version'] if 'url' in findings[0]: - result_item.download_location = findings[0]['url'] + result_item.download_location = list([findings[0]['url']]) license_detected = [] license_w_source = {"component_declared": [], "file_spdx_tag": [], diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 213fecc..5a830c9 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -27,7 +27,7 @@ class ScanItem: is_license_text = False oss_name = "" oss_version = "" - download_location = "" + download_location = [] matched_lines = "" # Only for SCANOSS results fileURL = "" # Only for SCANOSS results license_reference = "" @@ -36,6 +36,7 @@ def __init__(self, value): self.file = value self._copyright = [] self._licenses = [] + self.download_location = [] self.comment = "" self.exclude = False self.is_license_text = False @@ -63,23 +64,42 @@ def licenses(self, value): if len(self._licenses) > 0: self._licenses = list(set(self._licenses)) + def get_file(self): + return self.file + def get_row_to_print(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", - self.comment] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), + "", "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), + url, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) return print_rows def get_row_to_print_for_scanoss(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", self.comment] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) return print_rows def get_row_to_print_for_all_scanner(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", self.comment, self.license_reference] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment, + self.license_reference]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment, + self.license_reference]) return print_rows def merge_scan_item(self, other): @@ -104,7 +124,7 @@ def merge_scan_item(self, other): if not self.oss_version: self.oss_version = other.oss_version if not self.download_location: - self.download_location = other.download_location + self.download_location = list(other.download_location) if not self.matched_lines: self.matched_lines = other.matched_lines if not self.fileURL: diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index a30dcde..84aa5c4 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -52,6 +52,7 @@ def main(): scanned_result = [] license_list = [] + scanoss_result = [] time_out = 120 core = -1 @@ -117,19 +118,16 @@ def main(): success, _result_log["Scan Result"], scanned_result, license_list = run_scan(path_to_scan, output_file_name, write_json_file, core, True, print_matched_text, format, True, - time_out, correct_mode, - correct_filepath) + time_out, correct_mode, correct_filepath) elif selected_scanner == 'scanoss': scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file) elif selected_scanner == 'all' or selected_scanner == '': - success, _result_log["Scan Result"], scanned_result, license_list = run_all_scanners(path_to_scan, output_file_name, - write_json_file, core, - print_matched_text, format, True, - time_out) + success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners( + path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out) else: print_help_msg_source_scanner() sys.exit(1) - create_report_file(_start_time, scanned_result, license_list, selected_scanner, print_matched_text, + create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, print_matched_text, output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) try: logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)) @@ -140,7 +138,7 @@ def main(): sys.exit(1) -def create_report_file(_start_time, scanned_result, license_list, selected_scanner, need_license=False, +def create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, need_license=False, output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="", path_to_scan=""): """ @@ -173,24 +171,33 @@ def create_report_file(_start_time, scanned_result, license_list, selected_scann scanned_result = sorted(scanned_result, key=lambda row: (''.join(row.licenses))) if selected_scanner == 'scancode' or output_extension == _json_ext: - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print(): + sheet_list[SCANOSS_SHEET_NAME].append(row) elif selected_scanner == 'scanoss': - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_scanoss() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print_for_scanoss(): + sheet_list[SCANOSS_SHEET_NAME].append(row) extended_header = SCANOSS_HEADER else: - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_all_scanner() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print_for_all_scanner(): + sheet_list[SCANOSS_SHEET_NAME].append(row) extended_header = MERGED_HEADER if need_license: if selected_scanner == 'scancode' or output_extension == _json_ext: sheet_list["scancode_reference"] = get_license_list_to_print(license_list) elif selected_scanner == 'scanoss': - sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result) + sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result) else: sheet_list["scancode_reference"] = get_license_list_to_print(license_list) - sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result) + sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result) if correct_mode: success, msg_correct, correct_list = correct_with_yaml(correct_filepath, path_to_scan, sheet_list) @@ -242,16 +249,17 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, False, "") scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file) + scanoss_result_for_merging = copy.deepcopy(scanoss_result) for file_in_scancode_result in scancode_result: per_file_result = copy.deepcopy(file_in_scancode_result) - if per_file_result in scanoss_result: - per_file_result.merge_scan_item(scanoss_result.pop(scanoss_result.index(file_in_scancode_result))) + if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist + scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result)) merged_result.append(per_file_result) - if scanoss_result: - for file_left_in_scanoss_result in scanoss_result: + if scanoss_result_for_merging: + for file_left_in_scanoss_result in scanoss_result_for_merging: merged_result.append(file_left_in_scanoss_result) - return success, _result_log["Scan Result"], merged_result, license_list + return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result if __name__ == '__main__': diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index ba8c608..6289710 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -75,7 +75,7 @@ def run_scan(path_to_scan, output_file_name="", processes=num_cores, output_json_pp=output_json_file, only_findings=True, license_text=True, - timeout=time_out) + url=True, timeout=time_out) if not rc: msg = "Source code analysis failed." @@ -90,7 +90,8 @@ def run_scan(path_to_scan, output_file_name="", _result_log["Error_files"] = error_msg msg = "Failed to analyze :" + error_msg if "files" in results: - rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], + has_error, path_to_scan, need_license) if parsing_msg: _result_log["Parsing Log"] = parsing_msg if rc: diff --git a/src/fosslight_source/run_scanoss.py b/src/fosslight_source/run_scanoss.py index e788df6..4e7e178 100755 --- a/src/fosslight_source/run_scanoss.py +++ b/src/fosslight_source/run_scanoss.py @@ -68,7 +68,7 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F if num_threads > 0: scan_command += " -T " + str(num_threads) else: - scan_command += " -T " + "30" + scan_command += " -T " + "10" try: os.system(scan_command) diff --git a/tests/test_files/run_scancode.py b/tests/test_files/run_scancode.py index ef16700..252065d 100755 --- a/tests/test_files/run_scancode.py +++ b/tests/test_files/run_scancode.py @@ -3,6 +3,9 @@ # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 +# SPDX-PackageDownloadLocation: https://dummy_url_for_test.com +# The code is not licensed under GPL-2.0. + import sys import os import multiprocessing diff --git a/tests/test_files/run_scancode2.py b/tests/test_files/run_scancode2.py new file mode 100755 index 0000000..1e38bf8 --- /dev/null +++ b/tests/test_files/run_scancode2.py @@ -0,0 +1,155 @@ +# This file is a sample code for testing. +# It should not be listed at Scancode result and +# should be listed at SCANOSS result. + +import sys +import os +import multiprocessing +import warnings +import platform +import getopt +import logging +import yaml +from scancode import cli +from datetime import datetime +import fosslight_util.constant as constant +from fosslight_util.set_log import init_log +from fosslight_util.timer_thread import TimerThread +from ._parsing_scancode_file_item import parsing_file_item +from ._parsing_scancode_file_item import get_error_from_header +from fosslight_util.write_excel import write_excel_and_csv +from ._help import print_help_msg_source_scanner +from ._license_matched import get_license_list_to_print + +logger = logging.getLogger(constant.LOGGER_NAME) +warnings.filterwarnings("ignore", category=FutureWarning) +_PKG_NAME = "fosslight_source" + + +def main(): + argv = sys.argv[1:] + path_to_scan = "" + write_json_file = False + output_file = "" + print_matched_text = False + + try: + opts, args = getopt.getopt(argv, 'hmjp:o:') + for opt, arg in opts: + if opt == "-h": + print_help_msg_source_scanner() + elif opt == "-p": + path_to_scan = arg + elif opt == "-j": + write_json_file = True + elif opt == "-o": + output_file = arg + elif opt == "-m": + print_matched_text = True + except Exception: + print_help_msg_source_scanner() + + timer = TimerThread() + timer.setDaemon(True) + timer.start() + run_scan(path_to_scan, output_file, write_json_file, -1, False, print_matched_text) + + +def run_scan(path_to_scan, output_file_name="", + _write_json_file=False, num_cores=-1, return_results=False, need_license=False): + global logger + + success = True + msg = "" + _str_final_result_log = "" + _result_log = {} + result_list = [] + + _windows = platform.system() == "Windows" + _start_time = datetime.now().strftime('%y%m%d_%H%M') + + if output_file_name == "": + output_file = f"fosslight_report_{_start_time}" + output_json_file = f"scancode_{_start_time}" + output_dir = os.getcwd() + else: + output_file = output_file_name + output_json_file = output_file_name + output_dir = os.path.dirname(os.path.abspath(output_file_name)) + + logger, _result_log = init_log(os.path.join(output_dir, f"fosslight_log_{_start_time}.txt"), + True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan) + + if path_to_scan == "": + if _windows: + path_to_scan = os.getcwd() + else: + print_help_msg_source_scanner() + + num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores + + if os.path.isdir(path_to_scan): + try: + output_json_file = f"{output_json_file}.json" if _write_json_file\ + else "" + + rc, results = cli.run_scan(path_to_scan, max_depth=100, + strip_root=True, license=True, + copyright=True, return_results=True, + processes=num_cores, + output_json_pp=output_json_file, + only_findings=True, license_text=True) + + if not rc: + msg = "Source code analysis failed." + success = False + + if results: + sheet_list = {} + has_error = False + if "headers" in results: + has_error, error_msg = get_error_from_header(results["headers"]) + if has_error: + _result_log["Error_files"] = error_msg + msg = "Failed to analyze :" + error_msg + if "files" in results: + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license) + _result_log["Parsing Log"] = parsing_msg + if rc: + if not success: + success = True + result_list = sorted( + result_list, key=lambda row: (''.join(row.licenses))) + sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in result_list] + if need_license: + sheet_list["matched_text"] = get_license_list_to_print(license_list) + + success_to_write, writing_msg = write_excel_and_csv( + output_file, sheet_list) + logger.info(f"Writing excel : {success_to_write} {writing_msg}") + if success_to_write: + _result_log["FOSSLight Report"] = f"{output_file}.xlsx" + except Exception as ex: + success = False + msg = str(ex) + logger.error(f"Analyze {path_to_scan}: {msg}") + else: + success = False + msg = f"Check the path to scan. : {path_to_scan}" + + if not return_results: + result_list = [] + + scan_result_msg = str(success) if msg == "" else str(success) + "," + msg + _result_log["Scan Result"] = scan_result_msg + _result_log["Output Directory"] = output_dir + try: + _str_final_result_log = yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True) + logger.info(_str_final_result_log) + except Exception as ex: + logger.warning(f"Failed to print result log. {ex}") + return success, _result_log["Scan Result"], result_list + + +if __name__ == '__main__': + main() diff --git a/tests/test_files/sample.cpp b/tests/test_files/sample.cpp index 1ffaca7..53377be 100644 --- a/tests/test_files/sample.cpp +++ b/tests/test_files/sample.cpp @@ -5,6 +5,9 @@ * SPDX-FileCopyrightText: Copyright 2017 Free Software Foundation Europe e.V. * SPDX-License-Identifier: MIT * DownloadLocation: https://github.com/fsfe/reuse-tool + * SPDX-PackageDownloadLocation: https://dummy_url_for_test.com + * SPDX-PackageDownloadLocation: https://second_dummy_url_for_test.com + * SPDX-PackageDownloadLocation: https://third_dummy_url_for_test.com */ #include