From 9b531d8addfbbe3da9e74a7cc66ae24f29efa419 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 25 May 2023 18:44:45 +0900 Subject: [PATCH 01/13] Read download location --- src/fosslight_source/_help.py | 1 + .../_parsing_scancode_file_item.py | 44 ++++- src/fosslight_source/_scan_item.py | 8 + src/fosslight_source/cli.py | 50 ++++-- src/fosslight_source/run_scancode.py | 6 +- tests/test_files/run_scancode.py | 2 + tests/test_files/run_scancode2.py | 151 ++++++++++++++++++ tests/test_files/sample.cpp | 1 + tox.ini | 3 + 9 files changed, 246 insertions(+), 20 deletions(-) create mode 100755 tests/test_files/run_scancode2.py diff --git a/src/fosslight_source/_help.py b/src/fosslight_source/_help.py index 3b77321..ee7a056 100644 --- a/src/fosslight_source/_help.py +++ b/src/fosslight_source/_help.py @@ -29,6 +29,7 @@ -j\t\t\t Generate raw result of scanners in json format -t \t\t Stop scancode scanning if scanning takes longer than a timeout in seconds. -c \t\t Select the number of cores to be scanned with ScanCode. + -u \t\t Print scanned url information --no_correction\t Enter if you don't want to correct OSS information with sbom-info.yaml --correct_fpath Path to the sbom-info.yaml file""" diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 8ace4bb..1b804d1 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -7,11 +7,13 @@ import logging import re import fosslight_util.constant as constant +import mmap from ._license_matched import MatchedLicense from ._scan_item import ScanItem from ._scan_item import is_exclude_dir from ._scan_item import is_exclude_file from ._scan_item import replace_word +import copy logger = logging.getLogger(constant.LOGGER_NAME) _exclude_directory = ["test", "tests", "doc", "docs"] @@ -40,7 +42,7 @@ def get_error_from_header(header_item): return has_error, str_error -def parsing_file_item(scancode_file_list, has_error, need_matched_license=False): +def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False, need_url=False): rc = True scancode_file_item = [] @@ -51,6 +53,8 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) prev_dir_value = False regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) + url_count = 0 + if scancode_file_list: for file in scancode_file_list: try: @@ -70,7 +74,30 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) copyright_list = file.get("copyrights", []) result_item = ScanItem(file_path) - + + if need_url: + fullpath = path_to_scan + '/' + file_path + + urls = file.get("urls", []) + url_list = [] + + if urls: + url_count += 1 + test_file = open(fullpath,"r") + # First method. read the lines brute force + for line in test_file: + if "SPDX-PackageDownloadLocation: " in line: + spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() + url_list.append(spdx_download_location) + # Second method. search with mmap + """mapped_file = mmap.mmap(test_file.fileno(),0,access=mmap.ACCESS_READ) + lines = mapped_file.read().split(b'\n') + for line in lines: + if line.find("SPDX-PackageDownloadLocation: ".encode()) != -1: + spdx_download_location = line.decode() + spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', spdx_download_location) + url_list.append(spdx_download_location)""" + # result_item.download_location = ",".join(url_list) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: @@ -164,10 +191,19 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if is_exclude_file(file_path, prev_dir, prev_dir_value): result_item.exclude = True - scancode_file_item.append(result_item) - + if need_url: + if url_list: + for url in url_list: + temp_result_item = copy.deepcopy(result_item) + temp_result_item.download_location = url + scancode_file_item.append(temp_result_item) + else: + scancode_file_item.append(result_item) + else: + scancode_file_item.append(result_item) except Exception as ex: msg.append(f"Error Parsing item: {ex}") rc = False + logger.info(f"URL FILE COUNT : {url_count}") msg = list(set(msg)) return rc, scancode_file_item, msg, license_list diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 213fecc..d849105 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -28,6 +28,7 @@ class ScanItem: oss_name = "" oss_version = "" download_location = "" + #spdx_download_location = "" matched_lines = "" # Only for SCANOSS results fileURL = "" # Only for SCANOSS results license_reference = "" @@ -63,6 +64,8 @@ def licenses(self, value): if len(self._licenses) > 0: self._licenses = list(set(self._licenses)) + def get_file(self): + return self.file def get_row_to_print(self): print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", ','.join(self.copyright), @@ -105,6 +108,11 @@ def merge_scan_item(self, other): self.oss_version = other.oss_version if not self.download_location: self.download_location = other.download_location + elif self.download_location != other.download_location: + if self.comment == "": + self.comment = "(SCANOSS URL)" + other.download_location + else: + self.comment = self.comment + "|(SCANOSS URL)" + other.download_location if not self.matched_lines: self.matched_lines = other.matched_lines if not self.fileURL: diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index a30dcde..dbaa579 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -52,8 +52,13 @@ def main(): scanned_result = [] license_list = [] + scanoss_result = [] time_out = 120 core = -1 + print_url = False + + start_time = datetime.now() + formatted_start_time = start_time.strftime("%Y-%m-%d %H:%M:%S") parser = argparse.ArgumentParser(description='FOSSLight Source', prog='fosslight_source', add_help=False) parser.add_argument('-h', '--help', action='store_true', required=False) @@ -68,6 +73,7 @@ def main(): parser.add_argument('-c', '--cores', type=int, required=False, default=-1) parser.add_argument('--no_correction', action='store_true', required=False) parser.add_argument('--correct_fpath', nargs=1, type=str, required=False) + parser.add_argument('-u', '--url', action='store_true', required=False) args = parser.parse_args() @@ -93,6 +99,8 @@ def main(): correct_filepath = path_to_scan if args.correct_fpath: correct_filepath = ''.join(args.correct_fpath) + if args.url: + print_url = True time_out = args.timeout core = args.cores @@ -118,29 +126,42 @@ def main(): write_json_file, core, True, print_matched_text, format, True, time_out, correct_mode, - correct_filepath) + correct_filepath,print_url) elif selected_scanner == 'scanoss': scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file) elif selected_scanner == 'all' or selected_scanner == '': - success, _result_log["Scan Result"], scanned_result, license_list = run_all_scanners(path_to_scan, output_file_name, + success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners(path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, - time_out) + time_out,print_url) else: print_help_msg_source_scanner() sys.exit(1) - create_report_file(_start_time, scanned_result, license_list, selected_scanner, print_matched_text, + create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, print_matched_text, output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan) try: logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)) except Exception as ex: logger.debug(f"Failed to print log.: {ex}") + end_time = datetime.now() + formatted_end_time = end_time.strftime("%Y-%m-%d %H:%M:%S") + + duration = end_time - start_time + total_seconds = int(duration.total_seconds()) + + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + + logger.info(f"Start time : {formatted_start_time}") + logger.info(f"End time : {formatted_end_time}") + logger.info(f"Duration : {hours}:{minutes}:{seconds}") else: logger.error(f"Check the path to scan. : {path_to_scan}") sys.exit(1) -def create_report_file(_start_time, scanned_result, license_list, selected_scanner, need_license=False, +def create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, need_license=False, output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="", path_to_scan=""): """ @@ -187,10 +208,10 @@ def create_report_file(_start_time, scanned_result, license_list, selected_scann if selected_scanner == 'scancode' or output_extension == _json_ext: sheet_list["scancode_reference"] = get_license_list_to_print(license_list) elif selected_scanner == 'scanoss': - sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result) + sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result) else: sheet_list["scancode_reference"] = get_license_list_to_print(license_list) - sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result) + sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result) if correct_mode: success, msg_correct, correct_list = correct_with_yaml(correct_filepath, path_to_scan, sheet_list) @@ -213,7 +234,7 @@ def create_report_file(_start_time, scanned_result, license_list, selected_scann def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, - need_license=False, format="", called_by_cli=True, time_out=120): + need_license=False, format="", called_by_cli=True, time_out=120, print_url=False): """ Run Scancode and scanoss.py for the given path. @@ -239,19 +260,20 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, _write_json_file, num_cores, True, need_license, format, called_by_cli, time_out, - False, "") + False, "", print_url) scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file) + scanoss_result_for_merging = copy.deepcopy(scanoss_result) for file_in_scancode_result in scancode_result: per_file_result = copy.deepcopy(file_in_scancode_result) - if per_file_result in scanoss_result: - per_file_result.merge_scan_item(scanoss_result.pop(scanoss_result.index(file_in_scancode_result))) + if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist + scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result)) merged_result.append(per_file_result) - if scanoss_result: - for file_left_in_scanoss_result in scanoss_result: + if scanoss_result_for_merging: + for file_left_in_scanoss_result in scanoss_result_for_merging: merged_result.append(file_left_in_scanoss_result) - return success, _result_log["Scan Result"], merged_result, license_list + return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result if __name__ == '__main__': diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index ba8c608..f5c749a 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -25,7 +25,7 @@ def run_scan(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, return_results=False, need_license=False, format="", - called_by_cli=False, time_out=120, correct_mode=True, correct_filepath=""): + called_by_cli=False, time_out=120, correct_mode=True, correct_filepath="", need_url=False): if not called_by_cli: global logger @@ -75,6 +75,7 @@ def run_scan(path_to_scan, output_file_name="", processes=num_cores, output_json_pp=output_json_file, only_findings=True, license_text=True, + url=need_url, timeout=time_out) if not rc: @@ -90,7 +91,8 @@ def run_scan(path_to_scan, output_file_name="", _result_log["Error_files"] = error_msg msg = "Failed to analyze :" + error_msg if "files" in results: - rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license) + print("path to scan : ", path_to_scan) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, path_to_scan, need_license, need_url) if parsing_msg: _result_log["Parsing Log"] = parsing_msg if rc: diff --git a/tests/test_files/run_scancode.py b/tests/test_files/run_scancode.py index ef16700..8e9faa5 100755 --- a/tests/test_files/run_scancode.py +++ b/tests/test_files/run_scancode.py @@ -2,6 +2,8 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 +# SPDX-PackageDownloadLocation: https://dummy_url_for_test.com +# GPL-2.0 import sys import os diff --git a/tests/test_files/run_scancode2.py b/tests/test_files/run_scancode2.py new file mode 100755 index 0000000..ca04045 --- /dev/null +++ b/tests/test_files/run_scancode2.py @@ -0,0 +1,151 @@ +import sys +import os +import multiprocessing +import warnings +import platform +import getopt +import logging +import yaml +from scancode import cli +from datetime import datetime +import fosslight_util.constant as constant +from fosslight_util.set_log import init_log +from fosslight_util.timer_thread import TimerThread +from ._parsing_scancode_file_item import parsing_file_item +from ._parsing_scancode_file_item import get_error_from_header +from fosslight_util.write_excel import write_excel_and_csv +from ._help import print_help_msg_source_scanner +from ._license_matched import get_license_list_to_print + +logger = logging.getLogger(constant.LOGGER_NAME) +warnings.filterwarnings("ignore", category=FutureWarning) +_PKG_NAME = "fosslight_source" + + +def main(): + argv = sys.argv[1:] + path_to_scan = "" + write_json_file = False + output_file = "" + print_matched_text = False + + try: + opts, args = getopt.getopt(argv, 'hmjp:o:') + for opt, arg in opts: + if opt == "-h": + print_help_msg_source_scanner() + elif opt == "-p": + path_to_scan = arg + elif opt == "-j": + write_json_file = True + elif opt == "-o": + output_file = arg + elif opt == "-m": + print_matched_text = True + except Exception: + print_help_msg_source_scanner() + + timer = TimerThread() + timer.setDaemon(True) + timer.start() + run_scan(path_to_scan, output_file, write_json_file, -1, False, print_matched_text) + + +def run_scan(path_to_scan, output_file_name="", + _write_json_file=False, num_cores=-1, return_results=False, need_license=False): + global logger + + success = True + msg = "" + _str_final_result_log = "" + _result_log = {} + result_list = [] + + _windows = platform.system() == "Windows" + _start_time = datetime.now().strftime('%y%m%d_%H%M') + + if output_file_name == "": + output_file = f"fosslight_report_{_start_time}" + output_json_file = f"scancode_{_start_time}" + output_dir = os.getcwd() + else: + output_file = output_file_name + output_json_file = output_file_name + output_dir = os.path.dirname(os.path.abspath(output_file_name)) + + logger, _result_log = init_log(os.path.join(output_dir, f"fosslight_log_{_start_time}.txt"), + True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan) + + if path_to_scan == "": + if _windows: + path_to_scan = os.getcwd() + else: + print_help_msg_source_scanner() + + num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores + + if os.path.isdir(path_to_scan): + try: + output_json_file = f"{output_json_file}.json" if _write_json_file\ + else "" + + rc, results = cli.run_scan(path_to_scan, max_depth=100, + strip_root=True, license=True, + copyright=True, return_results=True, + processes=num_cores, + output_json_pp=output_json_file, + only_findings=True, license_text=True) + + if not rc: + msg = "Source code analysis failed." + success = False + + if results: + sheet_list = {} + has_error = False + if "headers" in results: + has_error, error_msg = get_error_from_header(results["headers"]) + if has_error: + _result_log["Error_files"] = error_msg + msg = "Failed to analyze :" + error_msg + if "files" in results: + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license) + _result_log["Parsing Log"] = parsing_msg + if rc: + if not success: + success = True + result_list = sorted( + result_list, key=lambda row: (''.join(row.licenses))) + sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in result_list] + if need_license: + sheet_list["matched_text"] = get_license_list_to_print(license_list) + + success_to_write, writing_msg = write_excel_and_csv( + output_file, sheet_list) + logger.info(f"Writing excel : {success_to_write} {writing_msg}") + if success_to_write: + _result_log["FOSSLight Report"] = f"{output_file}.xlsx" + except Exception as ex: + success = False + msg = str(ex) + logger.error(f"Analyze {path_to_scan}: {msg}") + else: + success = False + msg = f"Check the path to scan. : {path_to_scan}" + + if not return_results: + result_list = [] + + scan_result_msg = str(success) if msg == "" else str(success) + "," + msg + _result_log["Scan Result"] = scan_result_msg + _result_log["Output Directory"] = output_dir + try: + _str_final_result_log = yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True) + logger.info(_str_final_result_log) + except Exception as ex: + logger.warning(f"Failed to print result log. {ex}") + return success, _result_log["Scan Result"], result_list + + +if __name__ == '__main__': + main() diff --git a/tests/test_files/sample.cpp b/tests/test_files/sample.cpp index 1ffaca7..d0b99ae 100644 --- a/tests/test_files/sample.cpp +++ b/tests/test_files/sample.cpp @@ -5,6 +5,7 @@ * SPDX-FileCopyrightText: Copyright 2017 Free Software Foundation Europe e.V. * SPDX-License-Identifier: MIT * DownloadLocation: https://github.com/fsfe/reuse-tool + * SPDX-PackageDownloadLocation: https://github.com/fosslight/fosslight_util */ #include diff --git a/tox.ini b/tox.ini index 5d1b558..d65c539 100644 --- a/tox.ini +++ b/tox.ini @@ -42,6 +42,9 @@ commands = fosslight_source -p tests/test_files -m -j -o test_scan2/ ls test_scan2/ + fosslight_source -p tests/test_files -m -j -o test_scan3/ -s all -u + ls test_scan3/ + fosslight_convert -p tests/scancode_raw.json -o test_convert -f opossum python tests/cli_test.py From f701672e96f13609470180d8f1f96316c733e290 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Fri, 9 Jun 2023 17:34:59 +0900 Subject: [PATCH 02/13] fix method2 --- .../_parsing_scancode_file_item.py | 22 ++++++++++++++++--- src/fosslight_source/cli.py | 9 ++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 1b804d1..96ee169 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -76,19 +76,19 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ result_item = ScanItem(file_path) if need_url: - fullpath = path_to_scan + '/' + file_path + fullpath = os.path.join(path_to_scan, file_path) urls = file.get("urls", []) url_list = [] if urls: url_count += 1 - test_file = open(fullpath,"r") # First method. read the lines brute force + """test_file = open(fullpath,"r") for line in test_file: if "SPDX-PackageDownloadLocation: " in line: spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() - url_list.append(spdx_download_location) + url_list.append(spdx_download_location)""" # Second method. search with mmap """mapped_file = mmap.mmap(test_file.fileno(),0,access=mmap.ACCESS_READ) lines = mapped_file.read().split(b'\n') @@ -97,6 +97,22 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ spdx_download_location = line.decode() spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', spdx_download_location) url_list.append(spdx_download_location)""" + # Redo the mmap + search_term = "SPDX-PackageDownloadLocation: ".encode() + with open(fullpath, "r") as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + start = 0 + while True: + # Find the next occurrence of the search term + start = m.find(search_term, start) + if start == -1: + break + # Extract the line that contains the search term + line = m[start:].split(b"\n")[0].decode() + spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line) + url_list.append(spdx_download_location) + # Move the start position to the end of the line + start += len(line) # result_item.download_location = ",".join(url_list) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index dbaa579..4fac2fd 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -8,7 +8,10 @@ import warnings import logging import copy + from datetime import datetime +import pytz + import fosslight_util.constant as constant from fosslight_util.set_log import init_log from fosslight_util.timer_thread import TimerThread @@ -57,7 +60,9 @@ def main(): core = -1 print_url = False - start_time = datetime.now() + # Set the timezone to KST + kst = pytz.timezone('Asia/Seoul') + start_time = datetime.now(kst) formatted_start_time = start_time.strftime("%Y-%m-%d %H:%M:%S") parser = argparse.ArgumentParser(description='FOSSLight Source', prog='fosslight_source', add_help=False) @@ -143,7 +148,7 @@ def main(): logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)) except Exception as ex: logger.debug(f"Failed to print log.: {ex}") - end_time = datetime.now() + end_time = datetime.now(kst) formatted_end_time = end_time.strftime("%Y-%m-%d %H:%M:%S") duration = end_time - start_time From b8ed8030a341c38faf531fd151cab3c9eb31400e Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Fri, 9 Jun 2023 17:37:30 +0900 Subject: [PATCH 03/13] reduce thread for scanoss --- src/fosslight_source/run_scanoss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fosslight_source/run_scanoss.py b/src/fosslight_source/run_scanoss.py index e788df6..4e7e178 100755 --- a/src/fosslight_source/run_scanoss.py +++ b/src/fosslight_source/run_scanoss.py @@ -68,7 +68,7 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F if num_threads > 0: scan_command += " -T " + str(num_threads) else: - scan_command += " -T " + "30" + scan_command += " -T " + "10" try: os.system(scan_command) From be343a0e417b5685c1b1911864c7fded072dbabb Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Mon, 12 Jun 2023 16:47:34 +0900 Subject: [PATCH 04/13] cleanup --- .../_parsing_scancode_file_item.py | 8 +++---- src/fosslight_source/cli.py | 21 ------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 96ee169..452c3f9 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -84,11 +84,11 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ if urls: url_count += 1 # First method. read the lines brute force - """test_file = open(fullpath,"r") + test_file = open(fullpath,"r") for line in test_file: if "SPDX-PackageDownloadLocation: " in line: spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() - url_list.append(spdx_download_location)""" + url_list.append(spdx_download_location) # Second method. search with mmap """mapped_file = mmap.mmap(test_file.fileno(),0,access=mmap.ACCESS_READ) lines = mapped_file.read().split(b'\n') @@ -98,7 +98,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', spdx_download_location) url_list.append(spdx_download_location)""" # Redo the mmap - search_term = "SPDX-PackageDownloadLocation: ".encode() + """search_term = "SPDX-PackageDownloadLocation: ".encode() with open(fullpath, "r") as f: with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: start = 0 @@ -112,7 +112,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line) url_list.append(spdx_download_location) # Move the start position to the end of the line - start += len(line) + start += len(line)""" # result_item.download_location = ",".join(url_list) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index 4fac2fd..af1d690 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -8,10 +8,7 @@ import warnings import logging import copy - from datetime import datetime -import pytz - import fosslight_util.constant as constant from fosslight_util.set_log import init_log from fosslight_util.timer_thread import TimerThread @@ -60,11 +57,6 @@ def main(): core = -1 print_url = False - # Set the timezone to KST - kst = pytz.timezone('Asia/Seoul') - start_time = datetime.now(kst) - formatted_start_time = start_time.strftime("%Y-%m-%d %H:%M:%S") - parser = argparse.ArgumentParser(description='FOSSLight Source', prog='fosslight_source', add_help=False) parser.add_argument('-h', '--help', action='store_true', required=False) parser.add_argument('-v', '--version', action='store_true', required=False) @@ -148,19 +140,6 @@ def main(): logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)) except Exception as ex: logger.debug(f"Failed to print log.: {ex}") - end_time = datetime.now(kst) - formatted_end_time = end_time.strftime("%Y-%m-%d %H:%M:%S") - - duration = end_time - start_time - total_seconds = int(duration.total_seconds()) - - hours = total_seconds // 3600 - minutes = (total_seconds % 3600) // 60 - seconds = total_seconds % 60 - - logger.info(f"Start time : {formatted_start_time}") - logger.info(f"End time : {formatted_end_time}") - logger.info(f"Duration : {hours}:{minutes}:{seconds}") else: logger.error(f"Check the path to scan. : {path_to_scan}") sys.exit(1) From e4fb6ecc4ca6881813229019be4cdd6cb53ec7d6 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Mon, 12 Jun 2023 17:11:01 +0900 Subject: [PATCH 05/13] fix for tox --- .../_parsing_scancode_file_item.py | 24 +++++++------------ src/fosslight_source/_scan_item.py | 9 ++++--- src/fosslight_source/cli.py | 10 ++++---- src/fosslight_source/run_scancode.py | 3 ++- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 452c3f9..0d8157b 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -7,7 +7,7 @@ import logging import re import fosslight_util.constant as constant -import mmap +# import mmap from ._license_matched import MatchedLicense from ._scan_item import ScanItem from ._scan_item import is_exclude_dir @@ -74,30 +74,23 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ copyright_list = file.get("copyrights", []) result_item = ScanItem(file_path) - + if need_url: fullpath = os.path.join(path_to_scan, file_path) urls = file.get("urls", []) url_list = [] - + if urls: url_count += 1 # First method. read the lines brute force - test_file = open(fullpath,"r") + test_file = open(fullpath, "r") for line in test_file: if "SPDX-PackageDownloadLocation: " in line: - spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() + spdx_download_location = re.sub( + r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() url_list.append(spdx_download_location) # Second method. search with mmap - """mapped_file = mmap.mmap(test_file.fileno(),0,access=mmap.ACCESS_READ) - lines = mapped_file.read().split(b'\n') - for line in lines: - if line.find("SPDX-PackageDownloadLocation: ".encode()) != -1: - spdx_download_location = line.decode() - spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', spdx_download_location) - url_list.append(spdx_download_location)""" - # Redo the mmap """search_term = "SPDX-PackageDownloadLocation: ".encode() with open(fullpath, "r") as f: with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: @@ -109,11 +102,11 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ break # Extract the line that contains the search term line = m[start:].split(b"\n")[0].decode() - spdx_download_location = re.sub(r'.*?SPDX-PackageDownloadLocation: ', '', line) + spdx_download_location = re.sub( + r'.*?SPDX-PackageDownloadLocation: ', '', line) url_list.append(spdx_download_location) # Move the start position to the end of the line start += len(line)""" - # result_item.download_location = ",".join(url_list) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: @@ -220,6 +213,5 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ except Exception as ex: msg.append(f"Error Parsing item: {ex}") rc = False - logger.info(f"URL FILE COUNT : {url_count}") msg = list(set(msg)) return rc, scancode_file_item, msg, license_list diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index d849105..a438ec5 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -28,7 +28,6 @@ class ScanItem: oss_name = "" oss_version = "" download_location = "" - #spdx_download_location = "" matched_lines = "" # Only for SCANOSS results fileURL = "" # Only for SCANOSS results license_reference = "" @@ -66,11 +65,11 @@ def licenses(self, value): def get_file(self): return self.file + def get_row_to_print(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", - self.comment] + print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), + self.download_location, "", ','.join(self.copyright), + "Exclude" if self.exclude else "", self.comment] return print_rows def get_row_to_print_for_scanoss(self): diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index af1d690..319af27 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -123,14 +123,12 @@ def main(): write_json_file, core, True, print_matched_text, format, True, time_out, correct_mode, - correct_filepath,print_url) + correct_filepath, print_url) elif selected_scanner == 'scanoss': scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file) elif selected_scanner == 'all' or selected_scanner == '': - success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners(path_to_scan, output_file_name, - write_json_file, core, - print_matched_text, format, True, - time_out,print_url) + success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners( + path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out, print_url) else: print_help_msg_source_scanner() sys.exit(1) @@ -250,7 +248,7 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, scanoss_result_for_merging = copy.deepcopy(scanoss_result) for file_in_scancode_result in scancode_result: per_file_result = copy.deepcopy(file_in_scancode_result) - if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist + if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result)) merged_result.append(per_file_result) if scanoss_result_for_merging: diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index f5c749a..e41babe 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -92,7 +92,8 @@ def run_scan(path_to_scan, output_file_name="", msg = "Failed to analyze :" + error_msg if "files" in results: print("path to scan : ", path_to_scan) - rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, path_to_scan, need_license, need_url) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, + path_to_scan, need_license, need_url) if parsing_msg: _result_log["Parsing Log"] = parsing_msg if rc: From 40fda046e7edfef3ff4332f05b0eef522a2d5ca9 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 09:02:06 +0900 Subject: [PATCH 06/13] modify tests samples --- tests/test_files/run_scancode.py | 2 +- tests/test_files/run_scancode2.py | 4 ++++ tests/test_files/sample.cpp | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_files/run_scancode.py b/tests/test_files/run_scancode.py index 8e9faa5..d69fb48 100755 --- a/tests/test_files/run_scancode.py +++ b/tests/test_files/run_scancode.py @@ -3,7 +3,7 @@ # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 # SPDX-PackageDownloadLocation: https://dummy_url_for_test.com -# GPL-2.0 +# The code is not licensed under GPL-2.0. import sys import os diff --git a/tests/test_files/run_scancode2.py b/tests/test_files/run_scancode2.py index ca04045..1e38bf8 100755 --- a/tests/test_files/run_scancode2.py +++ b/tests/test_files/run_scancode2.py @@ -1,3 +1,7 @@ +# This file is a sample code for testing. +# It should not be listed at Scancode result and +# should be listed at SCANOSS result. + import sys import os import multiprocessing diff --git a/tests/test_files/sample.cpp b/tests/test_files/sample.cpp index d0b99ae..53377be 100644 --- a/tests/test_files/sample.cpp +++ b/tests/test_files/sample.cpp @@ -5,7 +5,9 @@ * SPDX-FileCopyrightText: Copyright 2017 Free Software Foundation Europe e.V. * SPDX-License-Identifier: MIT * DownloadLocation: https://github.com/fsfe/reuse-tool - * SPDX-PackageDownloadLocation: https://github.com/fosslight/fosslight_util + * SPDX-PackageDownloadLocation: https://dummy_url_for_test.com + * SPDX-PackageDownloadLocation: https://second_dummy_url_for_test.com + * SPDX-PackageDownloadLocation: https://third_dummy_url_for_test.com */ #include From 400a2d3ead4afa54da1f855a97d479d9bc0cc3c3 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 13:23:15 +0900 Subject: [PATCH 07/13] reflect review --- .../_parsing_scancode_file_item.py | 17 +++-------------- src/fosslight_source/_scan_item.py | 5 ----- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 0d8157b..acf5594 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -7,7 +7,7 @@ import logging import re import fosslight_util.constant as constant -# import mmap +import mmap from ._license_matched import MatchedLicense from ._scan_item import ScanItem from ._scan_item import is_exclude_dir @@ -53,8 +53,6 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ prev_dir_value = False regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) - url_count = 0 - if scancode_file_list: for file in scancode_file_list: try: @@ -82,16 +80,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ url_list = [] if urls: - url_count += 1 - # First method. read the lines brute force - test_file = open(fullpath, "r") - for line in test_file: - if "SPDX-PackageDownloadLocation: " in line: - spdx_download_location = re.sub( - r'.*?SPDX-PackageDownloadLocation: ', '', line).strip() - url_list.append(spdx_download_location) - # Second method. search with mmap - """search_term = "SPDX-PackageDownloadLocation: ".encode() + search_term = "SPDX-PackageDownloadLocation: ".encode() with open(fullpath, "r") as f: with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: start = 0 @@ -106,7 +95,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ r'.*?SPDX-PackageDownloadLocation: ', '', line) url_list.append(spdx_download_location) # Move the start position to the end of the line - start += len(line)""" + start += len(line) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index a438ec5..25202af 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -107,11 +107,6 @@ def merge_scan_item(self, other): self.oss_version = other.oss_version if not self.download_location: self.download_location = other.download_location - elif self.download_location != other.download_location: - if self.comment == "": - self.comment = "(SCANOSS URL)" + other.download_location - else: - self.comment = self.comment + "|(SCANOSS URL)" + other.download_location if not self.matched_lines: self.matched_lines = other.matched_lines if not self.fileURL: From 697d4b05cc904d5deee9ce31622ea0df5e264a5f Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 14:29:19 +0900 Subject: [PATCH 08/13] remove -u option --- .../_parsing_scancode_file_item.py | 61 +++++++++---------- src/fosslight_source/cli.py | 17 +++--- src/fosslight_source/run_scancode.py | 9 +-- 3 files changed, 40 insertions(+), 47 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index acf5594..ddbe4dd 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -42,7 +42,7 @@ def get_error_from_header(header_item): return has_error, str_error -def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False, need_url=False): +def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False): rc = True scancode_file_item = [] @@ -73,29 +73,29 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ result_item = ScanItem(file_path) - if need_url: - fullpath = os.path.join(path_to_scan, file_path) - - urls = file.get("urls", []) - url_list = [] - - if urls: - search_term = "SPDX-PackageDownloadLocation: ".encode() - with open(fullpath, "r") as f: - with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: - start = 0 - while True: - # Find the next occurrence of the search term - start = m.find(search_term, start) - if start == -1: - break - # Extract the line that contains the search term - line = m[start:].split(b"\n")[0].decode() - spdx_download_location = re.sub( - r'.*?SPDX-PackageDownloadLocation: ', '', line) - url_list.append(spdx_download_location) - # Move the start position to the end of the line - start += len(line) + fullpath = os.path.join(path_to_scan, file_path) + + urls = file.get("urls", []) + url_list = [] + print("!!!!!!!! : ",fullpath) + + if urls: + search_term = "SPDX-PackageDownloadLocation:".encode() + with open(fullpath, "r") as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + start = 0 + while True: + # Find the next occurrence of the search term + start = m.find(search_term, start) + if start == -1: + break + # Extract the line that contains the search term + line = m[start:].split(b"\n")[0].decode() + spdx_download_location = re.sub( + r'.*?SPDX-PackageDownloadLocation: ', '', line) + url_list.append(spdx_download_location) + # Move the start position to the end of the line + start += len(line) if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: @@ -189,14 +189,11 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ if is_exclude_file(file_path, prev_dir, prev_dir_value): result_item.exclude = True - if need_url: - if url_list: - for url in url_list: - temp_result_item = copy.deepcopy(result_item) - temp_result_item.download_location = url - scancode_file_item.append(temp_result_item) - else: - scancode_file_item.append(result_item) + if url_list: + for url in url_list: + temp_result_item = copy.deepcopy(result_item) + temp_result_item.download_location = url + scancode_file_item.append(temp_result_item) else: scancode_file_item.append(result_item) except Exception as ex: diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index 319af27..c61bda3 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -55,7 +55,7 @@ def main(): scanoss_result = [] time_out = 120 core = -1 - print_url = False + # print_url = False parser = argparse.ArgumentParser(description='FOSSLight Source', prog='fosslight_source', add_help=False) parser.add_argument('-h', '--help', action='store_true', required=False) @@ -70,7 +70,7 @@ def main(): parser.add_argument('-c', '--cores', type=int, required=False, default=-1) parser.add_argument('--no_correction', action='store_true', required=False) parser.add_argument('--correct_fpath', nargs=1, type=str, required=False) - parser.add_argument('-u', '--url', action='store_true', required=False) + # parser.add_argument('-u', '--url', action='store_true', required=False) args = parser.parse_args() @@ -96,8 +96,8 @@ def main(): correct_filepath = path_to_scan if args.correct_fpath: correct_filepath = ''.join(args.correct_fpath) - if args.url: - print_url = True + #if args.url: + #print_url = True time_out = args.timeout core = args.cores @@ -122,13 +122,12 @@ def main(): success, _result_log["Scan Result"], scanned_result, license_list = run_scan(path_to_scan, output_file_name, write_json_file, core, True, print_matched_text, format, True, - time_out, correct_mode, - correct_filepath, print_url) + time_out, correct_mode, correct_filepath) elif selected_scanner == 'scanoss': scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file) elif selected_scanner == 'all' or selected_scanner == '': success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners( - path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out, print_url) + path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out) else: print_help_msg_source_scanner() sys.exit(1) @@ -216,7 +215,7 @@ def create_report_file(_start_time, scanned_result, license_list, scanoss_result def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, - need_license=False, format="", called_by_cli=True, time_out=120, print_url=False): + need_license=False, format="", called_by_cli=True, time_out=120): """ Run Scancode and scanoss.py for the given path. @@ -242,7 +241,7 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, _write_json_file, num_cores, True, need_license, format, called_by_cli, time_out, - False, "", print_url) + False, "") scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file) scanoss_result_for_merging = copy.deepcopy(scanoss_result) diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index e41babe..438901c 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -25,7 +25,7 @@ def run_scan(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1, return_results=False, need_license=False, format="", - called_by_cli=False, time_out=120, correct_mode=True, correct_filepath="", need_url=False): + called_by_cli=False, time_out=120, correct_mode=True, correct_filepath=""): if not called_by_cli: global logger @@ -75,8 +75,7 @@ def run_scan(path_to_scan, output_file_name="", processes=num_cores, output_json_pp=output_json_file, only_findings=True, license_text=True, - url=need_url, - timeout=time_out) + url=True, timeout=time_out) if not rc: msg = "Source code analysis failed." @@ -91,9 +90,7 @@ def run_scan(path_to_scan, output_file_name="", _result_log["Error_files"] = error_msg msg = "Failed to analyze :" + error_msg if "files" in results: - print("path to scan : ", path_to_scan) - rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, - path_to_scan, need_license, need_url) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, path_to_scan, need_license) if parsing_msg: _result_log["Parsing Log"] = parsing_msg if rc: From 48fe83dc0ffee6aafcf937922043eae1fdad27bf Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 16:02:12 +0900 Subject: [PATCH 09/13] fix use of mmap --- .../_parsing_scancode_file_item.py | 23 +++++-------------- src/fosslight_source/cli.py | 3 --- src/fosslight_source/run_scancode.py | 3 ++- tests/test_files/run_scancode.py | 1 + tox.ini | 3 --- 5 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index ddbe4dd..8c6dde7 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -52,6 +52,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ prev_dir = "" prev_dir_value = False regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) + find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE) if scancode_file_list: for file in scancode_file_list: @@ -73,29 +74,17 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ result_item = ScanItem(file_path) - fullpath = os.path.join(path_to_scan, file_path) + fullpath = os.path.join(path_to_scan, file_path) urls = file.get("urls", []) url_list = [] - print("!!!!!!!! : ",fullpath) if urls: - search_term = "SPDX-PackageDownloadLocation:".encode() with open(fullpath, "r") as f: - with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: - start = 0 - while True: - # Find the next occurrence of the search term - start = m.find(search_term, start) - if start == -1: - break - # Extract the line that contains the search term - line = m[start:].split(b"\n")[0].decode() - spdx_download_location = re.sub( - r'.*?SPDX-PackageDownloadLocation: ', '', line) - url_list.append(spdx_download_location) - # Move the start position to the end of the line - start += len(line) + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj: + for word in find_word.findall(mmap_obj): + url_list.append(word.decode('utf-8')) + if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) if len(error_msg) > 0: diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index c61bda3..df997c0 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -70,7 +70,6 @@ def main(): parser.add_argument('-c', '--cores', type=int, required=False, default=-1) parser.add_argument('--no_correction', action='store_true', required=False) parser.add_argument('--correct_fpath', nargs=1, type=str, required=False) - # parser.add_argument('-u', '--url', action='store_true', required=False) args = parser.parse_args() @@ -96,8 +95,6 @@ def main(): correct_filepath = path_to_scan if args.correct_fpath: correct_filepath = ''.join(args.correct_fpath) - #if args.url: - #print_url = True time_out = args.timeout core = args.cores diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index 438901c..6289710 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -90,7 +90,8 @@ def run_scan(path_to_scan, output_file_name="", _result_log["Error_files"] = error_msg msg = "Failed to analyze :" + error_msg if "files" in results: - rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, path_to_scan, need_license) + rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], + has_error, path_to_scan, need_license) if parsing_msg: _result_log["Parsing Log"] = parsing_msg if rc: diff --git a/tests/test_files/run_scancode.py b/tests/test_files/run_scancode.py index d69fb48..252065d 100755 --- a/tests/test_files/run_scancode.py +++ b/tests/test_files/run_scancode.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- # Copyright (c) 2020 LG Electronics Inc. # SPDX-License-Identifier: Apache-2.0 + # SPDX-PackageDownloadLocation: https://dummy_url_for_test.com # The code is not licensed under GPL-2.0. diff --git a/tox.ini b/tox.ini index d65c539..5d1b558 100644 --- a/tox.ini +++ b/tox.ini @@ -42,9 +42,6 @@ commands = fosslight_source -p tests/test_files -m -j -o test_scan2/ ls test_scan2/ - fosslight_source -p tests/test_files -m -j -o test_scan3/ -s all -u - ls test_scan3/ - fosslight_convert -p tests/scancode_raw.json -o test_convert -f opossum python tests/cli_test.py From 6e93c1501701a6e3706ac6ee84a68854bf8ba66e Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 16:25:02 +0900 Subject: [PATCH 10/13] fix help message --- src/fosslight_source/_help.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fosslight_source/_help.py b/src/fosslight_source/_help.py index ee7a056..3b77321 100644 --- a/src/fosslight_source/_help.py +++ b/src/fosslight_source/_help.py @@ -29,7 +29,6 @@ -j\t\t\t Generate raw result of scanners in json format -t \t\t Stop scancode scanning if scanning takes longer than a timeout in seconds. -c \t\t Select the number of cores to be scanned with ScanCode. - -u \t\t Print scanned url information --no_correction\t Enter if you don't want to correct OSS information with sbom-info.yaml --correct_fpath Path to the sbom-info.yaml file""" From 2ec7ed75da05022d379ba1a34f81929dcd707ac3 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 14 Jun 2023 18:14:17 +0900 Subject: [PATCH 11/13] change download_location as list --- .../_parsing_scancode_file_item.py | 10 ++---- src/fosslight_source/_parsing_scanoss_file.py | 2 +- src/fosslight_source/_scan_item.py | 36 ++++++++++++++----- src/fosslight_source/cli.py | 15 ++++++-- 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 8c6dde7..35d2663 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -13,7 +13,6 @@ from ._scan_item import is_exclude_dir from ._scan_item import is_exclude_file from ._scan_item import replace_word -import copy logger = logging.getLogger(constant.LOGGER_NAME) _exclude_directory = ["test", "tests", "doc", "docs"] @@ -84,6 +83,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj: for word in find_word.findall(mmap_obj): url_list.append(word.decode('utf-8')) + result_item.download_location = url_list if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) @@ -178,13 +178,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_ if is_exclude_file(file_path, prev_dir, prev_dir_value): result_item.exclude = True - if url_list: - for url in url_list: - temp_result_item = copy.deepcopy(result_item) - temp_result_item.download_location = url - scancode_file_item.append(temp_result_item) - else: - scancode_file_item.append(result_item) + scancode_file_item.append(result_item) except Exception as ex: msg.append(f"Error Parsing item: {ex}") rc = False diff --git a/src/fosslight_source/_parsing_scanoss_file.py b/src/fosslight_source/_parsing_scanoss_file.py index de7d5e1..a39384f 100644 --- a/src/fosslight_source/_parsing_scanoss_file.py +++ b/src/fosslight_source/_parsing_scanoss_file.py @@ -48,7 +48,7 @@ def parsing_scanResult(scanoss_report): if 'version' in findings[0]: result_item.oss_version = findings[0]['version'] if 'url' in findings[0]: - result_item.download_location = findings[0]['url'] + result_item.download_location = list([findings[0]['url']]) license_detected = [] license_w_source = {"component_declared": [], "file_spdx_tag": [], diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 25202af..086eeef 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -27,7 +27,7 @@ class ScanItem: is_license_text = False oss_name = "" oss_version = "" - download_location = "" + download_location = [] matched_lines = "" # Only for SCANOSS results fileURL = "" # Only for SCANOSS results license_reference = "" @@ -70,18 +70,38 @@ def get_row_to_print(self): print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), + "", "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), + url, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) return print_rows def get_row_to_print_for_scanoss(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", self.comment] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment]) return print_rows def get_row_to_print_for_all_scanner(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "", - ','.join(self.copyright), - "Exclude" if self.exclude else "", self.comment, self.license_reference] + print_rows = [] + if not self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment, + self.license_reference]) + else: + for url in self.download_location: + print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "", + ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment, + self.license_reference]) return print_rows def merge_scan_item(self, other): @@ -106,7 +126,7 @@ def merge_scan_item(self, other): if not self.oss_version: self.oss_version = other.oss_version if not self.download_location: - self.download_location = other.download_location + self.download_location = list(other.download_location) if not self.matched_lines: self.matched_lines = other.matched_lines if not self.fileURL: diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index df997c0..bb59dd8 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -172,14 +172,23 @@ def create_report_file(_start_time, scanned_result, license_list, scanoss_result scanned_result = sorted(scanned_result, key=lambda row: (''.join(row.licenses))) if selected_scanner == 'scancode' or output_extension == _json_ext: - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print(): + sheet_list[SCANOSS_SHEET_NAME].append(row) elif selected_scanner == 'scanoss': - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_scanoss() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print_for_scanoss(): + sheet_list[SCANOSS_SHEET_NAME].append(row) extended_header = SCANOSS_HEADER else: - sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_all_scanner() for scan_item in scanned_result] + sheet_list[SCANOSS_SHEET_NAME] = [] + for scan_item in scanned_result: + for row in scan_item.get_row_to_print_for_all_scanner(): + sheet_list[SCANOSS_SHEET_NAME].append(row) extended_header = MERGED_HEADER if need_license: From 919cfae72271b6995f7532eadaa8f797d27e966d Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 15 Jun 2023 10:34:33 +0900 Subject: [PATCH 12/13] remove useless comment --- src/fosslight_source/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fosslight_source/cli.py b/src/fosslight_source/cli.py index bb59dd8..84aa5c4 100755 --- a/src/fosslight_source/cli.py +++ b/src/fosslight_source/cli.py @@ -55,7 +55,6 @@ def main(): scanoss_result = [] time_out = 120 core = -1 - # print_url = False parser = argparse.ArgumentParser(description='FOSSLight Source', prog='fosslight_source', add_help=False) parser.add_argument('-h', '--help', action='store_true', required=False) From 39b4a178a3d4e1119ebc6faa7d63a43f61008eb4 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Thu, 15 Jun 2023 13:13:59 +0900 Subject: [PATCH 13/13] fix --- src/fosslight_source/_scan_item.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 086eeef..5a830c9 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -36,6 +36,7 @@ def __init__(self, value): self.file = value self._copyright = [] self._licenses = [] + self.download_location = [] self.comment = "" self.exclude = False self.is_license_text = False @@ -67,9 +68,6 @@ def get_file(self): return self.file def get_row_to_print(self): - print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), - self.download_location, "", ','.join(self.copyright), - "Exclude" if self.exclude else "", self.comment] print_rows = [] if not self.download_location: print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),