From 5dcc95b6717ff8205ec1429528e343c0fed9d6bb Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:00:07 +0900 Subject: [PATCH 1/9] Add file for testing dual license --- tests/test_files/dual.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/test_files/dual.txt diff --git a/tests/test_files/dual.txt b/tests/test_files/dual.txt new file mode 100644 index 00000000..53714641 --- /dev/null +++ b/tests/test_files/dual.txt @@ -0,0 +1 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT From 776f53c86031a170aa8703377adace3b3abd2433 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:04:51 +0900 Subject: [PATCH 2/9] Fix a bug when reading a non-existent key --- .../_parsing_scancode_file_item.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 7e1c7776..25253517 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -44,7 +44,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) rc = True scancode_file_item = [] license_list = {} # Key :[license]+[matched_text], value: MatchedLicense() - msg = "" + msg = [] prev_dir = "" prev_dir_value = False @@ -74,12 +74,12 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if has_error and "scan_errors" in file: error_msg = file["scan_errors"] if len(error_msg) > 0: - logger.debug(f"Test_msg {file_path}:{error_msg}") result_item.comment = ",".join(error_msg) scancode_file_item.append(result_item) continue - - copyright_value_list = [x["value"] for x in copyright_list] + copyright_value_list = [] + for x in copyright_list: + copyright_value_list.append(x.get("value", "")) result_item.copyright = copyright_value_list # Set the license value @@ -87,7 +87,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if licenses is None or licenses == "": continue - license_expression_list = file["license_expressions"] + license_expression_list = file.get("license_expressions",{}) if len(license_expression_list) > 0: license_expression_list = [ x.lower() for x in license_expression_list @@ -153,8 +153,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) scancode_file_item.append(result_item) except Exception as ex: - msg = f"* Error Parsing item: {ex}" + msg.append(f"Error Parsing item: {ex}") rc = False - logger.debug(msg) - - return rc, scancode_file_item, msg.strip(), license_list + msg = list(set(msg)) + return rc, scancode_file_item, msg, license_list From 033df5176410578de3ef3222adf0cb1bd84fc92e Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:16:25 +0900 Subject: [PATCH 3/9] Add null handling when parsing scancode items --- .../_parsing_scancode_file_item.py | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 25253517..26477613 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -53,12 +53,9 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if scancode_file_list: for file in scancode_file_list: try: - is_binary = False is_dir = False - file_path = file["path"] - - if "is_binary" in file: - is_binary = file["is_binary"] + file_path = file.get("path", "") + is_binary = file.get("is_binary", False) if "type" in file: is_dir = file["type"] == "directory" if is_dir: @@ -66,13 +63,13 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) prev_dir = file_path if not is_binary and not is_dir: - licenses = file["licenses"] - copyright_list = file["copyrights"] + licenses = file.get("licenses", []) + copyright_list = file.get("copyrights", []) result_item = ScanItem(file_path) if has_error and "scan_errors" in file: - error_msg = file["scan_errors"] + error_msg = file.get("scan_errors", []) if len(error_msg) > 0: result_item.comment = ",".join(error_msg) scancode_file_item.append(result_item) @@ -95,8 +92,8 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) for lic_item in licenses: license_value = "" - key = lic_item["key"] - spdx = lic_item["spdx_license_key"] + key = lic_item.get("key", "") + spdx = lic_item.get("spdx_license_key", "") # logger.debug("LICENSE_KEY:"+str(key)+",SPDX:"+str(spdx)) if key is not None and key != "": @@ -111,11 +108,10 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if license_value != "": if key == "unknown-spdx": try: - if "matched_text" in lic_item: - matched_txt = lic_item["matched_text"].lower() - matched = regex.search(matched_txt) - if matched: - license_value = str(matched.group()) + matched_txt = lic_item.get("matched_text", "").lower() + matched = regex.search(matched_txt) + if matched: + license_value = str(matched.group()) except Exception: pass @@ -136,9 +132,8 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) lic_info = MatchedLicense(license_value, lic_category, lic_matched_text, file_path) license_list[lic_matched_key] = lic_info - matched_rule = lic_item["matched_rule"] - if matched_rule["is_license_text"]: - result_item.is_license_text = True + matched_rule = lic_item.get("matched_rule", {}) + result_item.is_license_text = matched_rule.get("is_license_text", False) if len(license_detected) > 0: result_item.licenses = license_detected From f510d3dc41b28198b3a914d141958864b0302001 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:21:20 +0900 Subject: [PATCH 4/9] Add copyrights to test various copyrights --- tests/test_files/dual.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_files/dual.txt b/tests/test_files/dual.txt index 53714641..b663f37f 100644 --- a/tests/test_files/dual.txt +++ b/tests/test_files/dual.txt @@ -1 +1,3 @@ +# Copyright (c) 2020 LG Electronics Inc. +# Copyright (c) 2021 LG Electronics Inc. # SPDX-License-Identifier: GPL-2.0 or MIT From 4be39cb56ce6a08a64f33b60214175f8cac81adc Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:25:31 +0900 Subject: [PATCH 5/9] Add file for testing licenseref format --- tests/test_files/spdx_lic.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/test_files/spdx_lic.txt diff --git a/tests/test_files/spdx_lic.txt b/tests/test_files/spdx_lic.txt new file mode 100644 index 00000000..d9d084b4 --- /dev/null +++ b/tests/test_files/spdx_lic.txt @@ -0,0 +1 @@ +# SPDX-License-Identifier: licenseref-TEMP_LICENSE From c3dfd0f6c01876ba9c591a72d29ab0f4f9708910 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:34:29 +0900 Subject: [PATCH 6/9] Remove licenseref- for spdx format and print --- src/fosslight_source/_parsing_scancode_file_item.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 26477613..af59f80c 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -48,7 +48,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) prev_dir = "" prev_dir_value = False - regex = re.compile(r'licenseref-(\S)+') + regex = re.compile(r'.*licenseref-(\S+)', re.IGNORECASE) if scancode_file_list: for file in scancode_file_list: @@ -108,10 +108,10 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if license_value != "": if key == "unknown-spdx": try: - matched_txt = lic_item.get("matched_text", "").lower() + matched_txt = lic_item.get("matched_text", "") matched = regex.search(matched_txt) if matched: - license_value = str(matched.group()) + license_value = str(matched.group(1)) except Exception: pass From 519e73afbd1aa10d68fe91476e56b280a58ba477 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:39:30 +0900 Subject: [PATCH 7/9] Replace unnecessary lower --- src/fosslight_source/_parsing_scancode_file_item.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index af59f80c..716cb0d1 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -48,7 +48,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) prev_dir = "" prev_dir_value = False - regex = re.compile(r'.*licenseref-(\S+)', re.IGNORECASE) + regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE) if scancode_file_list: for file in scancode_file_list: @@ -111,7 +111,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) matched_txt = lic_item.get("matched_text", "") matched = regex.search(matched_txt) if matched: - license_value = str(matched.group(1)) + license_value = str(matched.group()) except Exception: pass From 00fe6f8e7af2b192622191625043913c5fd8e2e6 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 17:51:14 +0900 Subject: [PATCH 8/9] Add comments for excluding reuse --- src/fosslight_source/_parsing_scancode_file_item.py | 2 +- tests/test_files/dual.txt | 2 ++ tests/test_files/spdx_lic.txt | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 716cb0d1..206db1a3 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -84,7 +84,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) if licenses is None or licenses == "": continue - license_expression_list = file.get("license_expressions",{}) + license_expression_list = file.get("license_expressions", {}) if len(license_expression_list) > 0: license_expression_list = [ x.lower() for x in license_expression_list diff --git a/tests/test_files/dual.txt b/tests/test_files/dual.txt index b663f37f..2ec8fc7d 100644 --- a/tests/test_files/dual.txt +++ b/tests/test_files/dual.txt @@ -1,3 +1,5 @@ # Copyright (c) 2020 LG Electronics Inc. # Copyright (c) 2021 LG Electronics Inc. +# REUSE-IgnoreStart # SPDX-License-Identifier: GPL-2.0 or MIT +# REUSE-IgnoreEnd diff --git a/tests/test_files/spdx_lic.txt b/tests/test_files/spdx_lic.txt index d9d084b4..1714193c 100644 --- a/tests/test_files/spdx_lic.txt +++ b/tests/test_files/spdx_lic.txt @@ -1 +1,3 @@ +# REUSE-IgnoreStart # SPDX-License-Identifier: licenseref-TEMP_LICENSE +# REUSE-IgnoreEnd From a9c59de5b515f4649555399b92774f9fa1c666f0 Mon Sep 17 00:00:00 2001 From: Soim Kim Date: Wed, 31 Aug 2022 18:54:30 +0900 Subject: [PATCH 9/9] Handle an exception if path is null --- src/fosslight_source/_parsing_scancode_file_item.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 206db1a3..ed1b370b 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -55,6 +55,8 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False) try: is_dir = False file_path = file.get("path", "") + if not file_path: + continue is_binary = file.get("is_binary", False) if "type" in file: is_dir = file["type"] == "directory"