Skip to content
17 changes: 15 additions & 2 deletions src/fosslight_source/_parsing_scancode_file_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import re
import fosslight_util.constant as constant
import mmap
from ._license_matched import MatchedLicense
from ._scan_item import ScanItem
from ._scan_item import is_exclude_dir
Expand Down Expand Up @@ -40,7 +41,7 @@ def get_error_from_header(header_item):
return has_error, str_error


def parsing_file_item(scancode_file_list, has_error, need_matched_license=False):
def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False):

rc = True
scancode_file_item = []
Expand All @@ -50,6 +51,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)
prev_dir = ""
prev_dir_value = False
regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE)
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)

if scancode_file_list:
for file in scancode_file_list:
Expand All @@ -71,6 +73,18 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)

result_item = ScanItem(file_path)

fullpath = os.path.join(path_to_scan, file_path)

urls = file.get("urls", [])
url_list = []

if urls:
with open(fullpath, "r") as f:
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
for word in find_word.findall(mmap_obj):
url_list.append(word.decode('utf-8'))
result_item.download_location = url_list

if has_error and "scan_errors" in file:
error_msg = file.get("scan_errors", [])
if len(error_msg) > 0:
Expand Down Expand Up @@ -165,7 +179,6 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)
if is_exclude_file(file_path, prev_dir, prev_dir_value):
result_item.exclude = True
scancode_file_item.append(result_item)

except Exception as ex:
msg.append(f"Error Parsing item: {ex}")
rc = False
Expand Down
2 changes: 1 addition & 1 deletion src/fosslight_source/_parsing_scanoss_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def parsing_scanResult(scanoss_report):
if 'version' in findings[0]:
result_item.oss_version = findings[0]['version']
if 'url' in findings[0]:
result_item.download_location = findings[0]['url']
result_item.download_location = list([findings[0]['url']])

license_detected = []
license_w_source = {"component_declared": [], "file_spdx_tag": [],
Expand Down
44 changes: 32 additions & 12 deletions src/fosslight_source/_scan_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ScanItem:
is_license_text = False
oss_name = ""
oss_version = ""
download_location = ""
download_location = []
matched_lines = "" # Only for SCANOSS results
fileURL = "" # Only for SCANOSS results
license_reference = ""
Expand All @@ -36,6 +36,7 @@ def __init__(self, value):
self.file = value
self._copyright = []
self._licenses = []
self.download_location = []
self.comment = ""
self.exclude = False
self.is_license_text = False
Expand Down Expand Up @@ -63,23 +64,42 @@ def licenses(self, value):
if len(self._licenses) > 0:
self._licenses = list(set(self._licenses))

def get_file(self):
return self.file

def get_row_to_print(self):
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
','.join(self.copyright),
"Exclude" if self.exclude else "",
self.comment]
print_rows = []
if not self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
"", "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
else:
for url in self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
url, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
return print_rows

def get_row_to_print_for_scanoss(self):
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
','.join(self.copyright),
"Exclude" if self.exclude else "", self.comment]
print_rows = []
if not self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
else:
for url in self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "",
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
return print_rows

def get_row_to_print_for_all_scanner(self):
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
','.join(self.copyright),
"Exclude" if self.exclude else "", self.comment, self.license_reference]
print_rows = []
if not self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment,
self.license_reference])
else:
for url in self.download_location:
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "",
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment,
self.license_reference])
return print_rows

def merge_scan_item(self, other):
Expand All @@ -104,7 +124,7 @@ def merge_scan_item(self, other):
if not self.oss_version:
self.oss_version = other.oss_version
if not self.download_location:
self.download_location = other.download_location
self.download_location = list(other.download_location)
if not self.matched_lines:
self.matched_lines = other.matched_lines
if not self.fileURL:
Expand Down
44 changes: 26 additions & 18 deletions src/fosslight_source/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def main():

scanned_result = []
license_list = []
scanoss_result = []
time_out = 120
core = -1

Expand Down Expand Up @@ -117,19 +118,16 @@ def main():
success, _result_log["Scan Result"], scanned_result, license_list = run_scan(path_to_scan, output_file_name,
write_json_file, core, True,
print_matched_text, format, True,
time_out, correct_mode,
correct_filepath)
time_out, correct_mode, correct_filepath)
elif selected_scanner == 'scanoss':
scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file)
elif selected_scanner == 'all' or selected_scanner == '':
success, _result_log["Scan Result"], scanned_result, license_list = run_all_scanners(path_to_scan, output_file_name,
write_json_file, core,
print_matched_text, format, True,
time_out)
success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners(
path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out)
else:
print_help_msg_source_scanner()
sys.exit(1)
create_report_file(_start_time, scanned_result, license_list, selected_scanner, print_matched_text,
create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, print_matched_text,
output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan)
try:
logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True))
Expand All @@ -140,7 +138,7 @@ def main():
sys.exit(1)


def create_report_file(_start_time, scanned_result, license_list, selected_scanner, need_license=False,
def create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, need_license=False,
output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="",
path_to_scan=""):
"""
Expand Down Expand Up @@ -173,24 +171,33 @@ def create_report_file(_start_time, scanned_result, license_list, selected_scann
scanned_result = sorted(scanned_result, key=lambda row: (''.join(row.licenses)))

if selected_scanner == 'scancode' or output_extension == _json_ext:
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print() for scan_item in scanned_result]
sheet_list[SCANOSS_SHEET_NAME] = []
for scan_item in scanned_result:
for row in scan_item.get_row_to_print():
sheet_list[SCANOSS_SHEET_NAME].append(row)

elif selected_scanner == 'scanoss':
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_scanoss() for scan_item in scanned_result]
sheet_list[SCANOSS_SHEET_NAME] = []
for scan_item in scanned_result:
for row in scan_item.get_row_to_print_for_scanoss():
sheet_list[SCANOSS_SHEET_NAME].append(row)
extended_header = SCANOSS_HEADER

else:
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_all_scanner() for scan_item in scanned_result]
sheet_list[SCANOSS_SHEET_NAME] = []
for scan_item in scanned_result:
for row in scan_item.get_row_to_print_for_all_scanner():
sheet_list[SCANOSS_SHEET_NAME].append(row)
extended_header = MERGED_HEADER

if need_license:
if selected_scanner == 'scancode' or output_extension == _json_ext:
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
elif selected_scanner == 'scanoss':
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result)
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
else:
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result)
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)

if correct_mode:
success, msg_correct, correct_list = correct_with_yaml(correct_filepath, path_to_scan, sheet_list)
Expand Down Expand Up @@ -242,16 +249,17 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False,
False, "")
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file)

scanoss_result_for_merging = copy.deepcopy(scanoss_result)
for file_in_scancode_result in scancode_result:
per_file_result = copy.deepcopy(file_in_scancode_result)
if per_file_result in scanoss_result:
per_file_result.merge_scan_item(scanoss_result.pop(scanoss_result.index(file_in_scancode_result)))
if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist
scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result))
merged_result.append(per_file_result)
if scanoss_result:
for file_left_in_scanoss_result in scanoss_result:
if scanoss_result_for_merging:
for file_left_in_scanoss_result in scanoss_result_for_merging:
merged_result.append(file_left_in_scanoss_result)

return success, _result_log["Scan Result"], merged_result, license_list
return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result


if __name__ == '__main__':
Expand Down
5 changes: 3 additions & 2 deletions src/fosslight_source/run_scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def run_scan(path_to_scan, output_file_name="",
processes=num_cores,
output_json_pp=output_json_file,
only_findings=True, license_text=True,
timeout=time_out)
url=True, timeout=time_out)

if not rc:
msg = "Source code analysis failed."
Expand All @@ -90,7 +90,8 @@ def run_scan(path_to_scan, output_file_name="",
_result_log["Error_files"] = error_msg
msg = "Failed to analyze :" + error_msg
if "files" in results:
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license)
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"],
has_error, path_to_scan, need_license)
if parsing_msg:
_result_log["Parsing Log"] = parsing_msg
if rc:
Expand Down
2 changes: 1 addition & 1 deletion src/fosslight_source/run_scanoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F
if num_threads > 0:
scan_command += " -T " + str(num_threads)
else:
scan_command += " -T " + "30"
scan_command += " -T " + "10"

try:
os.system(scan_command)
Expand Down
3 changes: 3 additions & 0 deletions tests/test_files/run_scancode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
# Copyright (c) 2020 LG Electronics Inc.
# SPDX-License-Identifier: Apache-2.0

# SPDX-PackageDownloadLocation: https://dummy_url_for_test.com
# The code is not licensed under GPL-2.0.

import sys
import os
import multiprocessing
Expand Down
Loading