In [1]:
import io
from os import path
from os import walk

In [2]:
from packagedcode.debian_copyright import parse_copyright_file

In [3]:
from scancode_analyzer.license_analyzer import LicenseDetectionIssue
from scancode_analyzer.summary import SummaryLicenseIssues

from scancode_analyzer.analyzer_plugin import from_license_match_object
from scancode_analyzer.analyzer_plugin import ScancodeDataChangedError

In [4]:
def relative_walk(dir_path):
    """
    Walk path and yield files paths relative to dir_path.
    """
    for base_dir, _dirs, files in walk(dir_path):
        for file_name in files:
            if file_name.endswith('.yml'):
                continue
            file_path = path.join(base_dir, file_name)
            file_path = file_path.replace(dir_path, '', 1)
            file_path = file_path.strip(path.sep)
            yield file_path


In [5]:
def get_copyright_files(test_dir_loc):
    copyright_files = []
    
    for test_file in relative_walk(test_dir_loc):
        test_loc = path.join(test_dir_loc, test_file)
        if test_loc.endswith("copyright"):
            copyright_files.append(test_loc)
        
    return copyright_files

In [6]:
test_dir_loc = "path/to/scancode-toolkit/tests/packagedcode/data/debian/copyright/debian-slim-2021-04-07/"

In [7]:
copyright_files = get_copyright_files(test_dir_loc)

In [8]:
len(copyright_files)

85

In [9]:
structued_license_issues = []
unstructued_license_issues = []
count_files_with_issues = 0

In [10]:
for copyright_file in copyright_files:
    dc = parse_copyright_file(copyright_file)
    license_issues = []

    # Unstructured Files
    if hasattr(dc, "license_matches"):
        license_matches = dc.license_matches
        license_matches_in_format = from_license_match_object(license_matches)

        issues = list(LicenseDetectionIssue.from_license_matches(
            license_matches=license_matches_in_format,
            is_license_text=False,
            is_legal=False,
            path=copyright_file,
        ))
        if issues:
            count_files_with_issues+=1
            unstructued_license_issues.extend(issues)
    
    #Structured Files
    elif hasattr(dc, "license_detections"):
        for license_detection in dc.license_detections:
            
            if not license_detection.license_matches:
                continue
                
            license_matches = license_detection.license_matches
            license_matches_in_format = from_license_match_object(license_matches)
            
            issues = list(LicenseDetectionIssue.from_license_matches(
                license_matches=license_matches_in_format,
                is_license_text=False,
                is_legal=False,
                path=copyright_file,
            ))
            if issues:
                count_files_with_issues+=1
                structued_license_issues.extend(issues)
    
    else:
        raise Exception

  remove_tags = re.compile(
  self._regexps = [(re.compile(regexp), tag,) for regexp, tag in regexps]


In [11]:
len(structued_license_issues)

81

In [12]:
summary_structured_license = SummaryLicenseIssues.summarize(
    license_issues = structued_license_issues,
    count_has_license = len(copyright_files),
    count_files_with_issues = count_files_with_issues,
)

In [13]:
len(unstructued_license_issues)

133

In [14]:
summary_unstructured_license = SummaryLicenseIssues.summarize(
    license_issues = unstructued_license_issues,
    count_has_license = len(copyright_files),
    count_files_with_issues = count_files_with_issues,
)

In [16]:
import json


def load_json(path):
    with open(path, 'r') as file_handler:
        data = json.load(file_handler)
    return data


def write_json(data, path):
    with open(path, 'w') as file_handler:
        json.dump(data, file_handler, indent=2)


In [17]:
results_json_structured_path = "path/to/structured_debian_result_summary.json"
results_json_unstructured_path = "path/to/unstructured_debian_result_summary.json"

In [18]:
write_json(summary_structured_license.to_dict(), results_json_structured_path)

In [19]:
write_json(summary_unstructured_license.to_dict(), results_json_unstructured_path)