Add scripts that parse and compare static analyzer HTML output

https://bugs.webkit.org/show_bug.cgi?id=269390 rdar://problem/122962791 Reviewed by David Kilzer. These scripts will be used by a static analyzer bot to parse and compare HTML reports from static analysis. * Tools/Scripts/compare-static-analysis-results.py: Added. (parser): (find_diff): Compares two files to find regressions and fixes. (compare_project_results): Consolidates results per project. (create_filtered_results_dir): Uses scan-build --generate-index-only to generate new index.html for new issues per project. (main): * Tools/Scripts/generate-dirty-files.py: Added. Takes in output from the static analyzer and extracts the issue hash, file name, and line number per project and checker type. (parser): (parse_results_file): Extracts data from HTML comments. (find_project_results): Writes data per checker. (find_all_results): Summarizes all results. (main): Canonical link: https://commits.webkit.org/276495@main
WebKit · Mar 21, 2024 · 49272b1 · 49272b1
1 parent b289d5f
commit 49272b1
Show file tree

Hide file tree

Showing 2 changed files with 309 additions and 0 deletions.
diff --git a/Tools/Scripts/compare-static-analysis-results.py b/Tools/Scripts/compare-static-analysis-results.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# Copyright (C) 2024 Apple Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1.  Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+# 2.  Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import subprocess
+import argparse
+import sys
+
+CHECKERS = ['UncountedCallArgsChecker', 'UncountedLocalVarsChecker']
+PROJECTS = ['WebKit', 'WebCore']
+
+
+def parser():
+    parser = argparse.ArgumentParser(description='compare dirty file lists')
+    parser.add_argument(
+        'archived_dir',
+        help='directory of dirty lists from previous build'
+    )
+    parser.add_argument(
+        'new_dir',
+        help='directory of dirty lists from new build'
+    )
+    parser.add_argument(
+        '--build-output',
+        dest='build_output',
+        help='output from new build',
+        required=True
+    )
+    parser.add_argument(
+        '--scan-build-path',
+        dest='scan_build',
+        help='path to scan-build'
+    )
+
+    return parser.parse_args()
+
+
+def find_diff(file1, file2, mode):
+    # Find new regressions
+    new_lines_list = []
+    find_issues_cmd = f"/usr/bin/grep -F -v -f {file1}-{mode} {file2}-{mode}"
+    try:
+        new_lines = subprocess.check_output(find_issues_cmd, shell=True, stderr=subprocess.STDOUT, text=True)
+        new_lines_list = new_lines.splitlines()
+    except subprocess.CalledProcessError as e:
+        if not e.returncode == 1:
+            sys.stderr.write(f'{e.output}')
+
+    # Find all fixes
+    fixed_lines_list = []
+    find_fixes_cmd = f'grep -F -v -f {file2}-{mode} {file1}-{mode}'
+    try:
+        fixed_lines = subprocess.check_output(find_fixes_cmd, shell=True, text=True, stderr=subprocess.STDOUT)
+        fixed_lines_list = fixed_lines.splitlines()
+    except subprocess.CalledProcessError as e:
+        if not e.returncode == 1:
+            sys.stderr.write(f'{e.output}')
+
+    return set(new_lines_list), set(fixed_lines_list)
+
+
+def compare_project_results(args, archive_path, new_path, project):
+    new_issues_total = set()
+    new_files_total = set()
+    fixed_issues_total = set()
+    fixed_files_total = set()
+
+    for checker in CHECKERS:
+        print(f'{checker}:')
+        new_issues, fixed_issues = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'issues')
+        new_files, fixed_files = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'files')
+        fixed_issues_total.update(fixed_issues)
+        fixed_files_total.update(fixed_files)
+        new_issues_total.update(new_issues)
+        new_files_total.update(new_files)
+
+        print(f'    Fixed {len(fixed_issues)} issue(s).')
+        print(f'    Fixed {len(fixed_files)} file(s).')
+        print(f'    {len(new_issues)} new issue(s).')
+        print(f'    {len(new_files)} new file(s) with issues.\n')
+
+    if new_issues_total:
+        create_filtered_results_dir(args, project, new_issues_total, 'StaticAnalyzerRegressions')
+
+    return new_issues_total
+
+
+def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
+    print(f'Creating {category} and linking results...')
+    # Create symlinks to new issues only so that we can run scan-build to generate new index.html files
+    path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
+    subprocess.run(['mkdir', '-p', path_to_reports])
+    for issue_hash in issues:
+        report = f"report-{issue_hash[:6]}.html"
+        path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
+        path_to_report_new = os.path.join(path_to_reports, report)
+        subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])
+
+    path_to_project = f'{args.build_output}/{category}/{project}'
+    subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])
+
+
+def main():
+    args = parser()
+    new_issues_total = set()
+
+    for project in PROJECTS:
+        archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
+        new_path = os.path.abspath(f'{args.new_dir}/{project}')
+        print(f'\n------ {project} ------\n')
+        new_issues = compare_project_results(args, archive_path, new_path, project)
+        new_issues_total.update(new_issues)
+
+    if new_issues_total:
+        print(f'\nTotal new issues: {len(new_issues_total)}')
+
+    return 0
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Tools/Scripts/generate-dirty-files.py b/Tools/Scripts/generate-dirty-files.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+# Copyright (C) 2024 Apple Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1.  Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+# 2.  Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import subprocess
+import argparse
+import json
+import sys
+
+CHECKER_MAP = {
+    'Uncounted call argument for a raw pointer/reference parameter': 'UncountedCallArgsChecker',
+    'Uncounted raw pointer or reference not provably backed by ref-counted variable': 'UncountedLocalVarsChecker'
+}
+
+PROJECTS = ['WebKit', 'WebCore']
+
+
+def parser():
+    parser = argparse.ArgumentParser(description='analyze clang results')
+    parser.add_argument(
+        'results_dir',
+        help='directory of results to parse'
+    )
+    parser.add_argument(
+        '--output-dir',
+        dest='output_dir',
+        help='output directory for dirty files list',
+        default='smart-pointer-result-archive'
+    )
+    parser.add_argument(
+        '--build-dir',
+        dest='build_dir',
+        help='path to build directory, used to standardize file paths'
+    )
+
+    return parser.parse_args()
+
+
+def parse_results_file(args, file_path):
+    bug_type, bug_file, issue_hash, bug_line = None, None, None, None
+    with open(file_path, 'r') as f:
+        while True:
+            lines = f.readlines(250)
+            if not lines:
+                break
+            for line in lines:
+                if 'BUGFILE' in line:
+                    bug_file = line.removeprefix('<!-- BUGFILE ')
+                    bug_file = bug_file.removesuffix(' -->\n')
+                    if args.build_dir:
+                        bug_file = bug_file.removeprefix(f'{args.build_dir}/')
+                if 'ISSUEHASHCONTENTOFLINEINCONTEXT' in line:
+                    issue_hash = line.removeprefix('<!-- ISSUEHASHCONTENTOFLINEINCONTEXT ')
+                    issue_hash = issue_hash.removesuffix(' -->\n')
+                if 'BUGTYPE' in line:
+                    bug_type = line.removeprefix('<!-- BUGTYPE ')
+                    bug_type = bug_type.removesuffix(' -->\n')
+                if 'BUGLINE' in line:
+                    bug_line = line.removeprefix('<!-- BUGLINE ')
+                    bug_line = bug_line.removesuffix(' -->\n')
+                if bug_file and issue_hash and bug_type and bug_line:
+                    return bug_file, issue_hash, bug_type, bug_line
+    return None, None, None, None
+
+
+def find_project_results(args, project, file_list, results_data):
+    bug_counts = {
+        'Uncounted call argument for a raw pointer/reference parameter': 0,
+        'Uncounted raw pointer or reference not provably backed by ref-counted variable': 0
+    }
+
+    for result_file in file_list:
+        if result_file:
+            file_name, issue_hash, bug_type, bug_line = parse_results_file(args, result_file)
+            if not file_name:
+                continue
+
+            # Create files listing issue hashes and file names.
+            bug_counts[bug_type] += 1
+            issue_obj = {"hash": issue_hash, "bugtype": bug_type, "line": bug_line}
+            list_of_issues = results_data.get(file_name, [])
+            list_of_issues.append(issue_obj)
+            results_data[file_name] = list_of_issues
+
+            output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues')
+            f = open(output_file_name, 'a')
+            f.write(f'{issue_hash}\n')
+            f.close()
+
+            output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files')
+            f = open(output_file_name_2, 'a')
+            f.write(f'{file_name}\n')
+            f.close()
+
+    for type, count in bug_counts.items():
+        print(f'    {type}: {count}')
+    return results_data
+
+
+def find_all_results(args):
+    file_list = []
+    results_data = {}
+    result_counts = {}
+
+    for project in PROJECTS:
+        subprocess.run(['mkdir', os.path.abspath(f'{args.output_dir}/{project}')])
+        path = os.path.abspath(os.path.join(args.results_dir, 'StaticAnalyzer', project))
+        command = 'find {} -name report\\*.html -print'.format(path)
+        try:
+            result_files = subprocess.check_output(command, shell=True, text=True)
+        except subprocess.CalledProcessError as e:
+            sys.stderr.write(f'{e.output}')
+            sys.stderr.write(f'Could not find results for {project}\n')
+            return -1
+        project_files = result_files.splitlines()
+        file_list.extend(project_files)
+        result_counts[project] = len(project_files)
+
+        print(f'\n------ {project} ------\n')
+        print(f'TOTAL ISSUES: {len(project_files)}')
+        find_project_results(args, project, project_files, results_data)
+
+    print("\nWriting results files...")
+    results_data_file = os.path.abspath(f'{args.output_dir}/dirty_file_data.json')
+    with open(results_data_file, "w") as f:
+        results_data_obj = json.dumps(results_data, indent=4)
+        f.write(results_data_obj)
+    print(f'Done! Find them in {os.path.abspath(args.output_dir)}\n')
+
+    results_msg = f'Total ({sum([c for c in result_counts.values()])}) '
+    for proj, count in result_counts.items():
+        results_msg += f'{proj} ({count}) '
+    print(results_msg)
+
+
+def main():
+    args = parser()
+    try:
+        subprocess.run(['mkdir', '-p', args.output_dir])
+    except subprocess.CalledProcessError as e:
+        sys.stderr.write(f'{e.output}\n')
+
+    if args.results_dir:
+        find_all_results(args)
+
+
+if __name__ == '__main__':
+    main()