Add support for post-commit smart pointer workflow

https://bugs.webkit.org/show_bug.cgi?id=273896 rdar://127762312 Reviewed by Jonathan Bedard. Revamps the smart pointer static analysis scripts to find new regressions and fixes as compared to expectation files per checker for each project. * Tools/Scripts/compare-static-analysis-results: (parser): Adds --check-expectations argument. (find_diff): Removes grep commands and uses set operations instead. (compare_project_results_to_expectations): New method to find unexpected regressions and fixes. (compare_project_results_by_run): Renames method that finds new issues and fixes per run. (main): Adds new logging. (compare_project_results): Deleted. * Tools/Scripts/generate-dirty-files: (parse_results_file): Removes prefix from file paths. (find_project_results): Changes saved data structure to lists of issues per file per checker. (find_all_results): Creates issues_per_file.json which is consumed in compare-static-analysis-results. * Tools/Scripts/generate-static-analysis-archive: Adds new templates for unexpected results page. (parse_command_line): (generate_results_page): (create_results_file): (main): Canonical link: https://commits.webkit.org/278808@main
WebKit · May 15, 2024 · fa1fd78 · fa1fd78
1 parent 5dd01ba
commit fa1fd78
Show file tree

Hide file tree

Showing 3 changed files with 163 additions and 101 deletions.
diff --git a/Tools/Scripts/compare-static-analysis-results b/Tools/Scripts/compare-static-analysis-results
@@ -24,77 +24,125 @@
 import os
 import subprocess
 import argparse
-import sys
+import json
 
 CHECKERS = ['NoUncountedMemberChecker', 'RefCntblBaseVirtualDtor', 'UncountedCallArgsChecker', 'UncountedLocalVarsChecker']
 PROJECTS = ['WebCore', 'WebKit']
 
 
 def parser():
-    parser = argparse.ArgumentParser(description='compare dirty file lists')
+    parser = argparse.ArgumentParser(description='Finds new regressions and fixes between two smart pointer static analysis results')
     parser.add_argument(
-        'archived_dir',
-        help='directory of dirty lists from previous build'
+        'new_dir',
+        help='Path to directory of results from new build'
     )
     parser.add_argument(
-        'new_dir',
-        help='directory of dirty lists from new build'
+        '--archived-dir',
+        dest='archived_dir',
+        help='Path to directory of previous results for comparison'
     )
     parser.add_argument(
         '--build-output',
         dest='build_output',
-        help='output from new build',
+        help='Path to the static analyzer output from the new build',
     )
     parser.add_argument(
         '--scan-build-path',
         dest='scan_build',
-        help='path to scan-build'
+        help='Path to scan-build executable'
+    )
+    parser.add_argument(
+        '--check-expectations',
+        dest='check_expectations',
+        action='store_true',
+        default=False,
+        help='Compare new results to expectations (instead of a previous run)'
     )
     return parser.parse_args()
 
 
-def find_diff(file1, file2, mode):
-    # Create empty file if the corresponding one doesn't exist - this happens if a checker is added or removed
-    if not os.path.exists(f'{file1}-{mode}'):
-        f = open(f'{file1}-{mode}', 'a')
-        f.close()
-    if not os.path.exists(f'{file2}-{mode}'):
-        f = open(f'{file2}-{mode}', 'a')
-        f.close()
-
-    # Find new regressions
-    new_lines_list = []
-    find_issues_cmd = f"/usr/bin/grep -F -v -f {file1}-{mode} {file2}-{mode}"
-    try:
-        new_lines = subprocess.check_output(find_issues_cmd, shell=True, stderr=subprocess.STDOUT, text=True)
-        new_lines_list = new_lines.splitlines()
-    except subprocess.CalledProcessError as e:
-        if not e.returncode == 1:
-            sys.stderr.write(f'{e.output}')
-
-    # Find all fixes
-    fixed_lines_list = []
-    find_fixes_cmd = f'grep -F -v -f {file2}-{mode} {file1}-{mode}'
-    try:
-        fixed_lines = subprocess.check_output(find_fixes_cmd, shell=True, text=True, stderr=subprocess.STDOUT)
-        fixed_lines_list = fixed_lines.splitlines()
-    except subprocess.CalledProcessError as e:
-        if not e.returncode == 1:
-            sys.stderr.write(f'{e.output}')
-
-    return set(new_lines_list), set(fixed_lines_list)
-
-
-def compare_project_results(args, archive_path, new_path, project):
+def find_diff(args, file1, file2):
+    if not args.check_expectations:
+        # Create empty file if the corresponding one doesn't exist - this happens if a checker is added or removed
+        if not os.path.exists(file1):
+            f = open(file1, 'a')
+            f.close()
+        if not os.path.exists(file2):
+            f = open(file2, 'a')
+            f.close()
+
+    with open(file1) as baseline_file, open(file2) as new_file:
+        baseline_list = baseline_file.read().splitlines()
+        new_file_list = new_file.read().splitlines()
+        # Find new regressions
+        diff_new_from_baseline = set(new_file_list) - set(baseline_list)
+         # Find fixes
+        diff_baseline_from_new = set(baseline_list) - set(new_file_list)
+
+    return set(diff_new_from_baseline), set(diff_baseline_from_new)
+
+
+def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
+    # Create symlinks to new issues only so that we can run scan-build to generate new index.html files
+    path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
+    subprocess.run(['mkdir', '-p', path_to_reports])
+    for issue_hash in issues:
+        report = f"report-{issue_hash[:6]}.html"
+        path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
+        path_to_report_new = os.path.join(path_to_reports, report)
+        subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])
+
+    path_to_project = f'{args.build_output}/{category}/{project}'
+    subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])
+
+
+def compare_project_results_to_expectations(args, new_path, project):
+    unexpected_issues_total = set()
+    unexpected_buggy_files = set()
+    unexpected_clean_files = set()
+
+    # Compare the list of dirty files to the expectations list of files
+    for checker in CHECKERS:
+        # Get unexpected clean and buggy files per checker
+        buggy_files, clean_files = find_diff(args, os.path.abspath(f'Source/{project}/{checker}Expectations.txt'), f'{new_path}/{project}/{checker}-files.txt')
+        unexpected_clean_files.update(clean_files)
+        unexpected_buggy_files.update(buggy_files)
+
+        # Get unexpected issues per checker
+        unexpected_issues = set()
+        with open(f'{new_path}/issues_per_file.json') as f:
+            issues_per_file = json.load(f)
+        for file_name in buggy_files:
+            unexpected_issues.update(issues_per_file[checker][file_name])
+        unexpected_issues_total.update(unexpected_issues)
+
+        with open(f'{new_path}/{project}/UnexpectedCleanFiles{checker}.txt', 'a') as f:
+            f.write('\n'.join(clean_files))
+        with open(f'{new_path}/{project}/UnexpectedFailedFiles{checker}.txt', 'a') as f:
+            f.write('\n'.join(buggy_files))
+        with open(f'{new_path}/{project}/UnexpectedIssues{checker}.txt', 'a') as f:
+            f.write('\n'.join(unexpected_issues))
+
+        print(f'{checker}:')
+        print(f'    Unexpected passing files: {len(clean_files)}')
+        print(f'    Unexpected failing files: {len(buggy_files)}')
+        print(f'    Unexpected issues: {len(unexpected_issues)}\n')
+
+    if unexpected_issues_total and args.scan_build:
+        create_filtered_results_dir(args, project, unexpected_issues_total, 'StaticAnalyzerUnexpectedRegressions')
+    return unexpected_buggy_files, unexpected_clean_files, unexpected_issues_total
+
+
+def compare_project_results_by_run(args, archive_path, new_path, project):
     new_issues_total = set()
     new_files_total = set()
     fixed_issues_total = set()
     fixed_files_total = set()
 
     for checker in CHECKERS:
         print(f'{checker}:')
-        new_issues, fixed_issues = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'issues')
-        new_files, fixed_files = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'files')
+        new_issues, fixed_issues = find_diff(args, f'{archive_path}/{checker}-issues', f'{new_path}/{project}/{checker}-issues.txt')
+        new_files, fixed_files = find_diff(args, f'{archive_path}/{checker}-files', f'{new_path}/{project}/{checker}-files.txt')
         fixed_issues_total.update(fixed_issues)
         fixed_files_total.update(fixed_files)
         new_issues_total.update(new_issues)
@@ -111,37 +159,38 @@ def compare_project_results(args, archive_path, new_path, project):
     return new_issues_total, new_files_total
 
 
-def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
-    # Create symlinks to new issues only so that we can run scan-build to generate new index.html files
-    path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
-    subprocess.run(['mkdir', '-p', path_to_reports])
-    for issue_hash in issues:
-        report = f"report-{issue_hash[:6]}.html"
-        path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
-        path_to_report_new = os.path.join(path_to_reports, report)
-        subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])
-
-    path_to_project = f'{args.build_output}/{category}/{project}'
-    subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])
-
-
 def main():
     args = parser()
     new_issues_total = set()
     new_files_total = set()
+    unexpected_passes_total = set()
+    unexpected_failures_total = set()
+    unexpected_issues_total = set()
 
     for project in PROJECTS:
-        archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
-        new_path = os.path.abspath(f'{args.new_dir}/{project}')
         print(f'\n------ {project} ------\n')
-        new_issues, new_files = compare_project_results(args, archive_path, new_path, project)
-        new_issues_total.update(new_issues)
-        new_files_total.update(new_files)
-
-    if new_issues_total:
-        print(f'\nTotal new issues: {len(new_issues_total)}')
-    if new_files_total:
-        print(f'Total new files: {len(new_files_total)}')
+        new_path = os.path.abspath(f'{args.new_dir}')
+        if args.check_expectations:
+            unexpected_failures, unexpected_passes, unexpected_issues = compare_project_results_to_expectations(args, new_path, project)
+            unexpected_failures_total.update(unexpected_failures)
+            unexpected_passes_total.update(unexpected_passes)
+            unexpected_issues_total.update(unexpected_issues)
+        else:
+            archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
+            new_issues, new_files = compare_project_results_by_run(args, archive_path, new_path, project)
+            new_issues_total.update(new_issues)
+            new_files_total.update(new_files)
+
+    print('\n')
+    for type, type_total in {
+        'new issues': new_issues_total,
+        'new files': new_files_total,
+        'unexpected failing files': unexpected_failures_total,
+        'unexpected passing files': unexpected_passes_total,
+        'unexpected issues': unexpected_issues_total
+    }:
+        if type_total:
+            print(f'Total {type}: {len(type_total)}')
 
     return 0
 

diff --git a/Tools/Scripts/generate-dirty-files b/Tools/Scripts/generate-dirty-files
@@ -38,21 +38,22 @@ PROJECTS = ['WebCore', 'WebKit']
 
 
 def parser():
-    parser = argparse.ArgumentParser(description='analyze clang results')
+    parser = argparse.ArgumentParser(description='Generates files listing new regressions and files per checker type')
     parser.add_argument(
         'results_dir',
-        help='directory of results to parse'
+        help='Path to directory of results to parse'
     )
     parser.add_argument(
         '--output-dir',
         dest='output_dir',
-        help='output directory for dirty files list',
+        help='Path to output directory for generated files',
         default='smart-pointer-result-archive'
     )
     parser.add_argument(
         '--build-dir',
         dest='build_dir',
-        help='path to build directory, used to standardize file paths'
+        help='Path to build directory, used to standardize file paths',
+        required=True
     )
 
     return parser.parse_args()
@@ -69,8 +70,7 @@ def parse_results_file(args, file_path):
                 if 'BUGFILE' in line:
                     bug_file = line.removeprefix('<!-- BUGFILE ')
                     bug_file = bug_file.removesuffix(' -->\n')
-                    if args.build_dir:
-                        bug_file = bug_file.removeprefix(f'{args.build_dir}/')
+                    bug_file = bug_file.removeprefix(f'{args.build_dir}/')
                 if 'ISSUEHASHCONTENTOFLINEINCONTEXT' in line:
                     issue_hash = line.removeprefix('<!-- ISSUEHASHCONTENTOFLINEINCONTEXT ')
                     issue_hash = issue_hash.removesuffix(' -->\n')
@@ -94,24 +94,30 @@ def find_project_results(args, project, file_list, results_data):
             if not file_name:
                 continue
 
-            # Create files listing issue hashes and file names.
             if bug_type not in bug_counts:
                 bug_counts[bug_type] = 0
             bug_counts[bug_type] += 1
-            issue_obj = {"hash": issue_hash, "bugtype": bug_type, "line": bug_line}
-            list_of_issues = results_data.get(file_name, [])
-            list_of_issues.append(issue_obj)
-            results_data[file_name] = list_of_issues
 
-            output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues')
-            f = open(output_file_name, 'a')
-            f.write(f'{issue_hash}\n')
-            f.close()
+            # Truncates path after project name to match the expectations file
+            try:
+                file_name = file_name.split(f'{project}/')[1]
+            except IndexError:
+                continue
+
+            # Maps filename to issues for each bug type
+            file_dict = results_data.get(CHECKER_MAP[bug_type], {})
+            if not file_name in file_dict:
+                file_dict[file_name] = []
+            file_dict[file_name].append(issue_hash)
+            results_data[CHECKER_MAP[bug_type]] = file_dict
+
+            output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues.txt')
+            with open(output_file_name, 'a') as f:
+                f.write(f'{issue_hash}\n')
 
-            output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files')
-            f = open(output_file_name_2, 'a')
-            f.write(f'{file_name}\n')
-            f.close()
+            output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files.txt')
+            with open(output_file_name_2, 'a') as f:
+                f.write(f'{file_name}\n')
 
     for type, count in bug_counts.items():
         print(f'    {type}: {count}')
@@ -142,7 +148,7 @@ def find_all_results(args):
         find_project_results(args, project, project_files, results_data)
 
     print("\nWriting results files...")
-    results_data_file = os.path.abspath(f'{args.output_dir}/dirty_file_data.json')
+    results_data_file = os.path.abspath(f'{args.output_dir}/issues_per_file.json')
     with open(results_data_file, "w") as f:
         results_data_obj = json.dumps(results_data, indent=4)
         f.write(results_data_obj)

diff --git a/Tools/Scripts/generate-static-analysis-archive b/Tools/Scripts/generate-static-analysis-archive
@@ -42,6 +42,9 @@ INDEX_TEMPLATE = """
 </html>
 """
 PROJECT_TEMPLATE = '<li><a href="{project_file_url}">{project_name}</a> ({project_issue_count})</li>'
+STATIC_ANALYZER = 'StaticAnalyzer'
+STATIC_ANALYZER_REGRESSIONS = 'StaticAnalyzerRegressions'
+STATIC_ANALYZER_UNEXPECTED = 'StaticAnalyzerUnexpectedRegressions'
 
 
 def parse_command_line(args):
@@ -54,7 +57,7 @@ def parse_command_line(args):
     return parser.parse_args(args)
 
 
-def generate_results_page(project_dirs, id_string, output_root, static_analysis_dir, is_new_result=False):
+def generate_results_page(project_dirs, id_string, output_root, static_analysis_dir, result_type=None):
     project_list = ''
     for project_name in sorted(project_dirs):
         static_analyzer_index_path = os.path.join(static_analysis_dir, project_name, INDEX_HTML)
@@ -63,10 +66,10 @@ def generate_results_page(project_dirs, id_string, output_root, static_analysis_
                 project_issue_count=get_project_issue_count_as_string(static_analyzer_index_path),
                 project_name=project_name)
 
-    if is_new_result:
-        heading = 'New issues in {}'.format(id_string)
+    if result_type:
+        heading = f'{result_type.capitalize()} issues in {id_string}'
     else:
-        heading = 'Results for {}'.format(id_string)
+        heading = f'Results for {id_string}'
 
     return INDEX_TEMPLATE.format(
         heading=heading,
@@ -75,14 +78,14 @@ def generate_results_page(project_dirs, id_string, output_root, static_analysis_
     )
 
 
-def create_results_file(options, output_root, static_analysis_dir, is_new_result=False):
+def create_results_file(options, output_root, static_analysis_dir, result_type=None):
     project_dirs = list(filter(lambda x: x[0] != '.' and os.path.isdir(os.path.join(static_analysis_dir, x)),
                                          os.listdir(static_analysis_dir)))
 
     if options.id_string:
-        results_page = generate_results_page(project_dirs, options.id_string, output_root, static_analysis_dir, is_new_result)
-        if is_new_result:
-            f = open(output_root + '/new-results.html', 'w')
+        results_page = generate_results_page(project_dirs, options.id_string, output_root, static_analysis_dir, result_type)
+        if result_type:
+            f = open(output_root + f'/{result_type}-results.html', 'w')
         else:
             f = open(output_root + '/results.html', 'w')
         f.write(results_page)
@@ -105,14 +108,18 @@ def main(options):
     output_root = options.output_root
     static_analysis_dir = output_root
 
-    if os.path.isdir(os.path.join(static_analysis_dir, 'StaticAnalyzer')):
-        static_analysis_dir = os.path.join(static_analysis_dir, 'StaticAnalyzer')
-    
+    if os.path.isdir(os.path.join(static_analysis_dir, STATIC_ANALYZER)):
+        static_analysis_dir = os.path.join(static_analysis_dir, STATIC_ANALYZER)
+
     create_results_file(options, output_root, static_analysis_dir)
 
-    if os.path.isdir(os.path.join(output_root, 'StaticAnalyzerRegressions')):
-        static_analysis_new_dir = os.path.join(output_root, 'StaticAnalyzerRegressions')
-        create_results_file(options, output_root, static_analysis_new_dir, is_new_result=True)
+    if os.path.isdir(os.path.join(output_root, STATIC_ANALYZER_REGRESSIONS)):
+        static_analysis_new_dir = os.path.join(output_root, STATIC_ANALYZER_REGRESSIONS)
+        create_results_file(options, output_root, static_analysis_new_dir, result_type='new')
+
+    if os.path.isdir(os.path.join(output_root, STATIC_ANALYZER_UNEXPECTED)):
+        static_analysis_new_dir = os.path.join(output_root, STATIC_ANALYZER_UNEXPECTED)
+        create_results_file(options, output_root, static_analysis_new_dir, result_type='unexpected')
 
     if options.destination:
         if os.path.isfile(options.destination):