Skip to content

Commit

Permalink
Add support for post-commit smart pointer workflow
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=273896
rdar://127762312

Reviewed by Jonathan Bedard.

Revamps the smart pointer static analysis scripts to find new regressions and
fixes as compared to expectation files per checker for each project.

* Tools/Scripts/compare-static-analysis-results:
(parser): Adds --check-expectations argument.
(find_diff): Removes grep commands and uses set operations instead.
(compare_project_results_to_expectations): New method to find unexpected regressions and fixes.
(compare_project_results_by_run): Renames method that finds new issues and fixes per run.
(main): Adds new logging.
(compare_project_results): Deleted.

* Tools/Scripts/generate-dirty-files:
(parse_results_file): Removes prefix from file paths.
(find_project_results): Changes saved data structure to lists of issues per file per checker.
(find_all_results): Creates issues_per_file.json which is consumed in compare-static-analysis-results.

* Tools/Scripts/generate-static-analysis-archive: Adds new templates for unexpected results page.
(parse_command_line):
(generate_results_page):
(create_results_file):
(main):

Canonical link: https://commits.webkit.org/278808@main
  • Loading branch information
briannafan committed May 15, 2024
1 parent 5dd01ba commit fa1fd78
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 101 deletions.
183 changes: 116 additions & 67 deletions Tools/Scripts/compare-static-analysis-results
Original file line number Diff line number Diff line change
Expand Up @@ -24,77 +24,125 @@
import os
import subprocess
import argparse
import sys
import json

CHECKERS = ['NoUncountedMemberChecker', 'RefCntblBaseVirtualDtor', 'UncountedCallArgsChecker', 'UncountedLocalVarsChecker']
PROJECTS = ['WebCore', 'WebKit']


def parser():
parser = argparse.ArgumentParser(description='compare dirty file lists')
parser = argparse.ArgumentParser(description='Finds new regressions and fixes between two smart pointer static analysis results')
parser.add_argument(
'archived_dir',
help='directory of dirty lists from previous build'
'new_dir',
help='Path to directory of results from new build'
)
parser.add_argument(
'new_dir',
help='directory of dirty lists from new build'
'--archived-dir',
dest='archived_dir',
help='Path to directory of previous results for comparison'
)
parser.add_argument(
'--build-output',
dest='build_output',
help='output from new build',
help='Path to the static analyzer output from the new build',
)
parser.add_argument(
'--scan-build-path',
dest='scan_build',
help='path to scan-build'
help='Path to scan-build executable'
)
parser.add_argument(
'--check-expectations',
dest='check_expectations',
action='store_true',
default=False,
help='Compare new results to expectations (instead of a previous run)'
)
return parser.parse_args()


def find_diff(file1, file2, mode):
# Create empty file if the corresponding one doesn't exist - this happens if a checker is added or removed
if not os.path.exists(f'{file1}-{mode}'):
f = open(f'{file1}-{mode}', 'a')
f.close()
if not os.path.exists(f'{file2}-{mode}'):
f = open(f'{file2}-{mode}', 'a')
f.close()

# Find new regressions
new_lines_list = []
find_issues_cmd = f"/usr/bin/grep -F -v -f {file1}-{mode} {file2}-{mode}"
try:
new_lines = subprocess.check_output(find_issues_cmd, shell=True, stderr=subprocess.STDOUT, text=True)
new_lines_list = new_lines.splitlines()
except subprocess.CalledProcessError as e:
if not e.returncode == 1:
sys.stderr.write(f'{e.output}')

# Find all fixes
fixed_lines_list = []
find_fixes_cmd = f'grep -F -v -f {file2}-{mode} {file1}-{mode}'
try:
fixed_lines = subprocess.check_output(find_fixes_cmd, shell=True, text=True, stderr=subprocess.STDOUT)
fixed_lines_list = fixed_lines.splitlines()
except subprocess.CalledProcessError as e:
if not e.returncode == 1:
sys.stderr.write(f'{e.output}')

return set(new_lines_list), set(fixed_lines_list)


def compare_project_results(args, archive_path, new_path, project):
def find_diff(args, file1, file2):
if not args.check_expectations:
# Create empty file if the corresponding one doesn't exist - this happens if a checker is added or removed
if not os.path.exists(file1):
f = open(file1, 'a')
f.close()
if not os.path.exists(file2):
f = open(file2, 'a')
f.close()

with open(file1) as baseline_file, open(file2) as new_file:
baseline_list = baseline_file.read().splitlines()
new_file_list = new_file.read().splitlines()
# Find new regressions
diff_new_from_baseline = set(new_file_list) - set(baseline_list)
# Find fixes
diff_baseline_from_new = set(baseline_list) - set(new_file_list)

return set(diff_new_from_baseline), set(diff_baseline_from_new)


def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
# Create symlinks to new issues only so that we can run scan-build to generate new index.html files
path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
subprocess.run(['mkdir', '-p', path_to_reports])
for issue_hash in issues:
report = f"report-{issue_hash[:6]}.html"
path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
path_to_report_new = os.path.join(path_to_reports, report)
subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])

path_to_project = f'{args.build_output}/{category}/{project}'
subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])


def compare_project_results_to_expectations(args, new_path, project):
unexpected_issues_total = set()
unexpected_buggy_files = set()
unexpected_clean_files = set()

# Compare the list of dirty files to the expectations list of files
for checker in CHECKERS:
# Get unexpected clean and buggy files per checker
buggy_files, clean_files = find_diff(args, os.path.abspath(f'Source/{project}/{checker}Expectations.txt'), f'{new_path}/{project}/{checker}-files.txt')
unexpected_clean_files.update(clean_files)
unexpected_buggy_files.update(buggy_files)

# Get unexpected issues per checker
unexpected_issues = set()
with open(f'{new_path}/issues_per_file.json') as f:
issues_per_file = json.load(f)
for file_name in buggy_files:
unexpected_issues.update(issues_per_file[checker][file_name])
unexpected_issues_total.update(unexpected_issues)

with open(f'{new_path}/{project}/UnexpectedCleanFiles{checker}.txt', 'a') as f:
f.write('\n'.join(clean_files))
with open(f'{new_path}/{project}/UnexpectedFailedFiles{checker}.txt', 'a') as f:
f.write('\n'.join(buggy_files))
with open(f'{new_path}/{project}/UnexpectedIssues{checker}.txt', 'a') as f:
f.write('\n'.join(unexpected_issues))

print(f'{checker}:')
print(f' Unexpected passing files: {len(clean_files)}')
print(f' Unexpected failing files: {len(buggy_files)}')
print(f' Unexpected issues: {len(unexpected_issues)}\n')

if unexpected_issues_total and args.scan_build:
create_filtered_results_dir(args, project, unexpected_issues_total, 'StaticAnalyzerUnexpectedRegressions')
return unexpected_buggy_files, unexpected_clean_files, unexpected_issues_total


def compare_project_results_by_run(args, archive_path, new_path, project):
new_issues_total = set()
new_files_total = set()
fixed_issues_total = set()
fixed_files_total = set()

for checker in CHECKERS:
print(f'{checker}:')
new_issues, fixed_issues = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'issues')
new_files, fixed_files = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'files')
new_issues, fixed_issues = find_diff(args, f'{archive_path}/{checker}-issues', f'{new_path}/{project}/{checker}-issues.txt')
new_files, fixed_files = find_diff(args, f'{archive_path}/{checker}-files', f'{new_path}/{project}/{checker}-files.txt')
fixed_issues_total.update(fixed_issues)
fixed_files_total.update(fixed_files)
new_issues_total.update(new_issues)
Expand All @@ -111,37 +159,38 @@ def compare_project_results(args, archive_path, new_path, project):
return new_issues_total, new_files_total


def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
# Create symlinks to new issues only so that we can run scan-build to generate new index.html files
path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
subprocess.run(['mkdir', '-p', path_to_reports])
for issue_hash in issues:
report = f"report-{issue_hash[:6]}.html"
path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
path_to_report_new = os.path.join(path_to_reports, report)
subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])

path_to_project = f'{args.build_output}/{category}/{project}'
subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])


def main():
args = parser()
new_issues_total = set()
new_files_total = set()
unexpected_passes_total = set()
unexpected_failures_total = set()
unexpected_issues_total = set()

for project in PROJECTS:
archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
new_path = os.path.abspath(f'{args.new_dir}/{project}')
print(f'\n------ {project} ------\n')
new_issues, new_files = compare_project_results(args, archive_path, new_path, project)
new_issues_total.update(new_issues)
new_files_total.update(new_files)

if new_issues_total:
print(f'\nTotal new issues: {len(new_issues_total)}')
if new_files_total:
print(f'Total new files: {len(new_files_total)}')
new_path = os.path.abspath(f'{args.new_dir}')
if args.check_expectations:
unexpected_failures, unexpected_passes, unexpected_issues = compare_project_results_to_expectations(args, new_path, project)
unexpected_failures_total.update(unexpected_failures)
unexpected_passes_total.update(unexpected_passes)
unexpected_issues_total.update(unexpected_issues)
else:
archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
new_issues, new_files = compare_project_results_by_run(args, archive_path, new_path, project)
new_issues_total.update(new_issues)
new_files_total.update(new_files)

print('\n')
for type, type_total in {
'new issues': new_issues_total,
'new files': new_files_total,
'unexpected failing files': unexpected_failures_total,
'unexpected passing files': unexpected_passes_total,
'unexpected issues': unexpected_issues_total
}:
if type_total:
print(f'Total {type}: {len(type_total)}')

return 0

Expand Down
46 changes: 26 additions & 20 deletions Tools/Scripts/generate-dirty-files
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,22 @@ PROJECTS = ['WebCore', 'WebKit']


def parser():
parser = argparse.ArgumentParser(description='analyze clang results')
parser = argparse.ArgumentParser(description='Generates files listing new regressions and files per checker type')
parser.add_argument(
'results_dir',
help='directory of results to parse'
help='Path to directory of results to parse'
)
parser.add_argument(
'--output-dir',
dest='output_dir',
help='output directory for dirty files list',
help='Path to output directory for generated files',
default='smart-pointer-result-archive'
)
parser.add_argument(
'--build-dir',
dest='build_dir',
help='path to build directory, used to standardize file paths'
help='Path to build directory, used to standardize file paths',
required=True
)

return parser.parse_args()
Expand All @@ -69,8 +70,7 @@ def parse_results_file(args, file_path):
if 'BUGFILE' in line:
bug_file = line.removeprefix('<!-- BUGFILE ')
bug_file = bug_file.removesuffix(' -->\n')
if args.build_dir:
bug_file = bug_file.removeprefix(f'{args.build_dir}/')
bug_file = bug_file.removeprefix(f'{args.build_dir}/')
if 'ISSUEHASHCONTENTOFLINEINCONTEXT' in line:
issue_hash = line.removeprefix('<!-- ISSUEHASHCONTENTOFLINEINCONTEXT ')
issue_hash = issue_hash.removesuffix(' -->\n')
Expand All @@ -94,24 +94,30 @@ def find_project_results(args, project, file_list, results_data):
if not file_name:
continue

# Create files listing issue hashes and file names.
if bug_type not in bug_counts:
bug_counts[bug_type] = 0
bug_counts[bug_type] += 1
issue_obj = {"hash": issue_hash, "bugtype": bug_type, "line": bug_line}
list_of_issues = results_data.get(file_name, [])
list_of_issues.append(issue_obj)
results_data[file_name] = list_of_issues

output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues')
f = open(output_file_name, 'a')
f.write(f'{issue_hash}\n')
f.close()
# Truncates path after project name to match the expectations file
try:
file_name = file_name.split(f'{project}/')[1]
except IndexError:
continue

# Maps filename to issues for each bug type
file_dict = results_data.get(CHECKER_MAP[bug_type], {})
if not file_name in file_dict:
file_dict[file_name] = []
file_dict[file_name].append(issue_hash)
results_data[CHECKER_MAP[bug_type]] = file_dict

output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues.txt')
with open(output_file_name, 'a') as f:
f.write(f'{issue_hash}\n')

output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files')
f = open(output_file_name_2, 'a')
f.write(f'{file_name}\n')
f.close()
output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files.txt')
with open(output_file_name_2, 'a') as f:
f.write(f'{file_name}\n')

for type, count in bug_counts.items():
print(f' {type}: {count}')
Expand Down Expand Up @@ -142,7 +148,7 @@ def find_all_results(args):
find_project_results(args, project, project_files, results_data)

print("\nWriting results files...")
results_data_file = os.path.abspath(f'{args.output_dir}/dirty_file_data.json')
results_data_file = os.path.abspath(f'{args.output_dir}/issues_per_file.json')
with open(results_data_file, "w") as f:
results_data_obj = json.dumps(results_data, indent=4)
f.write(results_data_obj)
Expand Down
35 changes: 21 additions & 14 deletions Tools/Scripts/generate-static-analysis-archive
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ INDEX_TEMPLATE = """
</html>
"""
PROJECT_TEMPLATE = '<li><a href="{project_file_url}">{project_name}</a> ({project_issue_count})</li>'
STATIC_ANALYZER = 'StaticAnalyzer'
STATIC_ANALYZER_REGRESSIONS = 'StaticAnalyzerRegressions'
STATIC_ANALYZER_UNEXPECTED = 'StaticAnalyzerUnexpectedRegressions'


def parse_command_line(args):
Expand All @@ -54,7 +57,7 @@ def parse_command_line(args):
return parser.parse_args(args)


def generate_results_page(project_dirs, id_string, output_root, static_analysis_dir, is_new_result=False):
def generate_results_page(project_dirs, id_string, output_root, static_analysis_dir, result_type=None):
project_list = ''
for project_name in sorted(project_dirs):
static_analyzer_index_path = os.path.join(static_analysis_dir, project_name, INDEX_HTML)
Expand All @@ -63,10 +66,10 @@ def generate_results_page(project_dirs, id_string, output_root, static_analysis_
project_issue_count=get_project_issue_count_as_string(static_analyzer_index_path),
project_name=project_name)

if is_new_result:
heading = 'New issues in {}'.format(id_string)
if result_type:
heading = f'{result_type.capitalize()} issues in {id_string}'
else:
heading = 'Results for {}'.format(id_string)
heading = f'Results for {id_string}'

return INDEX_TEMPLATE.format(
heading=heading,
Expand All @@ -75,14 +78,14 @@ def generate_results_page(project_dirs, id_string, output_root, static_analysis_
)


def create_results_file(options, output_root, static_analysis_dir, is_new_result=False):
def create_results_file(options, output_root, static_analysis_dir, result_type=None):
project_dirs = list(filter(lambda x: x[0] != '.' and os.path.isdir(os.path.join(static_analysis_dir, x)),
os.listdir(static_analysis_dir)))

if options.id_string:
results_page = generate_results_page(project_dirs, options.id_string, output_root, static_analysis_dir, is_new_result)
if is_new_result:
f = open(output_root + '/new-results.html', 'w')
results_page = generate_results_page(project_dirs, options.id_string, output_root, static_analysis_dir, result_type)
if result_type:
f = open(output_root + f'/{result_type}-results.html', 'w')
else:
f = open(output_root + '/results.html', 'w')
f.write(results_page)
Expand All @@ -105,14 +108,18 @@ def main(options):
output_root = options.output_root
static_analysis_dir = output_root

if os.path.isdir(os.path.join(static_analysis_dir, 'StaticAnalyzer')):
static_analysis_dir = os.path.join(static_analysis_dir, 'StaticAnalyzer')
if os.path.isdir(os.path.join(static_analysis_dir, STATIC_ANALYZER)):
static_analysis_dir = os.path.join(static_analysis_dir, STATIC_ANALYZER)

create_results_file(options, output_root, static_analysis_dir)

if os.path.isdir(os.path.join(output_root, 'StaticAnalyzerRegressions')):
static_analysis_new_dir = os.path.join(output_root, 'StaticAnalyzerRegressions')
create_results_file(options, output_root, static_analysis_new_dir, is_new_result=True)
if os.path.isdir(os.path.join(output_root, STATIC_ANALYZER_REGRESSIONS)):
static_analysis_new_dir = os.path.join(output_root, STATIC_ANALYZER_REGRESSIONS)
create_results_file(options, output_root, static_analysis_new_dir, result_type='new')

if os.path.isdir(os.path.join(output_root, STATIC_ANALYZER_UNEXPECTED)):
static_analysis_new_dir = os.path.join(output_root, STATIC_ANALYZER_UNEXPECTED)
create_results_file(options, output_root, static_analysis_new_dir, result_type='unexpected')

if options.destination:
if os.path.isfile(options.destination):
Expand Down

0 comments on commit fa1fd78

Please sign in to comment.