Skip to content

Commit

Permalink
Add scripts that parse and compare static analyzer HTML output
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=269390
rdar://problem/122962791

Reviewed by David Kilzer.

These scripts will be used by a static analyzer bot to parse and compare HTML reports from static analysis.

* Tools/Scripts/compare-static-analysis-results.py: Added.
(parser):
(find_diff): Compares two files to find regressions and fixes.
(compare_project_results): Consolidates results per project.
(create_filtered_results_dir): Uses scan-build --generate-index-only to generate new index.html for new issues per project.
(main):
* Tools/Scripts/generate-dirty-files.py: Added. Takes in output from the static analyzer and extracts
the issue hash, file name, and line number per project and checker type.
(parser):
(parse_results_file): Extracts data from HTML comments.
(find_project_results): Writes data per checker.
(find_all_results): Summarizes all results.
(main):

Canonical link: https://commits.webkit.org/276495@main
  • Loading branch information
briannafan committed Mar 21, 2024
1 parent b289d5f commit 49272b1
Show file tree
Hide file tree
Showing 2 changed files with 309 additions and 0 deletions.
141 changes: 141 additions & 0 deletions Tools/Scripts/compare-static-analysis-results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import subprocess
import argparse
import sys

CHECKERS = ['UncountedCallArgsChecker', 'UncountedLocalVarsChecker']
PROJECTS = ['WebKit', 'WebCore']


def parser():
parser = argparse.ArgumentParser(description='compare dirty file lists')
parser.add_argument(
'archived_dir',
help='directory of dirty lists from previous build'
)
parser.add_argument(
'new_dir',
help='directory of dirty lists from new build'
)
parser.add_argument(
'--build-output',
dest='build_output',
help='output from new build',
required=True
)
parser.add_argument(
'--scan-build-path',
dest='scan_build',
help='path to scan-build'
)

return parser.parse_args()


def find_diff(file1, file2, mode):
# Find new regressions
new_lines_list = []
find_issues_cmd = f"/usr/bin/grep -F -v -f {file1}-{mode} {file2}-{mode}"
try:
new_lines = subprocess.check_output(find_issues_cmd, shell=True, stderr=subprocess.STDOUT, text=True)
new_lines_list = new_lines.splitlines()
except subprocess.CalledProcessError as e:
if not e.returncode == 1:
sys.stderr.write(f'{e.output}')

# Find all fixes
fixed_lines_list = []
find_fixes_cmd = f'grep -F -v -f {file2}-{mode} {file1}-{mode}'
try:
fixed_lines = subprocess.check_output(find_fixes_cmd, shell=True, text=True, stderr=subprocess.STDOUT)
fixed_lines_list = fixed_lines.splitlines()
except subprocess.CalledProcessError as e:
if not e.returncode == 1:
sys.stderr.write(f'{e.output}')

return set(new_lines_list), set(fixed_lines_list)


def compare_project_results(args, archive_path, new_path, project):
new_issues_total = set()
new_files_total = set()
fixed_issues_total = set()
fixed_files_total = set()

for checker in CHECKERS:
print(f'{checker}:')
new_issues, fixed_issues = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'issues')
new_files, fixed_files = find_diff(f'{archive_path}/{checker}', f'{new_path}/{checker}', 'files')
fixed_issues_total.update(fixed_issues)
fixed_files_total.update(fixed_files)
new_issues_total.update(new_issues)
new_files_total.update(new_files)

print(f' Fixed {len(fixed_issues)} issue(s).')
print(f' Fixed {len(fixed_files)} file(s).')
print(f' {len(new_issues)} new issue(s).')
print(f' {len(new_files)} new file(s) with issues.\n')

if new_issues_total:
create_filtered_results_dir(args, project, new_issues_total, 'StaticAnalyzerRegressions')

return new_issues_total


def create_filtered_results_dir(args, project, issues, category='StaticAnalyzerRegressions'):
print(f'Creating {category} and linking results...')
# Create symlinks to new issues only so that we can run scan-build to generate new index.html files
path_to_reports = os.path.abspath(f'{args.build_output}/{category}/{project}/StaticAnalyzerReports')
subprocess.run(['mkdir', '-p', path_to_reports])
for issue_hash in issues:
report = f"report-{issue_hash[:6]}.html"
path_to_report = f'{args.build_output}/StaticAnalyzer/{project}/StaticAnalyzerReports/{report}'
path_to_report_new = os.path.join(path_to_reports, report)
subprocess.run(['ln', '-s', os.path.abspath(path_to_report), path_to_report_new])

path_to_project = f'{args.build_output}/{category}/{project}'
subprocess.run([args.scan_build, '--generate-index-only', os.path.abspath(path_to_project)])


def main():
args = parser()
new_issues_total = set()

for project in PROJECTS:
archive_path = os.path.abspath(f'{args.archived_dir}/{project}')
new_path = os.path.abspath(f'{args.new_dir}/{project}')
print(f'\n------ {project} ------\n')
new_issues = compare_project_results(args, archive_path, new_path, project)
new_issues_total.update(new_issues)

if new_issues_total:
print(f'\nTotal new issues: {len(new_issues_total)}')

return 0


if __name__ == '__main__':
main()
168 changes: 168 additions & 0 deletions Tools/Scripts/generate-dirty-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import subprocess
import argparse
import json
import sys

CHECKER_MAP = {
'Uncounted call argument for a raw pointer/reference parameter': 'UncountedCallArgsChecker',
'Uncounted raw pointer or reference not provably backed by ref-counted variable': 'UncountedLocalVarsChecker'
}

PROJECTS = ['WebKit', 'WebCore']


def parser():
parser = argparse.ArgumentParser(description='analyze clang results')
parser.add_argument(
'results_dir',
help='directory of results to parse'
)
parser.add_argument(
'--output-dir',
dest='output_dir',
help='output directory for dirty files list',
default='smart-pointer-result-archive'
)
parser.add_argument(
'--build-dir',
dest='build_dir',
help='path to build directory, used to standardize file paths'
)

return parser.parse_args()


def parse_results_file(args, file_path):
bug_type, bug_file, issue_hash, bug_line = None, None, None, None
with open(file_path, 'r') as f:
while True:
lines = f.readlines(250)
if not lines:
break
for line in lines:
if 'BUGFILE' in line:
bug_file = line.removeprefix('<!-- BUGFILE ')
bug_file = bug_file.removesuffix(' -->\n')
if args.build_dir:
bug_file = bug_file.removeprefix(f'{args.build_dir}/')
if 'ISSUEHASHCONTENTOFLINEINCONTEXT' in line:
issue_hash = line.removeprefix('<!-- ISSUEHASHCONTENTOFLINEINCONTEXT ')
issue_hash = issue_hash.removesuffix(' -->\n')
if 'BUGTYPE' in line:
bug_type = line.removeprefix('<!-- BUGTYPE ')
bug_type = bug_type.removesuffix(' -->\n')
if 'BUGLINE' in line:
bug_line = line.removeprefix('<!-- BUGLINE ')
bug_line = bug_line.removesuffix(' -->\n')
if bug_file and issue_hash and bug_type and bug_line:
return bug_file, issue_hash, bug_type, bug_line
return None, None, None, None


def find_project_results(args, project, file_list, results_data):
bug_counts = {
'Uncounted call argument for a raw pointer/reference parameter': 0,
'Uncounted raw pointer or reference not provably backed by ref-counted variable': 0
}

for result_file in file_list:
if result_file:
file_name, issue_hash, bug_type, bug_line = parse_results_file(args, result_file)
if not file_name:
continue

# Create files listing issue hashes and file names.
bug_counts[bug_type] += 1
issue_obj = {"hash": issue_hash, "bugtype": bug_type, "line": bug_line}
list_of_issues = results_data.get(file_name, [])
list_of_issues.append(issue_obj)
results_data[file_name] = list_of_issues

output_file_name = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-issues')
f = open(output_file_name, 'a')
f.write(f'{issue_hash}\n')
f.close()

output_file_name_2 = os.path.abspath(f'{args.output_dir}/{project}/{CHECKER_MAP[bug_type]}-files')
f = open(output_file_name_2, 'a')
f.write(f'{file_name}\n')
f.close()

for type, count in bug_counts.items():
print(f' {type}: {count}')
return results_data


def find_all_results(args):
file_list = []
results_data = {}
result_counts = {}

for project in PROJECTS:
subprocess.run(['mkdir', os.path.abspath(f'{args.output_dir}/{project}')])
path = os.path.abspath(os.path.join(args.results_dir, 'StaticAnalyzer', project))
command = 'find {} -name report\\*.html -print'.format(path)
try:
result_files = subprocess.check_output(command, shell=True, text=True)
except subprocess.CalledProcessError as e:
sys.stderr.write(f'{e.output}')
sys.stderr.write(f'Could not find results for {project}\n')
return -1
project_files = result_files.splitlines()
file_list.extend(project_files)
result_counts[project] = len(project_files)

print(f'\n------ {project} ------\n')
print(f'TOTAL ISSUES: {len(project_files)}')
find_project_results(args, project, project_files, results_data)

print("\nWriting results files...")
results_data_file = os.path.abspath(f'{args.output_dir}/dirty_file_data.json')
with open(results_data_file, "w") as f:
results_data_obj = json.dumps(results_data, indent=4)
f.write(results_data_obj)
print(f'Done! Find them in {os.path.abspath(args.output_dir)}\n')

results_msg = f'Total ({sum([c for c in result_counts.values()])}) '
for proj, count in result_counts.items():
results_msg += f'{proj} ({count}) '
print(results_msg)


def main():
args = parser()
try:
subprocess.run(['mkdir', '-p', args.output_dir])
except subprocess.CalledProcessError as e:
sys.stderr.write(f'{e.output}\n')

if args.results_dir:
find_all_results(args)


if __name__ == '__main__':
main()

0 comments on commit 49272b1

Please sign in to comment.