Skip to content

Commit

Permalink
Testing: Correct perf parsing of C++ output
Browse files Browse the repository at this point in the history
This patch fixes errors in the parser of perf output.  Previously,
each record was assumed to be separated by spaces, but this failed for
more generic records (usually from C++) which included signatures (such
as `f(a, b)`) or templates (`f<a, b>`).  Nested constructs were also
possible.

This is fixed by introducing a simple tokenizer which extracts <, (, and
whitespace from the output , then rebuilds the records by combining any
whitespace which appears inside of delimiters.

This patch should hopefully resolve the CI errors in GitHub Actions.
  • Loading branch information
marshallward authored and adcroft committed May 15, 2024
1 parent e253883 commit 6272bbc
Showing 1 changed file with 55 additions and 5 deletions.
60 changes: 55 additions & 5 deletions .testing/tools/parse_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@
import collections
import json
import os
import re
import shlex
import subprocess
import sys

perf_scanner = re.Scanner([
(r'<', lambda scanner, token: token),
(r'>', lambda scanner, token: token),
(r'\(', lambda scanner, token: token),
(r'\)', lambda scanner, token: token),
(r'[ \t]+', lambda scanner, token: token),
(r'[^<>() \t]+', lambda scanner, token: token),
])


def main():
desc = 'Parse perf.data and return in JSON format.'
Expand Down Expand Up @@ -58,15 +68,55 @@ def parse_perf_report(perf_data_path):

# get per-symbol count
else:
tokens, remainder = perf_scanner.scan(line)
if remainder:
print('Line could not be tokenized', file=sys.stderr)
print(' line:', repr(line), file=sys.stderr)
print(' tokens:', tokens, file=sys.stderr)
print(' remainder:', remainder, file=sys.stderr)
sys.exit(os.EX_DATAERR)

# Construct record from tokens
# (NOTE: Not a proper grammar, just dumb bracket counting)
record = []
bracks = 0
parens = 0

for tok in tokens:
if tok == '<':
bracks += 1

if tok == '(':
parens += 1

rec = record[-1] if record else None

inside_bracket = rec and (bracks > 0 or parens > 0)
lead_rec = tok in '<(' and rec and not rec.isspace()
tail_rec = not tok.isspace() and rec and rec[-1] in '>)'

if inside_bracket or lead_rec or tail_rec:
record[-1] += tok
else:
record.append(tok)

if tok == '>':
bracks -= 1
if tok == '(':
parens -= 1

# Strip any whitespace tokens
record = [rec for rec in record if not rec.isspace()]

try:
tokens = line.split()
symbol = tokens[2]
period = int(tokens[3])
except ValueError:
symbol = record[2]
period = int(record[3])
except:
print("parse_perf.py: Error extracting symbol count",
file=sys.stderr)
file=sys.stderr)
print("line:", repr(line), file=sys.stderr)
print("tokens:", tokens, file=sys.stderr)
print("record:", record, file=sys.stderr)
raise

profile[event_name]['symbol'][symbol] = period
Expand Down

0 comments on commit 6272bbc

Please sign in to comment.