From 885e5adce76ea5ab0a6aa40873699ded908b5ee2 Mon Sep 17 00:00:00 2001 From: vsoch Date: Tue, 19 Nov 2024 10:00:09 -0700 Subject: [PATCH] feat: add close and perfetto support Problem: if we want to generate an accurate perfetto plot, we need both open and close events. The "close" event is technically close(2) and in fuse-go this is called a Flush. Signed-off-by: vsoch --- pkg/fs/loopback.go | 9 +++ python/compatlib/compatlib/client/__init__.py | 9 ++- .../compatlib/client/analyze_recording.py | 2 + python/compatlib/compatlib/client/models.py | 2 + python/compatlib/compatlib/client/perfetto.py | 30 +++++++ .../compatlib/client/plot_recording.py | 2 + python/compatlib/compatlib/traces/traces.py | 79 +++++++++++++++++++ python/compatlib/compatlib/utils.py | 5 ++ 8 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 python/compatlib/compatlib/client/perfetto.py diff --git a/pkg/fs/loopback.go b/pkg/fs/loopback.go index a061de1..5b5c583 100644 --- a/pkg/fs/loopback.go +++ b/pkg/fs/loopback.go @@ -16,6 +16,7 @@ import ( // We need to implement a custom LoopbackNode Open function var _ = (fs.NodeOpener)((*CompatLoopbackNode)(nil)) var _ = (fs.NodeLookuper)((*CompatLoopbackNode)(nil)) +var _ = (fs.NodeFlusher)((*CompatLoopbackNode)(nil)) type CompatLoopbackNode struct { fs.LoopbackNode @@ -40,6 +41,14 @@ func (n *CompatLoopbackNode) Lookup(ctx context.Context, name string, out *fuse. return ch, 0 } +// Flush is called for the close(2) call, could be multiple times. See: +// https://github.com/hanwen/go-fuse/blob/aff07cbd88fef6a2561a87a1e43255516ba7d4b6/fs/api.go#L369 +func (n *CompatLoopbackNode) Flush(ctx context.Context, fh fs.FileHandle) syscall.Errno { + p := n.path() + logger.LogEvent("Close", p) + return 0 +} + // https://github.com/hanwen/go-fuse/blob/f5b6d1b67f4a4d0f4c3c88b4491185b3685e8383/fs/loopback.go#L48 func idFromStat(rootNode *fs.LoopbackRoot, st *syscall.Stat_t) fs.StableAttr { swapped := (uint64(st.Dev) << 32) | (uint64(st.Dev) >> 32) diff --git a/python/compatlib/compatlib/client/__init__.py b/python/compatlib/compatlib/client/__init__.py index f895136..136db01 100644 --- a/python/compatlib/compatlib/client/__init__.py +++ b/python/compatlib/compatlib/client/__init__.py @@ -83,8 +83,13 @@ def get_parser(): description="Build models for fs recordings", formatter_class=argparse.RawTextHelpFormatter, ) + perfetto = subparsers.add_parser( + "to-perfetto", + description="generate perfetto recording for events", + formatter_class=argparse.RawTextHelpFormatter, + ) - for command in analyze_recording, plot_recording, run_models: + for command in analyze_recording, plot_recording, run_models, perfetto: command.add_argument( "-d", "--outdir", @@ -149,6 +154,8 @@ def help(return_code=0): from .plot_recording import main if args.command == "run-models": from .models import main + if args.command == "to-perfetto": + from .perfetto import main # Pass on to the correct parser return_code = 0 diff --git a/python/compatlib/compatlib/client/analyze_recording.py b/python/compatlib/compatlib/client/analyze_recording.py index 52018de..4a1cd92 100644 --- a/python/compatlib/compatlib/client/analyze_recording.py +++ b/python/compatlib/compatlib/client/analyze_recording.py @@ -23,6 +23,8 @@ def main(args, parser, extra, subparser): # A trace set is a collection of event files traceset = TraceSet(events) + if not traceset.files: + logger.exit("No event files were found.") # Define output files and paths image_outdir = os.path.join(args.outdir, "img") diff --git a/python/compatlib/compatlib/client/models.py b/python/compatlib/compatlib/client/models.py index faea802..3ba6b47 100644 --- a/python/compatlib/compatlib/client/models.py +++ b/python/compatlib/compatlib/client/models.py @@ -24,6 +24,8 @@ def main(args, parser, extra, subparser): # A trace set is a collection of event files traceset = TraceSet(events) + if not traceset.files: + logger.exit("No event files were found.") df = traceset.to_dataframe() # Define output files and paths diff --git a/python/compatlib/compatlib/client/perfetto.py b/python/compatlib/compatlib/client/perfetto.py new file mode 100644 index 0000000..01334c2 --- /dev/null +++ b/python/compatlib/compatlib/client/perfetto.py @@ -0,0 +1,30 @@ +import os + +from compatlib.logger import logger +from compatlib.traces import TraceSet + + +def main(args, parser, extra, subparser): + """ + The "extra" here is the list of events + + compatlib to-perfetto $(find ../recording -name *.out) + """ + # Extra events here should be one or more result event files to parse + events = extra + + # An output directory is required + if not args.outdir: + logger.exit("Please specify an output directory with -d/--outdir") + + # Define output files and paths + outfile = os.path.join(args.outdir, "perfetto-trace.pfw") + logger.info(f"Output will be saved to: {outfile}") + if not os.path.exists(args.outdir): + os.makedirs(args.outdir) + + # A trace set is a collection of event files + traceset = TraceSet(events) + if not traceset.files: + logger.exit("No event files were found.") + traceset.to_perfetto(outfile) diff --git a/python/compatlib/compatlib/client/plot_recording.py b/python/compatlib/compatlib/client/plot_recording.py index 22dd71e..4ffa157 100644 --- a/python/compatlib/compatlib/client/plot_recording.py +++ b/python/compatlib/compatlib/client/plot_recording.py @@ -24,6 +24,8 @@ def main(args, parser, extra, subparser): # A trace set is a collection of event files traceset = TraceSet(events) + if not traceset.files: + logger.exit("No event files were found.") # Define output files and paths image_outdir = os.path.join(args.outdir, "img") diff --git a/python/compatlib/compatlib/traces/traces.py b/python/compatlib/compatlib/traces/traces.py index 034c61f..dfeee9f 100644 --- a/python/compatlib/compatlib/traces/traces.py +++ b/python/compatlib/compatlib/traces/traces.py @@ -1,3 +1,4 @@ +import json import os import pandas @@ -43,6 +44,84 @@ def check(self): events.append(filename) self.files = events + def to_perfetto(self, outfile): + """ + Generate perfetto json output file for events. + + # See format at: + # https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0 + """ + df = self.to_dataframe() + + # Give an arbitrary id to each filename + ids = {} + count = 0 + + # We will thread the pids as the different runs of lammps, + # and the thread ids as the library ids + + # TODO: this can be cleaned up and moved into own perfetto.py + # I won't do this until I've added the close event and tested again + with open(outfile, "w") as fd: + fd.write("[") + + for pid, tag in enumerate(df.basename.unique()): + subset = df[df.basename == tag] + + # Subtract the minimum timestamp for each run so we always start at 0 + start_time = subset.timestamp.min() + subset.loc[:, "timestamp"] = subset.timestamp - start_time + + for row in subset.iterrows(): + # Get a faux process id + if row[1].normalized_path not in ids: + ids[row[1].normalized_path] = count + count += 1 + identifier = ids[row[1].normalized_path] + if row[1].ms_in_state is not None: + fd.write( + json.dumps( + { + "name": row[1].normalized_path, + "pid": pid, + "tid": identifier, + "ts": row[1].timestamp, + "dur": row[1].ms_in_state, + # Beginning of phase event + "ph": "X", + "cat": tag, + "args": { + "name": row[1].normalized_path, + "path": row[1].path, + "result": row[1].basename, + "function": row[1].function, + }, + } + ) + ) + else: + fd.write( + json.dumps( + { + "name": row[1].normalized_path, + "pid": pid, + "tid": identifier, + "ts": row[1].timestamp, + # Beginning of phase event + "ph": "B", + "cat": tag, + "args": { + "name": row[1].normalized_path, + "path": row[1].path, + "result": row[1].basename, + "function": row[1].function, + }, + } + ) + ) + fd.write("\n") + fd.write("]") + def iter_events(self, operation="Open"): """ Iterate through files and yield event object diff --git a/python/compatlib/compatlib/utils.py b/python/compatlib/compatlib/utils.py index 183085d..aa5cc92 100644 --- a/python/compatlib/compatlib/utils.py +++ b/python/compatlib/compatlib/utils.py @@ -24,6 +24,11 @@ def read_file(filename): return content +def write_json(content, filename): + with open(filename, "w") as fd: + fd.write(json.dumps(content, indent=4)) + + def write_file(content, filename, executable=False): with open(filename, "w") as fd: fd.write(content)