Skip to content

Commit

Permalink
trace2: add stopwatch timers
Browse files Browse the repository at this point in the history
Add stopwatch timer mechanism to Trace2.

Timers are an alternative to Trace2 Regions.  Regions are useful for
measuring the time spent in various computation phases, such as the
time to read the index, time to scan for unstaged files, time to scan
for untracked files, and etc.

However, regions are not appropriate in all places.  For example,
during a checkout, it would be very inefficient to use regions to
measure the total time spent inflating objects from the ODB from
across the entire lifetime of the process; a per-unzip() region would
flood the output and significantly slow the command; and some form of
post-processing would be requried to compute the time spent in unzip().

Timers can be used to measure a series of timer intervals and emit
a single summary event (at thread and/or process exit).

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
  • Loading branch information
jeffhostetler committed Oct 20, 2022
1 parent 8a89e1a commit 8e70110
Show file tree
Hide file tree
Showing 15 changed files with 786 additions and 0 deletions.
90 changes: 90 additions & 0 deletions Documentation/technical/api-trace2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,42 @@ The "value" field may be an integer or a string.
}
------------

`"th_timer"`::
This event logs the amount of time that a stopwatch timer was
running in the thread. This event is generated when a thread
exits for timers that requested per-thread events.
+
------------
{
"event":"th_timer",
...
"category":"my_category",
"name":"my_timer",
"intervals":5, # number of time it was started/stopped
"t_total":0.052741, # total time in seconds it was running
"t_min":0.010061, # shortest interval
"t_max":0.011648 # longest interval
}
------------

`"timer"`::
This event logs the amount of time that a stopwatch timer was
running aggregated across all threads. This event is generated
when the process exits.
+
------------
{
"event":"timer",
...
"category":"my_category",
"name":"my_timer",
"intervals":5, # number of time it was started/stopped
"t_total":0.052741, # total time in seconds it was running
"t_min":0.010061, # shortest interval
"t_max":0.011648 # longest interval
}
------------

== Example Trace2 API Usage

Here is a hypothetical usage of the Trace2 API showing the intended
Expand Down Expand Up @@ -1200,6 +1236,60 @@ d0 | main | data | r0 | 0.002126 | 0.002126 | fsy
d0 | main | exit | | 0.000470 | | | code:0
d0 | main | atexit | | 0.000477 | | | code:0
----------------

Stopwatch Timer Events::

Measure the time spent in a function call or span of code
that might be called from many places within the code
throughout the life of the process.
+
----------------
static void expensive_function(void)
{
trace2_timer_start(TRACE2_TIMER_ID_TEST1);
...
sleep_millisec(1000); // Do something expensive
...
trace2_timer_stop(TRACE2_TIMER_ID_TEST1);
}

static int ut_100timer(int argc, const char **argv)
{
...

expensive_function();

// Do something else 1...

expensive_function();

// Do something else 2...

expensive_function();

return 0;
}
----------------
+
In this example, we measure the total time spent in
`expensive_function()` regardless of when it is called
in the overall flow of the program.
+
----------------
$ export GIT_TRACE2_PERF_BRIEF=1
$ export GIT_TRACE2_PERF=~/log.perf
$ t/helper/test-tool trace2 100timer 3 1000
...
$ cat ~/log.perf
d0 | main | version | | | | | ...
d0 | main | start | | 0.001453 | | | t/helper/test-tool trace2 100timer 3 1000
d0 | main | cmd_name | | | | | trace2 (trace2)
d0 | main | exit | | 3.003667 | | | code:0
d0 | main | timer | | | | test | name:test1 intervals:3 total:3.001686 min:1.000254 max:1.000929
d0 | main | atexit | | 3.003796 | | | code:0
----------------


== Future Work

=== Relationship to the Existing Trace Api (api-trace.txt)
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ LIB_OBJS += trace2/tr2_tgt_event.o
LIB_OBJS += trace2/tr2_tgt_normal.o
LIB_OBJS += trace2/tr2_tgt_perf.o
LIB_OBJS += trace2/tr2_tls.o
LIB_OBJS += trace2/tr2_tmr.o
LIB_OBJS += trailer.o
LIB_OBJS += transport-helper.o
LIB_OBJS += transport.o
Expand Down
98 changes: 98 additions & 0 deletions t/helper/test-trace2.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,101 @@ static int ut_010bug_BUG(int argc, const char **argv)
BUG("a %s message", "BUG");
}

/*
* Single-threaded timer test. Create several intervals using the
* TEST1 timer. The test script can verify that an aggregate Trace2
* "timer" event is emitted indicating that we started+stopped the
* timer the requested number of times.
*/
static int ut_100timer(int argc, const char **argv)
{
const char *usage_error =
"expect <count> <ms_delay>";

int count = 0;
int delay = 0;
int k;

if (argc != 2)
die("%s", usage_error);
if (get_i(&count, argv[0]))
die("%s", usage_error);
if (get_i(&delay, argv[1]))
die("%s", usage_error);

for (k = 0; k < count; k++) {
trace2_timer_start(TRACE2_TIMER_ID_TEST1);
sleep_millisec(delay);
trace2_timer_stop(TRACE2_TIMER_ID_TEST1);
}

return 0;
}

struct ut_101_data {
int count;
int delay;
};

static void *ut_101timer_thread_proc(void *_ut_101_data)
{
struct ut_101_data *data = _ut_101_data;
int k;

trace2_thread_start("ut_101");

for (k = 0; k < data->count; k++) {
trace2_timer_start(TRACE2_TIMER_ID_TEST2);
sleep_millisec(data->delay);
trace2_timer_stop(TRACE2_TIMER_ID_TEST2);
}

trace2_thread_exit();
return NULL;
}

/*
* Multi-threaded timer test. Create several threads that each create
* several intervals using the TEST2 timer. The test script can verify
* that an individual Trace2 "th_timer" events for each thread and an
* aggregate "timer" event are generated.
*/
static int ut_101timer(int argc, const char **argv)
{
const char *usage_error =
"expect <count> <ms_delay> <threads>";

struct ut_101_data data = { 0, 0 };
int nr_threads = 0;
int k;
pthread_t *pids = NULL;

if (argc != 3)
die("%s", usage_error);
if (get_i(&data.count, argv[0]))
die("%s", usage_error);
if (get_i(&data.delay, argv[1]))
die("%s", usage_error);
if (get_i(&nr_threads, argv[2]))
die("%s", usage_error);

CALLOC_ARRAY(pids, nr_threads);

for (k = 0; k < nr_threads; k++) {
if (pthread_create(&pids[k], NULL, ut_101timer_thread_proc, &data))
die("failed to create thread[%d]", k);
}

for (k = 0; k < nr_threads; k++) {
if (pthread_join(pids[k], NULL))
die("failed to join thread[%d]", k);
}

free(pids);

return 0;
}

/*
* Usage:
* test-tool trace2 <ut_name_1> <ut_usage_1>
Expand All @@ -248,6 +343,9 @@ static struct unit_test ut_table[] = {
{ ut_008bug, "008bug", "" },
{ ut_009bug_BUG, "009bug_BUG","" },
{ ut_010bug_BUG, "010bug_BUG","" },

{ ut_100timer, "100timer", "<count> <ms_delay>" },
{ ut_101timer, "101timer", "<count> <ms_delay> <threads>" },
};
/* clang-format on */

Expand Down
49 changes: 49 additions & 0 deletions t/t0211-trace2-perf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,53 @@ test_expect_success 'using global config, perf stream, return code 0' '
test_cmp expect actual
'

# Exercise the stopwatch timers in a loop and confirm that we have
# as many start/stop intervals as expected. We cannot really test the
# actual (total, min, max) timer values, so we have to assume that they
# are good, but we can verify the interval count.
#
# The timer "test/test1" should only emit a global summary "timer" event.
# The timer "test/test2" should emit per-thread "th_timer" events and a
# global summary "timer" event.

have_timer_event () {
thread=$1 event=$2 category=$3 name=$4 intervals=$5 file=$6 &&

pattern="d0|${thread}|${event}||||${category}|name:${name} intervals:${intervals}" &&

grep "${pattern}" ${file}
}

test_expect_success 'stopwatch timer test/test1' '
test_when_finished "rm trace.perf actual" &&
test_config_global trace2.perfBrief 1 &&
test_config_global trace2.perfTarget "$(pwd)/trace.perf" &&
# Use the timer "test1" 5 times from "main".
test-tool trace2 100timer 5 10 &&
perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual &&
have_timer_event "main" "timer" "test" "test1" 5 actual
'

test_expect_success 'stopwatch timer test/test2' '
test_when_finished "rm trace.perf actual" &&
test_config_global trace2.perfBrief 1 &&
test_config_global trace2.perfTarget "$(pwd)/trace.perf" &&
# Use the timer "test2" 5 times each in 3 threads.
test-tool trace2 101timer 5 10 3 &&
perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual &&
# So we should have 3 per-thread events of 5 each.
have_timer_event "th01:ut_101" "th_timer" "test" "test2" 5 actual &&
have_timer_event "th02:ut_101" "th_timer" "test" "test2" 5 actual &&
have_timer_event "th03:ut_101" "th_timer" "test" "test2" 5 actual &&
# And we should have 15 total uses.
have_timer_event "main" "timer" "test" "test2" 15 actual
'

test_done
6 changes: 6 additions & 0 deletions t/t0211/scrub_perf.perl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@
goto SKIP_LINE;
}
}
elsif ($tokens[$col_event] =~ m/timer/) {
# This also captures "th_timer" events
$tokens[$col_rest] =~ s/ total:\d+\.\d*/ total:_T_TOTAL_/;
$tokens[$col_rest] =~ s/ min:\d+\.\d*/ min:_T_MIN_/;
$tokens[$col_rest] =~ s/ max:\d+\.\d*/ max:_T_MAX_/;
}

# t_abs and t_rel are either blank or a float. Replace the float
# with a constant for matching the HEREDOC in the test script.
Expand Down

0 comments on commit 8e70110

Please sign in to comment.