Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Per-pipeline-invocation profiling #8153

Merged
merged 15 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/BoundsInference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,13 @@ Stmt bounds_inference(Stmt s,
Expr marker = Call::make(Int(32), Call::skip_stages_marker, {}, Call::Intrinsic);
s = Block::make(Evaluate::make(marker), s);

if (target.has_feature(Target::Profile) || target.has_feature(Target::ProfileByTimer)) {
// Add a note in the IR for what profiling should cover, so that it doesn't
// include bounds queries as pipeline executions.
marker = Call::make(Int(32), Call::profiling_marker, {}, Call::Intrinsic);
s = Block::make(Evaluate::make(marker), s);
}

// Add a note in the IR for where assertions on input images
// should go. Those are handled by a later lowering pass.
marker = Call::make(Int(32), Call::add_image_checks_marker, {}, Call::Intrinsic);
Expand Down
4 changes: 2 additions & 2 deletions src/CodeGen_Internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ bool function_takes_user_context(const std::string &name) {
"halide_print",
"halide_profiler_memory_allocate",
"halide_profiler_memory_free",
"halide_profiler_pipeline_start",
"halide_profiler_pipeline_end",
"halide_profiler_instance_start",
"halide_profiler_instance_end",
"halide_profiler_stack_peak_update",
"halide_spawn_thread",
"halide_device_release",
Expand Down
1 change: 1 addition & 0 deletions src/IR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,7 @@ const char *const intrinsic_op_names[] = {
"mux",
"popcount",
"prefetch",
"profiling_marker",
abadams marked this conversation as resolved.
Show resolved Hide resolved
"promise_clamped",
"random",
"register_destructor",
Expand Down
1 change: 1 addition & 0 deletions src/IR.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,7 @@ struct Call : public ExprNode<Call> {
mux,
popcount,
prefetch,
profiling_marker,
promise_clamped,
random,
register_destructor,
Expand Down
227 changes: 144 additions & 83 deletions src/Profiling.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/Profiling.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class Function;
* storage flattening, but after all bounds inference.
*
*/
Stmt inject_profiling(Stmt, const std::string &, const std::map<std::string, Function> &env);
Stmt inject_profiling(const Stmt &, const std::string &, const std::map<std::string, Function> &env);

} // namespace Internal
} // namespace Halide
Expand Down
112 changes: 76 additions & 36 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1858,9 +1858,6 @@ struct HALIDE_ATTRIBUTE_ALIGN(8) halide_profiler_pipeline_stats {
/** The number of funcs in this pipeline. */
int num_funcs;

/** An internal base id used to identify the funcs in this pipeline. */
int first_func_id;

/** The number of times this pipeline has been run. */
int runs;

Expand All @@ -1871,48 +1868,97 @@ struct HALIDE_ATTRIBUTE_ALIGN(8) halide_profiler_pipeline_stats {
int num_allocs;
};

/** The global state of the profiler. */
/** Per-invocation-of-a-pipeline state. Lives on the stack of the Halide
* code. Exists in a doubly-linked list to that it can be cleanly
* removed. */
struct halide_profiler_instance_state {
/** Time spent in this instance. */
uint64_t time;
abadams marked this conversation as resolved.
Show resolved Hide resolved

struct halide_profiler_state {
/** Guards access to the fields below. If not locked, the sampling
* profiler thread is free to modify things below (including
* reordering the linked list of pipeline stats). */
struct halide_mutex lock;
/** The current memory allocation of funcs in this instance. */
uint64_t memory_current;

/** The amount of time the profiler thread sleeps between samples
* in milliseconds. Defaults to 1 */
int sleep_time;
/** The peak memory allocation of funcs in this instance. */
uint64_t memory_peak;

/** The total memory allocation of funcs in this instance. */
uint64_t memory_total;

/** The average number of thread pool worker threads doing useful
* work while computing this instance. */
uint64_t active_threads_numerator, active_threads_denominator;

/** An internal id used for bookkeeping. */
int first_free_id;
/** A pointer to the next running instance, so that the running instances
* can exist in a linked list. */
struct halide_profiler_instance_state *next;

/** A pointer to the address of the next pointer of the previous instance,
* so that this can be removed from the linked list when the instance
* terminates. */
struct halide_profiler_instance_state **prev_next;

/** Information shared across all instances. The stats above are merged into
* it when the instance is retired. */
struct halide_profiler_pipeline_stats *pipeline_stats;

/** An array containing states for each Func in this instance of this pipeline. */
struct halide_profiler_func_stats *funcs;

/** The id of the current running Func. Set by the pipeline, read
* periodically by the profiler thread. */
int current_func;

/** The number of threads currently doing work. */
/** The number of threads currently doing work on this pipeline instance. */
int active_threads;

/** The number of samples taken by this instance. */
int samples;

/** The total number of memory allocation of funcs in this instance. */
int num_allocs;

/** Whether or not this instance should count towards pipeline
* statistics. */
int should_collect_statistics;

/** Make the size of the struct a multiple of 8 */
int padding;
abadams marked this conversation as resolved.
Show resolved Hide resolved
};

/** The global state of the profiler. */
struct halide_profiler_state {
/** Guards access to the fields below. If not locked, the sampling
* profiler thread is free to modify things below (including
* reordering the linked list of pipeline stats). */
struct halide_mutex lock;

/** A linked list of stats gathered for each pipeline. */
struct halide_profiler_pipeline_stats *pipelines;

/** Retrieve remote profiler state. Used so that the sampling
* profiler can follow along with execution that occurs elsewhere,
* e.g. on a DSP. If null, it reads from the int above instead. */
void (*get_remote_profiler_state)(int *func, int *active_workers);

/** Sampling thread reference to be joined at shutdown. */
struct halide_thread *sampling_thread;
};

/** Profiler func ids with special meanings. */
enum {
/// current_func takes on this value when not inside Halide code
halide_profiler_outside_of_halide = -1,
/// Set current_func to this value to tell the profiling thread to
/// halt. It will start up again next time you run a pipeline with
/// profiling enabled.
halide_profiler_please_stop = -2
/** The running instances of Halide pipelines. */
struct halide_profiler_instance_state *instances;

/** If this callback is defined, the profiler asserts that there is a single
* live instance, and then uses it to get the current func and number of
* active threads insted of reading the fields in the instance. This is used
* so that the profiler can follow along with execution that occurs
* elsewhere (e.g. on an accelerator). */
void (*get_remote_profiler_state)(int *func, int *active_workers);

/** The amount of time the profiler thread sleeps between samples
* in milliseconds. Defaults to 1 */
int sleep_time;
abadams marked this conversation as resolved.
Show resolved Hide resolved

/** Set to 1 when you want the profiler to wait for all running instances to
* finish and then stop gracefully. */
int shutdown;
};

/** Get a pointer to the global profiler state for programmatic
Expand All @@ -1930,19 +1976,13 @@ extern struct halide_profiler_pipeline_stats *halide_profiler_get_pipeline_state
* accurate time interval if desired. */
extern int halide_profiler_sample(struct halide_profiler_state *s, uint64_t *prev_t);

/** Reset profiler state cheaply. May leave threads running or some
* memory allocated but all accumluated statistics are reset.
* WARNING: Do NOT call this method while any halide pipeline is
* running; halide_profiler_memory_allocate/free and
* halide_profiler_stack_peak_update update the profiler pipeline's
* state without grabbing the global profiler state's lock. */
/** Reset profiler state cheaply. May leave threads running or some memory
* allocated but all accumulated statistics are reset. Blocks until all running
* profiled Halide pipelines exit. */
extern void halide_profiler_reset(void);

/** Reset all profiler state.
* WARNING: Do NOT call this method while any halide pipeline is
* running; halide_profiler_memory_allocate/free and
* halide_profiler_stack_peak_update update the profiler pipeline's
* state without grabbing the global profiler state's lock. */
/** Reset all profiler state. Blocks until all running profiled Halide
* pipelines exit. */
void halide_profiler_shutdown(void);

/** Print out timing statistics for everything run since the last
Expand Down
9 changes: 8 additions & 1 deletion src/runtime/hexagon_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,14 @@ WEAK int halide_hexagon_run(void *user_context,
if (remote_poll_profiler_state) {
halide_profiler_get_state()->get_remote_profiler_state = get_remote_profiler_state;
if (remote_profiler_set_current_func) {
remote_profiler_set_current_func(halide_profiler_get_state()->current_func);
const halide_profiler_instance_state *instance = halide_profiler_get_state()->instances;
// The instance that called this runtime function should be registered.
halide_abort_if_false(user_context, instance);
if (instance->next) {
error(user_context) << "Hexagon: multiple simultaneous profiled pipelines is unsupported.";
return halide_error_code_generic_error;
}
remote_profiler_set_current_func(instance->current_func);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/runtime/hexagon_remote/qurt/halide_remote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ int halide_hexagon_remote_profiler_set_current_func(int current_func) {
halide_profiler_get_state()->current_func = current_func;
return 0;
}
halide_profiler_state *halide_profiler_get_state() {
static halide_profiler_state hvx_profiler_state;
halide_profiler_instance_state *halide_profiler_get_state() {
static halide_profiler_instance_state hvx_profiler_state;
return &hvx_profiler_state;
}

Expand Down
6 changes: 3 additions & 3 deletions src/runtime/hexagon_remote/qurt/sim_remote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,12 @@ int release_library(handle_t module_ptr) {
}

extern "C" {
halide_profiler_state profiler_state;
halide_profiler_instance_state profiler_state;
int *profiler_current_func_addr = &profiler_state.current_func;
}

halide_profiler_state *halide_profiler_get_state() {
return (halide_profiler_state *)(&profiler_state);
halide_profiler_instance_state *halide_profiler_get_state() {
return (halide_profiler_instance_state *)(&profiler_state);
}

extern "C" {
Expand Down
Loading
Loading