Permalink
Browse files

Merge branch 'pan/r16b01/system_monitor_long_schedule/OTP-11067' into…

… maint

* pan/r16b01/system_monitor_long_schedule/OTP-11067:
  Minor spelling correction
  Add system_monitor of long_schedule
  • Loading branch information...
bufflig committed Jun 5, 2013
2 parents 620b7e8 + eb4d4c1 commit eedd89fcb0b892117b798b8b097cf63b9ab59db7
View
@@ -6058,6 +6058,49 @@ ok
notice.
</p>
</item>
+ <tag><c>{long_schedule, Time}</c></tag>
+ <item>
+ <p>If a process or port in the system runs uninterrupted
+ for at least <c>Time</c> wall clock milliseconds, a
+ message <c>{monitor, PidOrPort, long_schedule, Info}</c>
+ is sent to <c>MonitorPid</c>. <c>PidOrPort</c> is the
+ process or port that was running and <c>Info</c> is a
+ list of two-element tuples describing the event. In case
+ of a <c>pid()</c>, the tuples <c>{timeout, Millis}</c>,
+ <c>{in, Location}</c> and <c>{out, Location}</c> will be
+ present, where <c>Location</c> is either an MFA
+ (<c>{Module, Function, Arity}</c>) describing the
+ function where the process was scheduled in/out, or the
+ atom <c>undefined</c>. In case of a <c>port()</c>, the
+ tuples <c>{timeout, Millis}</c> and <c>{port_op,Op}</c>
+ will be present. <c>Op</c> will be one of <c>proc_sig</c>,
+ <c>timeout</c>, <c>input</c>, <c>output</c>,
+ <c>event</c> or <c>dist_cmd</c>, depending on which
+ driver callback was executing. <c>proc_sig</c> is an
+ internal operation and should never appear, while the
+ others represent the corresponding driver callbacks
+ <c>timeout</c>, <c>ready_input</c>, <c>ready_output</c>,
+ <c>event</c> and finally <c>outputv</c> (when the port
+ is used by distribution). The <c>Millis</c> value in
+ the <c>timeout</c> tuple will tell you the actual
+ uninterrupted execution time of the process or port,
+ which will always be <c>&gt;=</c> the <c>Time</c> value
+ supplied when starting the trace. New tuples may be
+ added to the <c>Info</c> list in the future, and the
+ order of the tuples in the list may be changed at any
+ time without prior notice.
+ </p>
+ <p>This can be used to detect problems with NIF's or
+ drivers that take too long to execute. Generally, 1 ms
+ is considered a good maximum time for a driver callback
+ or a NIF. However, a time sharing system should usually
+ consider everything below 100 ms as "possible" and
+ fairly "normal". Schedule times above that might however
+ indicate swapping or a NIF/driver that is
+ misbehaving. Misbehaving NIF's and drivers could cause
+ bad resource utilization and bad overall performance of
+ the system.</p>
+ </item>
<tag><c>{large_heap, Size}</c></tag>
<item>
<p>If a garbage collection in the system results in
@@ -178,6 +178,7 @@ atom disable_trace
atom disabled
atom display_items
atom dist
+atom dist_cmd
atom Div='/'
atom div
atom dlink
@@ -313,6 +314,7 @@ atom load_cancelled
atom load_failure
atom local
atom long_gc
+atom long_schedule
atom low
atom Lt='<'
atom machine
@@ -432,6 +434,7 @@ atom port
atom ports
atom port_count
atom port_limit
+atom port_op
atom print
atom priority
atom private
@@ -443,6 +446,7 @@ atom process_display
atom process_limit
atom process_dump
atom procs
+atom proc_sig
atom profile
atom protected
atom protection
@@ -924,6 +924,7 @@ extern int count_instructions;
# define NOINLINE
#endif
+
/*
* The following functions are called directly by process_main().
* Don't inline them.
@@ -1153,6 +1154,9 @@ void process_main(void)
Eterm pt_arity; /* Used by do_put_tuple */
+ Uint64 start_time = 0; /* Monitor long schedule */
+ BeamInstr* start_time_i = NULL;
+
ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */
@@ -1175,6 +1179,16 @@ void process_main(void)
do_schedule:
reds_used = REDS_IN(c_p) - FCALLS;
do_schedule1:
+
+ if (start_time != 0) {
+ Sint64 diff = erts_timestamp_millis() - start_time;
+ if (diff > 0 && (Uint) diff > erts_system_monitor_long_schedule) {
+ BeamInstr *inptr = find_function_from_pc(start_time_i);
+ BeamInstr *outptr = find_function_from_pc(c_p->i);
+ monitor_long_schedule_proc(c_p,inptr,outptr,(Uint) diff);
+ }
+ }
+
PROCESS_MAIN_CHK_LOCKS(c_p);
ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p);
#if HALFWORD_HEAP
@@ -1183,11 +1197,18 @@ void process_main(void)
ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
c_p = schedule(c_p, reds_used);
ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
+ start_time = 0;
#ifdef DEBUG
pid = c_p->common.id; /* Save for debugging purpouses */
#endif
ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p);
PROCESS_MAIN_CHK_LOCKS(c_p);
+
+ if (erts_system_monitor_long_schedule != 0) {
+ start_time = erts_timestamp_millis();
+ start_time_i = c_p->i;
+ }
+
reg = ERTS_PROC_GET_SCHDATA(c_p)->x_reg_array;
freg = ERTS_PROC_GET_SCHDATA(c_p)->f_reg_array;
#if !HEAP_ON_C_STACK
@@ -6151,6 +6172,7 @@ apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg)
}
+
static Eterm
new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free)
{
@@ -2012,6 +2012,7 @@ void erts_system_monitor_clear(Process *c_p) {
#endif
erts_set_system_monitor(NIL);
erts_system_monitor_long_gc = 0;
+ erts_system_monitor_long_schedule = 0;
erts_system_monitor_large_heap = 0;
erts_system_monitor_flags.busy_port = 0;
erts_system_monitor_flags.busy_dist_port = 0;
@@ -2036,12 +2037,17 @@ static Eterm system_monitor_get(Process *p)
Uint hsz = 3 + (erts_system_monitor_flags.busy_dist_port ? 2 : 0) +
(erts_system_monitor_flags.busy_port ? 2 : 0);
Eterm long_gc = NIL;
+ Eterm long_schedule = NIL;
Eterm large_heap = NIL;
if (erts_system_monitor_long_gc != 0) {
hsz += 2+3;
(void) erts_bld_uint(NULL, &hsz, erts_system_monitor_long_gc);
}
+ if (erts_system_monitor_long_schedule != 0) {
+ hsz += 2+3;
+ (void) erts_bld_uint(NULL, &hsz, erts_system_monitor_long_schedule);
+ }
if (erts_system_monitor_large_heap != 0) {
hsz += 2+3;
(void) erts_bld_uint(NULL, &hsz, erts_system_monitor_large_heap);
@@ -2051,6 +2057,10 @@ static Eterm system_monitor_get(Process *p)
if (erts_system_monitor_long_gc != 0) {
long_gc = erts_bld_uint(&hp, NULL, erts_system_monitor_long_gc);
}
+ if (erts_system_monitor_long_schedule != 0) {
+ long_schedule = erts_bld_uint(&hp, NULL,
+ erts_system_monitor_long_schedule);
+ }
if (erts_system_monitor_large_heap != 0) {
large_heap = erts_bld_uint(&hp, NULL, erts_system_monitor_large_heap);
}
@@ -2059,6 +2069,10 @@ static Eterm system_monitor_get(Process *p)
Eterm t = TUPLE2(hp, am_long_gc, long_gc); hp += 3;
res = CONS(hp, t, res); hp += 2;
}
+ if (long_schedule != NIL) {
+ Eterm t = TUPLE2(hp, am_long_schedule, long_schedule); hp += 3;
+ res = CONS(hp, t, res); hp += 2;
+ }
if (large_heap != NIL) {
Eterm t = TUPLE2(hp, am_large_heap, large_heap); hp += 3;
res = CONS(hp, t, res); hp += 2;
@@ -2113,7 +2127,7 @@ system_monitor(Process *p, Eterm monitor_pid, Eterm list)
}
if (is_not_list(list)) goto error;
else {
- Uint long_gc, large_heap;
+ Uint long_gc, long_schedule, large_heap;
int busy_port, busy_dist_port;
system_blocked = 1;
@@ -2123,7 +2137,8 @@ system_monitor(Process *p, Eterm monitor_pid, Eterm list)
if (!erts_pid2proc(p, ERTS_PROC_LOCK_MAIN, monitor_pid, 0))
goto error;
- for (long_gc = 0, large_heap = 0, busy_port = 0, busy_dist_port = 0;
+ for (long_gc = 0, long_schedule = 0, large_heap = 0,
+ busy_port = 0, busy_dist_port = 0;
is_list(list);
list = CDR(list_val(list))) {
Eterm t = CAR(list_val(list));
@@ -2133,6 +2148,9 @@ system_monitor(Process *p, Eterm monitor_pid, Eterm list)
if (tp[1] == am_long_gc) {
if (! term_to_Uint(tp[2], &long_gc)) goto error;
if (long_gc < 1) long_gc = 1;
+ } else if (tp[1] == am_long_schedule) {
+ if (! term_to_Uint(tp[2], &long_schedule)) goto error;
+ if (long_schedule < 1) long_schedule = 1;
} else if (tp[1] == am_large_heap) {
if (! term_to_Uint(tp[2], &large_heap)) goto error;
if (large_heap < 16384) large_heap = 16384;
@@ -2148,6 +2166,7 @@ system_monitor(Process *p, Eterm monitor_pid, Eterm list)
prev = system_monitor_get(p);
erts_set_system_monitor(monitor_pid);
erts_system_monitor_long_gc = long_gc;
+ erts_system_monitor_long_schedule = long_schedule;
erts_system_monitor_large_heap = large_heap;
erts_system_monitor_flags.busy_port = !!busy_port;
erts_system_monitor_flags.busy_dist_port = !!busy_dist_port;
@@ -1594,6 +1594,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int fpe_was_unmasked;
erts_aint32_t state;
int active;
+ Uint64 start_time = 0;
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq));
@@ -1655,6 +1656,10 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
reset_handle(ptp);
+ if (erts_system_monitor_long_schedule != 0) {
+ start_time = erts_timestamp_millis();
+ }
+
ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(pp));
ERTS_SMP_CHK_NO_PROC_LOCKS;
ASSERT(pp->drv_ptr);
@@ -1723,6 +1728,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
reds += erts_port_driver_callback_epilogue(pp, &state);
+ if (start_time != 0) {
+ Sint64 diff = erts_timestamp_millis() - start_time;
+ if (diff > 0 && (Uint) diff > erts_system_monitor_long_schedule) {
+ monitor_long_schedule_port(pp,ptp->type,(Uint) diff);
+ }
+ }
+ start_time = 0;
+
aborted_port_task:
schedule_port_task_free(ptp);
@@ -267,6 +267,7 @@ static Uint last_exact_reductions;
Uint erts_default_process_flags;
Eterm erts_system_monitor;
Eterm erts_system_monitor_long_gc;
+Uint erts_system_monitor_long_schedule;
Eterm erts_system_monitor_large_heap;
struct erts_system_monitor_flags_t erts_system_monitor_flags;
@@ -1009,6 +1009,7 @@ extern erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
*/
extern Eterm erts_system_monitor;
extern Uint erts_system_monitor_long_gc;
+extern Uint erts_system_monitor_long_schedule;
extern Uint erts_system_monitor_large_heap;
struct erts_system_monitor_flags_t {
unsigned int busy_port : 1;
Oops, something went wrong.

0 comments on commit eedd89f

Please sign in to comment.