Permalink
Browse files

Tweak okay -> suspect config

Make it just a number of timeouts, without a new DWR being sent.
  • Loading branch information...
1 parent 8b947be commit 924716b9bcecbda07ec630c705ede820363e92bc Anders Svensson committed Mar 4, 2013
@@ -1132,10 +1132,10 @@ corresponding timeout (see below) or all fail.</p>
Specifies configuration that alters the behaviour of the watchdog
state machine.
On key <c>okay</c>, the non-negative number of answered DWR
-messages required before transitioning from REOPEN to OKAY.
-On key <c>suspect</c>, the positive number of unanswered DWR messages
-before transitioning from OKAY to SUSPECT, or 0 to never make this
-transition.</p>
+messages before transitioning from REOPEN to OKAY.
+On key <c>suspect</c>, the number of watchdog timeouts before
+transitioning from OKAY to SUSPECT when DWR is unanswered, or 0 to
+not make the transition.</p>
<p>
Defaults to <c>[{okay, 3}, {suspect, 1}]</c>.
@@ -49,6 +49,8 @@
-define(IS_NATURAL(N), (is_integer(N) andalso 0 =< N)).
+-define(CHOOSE(B,T,F), if (B) -> T; true -> F end).
+
-record(config,
{suspect = 1 :: non_neg_integer(), %% OKAY -> SUSPECT
okay = 3 :: non_neg_integer()}). %% REOPEN -> OKAY
@@ -61,7 +63,7 @@
%% {M,F,A} -> integer() >= 0
num_dwa = 0 :: -1 | non_neg_integer(),
%% number of DWAs received in reopen,
- %% or to send in okay before moving to suspect
+ %% or number of timeouts before okay -> suspect
%% end PCB
parent = self() :: pid(), %% service process
transport :: pid() | undefined, %% peer_fsm process
@@ -424,7 +426,7 @@ transition({'DOWN', _, process, TPid, _Reason},
#watchdog{transport = TPid,
status = T}
= S) ->
- set_watchdog(S#watchdog{status = case T of initial -> T; _ -> down end,
+ set_watchdog(S#watchdog{status = ?CHOOSE(initial == T, T, down),
pending = false,
transport = undefined});
@@ -668,9 +670,10 @@ timeout(#watchdog{status = okay,
case N of
1 ->
S#watchdog{status = suspect};
- _ -> %% non-standard
- send_watchdog(S#watchdog{pending = false,
- num_dwa = decr(N)})
+ 0 -> %% non-standard: never move to suspect
+ S;
+ N -> %% non-standard: more timeouts before moving
+ S#watchdog{num_dwa = N-1}
end;
%% SUSPECT Timer expires CloseConnection()
@@ -725,11 +728,6 @@ timeout(#watchdog{status = T} = S)
T == down ->
restart(S).
-decr(0 = N) ->
- N;
-decr(N) ->
- N-1.
-
%% restart/1
restart(#watchdog{transport = undefined} = S) ->
@@ -89,16 +89,23 @@
-define(INFO(T), #diameter_event{info = T}).
%% Receive an event message from diameter.
--define(EVENT(T),
- apply(fun() -> %% apply to not bind T_
- receive #diameter_event{info = T = T_} ->
- log_event(T_)
- end
+-define(EVENT(T), %% apply to not bind T_
+ apply(fun() ->
+ receive ?INFO(T = T_) -> log_event(T_) end
end,
[])).
%% Receive a watchdog event.
-define(WD_EVENT(Ref), log_wd(element(4, ?EVENT({watchdog, Ref, _, _, _})))).
+-define(WD_EVENT(Ref, Ms),
+ apply(fun() ->
+ receive ?INFO({watchdog, Ref, _, T_, _}) ->
+ log_wd(T_)
+ after Ms ->
+ false
+ end
+ end,
+ [])).
%% Log to make failures identifiable.
-define(LOG(T), ?LOG("~p", [T])).
@@ -376,8 +383,8 @@ tpid(Ref, [[{ref, Ref},
%% # suspect/1
%% ===========================================================================
-%% Configure transports to require a set number of watchdogs before
-%% moving from OKAY to SUSPECT.
+%% Configure transports to require a set number of watchdog timeouts
+%% before moving from OKAY to SUSPECT.
suspect(_) ->
[] = run([[abuse, [suspect, N]] || N <- [0,1,3]]).
@@ -394,19 +401,21 @@ suspect(TRef, true, SvcName, _) ->
{okay, _} = ?WD_EVENT(TRef);
suspect(TRef, false, SvcName, 0) -> %% SUSPECT disabled
- %% Wait 2+ watchdogs and see that two unanswered watchdogs have
- %% been sent.
- [2,0,0,0] = receive
- ?INFO({watchdog, TRef, _, _, _} = T) -> T
- after 28000 ->
- wd_counts(SvcName)
- end;
+ %% Wait 2+ watchdogs and see that only one watchdog has been sent.
+ false = ?WD_EVENT(TRef, 28000),
+ [1,0,0,0] = wd_counts(SvcName);
suspect(TRef, false, SvcName, N) ->
- {okay, suspect} = ?WD_EVENT(TRef),
- [N,0,0,0] = wd_counts(SvcName),
- {suspect, down} = ?WD_EVENT(TRef),
- [N,0,0,0] = wd_counts(SvcName).
+ %% Check that no watchdog transition takes place within N+
+ %% watchdogs ...
+ false = ?WD_EVENT(TRef, N*10000+8000),
+ [1,0,0,0] = wd_counts(SvcName),
+ %% ... but that the connection then becomes suspect ...
+ {okay, suspect} = ?WD_EVENT(TRef, 10000),
+ [1,0,0,0] = wd_counts(SvcName),
+ %% ... and goes down.
+ {suspect, down} = ?WD_EVENT(TRef, 18000),
+ [1,0,0,0] = wd_counts(SvcName).
%% abuse/1
@@ -470,13 +479,9 @@ ok(TRef, SvcName, Down, 0) ->
%% Connection comes up without watchdog exchange.
{Down, okay} = ?WD_EVENT(TRef),
[1,0,0,0] = wd_counts(SvcName),
- %% Wait 2+ watchdog timeout to see that the connection stays up and
- %% two watchdogs are exchanged.
- ok = receive ?INFO({watchdog, TRef, _, _, _} = T) ->
- T
- after 28000 ->
- ok
- end,
+ %% Wait 2+ watchdog timeouts to see that the connection stays up
+ %% and two watchdogs are exchanged.
+ false = ?WD_EVENT(TRef, 28000),
[3,0,0,2] = wd_counts(SvcName);
ok(TRef, SvcName, Down, N) ->

0 comments on commit 924716b

Please sign in to comment.