Skip to content
This repository

Kill orphaned workers and prune monitor state space. #4

Merged
merged 3 commits into from over 2 years ago

2 participants

Andy Skelton Devin Torres
Andy Skelton

If a process checks out a worker and then dies, the DOWN message dismisses the worker without regard for the pool state. This can leave a pool with too few workers. There are also places where monitors are not removed from state. This branch fixes both problems, adds a new test, and adds monitor state checking to other tests.

Andy Skelton

Sorry for the mess... I'm still green at git.

Andy Skelton Fix demonitored pids being left in state space and check monitor list…
… length in all tests. Add user_death test to check that an orphaned worker is killed when a monitored user is reported 'DOWN'.
d27c012
Devin Torres
Owner

Looking into this as well...

Devin Torres devinus merged commit d27c012 into from September 30, 2011
Devin Torres devinus closed this September 30, 2011
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 3 unique commits by 1 author.

Sep 26, 2011
Andy Skelton Remove down monitor from state. 675015e
Andy Skelton Merge branch 'remove-down-monitor' of https://github.com/skeltoac/poo…
…lboy into remove-down-monitor
0fd8de7
Sep 27, 2011
Andy Skelton Fix demonitored pids being left in state space and check monitor list…
… length in all tests. Add user_death test to check that an orphaned worker is killed when a monitored user is reported 'DOWN'.
d27c012
This page is out of date. Refresh to see the latest.
44  src/poolboy.erl
@@ -47,7 +47,7 @@ ready({checkin, Pid}, State) ->
47 47
     Workers = queue:in(Pid, State#state.workers),
48 48
     Monitors = case lists:keytake(Pid, 1, State#state.monitors) of
49 49
         {value, {_, Ref}, Left} -> erlang:demonitor(Ref), Left;
50  
-        false -> []
  50
+        false -> State#state.monitors
51 51
     end,
52 52
     {next_state, ready, State#state{workers=Workers, monitors=Monitors}};
53 53
 ready(_Event, State) ->
@@ -80,11 +80,19 @@ ready(_Event, _From, State) ->
80 80
     {reply, ok, ready, State}.
81 81
 
82 82
 overflow({checkin, Pid}, #state{overflow=1}=State) ->
83  
-    dismiss_worker(Pid),
84  
-    {next_state, ready, State#state{overflow=0}};
  83
+    dismiss_worker(Pid), %% TODO add demonitor to all checkins and check get_all_monitors in other tests!
  84
+    Monitors = case lists:keytake(Pid, 1, State#state.monitors) of
  85
+        {value, {_, Ref}, Left} -> erlang:demonitor(Ref), Left;
  86
+        false -> []
  87
+    end,
  88
+    {next_state, ready, State#state{overflow=0, monitors=Monitors}};
85 89
 overflow({checkin, Pid}, #state{overflow=Overflow}=State) ->
86 90
     dismiss_worker(Pid),
87  
-    {next_state, overflow, State#state{overflow=Overflow-1}};
  91
+    Monitors = case lists:keytake(Pid, 1, State#state.monitors) of
  92
+        {value, {_, Ref}, Left} -> erlang:demonitor(Ref), Left;
  93
+        false -> State#state.monitors
  94
+    end,
  95
+    {next_state, overflow, State#state{overflow=Overflow-1, monitors=Monitors}};
88 96
 overflow(_Event, State) ->
89 97
     {next_state, overflow, State}.
90 98
 
@@ -109,25 +117,25 @@ overflow(_Event, _From, State) ->
109 117
 
110 118
 full({checkin, Pid}, #state{waiting=Waiting, max_overflow=MaxOverflow,
111 119
     overflow=Overflow}=State) ->
  120
+    Monitors = case lists:keytake(Pid, 1, State#state.monitors) of
  121
+        {value, {_, Ref0}, Left0} -> erlang:demonitor(Ref0), Left0;
  122
+        false -> State#state.monitors
  123
+    end,
112 124
     case queue:out(Waiting) of
113 125
         {{value, {FromPid, _}=From}, Left} ->
114 126
             Ref = erlang:monitor(process, FromPid),
115  
-            Monitors = [{Pid, Ref} | State#state.monitors],
  127
+            Monitors1 = [{Pid, Ref} | Monitors],
116 128
             gen_fsm:reply(From, Pid),
117 129
             {next_state, full, State#state{waiting=Left,
118  
-                                           monitors=Monitors}};
  130
+                                           monitors=Monitors1}};
119 131
         {empty, Empty} when MaxOverflow < 1 ->
120 132
             Workers = queue:in(Pid, State#state.workers),
121  
-            Monitors = case lists:keytake(Pid, 1, State#state.monitors) of
122  
-                {value, {_, Ref}, Left} -> erlang:demonitor(Ref), Left;
123  
-                false -> []
124  
-            end,
125 133
             {next_state, ready, State#state{workers=Workers, waiting=Empty,
126 134
                                             monitors=Monitors}};
127 135
         {empty, Empty} ->
128 136
             dismiss_worker(Pid),
129 137
             {next_state, overflow, State#state{waiting=Empty,
130  
-                overflow=Overflow-1}}
  138
+                overflow=Overflow-1, monitors=Monitors}}
131 139
     end;
132 140
 full(_Event, State) ->
133 141
     {next_state, full, State}.
@@ -148,6 +156,8 @@ handle_sync_event(get_avail_workers, _From, StateName, #state{workers=Workers}=S
148 156
 handle_sync_event(get_all_workers, _From, StateName, #state{worker_sup=Sup}=State) ->
149 157
   WorkerList = supervisor:which_children(Sup),
150 158
   {reply, WorkerList, StateName, State};
  159
+handle_sync_event(get_all_monitors, _From, StateName, #state{monitors=Monitors}=State) ->
  160
+  {reply, Monitors, StateName, State};
151 161
 handle_sync_event(stop, _From, _StateName, State) ->
152 162
   {stop, normal, ok, State};
153 163
 handle_sync_event(_Event, _From, StateName, State) ->
@@ -155,11 +165,13 @@ handle_sync_event(_Event, _From, StateName, State) ->
155 165
   {reply, Reply, StateName, State}.
156 166
 
157 167
 handle_info({'DOWN', Ref, _, _, _}, StateName, State) ->
158  
-    Monitors = case lists:keytake(Ref, 2, State#state.monitors) of
159  
-                   {value, {Pid, _}, Left} -> dismiss_worker(Pid), Left;
160  
-                   false -> State#state.monitors
161  
-	       end,
162  
-    {next_state, StateName, State#state{monitors=Monitors}};
  168
+    case lists:keyfind(Ref, 2, State#state.monitors) of
  169
+        {Pid, Ref} ->
  170
+            exit(Pid, kill),
  171
+            {next_state, StateName, State};
  172
+        false ->
  173
+            {next_state, StateName, State}
  174
+    end;
163 175
 handle_info({'EXIT', Pid, _}, StateName, #state{worker_sup=Sup,
164 176
                                                 overflow=Overflow,
165 177
                                                 waiting=Waiting,
43  test/poolboy_tests.erl
@@ -42,6 +42,9 @@ pool_test_() ->
42 42
             },
43 43
             {"Non-blocking pool behaves when it's full",
44 44
                 fun pool_full_nonblocking/0
  45
+            },
  46
+            {"Pool behaves right on user death",
  47
+                fun user_death/0
45 48
             }
46 49
         ]
47 50
     }.
@@ -78,6 +81,8 @@ pool_startup() ->
78 81
     checkin_worker(Pid, Worker),
79 82
     ?assertEqual(9, length(gen_fsm:sync_send_all_state_event(Pid,
80 83
                 get_avail_workers))),
  84
+    ?assertEqual(1, length(gen_fsm:sync_send_all_state_event(Pid,
  85
+                get_all_monitors))),
81 86
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
82 87
 
83 88
 pool_overflow() ->
@@ -113,6 +118,8 @@ pool_overflow() ->
113 118
                 get_avail_workers))),
114 119
     ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
115 120
                 get_all_workers))),
  121
+    ?assertEqual(0, length(gen_fsm:sync_send_all_state_event(Pid,
  122
+                get_all_monitors))),
116 123
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
117 124
 
118 125
 pool_empty() ->
@@ -167,6 +174,8 @@ pool_empty() ->
167 174
                 get_avail_workers))),
168 175
     ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
169 176
                 get_all_workers))),
  177
+    ?assertEqual(0, length(gen_fsm:sync_send_all_state_event(Pid,
  178
+                get_all_monitors))),
170 179
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
171 180
 
172 181
 pool_empty_no_overflow() ->
@@ -215,6 +224,8 @@ pool_empty_no_overflow() ->
215 224
                 get_avail_workers))),
216 225
     ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
217 226
                 get_all_workers))),
  227
+    ?assertEqual(0, length(gen_fsm:sync_send_all_state_event(Pid,
  228
+                get_all_monitors))),
218 229
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
219 230
 
220 231
 
@@ -244,6 +255,8 @@ worker_death() ->
244 255
                 get_avail_workers))),
245 256
     ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
246 257
                 get_all_workers))),
  258
+    ?assertEqual(4, length(gen_fsm:sync_send_all_state_event(Pid,
  259
+                get_all_monitors))),
247 260
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
248 261
 
249 262
 worker_death_while_full() ->
@@ -287,6 +300,8 @@ worker_death_while_full() ->
287 300
                 get_avail_workers))),
288 301
     ?assertEqual(6, length(gen_fsm:sync_send_all_state_event(Pid,
289 302
                 get_all_workers))),
  303
+    ?assertEqual(6, length(gen_fsm:sync_send_all_state_event(Pid,
  304
+                get_all_monitors))),
290 305
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
291 306
 
292 307
 
@@ -335,6 +350,8 @@ worker_death_while_full_no_overflow() ->
335 350
                 get_avail_workers))),
336 351
     ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
337 352
                 get_all_workers))),
  353
+    ?assertEqual(3, length(gen_fsm:sync_send_all_state_event(Pid,
  354
+                get_all_monitors))),
338 355
 
339 356
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
340 357
 
@@ -354,6 +371,8 @@ pool_full_nonblocking_no_overflow() ->
354 371
     A = hd(Workers),
355 372
     checkin_worker(Pid, A),
356 373
     ?assertEqual(A, poolboy:checkout(Pid)),
  374
+    ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
  375
+                get_all_monitors))),
357 376
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
358 377
 
359 378
 pool_full_nonblocking() ->
@@ -374,7 +393,31 @@ pool_full_nonblocking() ->
374 393
     ?assertEqual(false, is_process_alive(A)), %% overflow workers get shut down
375 394
     ?assert(is_pid(NewWorker)),
376 395
     ?assertEqual(full, poolboy:checkout(Pid)),
  396
+    ?assertEqual(10, length(gen_fsm:sync_send_all_state_event(Pid,
  397
+                get_all_monitors))),
  398
+    ok = gen_fsm:sync_send_all_state_event(Pid, stop).
  399
+
  400
+user_death() ->
  401
+    %% check that a dead user (a process that died with a worker checked out)
  402
+    %% causes the pool to dismiss the worker and prune the state space.
  403
+    {ok, Pid} = poolboy:start_link([{name, {local, poolboy_test}},
  404
+            {worker_module, poolboy_test_worker},
  405
+            {size, 5}, {max_overflow, 5}, {checkout_blocks, false}]),
  406
+    spawn(fun() ->
  407
+                  %% you'll have to pry it from my cold, dead hands
  408
+                  poolboy:checkout(Pid),
  409
+                  receive after 500 -> exit(normal) end
  410
+          end),
  411
+    %% on a long enough timeline, the survival rate for everyone drops to zero.
  412
+    receive after 1000 -> ok end,
  413
+    ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
  414
+                get_avail_workers))),
  415
+    ?assertEqual(5, length(gen_fsm:sync_send_all_state_event(Pid,
  416
+                get_all_workers))),
  417
+    ?assertEqual(0, length(gen_fsm:sync_send_all_state_event(Pid,
  418
+                get_all_monitors))),
377 419
     ok = gen_fsm:sync_send_all_state_event(Pid, stop).
  420
+    
378 421
 
379 422
 -endif.
380 423
 
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.