Permalink
Browse files

Fix dead lock case in the os process pool

Part of this patch was done by Paul Davis.
The patch also introduces a test case to validate that
the os process pool (couch_query_servers) operates as it
should.
Closes COUCHDB-1246.



git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1159045 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 6b4c778 commit 95da6f6f4246d2e8e86a3cf92ddf6487e46c10e9 @fdmanana fdmanana committed Aug 18, 2011
Showing with 179 additions and 13 deletions.
  1. +1 −5 src/couchdb/couch_os_process.erl
  2. +15 −7 src/couchdb/couch_query_servers.erl
  3. +161 −0 test/etap/210-os-proc-pool.t
  4. +2 −1 test/etap/Makefile.am
@@ -133,7 +133,6 @@ pick_command1(_) ->
% gen_server API
init([Command, Options, PortOptions]) ->
- process_flag(trap_exit, true),
PrivDir = couch_util:priv_dir(),
Spawnkiller = filename:join(PrivDir, "couchspawnkillable"),
BaseProc = #os_proc{
@@ -202,10 +201,7 @@ handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) ->
{stop, normal, OsProc};
handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) ->
?LOG_ERROR("OS Process died with status: ~p", [Status]),
- {stop, {exit_status, Status}, OsProc};
-handle_info(Msg, OsProc) ->
- ?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]),
- {noreply, OsProc}.
+ {stop, {exit_status, Status}, OsProc}.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
@@ -23,7 +23,8 @@
-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]).
-% -export([test/0]).
+% For 210-os-proc-pool.t
+-export([get_os_process/1, ret_os_process/1]).
-include("couch_db.hrl").
@@ -338,24 +339,30 @@ handle_call({get_proc, Lang}, From, Server) ->
Error ->
{reply, Error, Server}
end;
-handle_call({unlink_proc, Pid}, _From, #qserver{pid_procs=PidProcs}=Server) ->
- rem_value(PidProcs, Pid),
+handle_call({unlink_proc, Pid}, _From, Server) ->
unlink(Pid),
{reply, ok, Server};
handle_call({ret_proc, Proc}, _From, #qserver{
pid_procs=PidProcs,
lang_procs=LangProcs}=Server) ->
% Along with max process limit, here we should check
% if we're over the limit and discard when we are.
- add_value(PidProcs, Proc#proc.pid, Proc),
- add_to_list(LangProcs, Proc#proc.lang, Proc),
- link(Proc#proc.pid),
+ case is_process_alive(Proc#proc.pid) of
+ true ->
+ add_value(PidProcs, Proc#proc.pid, Proc),
+ add_to_list(LangProcs, Proc#proc.lang, Proc),
+ link(Proc#proc.pid);
+ false ->
+ ok
+ end,
{reply, true, service_waitlist(Server)}.
handle_cast(_Whatever, Server) ->
{noreply, Server}.
-handle_info({'EXIT', Pid, Status}, #qserver{
+handle_info({'EXIT', _, _}, Server) ->
+ {noreply, Server};
+handle_info({'DOWN', _, process, Pid, Status}, #qserver{
pid_procs=PidProcs,
lang_procs=LangProcs,
lang_limits=LangLimits}=Server) ->
@@ -466,6 +473,7 @@ new_process(Langs, LangLimits, Lang) ->
case ets:lookup(Langs, Lang) of
[{Lang, Mod, Func, Arg}] ->
{ok, Pid} = apply(Mod, Func, Arg),
+ erlang:monitor(process, Pid),
true = ets:insert(LangLimits, {Lang, Lim, Current+1}),
{ok, #proc{lang=Lang,
pid=Pid,
@@ -0,0 +1,161 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+main(_) ->
+ test_util:init_code_path(),
+
+ etap:plan(19),
+ case (catch test()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+ etap:bail(Other)
+ end,
+ ok.
+
+
+test() ->
+ couch_server_sup:start_link(test_util:config_files()),
+ couch_config:set("query_server_config", "os_process_limit", "3", false),
+
+ test_pool_full(),
+ test_client_unexpected_exit(),
+
+ couch_server_sup:stop(),
+ ok.
+
+
+test_pool_full() ->
+ Client1 = spawn_client(),
+ Client2 = spawn_client(),
+ Client3 = spawn_client(),
+
+ etap:diag("Check that we can spawn the max number of processes."),
+ etap:is(ping_client(Client1), ok, "Client 1 started ok."),
+ etap:is(ping_client(Client2), ok, "Client 2 started ok."),
+ etap:is(ping_client(Client3), ok, "Client 3 started ok."),
+
+ Proc1 = get_client_proc(Client1, "1"),
+ Proc2 = get_client_proc(Client2, "2"),
+ Proc3 = get_client_proc(Client3, "3"),
+ etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
+ etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),
+
+ etap:diag("Check that client 4 blocks waiting for a process."),
+ Client4 = spawn_client(),
+ etap:is(ping_client(Client4), timeout, "Client 4 blocked while waiting."),
+
+ etap:diag("Check that stopping a client gives up its process."),
+ etap:is(stop_client(Client1), ok, "First client stopped."),
+
+ etap:diag("And check that our blocked process has been unblocked."),
+ etap:is(ping_client(Client4), ok, "Client was unblocked."),
+
+ Proc4 = get_client_proc(Client4, "4"),
+ etap:is(Proc4, Proc1, "Client 4 got proc that client 1 got before."),
+
+ lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).
+
+
+test_client_unexpected_exit() ->
+ Client1 = spawn_client(),
+ Client2 = spawn_client(),
+ Client3 = spawn_client(),
+
+ etap:diag("Check that up to os_process_limit clients started."),
+ etap:is(ping_client(Client1), ok, "Client 1 started ok."),
+ etap:is(ping_client(Client2), ok, "Client 2 started ok."),
+ etap:is(ping_client(Client3), ok, "Client 3 started ok."),
+
+ Proc1 = get_client_proc(Client1, "1"),
+ Proc2 = get_client_proc(Client2, "2"),
+ Proc3 = get_client_proc(Client3, "3"),
+ etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
+ etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),
+
+ etap:diag("Check that killing a client frees an os_process."),
+ etap:is(kill_client(Client1), ok, "Client 1 died all right."),
+
+ etap:diag("Check that a new client is not blocked on boot."),
+ Client4 = spawn_client(),
+ etap:is(ping_client(Client4), ok, "New client booted without blocking."),
+
+ Proc4 = get_client_proc(Client4, "4"),
+ etap:isnt(Proc4, Proc1,
+ "Client 4 got a proc different from the one client 1 got before."),
+ etap:isnt(Proc4, Proc2, "Client 4's proc different from client 2's proc."),
+ etap:isnt(Proc4, Proc3, "Client 4's proc different from client 3's proc."),
+
+ lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).
+
+
+spawn_client() ->
+ Parent = self(),
+ Ref = make_ref(),
+ Pid = spawn(fun() ->
+ Proc = couch_query_servers:get_os_process(<<"javascript">>),
+ loop(Parent, Ref, Proc)
+ end),
+ {Pid, Ref}.
+
+
+ping_client({Pid, Ref}) ->
+ Pid ! ping,
+ receive
+ {pong, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+get_client_proc({Pid, Ref}, ClientName) ->
+ Pid ! get_proc,
+ receive
+ {proc, Ref, Proc} -> Proc
+ after 3000 ->
+ etap:bail("Timeout getting client " ++ ClientName ++ " proc.")
+ end.
+
+
+stop_client({Pid, Ref}) ->
+ Pid ! stop,
+ receive
+ {stop, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+kill_client({Pid, Ref}) ->
+ Pid ! die,
+ receive
+ {die, Ref} -> ok
+ after 3000 -> timeout
+ end.
+
+
+loop(Parent, Ref, Proc) ->
+ receive
+ ping ->
+ Parent ! {pong, Ref},
+ loop(Parent, Ref, Proc);
+ get_proc ->
+ Parent ! {proc, Ref, Proc},
+ loop(Parent, Ref, Proc);
+ stop ->
+ couch_query_servers:ret_os_process(Proc),
+ Parent ! {stop, Ref};
+ die ->
+ Parent ! {die, Ref},
+ exit(some_error)
+ end.
View
@@ -83,4 +83,5 @@ EXTRA_DIST = \
180-http-proxy.ini \
180-http-proxy.t \
190-json-stream-parse.t \
- 200-view-group-no-db-leaks.t
+ 200-view-group-no-db-leaks.t \
+ 210-os-proc-pool.t

0 comments on commit 95da6f6

Please sign in to comment.