Skip to content

Commit

Permalink
Fix dead lock case in the os process pool
Browse files Browse the repository at this point in the history
Part of this patch was done by Paul Davis.
The patch also introduces a test case to validate that
the os process pool (couch_query_servers) operates as it
should.
Closes COUCHDB-1246.



git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1159045 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
fdmanana committed Aug 18, 2011
1 parent 6b4c778 commit 95da6f6
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 13 deletions.
6 changes: 1 addition & 5 deletions src/couchdb/couch_os_process.erl
Expand Up @@ -133,7 +133,6 @@ pick_command1(_) ->

% gen_server API
init([Command, Options, PortOptions]) ->
process_flag(trap_exit, true),
PrivDir = couch_util:priv_dir(),
Spawnkiller = filename:join(PrivDir, "couchspawnkillable"),
BaseProc = #os_proc{
Expand Down Expand Up @@ -202,10 +201,7 @@ handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) ->
{stop, normal, OsProc};
handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) ->
?LOG_ERROR("OS Process died with status: ~p", [Status]),
{stop, {exit_status, Status}, OsProc};
handle_info(Msg, OsProc) ->
?LOG_DEBUG("OS Proc: Unknown info: ~p", [Msg]),
{noreply, OsProc}.
{stop, {exit_status, Status}, OsProc}.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.
Expand Down
22 changes: 15 additions & 7 deletions src/couchdb/couch_query_servers.erl
Expand Up @@ -23,7 +23,8 @@

-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]).

% -export([test/0]).
% For 210-os-proc-pool.t
-export([get_os_process/1, ret_os_process/1]).

-include("couch_db.hrl").

Expand Down Expand Up @@ -338,24 +339,30 @@ handle_call({get_proc, Lang}, From, Server) ->
Error ->
{reply, Error, Server}
end;
handle_call({unlink_proc, Pid}, _From, #qserver{pid_procs=PidProcs}=Server) ->
rem_value(PidProcs, Pid),
handle_call({unlink_proc, Pid}, _From, Server) ->
unlink(Pid),
{reply, ok, Server};
handle_call({ret_proc, Proc}, _From, #qserver{
pid_procs=PidProcs,
lang_procs=LangProcs}=Server) ->
% Along with max process limit, here we should check
% if we're over the limit and discard when we are.
add_value(PidProcs, Proc#proc.pid, Proc),
add_to_list(LangProcs, Proc#proc.lang, Proc),
link(Proc#proc.pid),
case is_process_alive(Proc#proc.pid) of
true ->
add_value(PidProcs, Proc#proc.pid, Proc),
add_to_list(LangProcs, Proc#proc.lang, Proc),
link(Proc#proc.pid);
false ->
ok
end,
{reply, true, service_waitlist(Server)}.

handle_cast(_Whatever, Server) ->
{noreply, Server}.

handle_info({'EXIT', Pid, Status}, #qserver{
handle_info({'EXIT', _, _}, Server) ->
{noreply, Server};
handle_info({'DOWN', _, process, Pid, Status}, #qserver{
pid_procs=PidProcs,
lang_procs=LangProcs,
lang_limits=LangLimits}=Server) ->
Expand Down Expand Up @@ -466,6 +473,7 @@ new_process(Langs, LangLimits, Lang) ->
case ets:lookup(Langs, Lang) of
[{Lang, Mod, Func, Arg}] ->
{ok, Pid} = apply(Mod, Func, Arg),
erlang:monitor(process, Pid),
true = ets:insert(LangLimits, {Lang, Lim, Current+1}),
{ok, #proc{lang=Lang,
pid=Pid,
Expand Down
161 changes: 161 additions & 0 deletions test/etap/210-os-proc-pool.t
@@ -0,0 +1,161 @@
#!/usr/bin/env escript
%% -*- erlang -*-
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

main(_) ->
test_util:init_code_path(),

etap:plan(19),
case (catch test()) of
ok ->
etap:end_tests();
Other ->
etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
etap:bail(Other)
end,
ok.


test() ->
couch_server_sup:start_link(test_util:config_files()),
couch_config:set("query_server_config", "os_process_limit", "3", false),

test_pool_full(),
test_client_unexpected_exit(),

couch_server_sup:stop(),
ok.


test_pool_full() ->
Client1 = spawn_client(),
Client2 = spawn_client(),
Client3 = spawn_client(),

etap:diag("Check that we can spawn the max number of processes."),
etap:is(ping_client(Client1), ok, "Client 1 started ok."),
etap:is(ping_client(Client2), ok, "Client 2 started ok."),
etap:is(ping_client(Client3), ok, "Client 3 started ok."),

Proc1 = get_client_proc(Client1, "1"),
Proc2 = get_client_proc(Client2, "2"),
Proc3 = get_client_proc(Client3, "3"),
etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),

etap:diag("Check that client 4 blocks waiting for a process."),
Client4 = spawn_client(),
etap:is(ping_client(Client4), timeout, "Client 4 blocked while waiting."),

etap:diag("Check that stopping a client gives up its process."),
etap:is(stop_client(Client1), ok, "First client stopped."),

etap:diag("And check that our blocked process has been unblocked."),
etap:is(ping_client(Client4), ok, "Client was unblocked."),

Proc4 = get_client_proc(Client4, "4"),
etap:is(Proc4, Proc1, "Client 4 got proc that client 1 got before."),

lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).


test_client_unexpected_exit() ->
Client1 = spawn_client(),
Client2 = spawn_client(),
Client3 = spawn_client(),

etap:diag("Check that up to os_process_limit clients started."),
etap:is(ping_client(Client1), ok, "Client 1 started ok."),
etap:is(ping_client(Client2), ok, "Client 2 started ok."),
etap:is(ping_client(Client3), ok, "Client 3 started ok."),

Proc1 = get_client_proc(Client1, "1"),
Proc2 = get_client_proc(Client2, "2"),
Proc3 = get_client_proc(Client3, "3"),
etap:isnt(Proc1, Proc2, "Clients 1 and 2 got different procs."),
etap:isnt(Proc2, Proc3, "Clients 2 and 3 got different procs."),

etap:diag("Check that killing a client frees an os_process."),
etap:is(kill_client(Client1), ok, "Client 1 died all right."),

etap:diag("Check that a new client is not blocked on boot."),
Client4 = spawn_client(),
etap:is(ping_client(Client4), ok, "New client booted without blocking."),

Proc4 = get_client_proc(Client4, "4"),
etap:isnt(Proc4, Proc1,
"Client 4 got a proc different from the one client 1 got before."),
etap:isnt(Proc4, Proc2, "Client 4's proc different from client 2's proc."),
etap:isnt(Proc4, Proc3, "Client 4's proc different from client 3's proc."),

lists:map(fun(C) -> ok = stop_client(C) end, [Client2, Client3, Client4]).


spawn_client() ->
Parent = self(),
Ref = make_ref(),
Pid = spawn(fun() ->
Proc = couch_query_servers:get_os_process(<<"javascript">>),
loop(Parent, Ref, Proc)
end),
{Pid, Ref}.


ping_client({Pid, Ref}) ->
Pid ! ping,
receive
{pong, Ref} -> ok
after 3000 -> timeout
end.


get_client_proc({Pid, Ref}, ClientName) ->
Pid ! get_proc,
receive
{proc, Ref, Proc} -> Proc
after 3000 ->
etap:bail("Timeout getting client " ++ ClientName ++ " proc.")
end.


stop_client({Pid, Ref}) ->
Pid ! stop,
receive
{stop, Ref} -> ok
after 3000 -> timeout
end.


kill_client({Pid, Ref}) ->
Pid ! die,
receive
{die, Ref} -> ok
after 3000 -> timeout
end.


loop(Parent, Ref, Proc) ->
receive
ping ->
Parent ! {pong, Ref},
loop(Parent, Ref, Proc);
get_proc ->
Parent ! {proc, Ref, Proc},
loop(Parent, Ref, Proc);
stop ->
couch_query_servers:ret_os_process(Proc),
Parent ! {stop, Ref};
die ->
Parent ! {die, Ref},
exit(some_error)
end.
3 changes: 2 additions & 1 deletion test/etap/Makefile.am
Expand Up @@ -83,4 +83,5 @@ EXTRA_DIST = \
180-http-proxy.ini \
180-http-proxy.t \
190-json-stream-parse.t \
200-view-group-no-db-leaks.t
200-view-group-no-db-leaks.t \
210-os-proc-pool.t

0 comments on commit 95da6f6

Please sign in to comment.