Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

CBD-165 Add design doc cache for set views

For every query and index access we used to always read
the corresponding design document from disk.

Design documents live in a dedicated database (master database)
which is normally small (1 or 2 btree nodes in the id and seq
btrees), so in the best (and most common scenario) we do 2 disk
reads (a single btree node can have easily 10 to 20 design docs,
plus 1 design doc body read), decompress the design doc body read
from disk and then JSON decode that body.

When the system is under high activity, this operation can easily
take between 0.5ms to 10ms (high concurrency, Erlang IO subsystem
very busy), as observed both on Linux and Mac OS X, with hard disks
and SSDs.

This design document cache avoids the disk reads and decompression
and JSOn decoding of the design document bodies.

Change-Id: I5e7a647aa2d73a15e0c3d3f9a132a6f449763114
Reviewed-on: http://review.couchbase.org/16184
Reviewed-by: Volker Mische <volker.mische@gmail.com>
Tested-by: buildbot <build@couchbase.com>
Tested-by: Filipe David Borba Manana <fdmanana@gmail.com>
  • Loading branch information...
commit aa8af2472422f0ffc99e40adf579b5c35d42a405 1 parent 44e1535
@fdmanana fdmanana authored fdmanana committed
View
3  etc/couchdb/default.ini.tpl.in
@@ -102,6 +102,7 @@ stats_aggregator={couch_stats_aggregator, start, []}
stats_collector={couch_stats_collector, start, []}
uuids={couch_uuids, start, []}
auth_cache={couch_auth_cache, start_link, []}
+couch_set_view_ddoc_cache={couch_set_view_ddoc_cache, start_link, []}
replication_manager={couch_replication_manager, start_link, []}
os_daemons={couch_os_daemons, start_link, []}
compaction_daemon={couch_compaction_daemon, start_link, []}
@@ -304,3 +305,5 @@ min_file_size = 131072
;version = 2.0.0
;url = http://www.couchbase.com/
+[set_views]
+ddoc_cache_size = 1048576
View
2  src/couch_set_view/Makefile.am
@@ -31,6 +31,7 @@ source_files = \
src/couch_set_view_mapreduce.erl \
src/couch_db_set.erl \
src/couch_index_barrier.erl \
+ src/couch_set_view_ddoc_cache.erl \
test/couch_set_view_test_util.erl
test_files = \
@@ -63,6 +64,7 @@ compiled_files = \
ebin/couch_set_view_mapreduce.beam \
ebin/couch_db_set.beam \
ebin/couch_index_barrier.beam \
+ ebin/couch_set_view_ddoc_cache.beam \
test/couch_set_view_test_util.beam
EXTRA_DIST = $(include_files) $(source_files) $(test_files) test/run.tpl
View
2  src/couch_set_view/src/couch_set_view.erl
@@ -830,7 +830,7 @@ handle_db_event({created, _DbName}) ->
% TODO: deal with this
% ok = gen_server:cast(?MODULE, {reset_indexes, DbName});
ok;
-handle_db_event({ddoc_updated, {DbName, DDocId}}) ->
+handle_db_event({Event, {DbName, DDocId}}) when Event == ddoc_updated; Event == ddoc_deleted ->
case string:tokens(?b2l(DbName), "/") of
[SetNameStr, "master"] ->
SetName = ?l2b(SetNameStr),
View
238 src/couch_set_view/src/couch_set_view_ddoc_cache.erl
@@ -0,0 +1,238 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_set_view_ddoc_cache).
+-behaviour(gen_server).
+
+% public API
+-export([start_link/0]).
+-export([get_ddoc/2]).
+
+% gen_server API
+-export([init/1, handle_call/3, handle_info/2, handle_cast/2]).
+-export([code_change/3, terminate/2]).
+
+-include("couch_db.hrl").
+-include_lib("couch_set_view/include/couch_set_view.hrl").
+
+-define(BY_DDOC_ID, set_view_by_ddoc_id_ets).
+-define(BY_ATIME, set_view_by_atime_ets).
+
+-record(state, {
+ max_cache_size = 0,
+ byte_size = 0,
+ db_notifier = nil
+}).
+
+
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+
+get_ddoc(SetName, DDocId) ->
+ case ets:lookup(?BY_DDOC_ID, {SetName, DDocId}) of
+ [{_, _ATime, DDoc, _DDocSize}] ->
+ ok = gen_server:cast(?MODULE, {cache_hit, SetName, DDocId}),
+ {ok, DDoc};
+ [] ->
+ case couch_db:open_int(?master_dbname(SetName), []) of
+ {ok, Db} ->
+ try
+ case couch_db:open_doc(Db, DDocId, [ejson_body]) of
+ {ok, DDoc} ->
+ Size = erlang:external_size(DDoc),
+ ok = gen_server:call(?MODULE, {add_ddoc, SetName, DDoc, Size}, infinity),
+ {ok, DDoc};
+ DocOpenError ->
+ {doc_open_error, DocOpenError}
+ end
+ after
+ ok = couch_db:close(Db)
+ end;
+ DbOpenError ->
+ {db_open_error, DbOpenError}
+ end
+ end.
+
+
+init(_) ->
+ ?BY_DDOC_ID = ets:new(?BY_DDOC_ID,
+ [set, protected, named_table, {read_concurrency, true}]),
+ ?BY_ATIME = ets:new(?BY_ATIME,
+ [ordered_set, private, named_table]),
+ process_flag(trap_exit, true),
+ ok = couch_config:register(fun handle_config_change/3),
+ {ok, Notifier} = couch_db_update_notifier:start_link(fun handle_db_event/1),
+ MaxSize = couch_config:get("set_views", "ddoc_cache_size", "1048576"),
+ State = #state{
+ db_notifier = Notifier,
+ max_cache_size = list_to_integer(MaxSize)
+ },
+ {ok, State}.
+
+
+handle_call({add_ddoc, SetName, DDoc, DDocSize}, From, State) ->
+ gen_server:reply(From, ok),
+ #doc{id = Id, rev = Rev} = DDoc,
+ Key = {SetName, Id},
+ case ets:lookup(?BY_DDOC_ID, Key) of
+ [] ->
+ Now = os:timestamp(),
+ true = ets:insert(?BY_ATIME, {Now, Key, DDocSize}),
+ true = ets:insert(?BY_DDOC_ID, {Key, Now, DDoc, DDocSize}),
+ NewSize = State#state.byte_size + DDocSize;
+ [{_, _ATime, #doc{rev = OldRev}, _OldDDocSize}] when OldRev > Rev ->
+ NewSize = State#state.byte_size;
+ [{_, ATime, _OldDDoc, OldDDocSize}] ->
+ Now = os:timestamp(),
+ true = ets:delete(?BY_ATIME, ATime),
+ true = ets:insert(?BY_ATIME, {Now, Key, DDocSize}),
+ true = ets:insert(?BY_DDOC_ID, {Key, Now, DDoc, DDocSize}),
+ NewSize = State#state.byte_size + DDocSize - OldDDocSize
+ end,
+ CacheSize = free_old_entries(State#state.max_cache_size, NewSize),
+ {noreply, State#state{byte_size = CacheSize}};
+
+handle_call({update_ddoc, SetName, DDoc, DDocSize}, From, State) ->
+ gen_server:reply(From, ok),
+ #doc{id = Id, rev = Rev} = DDoc,
+ Key = {SetName, Id},
+ case ets:lookup(?BY_DDOC_ID, Key) of
+ [] ->
+ {noreply, State};
+ [{_, _ATime, #doc{rev = OldRev}, _OldDDocSize}] when OldRev > Rev ->
+ {noreply, State};
+ [{_, ATime, _OldDDoc, OldDDocSize}] ->
+ % ddoc update, using current access time stamp
+ true = ets:update_element(?BY_ATIME, ATime, {3, DDocSize}),
+ true = ets:insert(?BY_DDOC_ID, {Key, ATime, DDoc, DDocSize}),
+ NewSize = State#state.byte_size + DDocSize - OldDDocSize,
+ CacheSize = free_old_entries(State#state.max_cache_size, NewSize),
+ {noreply, State#state{byte_size = CacheSize}}
+ end;
+
+handle_call({delete_ddoc, SetName, Id}, From, State) ->
+ gen_server:reply(From, ok),
+ Key = {SetName, Id},
+ case ets:lookup(?BY_DDOC_ID, Key) of
+ [] ->
+ {noreply, State};
+ [{_, ATime, _DDoc, DDocSize}] ->
+ true = ets:delete(?BY_DDOC_ID, Key),
+ true = ets:delete(?BY_ATIME, ATime),
+ NewSize = State#state.byte_size - DDocSize,
+ {noreply, State#state{byte_size = NewSize}}
+ end;
+
+handle_call({set_deleted, SetName}, From, State) ->
+ gen_server:reply(From, ok),
+ Entries = ets:match_object(?BY_DDOC_ID, {{SetName, '_'}, '_', '_'}),
+ lists:foreach(fun({Key, ATime, _DDoc, _DDocSize}) ->
+ true = ets:delete(?BY_DDOC_ID, Key),
+ true = ets:delete(?BY_ATIME, ATime)
+ end, Entries),
+ {noreply, State};
+
+handle_call({new_max_cache_size, NewMaxSize}, _From, State) ->
+ Size = free_old_entries(NewMaxSize, State#state.byte_size),
+ {reply, ok, State#state{byte_size = Size}}.
+
+
+
+handle_cast({cache_hit, SetName, DDocId}, State) ->
+ Key = {SetName, DDocId},
+ case ets:lookup(?BY_DDOC_ID, Key) of
+ [] ->
+ ok;
+ [{_, OldATime, _DDoc, DDocSize}] ->
+ NewATime = os:timestamp(),
+ true = ets:delete(?BY_ATIME, OldATime),
+ true = ets:insert(?BY_ATIME, {NewATime, Key, DDocSize}),
+ true = ets:update_element(?BY_DDOC_ID, Key, {2, NewATime})
+ end,
+ {noreply, State}.
+
+
+handle_info(shutdown, State) ->
+ {stop, shutdown, State}.
+
+
+terminate(_Reason, #state{db_notifier = Notifier}) ->
+ couch_db_update_notifier:stop(Notifier),
+ true = ets:delete(?BY_DDOC_ID),
+ true = ets:delete(?BY_ATIME),
+ ok.
+
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+
+free_old_entries(MaxSize, CurSize) when CurSize =< MaxSize ->
+ CurSize;
+free_old_entries(MaxSize, CurSize) ->
+ ATime = ets:first(?BY_ATIME),
+ [{_, {_SetName, _DDocId} = Key, DDocSize}] = ets:lookup(?BY_ATIME, ATime),
+ true = ets:delete(?BY_ATIME, ATime),
+ true = ets:delete(?BY_DDOC_ID, Key),
+ free_old_entries(MaxSize, CurSize - DDocSize).
+
+
+handle_config_change("set_views", "ddoc_cache_size", NewSizeList) ->
+ NewSize = list_to_integer(NewSizeList),
+ ok = gen_server:call(?MODULE, {new_max_cache_size, NewSize}, infinity).
+
+
+handle_db_event({deleted, DbName}) ->
+ case string:tokens(?b2l(DbName), "/") of
+ [SetNameList, "master"] ->
+ ok = gen_server:call(?MODULE, {set_deleted, ?l2b(SetNameList)}, infinity);
+ _ ->
+ ok
+ end;
+handle_db_event({ddoc_updated, {DbName, Id}}) ->
+ case string:tokens(?b2l(DbName), "/") of
+ [SetNameList, "master"] ->
+ SetName = ?l2b(SetNameList),
+ case couch_db:open_int(DbName, []) of
+ {ok, Db} ->
+ try
+ case couch_db:open_doc(Db, Id, [ejson_body]) of
+ {ok, Doc} ->
+ Size = erlang:external_size(Doc),
+ ok = gen_server:call(?MODULE, {update_ddoc, SetName, Doc, Size}, infinity);
+ _ ->
+ % Maybe ddoc got deleted in the meanwhile. If not a subsequent request
+ % will add it again to the cache. This approach make code simpler.
+ ok = gen_server:call(?MODULE, {delete_ddoc, SetName, Id}, infinity)
+ end
+ after
+ ok = couch_db:close(Db)
+ end;
+ _ ->
+ % Maybe db just got deleted, maybe we run out of file descriptors, etc.
+ % Just let future cache misses populate again the cache, this makes it
+ % simpler for an uncommon case.
+ ok = gen_server:call(?MODULE, {delete_set, SetName}, infinity)
+ end;
+ _ ->
+ ok
+ end;
+handle_db_event({ddoc_deleted, {DbName, Id}}) ->
+ case string:tokens(?b2l(DbName), "/") of
+ [SetNameList, "master"] ->
+ ok = gen_server:call(?MODULE, {delete_ddoc, ?l2b(SetNameList), Id}, infinity);
+ _ ->
+ ok
+ end;
+handle_db_event(_) ->
+ ok.
View
20 src/couch_set_view/src/couch_set_view_group.erl
@@ -1115,20 +1115,16 @@ do_open_index_file(Filepath) ->
open_set_group(SetName, GroupId) ->
- case couch_db:open_int(?master_dbname(SetName), []) of
- {ok, Db} ->
- case couch_db:open_doc(Db, GroupId, [ejson_body]) of
- {ok, Doc} ->
- couch_db:close(Db),
- {ok, couch_set_view_util:design_doc_to_set_view_group(SetName, Doc)};
- Else ->
- couch_db:close(Db),
- Else
- end;
- Else ->
- Else
+ case couch_set_view_ddoc_cache:get_ddoc(SetName, GroupId) of
+ {ok, DDoc} ->
+ {ok, couch_set_view_util:design_doc_to_set_view_group(SetName, DDoc)};
+ {doc_open_error, Error} ->
+ Error;
+ {db_open_error, Error} ->
+ Error
end.
+
get_group_info(State) ->
#state{
group = Group,
View
2  src/couchdb/couch_db.erl
@@ -441,8 +441,6 @@ update_docs(#db{name=DbName}=Db, Docs, Options0) ->
FullCommit},
case get_result(Db#db.update_pid, MRef) of
ok ->
- [couch_db_update_notifier:notify({ddoc_updated, {DbName, Id}})
- || #doc{id = <<?DESIGN_DOC_PREFIX, _/binary>> = Id} <- Docs],
ok;
retry ->
% This can happen if the db file we wrote to was swapped out by
View
5 src/couchdb/couch_db_update_notifier.erl
@@ -22,7 +22,7 @@
-behaviour(gen_event).
--export([start_link/1, notify/1]).
+-export([start_link/1, notify/1, sync_notify/1]).
-export([init/1, terminate/2, handle_event/2, handle_call/2, handle_info/2, code_change/3,stop/1]).
-include("couch_db.hrl").
@@ -33,6 +33,9 @@ start_link(Exec) ->
notify(Event) ->
gen_event:notify(couch_db_update, Event).
+sync_notify(Event) ->
+ gen_event:sync_notify(couch_db_update, Event).
+
stop(Pid) ->
couch_event_sup:stop(Pid).
View
8 src/couchdb/couch_db_updater.erl
@@ -283,6 +283,14 @@ handle_info({update_docs, Client, Docs, NonRepDocs, FullCommit}, Db) ->
couch_db_update_notifier:notify({updated, Db2#db.name});
true -> ok
end,
+ lists:foreach(
+ fun(#doc_update_info{id = <<?DESIGN_DOC_PREFIX, _/binary>> = Id, deleted = false}) ->
+ couch_db_update_notifier:sync_notify({ddoc_updated, {Db#db.name, Id}});
+ (#doc_update_info{id = <<?DESIGN_DOC_PREFIX, _/binary>> = Id, deleted = true}) ->
+ couch_db_update_notifier:sync_notify({ddoc_deleted, {Db#db.name, Id}});
+ (_) ->
+ ok
+ end, Docs),
catch(Client ! {done, self()}),
{noreply, Db2}
catch
View
18 src/couchdb/couch_index_merger.erl
@@ -28,6 +28,7 @@
-include("couch_index_merger.hrl").
% needed for #set_view_spec{}
-include("couch_view_merger.hrl").
+-include_lib("couch_set_view/include/couch_set_view.hrl").
-import(couch_util, [
get_value/2,
@@ -243,16 +244,21 @@ get_first_ddoc([#simple_index_spec{ddoc_id = nil} = Spec | _],
#simple_index_spec{index_name = <<"_all_docs">>} = Spec,
{ok, nil, <<"_all_docs">>};
-get_first_ddoc([#set_view_spec{} = Spec | _], UserCtx, Timeout) ->
+get_first_ddoc([#set_view_spec{} = Spec | _], _UserCtx, _Timeout) ->
#set_view_spec {
name = SetName, ddoc_id = Id, view_name = ViewName
} = Spec,
- {ok, Db} = open_db(<<SetName/binary, "/master">>, UserCtx, Timeout),
- {ok, DDoc} = get_ddoc(Db, Id),
- close_db(Db),
-
- {ok, DDoc, ViewName};
+ case couch_set_view_ddoc_cache:get_ddoc(SetName, Id) of
+ {ok, DDoc} ->
+ {ok, DDoc, ViewName};
+ {db_open_error, {not_found, _}} ->
+ throw({not_found, db_not_found_msg(?master_dbname(SetName))});
+ {db_open_error, Error} ->
+ throw(Error);
+ {doc_open_error, {not_found, _}} ->
+ throw({not_found, ddoc_not_found_msg(?master_dbname(SetName), Id)})
+ end;
get_first_ddoc([#simple_index_spec{} = Spec | _], UserCtx, Timeout) ->
#simple_index_spec{
Please sign in to comment.
Something went wrong with that request. Please try again.