From 414fa2eae1f7379640b7c13f8115eae4f4a66cf5 Mon Sep 17 00:00:00 2001 From: Benjamin Bastian Date: Mon, 29 Sep 2014 13:09:48 -0700 Subject: [PATCH] Add per-db configurable hash function capability This commits makes the algorithm used for consistent/distributed hashing configurable on a per-database granularity. A user can add a query-string option on database creation (called `hash_algorithm`) which specifies the hash function. Currently, the only available hash algorithm is crc32. --- src/mem3.app.src | 5 +++++ src/mem3.erl | 10 ++++++---- src/mem3_shards.erl | 4 ++-- src/mem3_util.erl | 34 +++++++++++++++++++++++++++++----- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/mem3.app.src b/src/mem3.app.src index 87eda0d..50b7813 100644 --- a/src/mem3.app.src +++ b/src/mem3.app.src @@ -48,5 +48,10 @@ couch_log, couch_event, couch_stats + ]}, + {env, [ + {hash_algorithms, [ + {"crc32", {erlang, crc32}} + ]} ]} ]}. diff --git a/src/mem3.erl b/src/mem3.erl index 5b7f640..1e5ff09 100644 --- a/src/mem3.erl +++ b/src/mem3.erl @@ -229,15 +229,17 @@ dbname(_) -> %% @doc Determine if DocId belongs in shard (identified by record or filename) belongs(#shard{}=Shard, DocId) when is_binary(DocId) -> [Begin, End] = range(Shard), - belongs(Begin, End, DocId); + belongs(Shard, Begin, End, DocId); belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) -> [Begin, End] = range(ShardName), - belongs(Begin, End, DocId); + belongs(ShardName, Begin, End, DocId); belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> true. -belongs(Begin, End, DocId) -> - HashKey = mem3_util:hash(DocId), +-spec belongs(#shard{} | binary(), integer(), integer(), binary()) -> boolean(). +belongs(Shard, Begin, End, DocId) -> + DbName = dbname(Shard), + HashKey = mem3_util:hash(DbName, DocId), Begin =< HashKey andalso HashKey =< End. range(#shard{range = Range}) -> diff --git a/src/mem3_shards.erl b/src/mem3_shards.erl index 11cf376..ab7d620 100644 --- a/src/mem3_shards.erl +++ b/src/mem3_shards.erl @@ -61,7 +61,7 @@ for_docid(DbName, DocId) -> for_docid(DbName, DocId, []). for_docid(DbName, DocId, Options) -> - HashKey = mem3_util:hash(DocId), + HashKey = mem3_util:hash(DbName, DocId), ShardHead = #shard{ name = '_', node = '_', @@ -283,7 +283,7 @@ load_shards_from_db(#db{} = ShardDb, DbName) -> load_shards_from_disk(DbName, DocId)-> Shards = load_shards_from_disk(DbName), - HashKey = mem3_util:hash(DocId), + HashKey = mem3_util:hash(DbName, DocId), [S || S <- Shards, in_range(S, HashKey)]. in_range(Shard, HashKey) -> diff --git a/src/mem3_util.erl b/src/mem3_util.erl index c437d52..e081580 100644 --- a/src/mem3_util.erl +++ b/src/mem3_util.erl @@ -12,7 +12,7 @@ -module(mem3_util). --export([hash/1, name_shard/2, create_partition_map/5, build_shards/2, +-export([hash/2, name_shard/2, create_partition_map/5, build_shards/2, n_val/2, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, shard_info/1, ensure_exists/1, open_db_doc/1]). -export([is_deleted/1, rotate_list/2]). @@ -25,14 +25,38 @@ -deprecated({name_shard, 1, eventually}). -define(RINGTOP, 2 bsl 31). % CRC32 space +-define(HASH_CACHE, hash_fun_lru). -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). -hash(Item) when is_binary(Item) -> - erlang:crc32(Item); -hash(Item) -> - erlang:crc32(term_to_binary(Item)). +-spec hash(binary(), term()) -> integer(). +hash(DbName, Item) when is_binary(Item) -> + MF = case ets_lru:lookup_d(?HASH_CACHE, DbName) of + {ok, MF0} -> + MF0; + _ -> + couch_stats:increment_counter([mem3, hash_fun_cache, hit]), + DbsDbName = ?l2b(config:get("mem3", "shard_db", "dbs")), + {ok, DbsDb} = mem3_util:ensure_exists(DbsDbName), + try + case couch_db:open_doc(DbsDb, DbName, [ejson_body]) of + {ok, #doc{body = {Props}}} -> + HashAlgo = couch_util:get_value(<<"hash_algorithm">>, Props, <<"crc32">>), + {ok, HashAlgos} = application:get_env(mem3, hash_algorithms), + MF0 = couch_util:get_value(binary_to_list(HashAlgo), HashAlgos), + ets_lru:insert(?HASH_CACHE, DbName, MF0), + MF0; + {not_found, _} -> + erlang:error(database_does_not_exist, ?b2l(DbName)) + end + after + couch_db:close(DbsDb) + end + end, + erlang:apply(MF, [Item]); +hash(DbName, Item) -> + hash(DbName, term_to_binary(Item)). name_shard(Shard) -> name_shard(Shard, "").