Permalink
Browse files

Rename "lsm-btree" to "hanoi".

  • Loading branch information...
1 parent fec14a1 commit 43f095b3f0b932b2cfdb4884c9e90c68f5063f7c Gregory Burd committed Apr 21, 2012
Showing with 267 additions and 267 deletions.
  1. +2 −2 Makefile
  2. +18 −18 README.md
  3. +6 −6 TODO
  4. BIN doc/{compare-innodb-vs-lsm_btree.png → compare-innodb-vs-hanoi.png}
  5. +9 −9 enable-lsm_btree → enable-hanoi
  6. +1 −1 include/{lsm_btree.hrl → hanoi.hrl}
  7. +14 −14 src/{basho_bench_driver_lsm_btree.erl → basho_bench_driver_hanoi.erl}
  8. +3 −3 src/{lsm_btree.app.src → hanoi.app.src}
  9. +22 −22 src/{lsm_btree.erl → hanoi.erl}
  10. +1 −1 src/{lsm_btree.hrl → hanoi.hrl}
  11. +3 −3 src/{lsm_btree_app.erl → hanoi_app.erl}
  12. +6 −6 src/{lsm_btree_bbench.config → hanoi_bbench.config}
  13. +3 −3 src/{lsm_btree_fold_worker.erl → hanoi_fold_worker.erl}
  14. +22 −22 src/{lsm_btree_level.erl → hanoi_level.erl}
  15. +24 −24 src/{lsm_btree_merger.erl → hanoi_merger.erl}
  16. +11 −11 src/{lsm_btree_nursery.erl → hanoi_nursery.erl}
  17. +7 −7 src/{lsm_btree_reader.erl → hanoi_reader.erl}
  18. +2 −2 src/{lsm_btree_sup.erl → hanoi_sup.erl}
  19. +3 −3 src/{lsm_btree_temp_riak_kv_backend.erl → hanoi_temp_riak_kv_backend.erl}
  20. +2 −2 src/{lsm_btree_util.erl → hanoi_util.erl}
  21. +8 −8 src/{lsm_btree_writer.erl → hanoi_writer.erl}
  22. +34 −34 src/{riak_kv_lsm_btree_backend.erl → riak_kv_hanoi_backend.erl}
  23. +11 −11 test/{lsm_btree_drv.erl → hanoi_drv.erl}
  24. +9 −9 test/{lsm_btree_merger_tests.erl → hanoi_merger_tests.erl}
  25. +29 −29 test/{lsm_btree_tests.erl → hanoi_tests.erl}
  26. +16 −16 test/{lsm_btree_writer_tests.erl → hanoi_writer_tests.erl}
  27. +1 −1 levelpresence.sh → visualize-hanoi.sh
View
@@ -28,15 +28,15 @@ clean-test-btrees:
rm -fr .eunit/Btree_* .eunit/simple
plt: compile
- $(DIALYZER) --build_plt --output_plt .lsm_btree.plt \
+ $(DIALYZER) --build_plt --output_plt .hanoi.plt \
-pa deps/plain_fsm/ebin \
-pa deps/ebloom/ebin \
deps/plain_fsm/ebin \
deps/ebloom/ebin \
--apps kernel stdlib
analyze: compile
- $(DIALYZER) --plt .lsm_btree.plt \
+ $(DIALYZER) --plt .hanoi.plt \
-pa deps/plain_fsm/ebin \
-pa deps/ebloom/ebin \
ebin
View
@@ -1,22 +1,22 @@
-# LSM B-Tree Storage
+# Hanoi Key/Value Storage Engine
-This erlang-based storage engine implements a structure somewhat like LSM-trees (Log-Structured Merge Trees). The notes below describe how this storage engine work; I have not done extensive studies as how it differs from other storage mechanisms, but a brief brows through available online resources on LSM-trees and Fractal Trees indicates that this storage engine is quite different in several respects.
+This Erlang-based storage engine implements a structure somewhat like LSM-trees (Log-Structured Merge Trees, see docs/10.1.1.44.2782.pdf). The notes below describe how this storage engine work; I have not done extensive studies as how it differs from other storage mechanisms, but a brief brows through available online resources on LSM-trees indicates that this storage engine is quite different in several respects.
-The storage engine may eventually provide an alternative backend for Basho's Riak. Of the existing backends, `lsm_btree` is closest to LevelDB in operational characteristics, but it is implemented in just ~1000 lines of Erlang.
+The storage engine can function as an alternative backend for Basho's Riak/KV.
Here's the bullet list:
- Very fast writes and deletes,
- Reasonably fast reads (N records are stored in log<sub>2</sub>(N) B-trees),
- Operations-friendly "append-only" storage (allows you to backup live system, and crash-recovery is very simple)
-- The cost of evicting stale key/values is amortized into insertion, so you don't need to schedule merge to happen at off-peak hours.
+- The cost of evicting stale key/values is amortized into insertion, so you don't need to schedule merge to happen at off-peak hours.
- Supports range queries (and thus eventually Riak 2i.)
- Doesn't need a boat load of RAM
- All in 1000 lines of pure Erlang code
Once we're a bit more stable, we'll provide a Riak backend.
-## How a LSM-BTree Works
+## How this LSM-BTree Works
If there are N records, there are in log<sub>2</sub>(N) levels (each being a plain B-tree in a file named "A-*level*.data"). The file `A-0.data` has 1 record, `A-1.data` has 2 records, `A-2.data` has 4 records, and so on: `A-n.data` has 2<sup>n</sup> records.
@@ -30,11 +30,11 @@ Lookup is quite simple: starting at `A-0.data`, the sought for Key is searched i
### Insertion
Insertion works by a mechanism known as B-tree injection. Insertion always starts by constructing a fresh B-tree with 1 element in it, and "injecting" that B-tree into level #0. So you always inject a B-tree of the same size as the size of the level you're injecting it into.
-- If the level being injected into empty (there is no A-*level*.data file), then the injected B-tree becomes the contents for that level (we just rename the file).
-- Otherwise,
+- If the level being injected into empty (there is no A-*level*.data file), then the injected B-tree becomes the contents for that level (we just rename the file).
+- Otherwise,
- The injected tree file is renamed to B-*level*.data;
- - The files A-*level*.data and B-*level*.data are merged into a new temporary B-tree (of roughly double size), X-*level*.data.
- - The outcome of the merge is then injected into the next level.
+ - The files A-*level*.data and B-*level*.data are merged into a new temporary B-tree (of roughly double size), X-*level*.data.
+ - The outcome of the merge is then injected into the next level.
While merging, lookups at level *n* first consults the B-*n*.data file, then the A-*n*.data file. At a given level, there can only be one merge operation active.
@@ -48,23 +48,23 @@ Deletes are the same: they are also done by inserting a tombstone (a special val
The really clever thing about this storage mechanism is that merging is guaranteed to be able to "keep up" with insertion. Bitcask for instance has a similar merging phase, but it is separated from insertion. This means that there can suddenly be a lot of catching up to do. The flip side is that you can then decide to do all merging at off-peak hours, but it is yet another thing that need to be configured.
-With LSM B-Trees; back-pressure is provided by the injection mechanism, which only returns when an injection is complete. Thus, every 2nd insert needs to wait for level #0 to finish the required merging; which - assuming merging has linear I/O complexity - is enough to guarantee that the merge mechanism can keep up at higher-numbered levels.
+With LSM B-Trees; back-pressure is provided by the injection mechanism, which only returns when an injection is complete. Thus, every 2nd insert needs to wait for level #0 to finish the required merging; which - assuming merging has linear I/O complexity - is enough to guarantee that the merge mechanism can keep up at higher-numbered levels.
A further trouble is that merging does in fact not have completely linear I/O complexity, because reading from a small file that was recently written is faster that reading from a file that was written a long time ago (because of OS-level caching); thus doing a merge at level #*N+1* is sometimes more than twice as slow as doing a merge at level #*N*. Because of this, sustained insert pressure may produce a situation where the system blocks while merging, though it does require an extremely high level of inserts. We're considering ways to alleviate this.
-Merging can be going on concurrently at each level (in preparation for an injection to the next level), which lets you utilize available multi-core capacity to merge.
+Merging can be going on concurrently at each level (in preparation for an injection to the next level), which lets you utilize available multi-core capacity to merge.
-### Deploying the lsm_btree for testing with Riak/KV
+### Deploying the hanoi for testing with Riak/KV
-You can deploy `lsm_btree` into a Riak devrel cluster using the
-`enable-lsm_btree` script. Clone the `riak` repo, change your working directory
-to it, and then execute the `enable-lsm_btree` script. It adds `lsm_btree` as a
+You can deploy `hanoi` into a Riak devrel cluster using the
+`enable-hanoi` script. Clone the `riak` repo, change your working directory
+to it, and then execute the `enable-hanoi` script. It adds `hanoi` as a
dependency, runs `make all devrel`, and then modifies the configuration
-settings of the resulting dev nodes to use the lsm_btree storage backend.
+settings of the resulting dev nodes to use the hanoi storage backend.
1. `git clone git://github.com/basho/riak.git`
1. `cd riak/deps`
-1. `git clone git://github.com/basho/lsm_btree.git`
+1. `git clone git://github.com/basho/hanoi.git`
1. `cd ..`
-1. `./deps/lsm_btree/enable-lsm_btree` # which does `make all devrel`
+1. `./deps/hanoi/enable-hanoi` # which does `make all devrel`
View
@@ -1,4 +1,4 @@
-* lsm_btree
+* hanoi
* [cleanup] add @doc strings and and -spec's
* [cleanup] check to make sure every error returns with a reason {error, Reason}
* [feature] statistics
@@ -23,24 +23,24 @@
serviced, then picks up from there again. So you could see intermittent
puts in a subsequent batch of results.
-* riak_kv_lsm_tree_backend
+* riak_kv_hanoie_backend
* add support for time-based expiry
* finish support for 2i
* add stats collection
- For each level {#merges, {merge-time-min, max, average}}
PHASE 2:
-* lsm_btree
+* hanoi
* Define a standard struct which is the metadata added at the end of the
file, e.g. [btree-nodes] [meta-data] [offset of meta-data]. This is written
- in lsm_btree_writer:flush_nodes, and read in lsm_btree_reader:open2.
+ in hanoi_writer:flush_nodes, and read in hanoi_reader:open2.
* [feature] compression, encryption on disk
PHASE 3:
* lsm_ixdb
- * lsm_{btree, ctrie, ...} support for sub-databases and associations with
+ * hanoi{btree, trie, ...} support for sub-databases and associations with
different index types
* [major change] add more CAPABILITIES such as
test-and-set(Fun, Key, Value) - to compare a vclock quickly, to speed up
@@ -56,7 +56,7 @@ REVIEW LITERATURE AND OTHER SIMILAR IMPLEMENTATAIONS:
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.44.2782&rep=rep1&type=pdf
-1: make the "first level" have more thatn 2^5 entries (controlled by the constant TOP_LEVEL in lsm_btree.hrl); this means a new set of files is opened/closed/merged for every 32 insert/updates/deletes. Setting this higher will just make the nursery correspondingly larger, which should be absolutely fine.
+1: make the "first level" have more thatn 2^5 entries (controlled by the constant TOP_LEVEL in hanoi.hrl); this means a new set of files is opened/closed/merged for every 32 insert/updates/deletes. Setting this higher will just make the nursery correspondingly larger, which should be absolutely fine.
2: Right now, the streaming btree writer emits a btree page based on number of elements. This could be changed to be based on the size of the node (say, some block-size boudary) and then add padding at the end so that each node read becomes a clean block transfer. Right now, we're probably taking way to many reads.
@@ -1,12 +1,12 @@
#!/bin/sh
-# This script adds lsm_btree to a riak github repo. Run it in the riak repo
+# This script adds hanoi to a riak github repo. Run it in the riak repo
# directory.
#
-# First it adds lsm_btree, then runs "make all devrel" and then enables the
-# lsm_btree storage backend in the resulting dev nodes.
+# First it adds hanoi, then runs "make all devrel" and then enables the
+# hanoi storage backend in the resulting dev nodes.
#
-# This script is intended to be temporary. Once lsm_btree is made into a proper
+# This script is intended to be temporary. Once hanoi is made into a proper
# riak citizen, this script will no longer be needed.
set -e
@@ -35,17 +35,17 @@ fi
./rebar get-deps
file=./deps/riak_kv/src/riak_kv.app.src
-if ! grep -q lsm_btree $file ; then
+if ! grep -q hanoi $file ; then
echo
echo "Modifying $file, saving the original as ${file}.orig ..."
- perl -i.orig -pe '/\bos_mon,/ && print qq( lsm_btree,\n)' $file
+ perl -i.orig -pe '/\bos_mon,/ && print qq( hanoi,\n)' $file
fi
file=./deps/riak_kv/rebar.config
-if ! grep -q lsm_btree $file ; then
+if ! grep -q hanoi $file ; then
echo
echo "Modifying $file, saving the original as ${file}.orig ..."
- perl -i.orig -pe '/\bsext\b/ && print qq( {lsm_btree, ".*", {git, "git\@github.com:basho/lsm_btree.git", "master"}},\n)' $file
+ perl -i.orig -pe '/\bsext\b/ && print qq( {hanoi, ".*", {git, "git\@github.com:basho/hanoi.git", "master"}},\n)' $file
fi
./rebar get-deps
@@ -55,6 +55,6 @@ make all devrel
echo
echo 'Modifying all dev/dev*/etc/app.config files, saving originals with .orig suffix...'
-perl -i.orig -ne 'if (/\bstorage_backend,/) { s/(storage_backend, )[^\}]+/\1riak_kv_lsm_btree_backend/; print } elsif (/\{eleveldb,/) { $eleveldb++; print } elsif ($eleveldb && /^\s+\]\},/) { $eleveldb = 0; print; print qq(\n {lsm_btree, [\n {data_root, "./data/lsm_btree"}\n ]},\n\n) } else { print }' dev/dev*/etc/app.config
+perl -i.orig -ne 'if (/\bstorage_backend,/) { s/(storage_backend, )[^\}]+/\1riak_kv_hanoi_backend/; print } elsif (/\{eleveldb,/) { $eleveldb++; print } elsif ($eleveldb && /^\s+\]\},/) { $eleveldb = 0; print; print qq(\n {hanoi, [\n {data_root, "./data/hanoi"}\n ]},\n\n) } else { print }' dev/dev*/etc/app.config
exit 0
@@ -1,6 +1,6 @@
%% ----------------------------------------------------------------------------
%%
-%% lsm_btree: LSM-trees (Log-Structured Merge Trees) Indexed Storage
+%% hanoi: LSM-trees (Log-Structured Merge Trees) Indexed Storage
%%
%% Copyright 2011-2012 (c) Trifork A/S. All Rights Reserved.
%% http://trifork.com/ info@trifork.com
@@ -1,6 +1,6 @@
%% ----------------------------------------------------------------------------
%%
-%% lsm_btree: LSM-trees (Log-Structured Merge Trees) Indexed Storage
+%% hanoi: LSM-trees (Log-Structured Merge Trees) Indexed Storage
%%
%% Copyright 2011-2012 (c) Trifork A/S. All Rights Reserved.
%% http://trifork.com/ info@trifork.com
@@ -22,7 +22,7 @@
%%
%% ----------------------------------------------------------------------------
--module(basho_bench_driver_lsm_btree).
+-module(basho_bench_driver_hanoi).
-record(state, { tree,
filename,
@@ -33,7 +33,7 @@
-export([new/1,
run/4]).
--include("lsm_btree.hrl").
+-include("hanoi.hrl").
-include_lib("basho_bench/include/basho_bench.hrl").
-record(btree_range, { from_key = <<>> :: binary(),
@@ -48,30 +48,30 @@
new(_Id) ->
%% Make sure bitcask is available
- case code:which(lsm_btree) of
+ case code:which(hanoi) of
non_existing ->
- ?FAIL_MSG("~s requires lsm_btree to be available on code path.\n",
+ ?FAIL_MSG("~s requires hanoi to be available on code path.\n",
[?MODULE]);
_ ->
ok
end,
%% Get the target directory
- Dir = basho_bench_config:get(lsm_btree_dir, "."),
- Filename = filename:join(Dir, "test.lsm_btree"),
+ Dir = basho_bench_config:get(hanoi_dir, "."),
+ Filename = filename:join(Dir, "test.hanoi"),
%% Look for sync interval config
- case basho_bench_config:get(lsm_btree_sync_interval, infinity) of
+ case basho_bench_config:get(hanoi_sync_interval, infinity) of
Value when is_integer(Value) ->
SyncInterval = Value;
infinity ->
SyncInterval = infinity
end,
%% Get any bitcask flags
- case lsm_btree:open(Filename) of
+ case hanoi:open(Filename) of
{error, Reason} ->
- ?FAIL_MSG("Failed to open lsm btree in ~s: ~p\n", [Filename, Reason]);
+ ?FAIL_MSG("Failed to open hanoi in ~s: ~p\n", [Filename, Reason]);
{ok, FBTree} ->
{ok, #state { tree = FBTree,
filename = Filename,
@@ -80,7 +80,7 @@ new(_Id) ->
end.
run(get, KeyGen, _ValueGen, State) ->
- case lsm_btree:lookup(State#state.tree, KeyGen()) of
+ case hanoi:lookup(State#state.tree, KeyGen()) of
{ok, _Value} ->
{ok, State};
not_found ->
@@ -89,14 +89,14 @@ run(get, KeyGen, _ValueGen, State) ->
{error, Reason}
end;
run(put, KeyGen, ValueGen, State) ->
- case lsm_btree:put(State#state.tree, KeyGen(), ValueGen()) of
+ case hanoi:put(State#state.tree, KeyGen(), ValueGen()) of
ok ->
{ok, State};
{error, Reason} ->
{error, Reason}
end;
run(delete, KeyGen, _ValueGen, State) ->
- case lsm_btree:delete(State#state.tree, KeyGen()) of
+ case hanoi:delete(State#state.tree, KeyGen()) of
ok ->
{ok, State};
{error, Reason} ->
@@ -105,7 +105,7 @@ run(delete, KeyGen, _ValueGen, State) ->
run(fold_100, KeyGen, _ValueGen, State) ->
[From,To] = lists:usort([KeyGen(), KeyGen()]),
- case lsm_btree:sync_fold_range(State#state.tree,
+ case hanoi:sync_fold_range(State#state.tree,
fun(_Key,_Value,Count) ->
Count+1
end,
@@ -1,6 +1,6 @@
%% ----------------------------------------------------------------------------
%%
-%% lsm_btree: LSM-trees (Log-Structured Merge Trees) Indexed Storage
+%% hanoi: LSM-trees (Log-Structured Merge Trees) Indexed Storage
%%
%% Copyright 2011-2012 (c) Trifork A/S. All Rights Reserved.
%% http://trifork.com/ info@trifork.com
@@ -22,7 +22,7 @@
%%
%% ----------------------------------------------------------------------------
-{application, lsm_btree,
+{application, hanoi,
[
{description, ""},
{vsn, "1.0.0"},
@@ -31,6 +31,6 @@
kernel,
stdlib
]},
- {mod, {lsm_btree_app, []}},
+ {mod, {hanoi_app, []}},
{env, []}
]}.
Oops, something went wrong.

0 comments on commit 43f095b

Please sign in to comment.