Permalink
Browse files

Merge branch 'master' into bitcask-badcrc-eunit-test

Preparing to bring the bad crc unit test into the present
  • Loading branch information...
engelsanchez committed Jan 31, 2013
2 parents 93e5d4c + 605992e commit c8b3c8728dd13ecb979dc9a70a7a6517b0ceea2c
Showing with 9,807 additions and 5,803 deletions.
  1. +12 −0 .travis.yml
  2. +3 −0 README.org
  3. +64 −0 docs/hashtree.md
  4. BIN docs/hashtree.png
  5. +48 −0 include/riak_kv_dtrace.hrl
  6. +20 −0 include/riak_kv_mrc_sink.hrl
  7. +8 −12 include/riak_kv_vnode.hrl
  8. BIN rebar
  9. +2 −6 rebar.config
  10. +1,097 −0 src/hashtree.erl
  11. +0 −46 src/lk.erl
  12. +11 −2 src/riak.erl
  13. +71 −310 src/riak_client.erl
  14. +37 −5 src/riak_index.erl
  15. +5 −9 src/riak_kv.app.src
  16. +141 −7 src/riak_kv_app.erl
  17. +25 −37 src/riak_kv_bitcask_backend.erl
  18. +2 −1 src/riak_kv_bucket.erl
  19. +22 −31 src/riak_kv_buckets_fsm.erl
  20. +176 −5 src/riak_kv_console.erl
  21. +2 −0 src/riak_kv_coverage_filter.erl
  22. +30 −82 src/riak_kv_delete.erl
  23. +194 −42 src/riak_kv_eleveldb_backend.erl
  24. +9 −11 src/riak_kv_encoding_migrate.erl
  25. +195 −0 src/riak_kv_entropy_info.erl
  26. +677 −0 src/riak_kv_entropy_manager.erl
  27. +287 −0 src/riak_kv_exchange_fsm.erl
  28. +81 −0 src/riak_kv_fsm_timing.erl
  29. +19 −16 src/riak_kv_get_core.erl
  30. +226 −85 src/riak_kv_get_fsm.erl
  31. +84 −0 src/riak_kv_get_put_monitor.erl
  32. +43 −46 src/riak_kv_index_fsm.erl
  33. +682 −0 src/riak_kv_index_hashtree.erl
  34. +6 −10 src/riak_kv_js_manager.erl
  35. +19 −2 src/riak_kv_js_vm.erl
  36. +0 −125 src/riak_kv_keylister_legacy.erl
  37. +0 −48 src/riak_kv_keylister_legacy_sup.erl
  38. +0 −80 src/riak_kv_keylister_master.erl
  39. +33 −46 src/riak_kv_keys_fsm.erl
  40. +0 −284 src/riak_kv_keys_fsm_legacy.erl
  41. +0 −49 src/riak_kv_keys_fsm_legacy_sup.erl
  42. +5 −6 src/riak_kv_legacy_vnode.erl
  43. +0 −352 src/riak_kv_lru.erl
  44. +0 −262 src/riak_kv_map_master.erl
  45. +0 −301 src/riak_kv_map_phase.erl
  46. +0 −311 src/riak_kv_mapper.erl
  47. +0 −48 src/riak_kv_mapper_sup.erl
  48. +0 −80 src/riak_kv_mapred_cache.erl
  49. +0 −75 src/riak_kv_mapred_planner.erl
  50. +0 −214 src/riak_kv_mapred_query.erl
  51. +1 −1 src/riak_kv_mapred_term.erl
  52. +4 −4 src/riak_kv_mapreduce.erl
  53. +272 −80 src/riak_kv_memory_backend.erl
  54. +11 −6 src/riak_kv_mrc_map.erl
  55. +296 −46 src/riak_kv_mrc_pipe.erl
  56. +434 −0 src/riak_kv_mrc_sink.erl
  57. +83 −0 src/riak_kv_mrc_sink_sup.erl
  58. +11 −5 src/riak_kv_multi_backend.erl
  59. +131 −0 src/riak_kv_pb_bucket.erl
  60. +102 −0 src/riak_kv_pb_index.erl
  61. +0 −61 src/riak_kv_pb_listener.erl
  62. +222 −0 src/riak_kv_pb_mapred.erl
  63. +303 −0 src/riak_kv_pb_object.erl
  64. +0 −646 src/riak_kv_pb_socket.erl
  65. +0 −44 src/riak_kv_pb_socket_sup.erl
  66. +0 −35 src/riak_kv_phase_proto.erl
  67. +59 −17 src/riak_kv_pipe_get.erl
  68. +58 −19 src/riak_kv_pipe_index.erl
  69. +57 −19 src/riak_kv_pipe_listkeys.erl
  70. +11 −1 src/riak_kv_put_core.erl
  71. +205 −78 src/riak_kv_put_fsm.erl
  72. +0 −112 src/riak_kv_reduce_phase.erl
  73. +313 −692 src/riak_kv_stat.erl
  74. +410 −0 src/riak_kv_stat_bc.erl
  75. +2 −7 src/riak_kv_status.erl
  76. +13 −45 src/riak_kv_sup.erl
  77. +183 −7 src/riak_kv_test_util.erl
  78. +84 −22 src/riak_kv_util.erl
  79. +298 −142 src/riak_kv_vnode.erl
  80. +3 −5 src/riak_kv_w_reduce.erl
  81. +3 −0 src/riak_kv_wm_buckets.erl
  82. +5 −35 src/riak_kv_wm_index.erl
  83. +3 −0 src/riak_kv_wm_keylist.erl
  84. +15 −16 src/riak_kv_wm_link_walker.erl
  85. +127 −225 src/riak_kv_wm_mapred.erl
  86. +48 −20 src/riak_kv_wm_object.erl
  87. +24 −5 src/riak_kv_wm_props.erl
  88. +5 −9 src/riak_kv_wm_stats.erl
  89. +48 −1 src/riak_kv_wm_utils.erl
  90. +350 −0 src/riak_kv_yessir_backend.erl
  91. +5 −2 src/riak_object.erl
  92. +227 −25 test/backend_eqc.erl
  93. +61 −0 test/fsm_eqc_util.erl
  94. +60 −28 test/get_fsm_qc.erl
  95. +292 −0 test/get_put_monitor_eqc.erl
  96. +227 −0 test/hashtree_eqc.erl
  97. +62 −138 test/keys_fsm_eqc.erl
  98. +314 −152 test/mapred_test.erl
  99. +29 −7 test/put_fsm_eqc.erl
View
@@ -0,0 +1,12 @@
+language: erlang
+script: (rebar compile && rebar eunit skip_deps=true) || (find . -name "*.log" -print -exec cat \{\} \; && sh -c "exit 1")
+notifications:
+ webhooks: http://basho-engbot.herokuapp.com/travis?key=ad9a6e51d706903e1fd0963c7b8e064b93e85b56
+ email: eng@basho.com
+before_script:
+ - "ulimit -n 4096"
+otp_release:
+ - R15B01
+ - R15B
+ - R14B04
+ - R14B03
View
@@ -1,5 +1,8 @@
* riak_kv
** Overview
+
+[[http://travis-ci.org/basho/riak_kv][Travis-CI]] :: [[https://secure.travis-ci.org/basho/riak_kv.png]]
+
Riak KV is an open source Erlang application that is distributed using the [[https://github.com/basho/riak_core][riak_core]] Erlang
library. Riak KV provides a key/value datastore and features MapReduce, lightweight data relations, and several different client APIs.
View
@@ -0,0 +1,64 @@
+`hashtree.erl` implements a fixed-sized hash tree, avoiding any need
+for rebalancing. The tree consists of a fixed number of on-disk
+`segments` and a hash tree constructed over these `segments`. Each
+level of the tree is grouped into buckets based on a fixed `tree
+width`. Each hash at level `i` corresponds to the hash of a bucket of
+hashes at level `i+1`. The following figure depicts a tree with 16
+segments and a tree-width of 4:
+
+![image](https://github.com/basho/riak_kv/raw/jdb-hashtree/docs/hashtree.png)
+
+To insert a new `(key, hash)` pair, the key is hashed and mapped to
+one of the segments. The `(key, hash)` pair is then stored in the
+appropriate segment, which is an ordered `(key, hash)` dictionary. The
+given segment is then marked as dirty. Whenever `update_tree` is
+called, the hash for each dirty segment is re-computed, the
+appropriate leaf node in the hash tree updated, and the hash tree is
+updated bottom-up as necessary. Only paths along which hashes have
+been changed are re-computed.
+
+The current implementation uses LevelDB for the heavy lifting. Rather
+than reading/writing the on-disk segments as a unit, `(key, hash)`
+pairs are written to LevelDB as simple key-value pairs. The LevelDB
+key written is the binary `<<$s, SegmentId:64/integer,
+Key/binary>>`. Thus, inserting a new key-value hash is nothing more
+than a single LevelDB write. Likewise, key-hash pairs for a segment
+are laided on sequentially on-disk based on key sorting. An in-memory
+bitvector is used to track dirty segments, although a `gb_sets` was
+formerly used.
+
+When updating the segment hashes, a LevelDB iterator is used to access
+the segment keys in-order. The iterator seeks to the beginning of the
+segment and then iterators through all of the key-hash pairs. As an
+optimization, the iteration process is designed to read in multiple
+segments when possible. For example, if the list of dirty segments was
+`[1, 2, 3, 5, 6, 10]`, the code will seek an iterator to the beginning
+of segment 1, iterator through all of its keys, compute the
+appropriate segment 1 hash, then continue to traverse through segment
+2 and segment 3's keys, updating those hashes as well. After segment
+3, a new iterator will be created to seek to the beginning of segment
+5, and handle both 5, and 6; and then a final iterator used to access
+segment 10. This design works very well when constructing a new tree
+from scratch. There's a phase of inserting a bunch of key-hash pairs
+(all writes), followed by an in-order traversal of the LevelDB
+database (all reads).
+
+Trees are compared using standard hash tree approach, comparing the
+hash at each level, and recursing to the next level down when
+different. After reaching the leaf nodes, any differing hashes results
+in a key exchange of the keys in the associated differing segments.
+
+By default, the hash tree itself is entirely in-memory. However, the
+code provides a `MEM_LEVEL` paramemter that specifics that levels
+greater than the parameter should be stored on-disk instead. These
+buckets are simply stored on disk in the same LevelDB structure as
+`{$b, Level, Bucket} -> orddict(Key, Hash)}` objects.
+
+The default settings use `1024*1024` segments with a tree width of
+`1024`. Thus, the resulting tree is only 3 levels deep. And there
+are only `1+1024+1024*1024` hashs stored in memory -- so, a few
+MB per hash tree. Given `1024*1024` on-disk segments, and assuming
+the code uniformly hashes keys to each segment, you end up with ~1000
+keys per segment with a 1 billion key hash tree. Thus, a single key
+difference would require 3 hash exchanges and a key exchange of
+1000 keys to determine the differing key.
View
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
@@ -0,0 +1,48 @@
+-include_lib("riak_core/include/riak_core_dtrace.hrl").
+
+%% Main wrapper macro for DTrace/SystemTap probe annotations
+%% NOTE: We assume there will be per-module dtrace_int() and dtrace() funcs!
+
+-define(DTRACE(Category, Ints, Strings),
+ dtrace_int(Category, Ints, Strings)).
+
+%% Probe categories
+-define(C_GET_FSM_INIT, 500).
+-define(C_GET_FSM_PREPARE, 501).
+-define(C_GET_FSM_VALIDATE, 502).
+-define(C_GET_FSM_EXECUTE, 503).
+-define(C_GET_FSM_PREFLIST, 504).
+-define(C_GET_FSM_WAITING_R, 505).
+-define(C_GET_FSM_WAITING_R_TIMEOUT, 506).
+-define(C_GET_FSM_CLIENT_REPLY, 507).
+-define(C_GET_FSM_FINALIZE, 508).
+-define(C_GET_FSM_MAYBE_DELETE, 509).
+-define(C_GET_FSM_RR, 510).
+-define(C_GET_FSM_WAITING_RR, 511).
+-define(C_GET_FSM_WAITING_RR_TIMEOUT, 512).
+
+-define(C_PUT_FSM_INIT, 520).
+-define(C_PUT_FSM_PREPARE, 521).
+-define(C_PUT_FSM_VALIDATE, 522).
+-define(C_PUT_FSM_PRECOMMIT, 523).
+-define(C_PUT_FSM_EXECUTE_LOCAL, 524).
+-define(C_PUT_FSM_WAITING_LOCAL_VNODE, 525).
+-define(C_PUT_FSM_EXECUTE_REMOTE, 526).
+-define(C_PUT_FSM_WAITING_REMOTE_VNODE, 527).
+-define(C_PUT_FSM_PROCESS_REPLY, 528).
+-define(C_PUT_FSM_POSTCOMMIT, 529).
+-define(C_PUT_FSM_FINISH, 530).
+-define(C_PUT_FSM_DECODE_PRECOMMIT, 531). % errors only
+-define(C_PUT_FSM_DECODE_POSTCOMMIT, 532). % errors only
+
+-define(C_DELETE_INIT1, 535).
+-define(C_DELETE_INIT2, 536).
+-define(C_DELETE_REAPER_GET_DONE, 537).
+
+-define(C_BUCKETS_INIT, 540).
+-define(C_BUCKETS_PROCESS_RESULTS, 541).
+-define(C_BUCKETS_FINISH, 542).
+
+-define(C_KEYS_INIT, 545).
+-define(C_KEYS_PROCESS_RESULTS, 546).
+-define(C_KEYS_FINISH, 547).
@@ -0,0 +1,20 @@
+%% used to communicate from riak_kv_mrc_sink to riak_kv_wm_mapred and
+%% riak_kv_pb_mapred
+-record(kv_mrc_sink,
+ {
+ ref :: reference(), % the pipe ref
+ results :: [{PhaseId::integer(), Result::term()}],
+ logs :: [{PhaseId::integer(), Message::term()}],
+ done :: boolean()
+ }).
+
+%% used by riak_kv_mrc_sink:mapred_stream_sink
+-record(mrc_ctx,
+ {
+ ref :: reference(), % the pipe ref (so we don't have to dig)
+ pipe :: riak_pipe:pipe(),
+ sink :: {pid(), reference()}, % sink and monitor
+ sender :: {pid(), reference()}, % async sender and monitor
+ timer :: {reference(), reference()}, % timeout timer and pipe ref
+ keeps :: integer()
+ }).
View
@@ -11,15 +11,6 @@
bkey :: {binary(), binary()},
req_id :: non_neg_integer()}).
--record(riak_kv_mget_req_v1, {
- bkeys :: list({binary(), binary()}),
- req_id :: non_neg_integer(),
- from :: term()}).
-
--record(riak_kv_listkeys_req_v1, {
- bucket :: binary(),
- req_id :: non_neg_integer()}).
-
-record(riak_kv_listkeys_req_v2, {
bucket :: binary()|'_'|tuple(),
req_id :: non_neg_integer(),
@@ -39,7 +30,13 @@
-record(riak_kv_index_req_v1, {
bucket :: binary() | tuple(),
- item_filter :: function(),
+ item_filter :: riak_kv_coverage_filter:filter(),
+ qry :: riak_index:query_def()}).
+
+%% same as _v1, but triggers ack-based backpressure
+-record(riak_kv_index_req_v2, {
+ bucket :: binary() | tuple(),
+ item_filter :: riak_kv_coverage_filter:filter(),
qry :: riak_index:query_def()}).
-record(riak_kv_vnode_status_req_v1, {}).
@@ -60,10 +57,9 @@
-define(KV_PUT_REQ, #riak_kv_put_req_v1).
-define(KV_GET_REQ, #riak_kv_get_req_v1).
--define(KV_MGET_REQ, #riak_kv_mget_req_v1).
-define(KV_LISTBUCKETS_REQ, #riak_kv_listbuckets_req_v1).
-define(KV_LISTKEYS_REQ, #riak_kv_listkeys_req_v4).
--define(KV_INDEX_REQ, #riak_kv_index_req_v1).
+-define(KV_INDEX_REQ, #riak_kv_index_req_v2).
-define(KV_VNODE_STATUS_REQ, #riak_kv_vnode_status_req_v1).
-define(KV_DELETE_REQ, #riak_kv_delete_req_v1).
-define(KV_MAP_REQ, #riak_kv_map_req_v1).
View
BIN rebar
Binary file not shown.
View
@@ -1,7 +1,7 @@
-{require_otp_vsn, "R13B04|R14"}.
{cover_enabled, true}.
{edoc_opts, [{preprocess, true}]}.
{erl_opts, [warnings_as_errors, {parse_transform, lager_transform}]}.
+{eunit_opts, [verbose]}.
{erl_first_files, [
"src/riak_kv_backend.erl",
@@ -10,9 +10,6 @@
{deps, [
{riak_core, ".*", {git, "git://github.com/basho/riak_core", "master"}},
- {riakc, ".*", {git, "git://github.com/basho/riak-erlang-client",
- "master"}},
- {luke, ".*", {git, "git://github.com/basho/luke", "master"}},
{erlang_js, ".*", {git, "git://github.com/basho/erlang_js", "master"}},
{bitcask, ".*", {git, "git://github.com/basho/bitcask", "master"}},
{merge_index, ".*", {git, "git://github.com/basho/merge_index",
@@ -24,6 +21,5 @@
{sext, ".*", {git, "git://github.com/esl/sext", "master"}},
{riak_pipe, ".*", {git, "git://github.com/basho/riak_pipe.git",
"master"}},
- {basho_metrics, ".*", {git, "git://github.com/basho/basho_metrics.git",
- "master"}}
+ {riak_api, ".*", {git, "git://github.com/basho/riak_api.git", "master"}}
]}.
Oops, something went wrong.

0 comments on commit c8b3c87

Please sign in to comment.