Permalink
Browse files

Merge branch '2.0'

  • Loading branch information...
2 parents 333169e + ffd5d8a commit 8a0d80547fe8ee2fb95f7f657050e4232fe92612 Jared Morrow committed Sep 4, 2014
Showing with 13,427 additions and 4,167 deletions.
  1. +6 −1 .gitignore
  2. +3 −2 .travis.yml
  3. +3 −25 Makefile
  4. +1 −1 README.md
  5. +17 −0 dialyzer.ignore-warnings
  6. +64 −0 docs/hashtree.md
  7. BIN docs/hashtree.png
  8. +0 −139 ebin/riak_core.app
  9. +50 −0 include/riak_core_bg_manager.hrl
  10. +3 −0 include/riak_core_bucket_type.hrl
  11. +0 −100 include/riak_core_connection.hrl
  12. +2 −2 include/riak_core_handoff.hrl
  13. +17 −0 include/riak_core_metadata.hrl
  14. +7 −3 include/riak_core_vnode.hrl
  15. +165 −0 priv/riak_core.schema
  16. BIN rebar
  17. +12 −12 rebar.config
  18. +3 −3 src/chash.erl
  19. +13 −8 src/chashbin.erl
  20. +461 −0 src/dvvset.erl
  21. +30 −16 src/gen_nb_server.erl
  22. +1,235 −0 src/hashtree.erl
  23. +565 −0 src/hashtree_tree.erl
  24. +0 −90 src/json_pp.erl
  25. +0 −374 src/merkerl.erl
  26. +68 −0 src/riak_core.app.src
  27. +24 −5 src/riak_core.erl
  28. +0 −6 src/riak_core.proto
  29. +18 −16 src/riak_core_apl.erl
  30. +16 −21 src/riak_core_app.erl
  31. +947 −0 src/riak_core_bg_manager.erl
  32. +626 −0 src/riak_core_broadcast.erl
  33. +43 −0 src/riak_core_broadcast_handler.erl
  34. +101 −56 src/riak_core_bucket.erl
  35. +256 −0 src/riak_core_bucket_props.erl
  36. +238 −0 src/riak_core_bucket_type.erl
  37. +34 −9 src/riak_core_capability.erl
  38. +3 −2 src/riak_core_claim.erl
  39. +10 −14 src/riak_core_claim_sim.erl
  40. +1 −1 src/riak_core_claim_util.erl
  41. +375 −25 src/riak_core_claimant.erl
  42. +0 −176 src/riak_core_config.erl
  43. +0 −244 src/riak_core_connection.erl
  44. +0 −622 src/riak_core_connection_mgr.erl
  45. +0 −284 src/riak_core_connection_mgr_stats.erl
  46. +345 −24 src/riak_core_console.erl
  47. +165 −0 src/riak_core_console_table.erl
  48. +3 −3 src/riak_core_coverage_fsm.erl
  49. +1 −1 src/riak_core_coverage_plan.erl
  50. +104 −0 src/riak_core_dist_mon.erl
  51. +8 −13 src/riak_core_dtrace.erl
  52. +4 −4 src/riak_core_format.erl
  53. +35 −21 src/riak_core_gen_server.erl
  54. +50 −34 src/riak_core_gossip.erl
  55. +3 −2 src/riak_core_gossip_legacy.erl
  56. +1 −1 src/riak_core_handoff_listener.erl
  57. +1 −5 src/riak_core_handoff_manager.erl
  58. +28 −10 src/riak_core_handoff_receiver.erl
  59. +143 −49 src/riak_core_handoff_sender.erl
  60. +389 −0 src/riak_core_metadata.erl
  61. +301 −0 src/riak_core_metadata_exchange_fsm.erl
  62. +364 −0 src/riak_core_metadata_hashtree.erl
  63. +685 −0 src/riak_core_metadata_manager.erl
  64. +149 −0 src/riak_core_metadata_object.erl
  65. +107 −0 src/riak_core_mochiglobal.erl
  66. +154 −0 src/riak_core_net_ticktime.erl
  67. +19 −13 src/riak_core_node_watcher.erl
  68. +50 −0 src/riak_core_pw_auth.erl
  69. +49 −36 src/riak_core_ring.erl
  70. +35 −12 src/riak_core_ring_handler.erl
  71. +45 −28 src/riak_core_ring_manager.erl
  72. +1,332 −0 src/riak_core_security.erl
  73. +0 −498 src/riak_core_service_mgr.erl
  74. +113 −42 src/riak_core_ssl_util.erl
  75. +5 −5 src/riak_core_stat.erl
  76. +10 −9 src/riak_core_stat_cache.erl
  77. +40 −5 src/riak_core_stat_calc_proc.erl
  78. +3 −3 src/riak_core_stat_calc_sup.erl
  79. +7 −3 src/riak_core_stat_q.erl
  80. +2 −2 src/riak_core_stats_sup.erl
  81. +1 −1 src/riak_core_status.erl
  82. +27 −24 src/riak_core_sup.erl
  83. +1 −1 src/riak_core_sysmon_handler.erl
  84. +87 −14 src/riak_core_tcp_mon.erl
  85. +22 −1 src/riak_core_test_util.erl
  86. +14 −14 src/riak_core_tracer.erl
  87. +159 −31 src/riak_core_util.erl
  88. +107 −45 src/riak_core_vnode.erl
  89. +17 −20 src/riak_core_vnode_manager.erl
  90. +17 −17 src/riak_core_vnode_master.erl
  91. +25 −19 src/riak_core_vnode_proxy.erl
  92. +3 −3 src/riak_core_vnode_proxy_sup.erl
  93. +4 −3 src/riak_core_vnode_worker_pool.erl
  94. +0 −102 src/riak_core_web.erl
  95. +0 −81 src/riak_core_wm_urlmap.erl
  96. +6 −7 src/supervisor_pre_r14b04.erl
  97. +98 −8 src/vclock.erl
  98. +892 −0 test/bg_manager_eqc.erl
  99. +163 −0 test/bg_manager_tests.erl
  100. +365 −0 test/btypes_eqc.erl
  101. +48 −114 test/bucket_fixup_test.erl
  102. +18 −14 test/core_vnode_eqc.erl
  103. +219 −0 test/hashtree_eqc.erl
  104. +28 −7 test/mock_vnode.erl
  105. +1 −1 test/node_watcher_qc.erl
  106. +0 −252 test/riak_core_connection_mgr_tests.erl
  107. +0 −136 test/riak_core_connection_tests.erl
  108. +108 −0 test/riak_core_schema_tests.erl
  109. +247 −0 test/riak_core_security_tests.erl
  110. +0 −172 test/riak_core_service_mgr_tests.erl
  111. +60 −0 test/site1-cert.pem
  112. +16 −0 test/site1-key.pem
  113. +60 −0 test/site2-cert.pem
  114. +16 −0 test/site2-key.pem
  115. +112 −0 test/sync_command_test.erl
  116. +207 −0 test/vclock_qc.erl
  117. +112 −0 tools.mk
View
@@ -1,8 +1,13 @@
.eunit/*
deps/*
priv/*
+ebin/*
*.o
-*.beam
include/*_pb.hrl
*~
doc/*
+/.eqc-info
+/current_counterexample.eqc
+.local_dialyzer_plt
+dialyzer_warnings
+dialyzer_unhandled_warnings
View
@@ -3,8 +3,9 @@ notifications:
webhooks: http://basho-engbot.herokuapp.com/travis?key=72ce513a4a26166521f60f72511bfb905329db87
email: eng@basho.com
otp_release:
+ - R16B
+ - R15B03
+ - R15B02
- R15B01
- R15B
- - R14B04
- - R14B03
View
@@ -1,13 +1,12 @@
-APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \
+DIALYZER_APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \
public_key mnesia syntax_tools compiler
-COMBO_PLT = $(HOME)/.riak_core_combo_dialyzer_plt
PULSE_TESTS = worker_pool_pulse
.PHONY: deps test
all: deps compile
-compile:
+compile: deps
./rebar compile
deps:
@@ -19,31 +18,10 @@ clean:
distclean: clean
./rebar delete-deps
-test: all
- ./rebar skip_deps=true eunit
-
# You should 'clean' before your first run of this target
# so that deps get built with PULSE where needed.
pulse:
./rebar compile -D PULSE
./rebar eunit -D PULSE skip_deps=true suite=$(PULSE_TESTS)
-docs: deps
- ./rebar skip_deps=true doc
-
-build_plt: compile
- dialyzer --build_plt --output_plt $(COMBO_PLT) --apps $(APPS) \
- deps/*/ebin
-
-check_plt: compile
- dialyzer --check_plt --plt $(COMBO_PLT) --apps $(APPS) \
- deps/*/ebin
-
-dialyzer: compile
- @echo
- @echo Use "'make check_plt'" to check PLT prior to using this target.
- @echo Use "'make build_plt'" to build PLT prior to using this target.
- @echo
- dialyzer --plt $(COMBO_PLT) ebin
-
-
+include tools.mk
View
@@ -35,7 +35,7 @@ There are numerous ways to file issues or start conversations around
something Core related
* The
- [Riak Users List](lists.basho.com/mailman/listinfo/riak-users_lists.basho.com)
+ [Riak Users List](http://lists.basho.com/mailman/listinfo/riak-users_lists.basho.com)
is the main place for all discussion around Riak.
* There is a
[Riak Core-specific mailing list](http://lists.basho.com/mailman/listinfo/riak-core_lists.basho.com)
@@ -0,0 +1,17 @@
+riak_core.erl:85: The pattern 'true' can never match the type 'false'
+riak_core.erl:187: The pattern 'true' can never match the type 'false'
+riak_core.erl:205: The pattern <'true', _> can never match the type <'false',_>
+riak_core.erl:241: The pattern 'true' can never match the type 'false'
+riak_core.erl:264: Function legacy_remove/1 will never be called
+riak_core_claimant.erl:370: The pattern 'legacy' can never match the type {'error','invalid_resize_claim'} | {'ok',[{_,_},...]}
+riak_core_claimant.erl:407: The pattern {'legacy', _} can never match the type {'error','invalid_resize_claim'} | {'ok',{'chstate_v2',_,'undefined' | [any()],{_,_},'undefined' | dict(),'undefined' | {_,_},'undefined' | [any()],[any()],_,'undefined' | [any()],'undefined' | [any()]}}
+riak_core_claimant.erl:913: The pattern {'legacy', _} can never match the type {'error','invalid_resize_claim'} | {'ok',{'chstate_v2',_,'undefined' | [any()],{_,_},'undefined' | dict(),'undefined' | {_,_},'undefined' | [any()],[any()],_,'undefined' | [any()],'undefined' | [any()]}}
+riak_core_claimant.erl:941: The pattern {'true', 'true'} can never match the type {'true','false'}
+riak_core_console.erl:102: The pattern 'true' can never match the type 'false'
+riak_core_gossip.erl:214: The pattern 1 can never match the type 2
+riak_core_gossip.erl:229: The pattern 'true' can never match the type 'false'
+riak_core_gossip.erl:245: The pattern 'true' can never match the type 'false'
+riak_core_ring_handler.erl:73: The pattern {'true', _, _, _} can never match the type {'false',boolean(),[integer()],'down' | 'exiting' | 'invalid' | 'joining' | 'leaving' | 'valid'}
+Unknown functions:
+ cluster_info:format/3
+ cluster_info:register_app/1
View
@@ -0,0 +1,64 @@
+`hashtree.erl` implements a fixed-sized hash tree, avoiding any need
+for rebalancing. The tree consists of a fixed number of on-disk
+`segments` and a hash tree constructed over these `segments`. Each
+level of the tree is grouped into buckets based on a fixed `tree
+width`. Each hash at level `i` corresponds to the hash of a bucket of
+hashes at level `i+1`. The following figure depicts a tree with 16
+segments and a tree-width of 4:
+
+![image](https://github.com/basho/riak_kv/raw/jdb-hashtree/docs/hashtree.png)
+
+To insert a new `(key, hash)` pair, the key is hashed and mapped to
+one of the segments. The `(key, hash)` pair is then stored in the
+appropriate segment, which is an ordered `(key, hash)` dictionary. The
+given segment is then marked as dirty. Whenever `update_tree` is
+called, the hash for each dirty segment is re-computed, the
+appropriate leaf node in the hash tree updated, and the hash tree is
+updated bottom-up as necessary. Only paths along which hashes have
+been changed are re-computed.
+
+The current implementation uses LevelDB for the heavy lifting. Rather
+than reading/writing the on-disk segments as a unit, `(key, hash)`
+pairs are written to LevelDB as simple key-value pairs. The LevelDB
+key written is the binary `<<$s, SegmentId:64/integer,
+Key/binary>>`. Thus, inserting a new key-value hash is nothing more
+than a single LevelDB write. Likewise, key-hash pairs for a segment
+are laided on sequentially on-disk based on key sorting. An in-memory
+bitvector is used to track dirty segments, although a `gb_sets` was
+formerly used.
+
+When updating the segment hashes, a LevelDB iterator is used to access
+the segment keys in-order. The iterator seeks to the beginning of the
+segment and then iterators through all of the key-hash pairs. As an
+optimization, the iteration process is designed to read in multiple
+segments when possible. For example, if the list of dirty segments was
+`[1, 2, 3, 5, 6, 10]`, the code will seek an iterator to the beginning
+of segment 1, iterator through all of its keys, compute the
+appropriate segment 1 hash, then continue to traverse through segment
+2 and segment 3's keys, updating those hashes as well. After segment
+3, a new iterator will be created to seek to the beginning of segment
+5, and handle both 5, and 6; and then a final iterator used to access
+segment 10. This design works very well when constructing a new tree
+from scratch. There's a phase of inserting a bunch of key-hash pairs
+(all writes), followed by an in-order traversal of the LevelDB
+database (all reads).
+
+Trees are compared using standard hash tree approach, comparing the
+hash at each level, and recursing to the next level down when
+different. After reaching the leaf nodes, any differing hashes results
+in a key exchange of the keys in the associated differing segments.
+
+By default, the hash tree itself is entirely in-memory. However, the
+code provides a `MEM_LEVEL` paramemter that specifics that levels
+greater than the parameter should be stored on-disk instead. These
+buckets are simply stored on disk in the same LevelDB structure as
+`{$b, Level, Bucket} -> orddict(Key, Hash)}` objects.
+
+The default settings use `1024*1024` segments with a tree width of
+`1024`. Thus, the resulting tree is only 3 levels deep. And there
+are only `1+1024+1024*1024` hashs stored in memory -- so, a few
+MB per hash tree. Given `1024*1024` on-disk segments, and assuming
+the code uniformly hashes keys to each segment, you end up with ~1000
+keys per segment with a 1 billion key hash tree. Thus, a single key
+difference would require 3 hash exchanges and a key exchange of
+1000 keys to determine the differing key.
View
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
@@ -1,139 +0,0 @@
-%% -*- tab-width: 4;erlang-indent-level: 4;indent-tabs-mode: nil -*-
-%% ex: ts=4 sw=4 et
-{application, riak_core,
-[
- {description, "Riak Core"},
- {vsn, "1.4.1"},
- {modules, [
- app_helper,
- bloom,
- chash,
- chashbin,
- gen_nb_server,
- riak_core_gen_server,
- json_pp,
- merkerl,
- riak_core_priority_queue,
- process_proxy,
- riak_core_gossip_legacy,
- riak_core,
- riak_core_apl,
- riak_core_app,
- riak_core_bucket,
- riak_core_cinfo_core,
- riak_core_claimant,
- riak_core_claim,
- riak_core_claim_sim,
- riak_core_claim_util,
- riak_core_new_claim,
- riak_core_capability,
- riak_core_config,
- riak_core_console,
- riak_core_coverage_fsm,
- riak_core_coverage_plan,
- riak_core_dtrace,
- riak_core_eventhandler_guard,
- riak_core_eventhandler_sup,
- riak_core_format,
- riak_core_gossip,
- riak_core_handoff_listener,
- riak_core_handoff_listener_sup,
- riak_core_handoff_manager,
- riak_core_handoff_receiver,
- riak_core_handoff_receiver_sup,
- riak_core_handoff_sender,
- riak_core_handoff_sender_sup,
- riak_core_handoff_sup,
- riak_core_nodeid,
- riak_core_node_watcher,
- riak_core_node_watcher_events,
- riak_core_pb,
- riak_core_repair,
- riak_core_ring,
- riak_core_ring_events,
- riak_core_ring_handler,
- riak_core_ring_manager,
- riak_core_ring_util,
- riak_core_send_msg,
- riak_core_stat,
- riak_core_stat_cache,
- riak_core_stat_calc_proc,
- riak_core_stat_calc_sup,
- riak_core_stat_sup,
- riak_core_stat_q,
- riak_core_stats_sup,
- riak_core_status,
- riak_core_sup,
- riak_core_sysmon_handler,
- riak_core_sysmon_minder,
- riak_core_tracer,
- riak_core_test_util,
- riak_core_util,
- riak_core_vnode,
- riak_core_vnode_manager,
- riak_core_vnode_master,
- riak_core_vnode_proxy,
- riak_core_vnode_proxy_sup,
- riak_core_vnode_sup,
- riak_core_vnode_worker,
- riak_core_vnode_worker_pool,
- riak_core_web,
- riak_core_wm_urlmap,
- riak_core_connection_mgr,
- riak_core_connection_mgr_stats,
- riak_core_connection,
- riak_core_service_mgr,
- riak_core_tcp_mon,
- riak_core_ssl_util,
- supervisor_pre_r14b04,
- vclock
- ]},
- {registered, []},
- {included_applications, [folsom]},
- {applications, [
- kernel,
- stdlib,
- lager,
- sasl,
- crypto,
- riak_sysmon,
- webmachine,
- os_mon
- ]},
- {mod, { riak_core_app, []}},
- {env, [
- %% Cluster name
- {cluster_name, "default"},
-
- %% Default location of ringstate
- {ring_state_dir, "data/ring"},
-
- %% Default ring creation size. Make sure it is a power of 2,
- %% e.g. 16, 32, 64, 128, 256, 512 etc
- {ring_creation_size, 64},
-
- %% Default gossip interval (milliseconds)
- {gossip_interval, 60000},
-
- %% Target N value
- {target_n_val, 4},
-
- %% Default claims functions
- {wants_claim_fun, {riak_core_claim, default_wants_claim}},
- {choose_claim_fun, {riak_core_claim, default_choose_claim}},
-
- %% Vnode inactivity timeout (how often to check if fallback vnodes
- %% should return their data) in ms.
- {vnode_inactivity_timeout, 60000},
-
- %% Number of VNodes allowed to do handoff concurrently.
- {handoff_concurrency, 2},
-
- %% Disable Nagle on HTTP sockets
- {disable_http_nagle, true},
-
- %% Handoff IP/port
- {handoff_port, 8099},
- {handoff_ip, "0.0.0.0"}
- ]}
- ]}.
@@ -0,0 +1,50 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2013 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+-type bg_lock() :: any().
+-type bg_token() :: any().
+-type bg_resource() :: bg_token() | bg_lock().
+-type bg_resource_type() :: lock | token.
+
+-type bg_meta() :: undefined | {atom(), any()}. %% meta data to associate with a lock/token
+-type bg_period() :: pos_integer(). %% token refill period in milliseconds
+-type bg_count() :: pos_integer(). %% token refill tokens to count at each refill period
+-type bg_rate() :: undefined | {bg_period(), bg_count()}. %% token refill rate
+-type bg_concurrency_limit() :: non_neg_integer() | infinity. %% max lock concurrency allowed
+-type bg_consumer() :: {undefined | pid(), bg_meta()}. %% a consumer of a resource
+
+%% Results of a "ps" of live given or blocked locks/tokens
+-record(bg_stat_live,
+ {
+ resource :: bg_resource(), %% resource name, e.g. 'aae_hashtree_lock'
+ type :: bg_resource_type(), %% resource type, e.g. 'lock'
+ owner :: bg_consumer() %% this consumer has the lock or token
+ }).
+-type bg_stat_live() :: #bg_stat_live{}.
+
+-define(BG_INFO_ETS_TABLE, background_mgr_info_table). %% name of lock/token manager info ETS table
+-define(BG_INFO_ETS_OPTS, [public, %% creation time properties of info ETS table
+ named_table,
+ set]).
+
+-define(BG_ENTRY_ETS_TABLE, background_mgr_entry_table). %% name of lock/token manager entry ETS table
+-define(BG_ENTRY_ETS_OPTS, [public, %% creation time properties of entry ETS table
+ named_table,
+ bag]).
+
+
@@ -0,0 +1,3 @@
+-define(BUCKET_TYPE_PREFIX, {core, bucket_types}).
+-define(DEFAULT_TYPE, <<"default">>).
+
Oops, something went wrong.

0 comments on commit 8a0d805

Please sign in to comment.