Skip to content
Browse files

Merge branch '2.0'

  • Loading branch information...
2 parents b5bb9f5 + fb63e7a commit 916610333a5e065355f606c6ca4f9907126088cd @jaredmorrow jaredmorrow committed Sep 4, 2014
Showing with 12,019 additions and 3,154 deletions.
  1. +6 −0 .gitignore
  2. +7 −15 Makefile
  3. +66 −0 dialyzer.ignore-warnings
  4. +0 −64 docs/hashtree.md
  5. BIN docs/hashtree.png
  6. +7 −0 include/riak_kv_dtrace.hrl
  7. +14 −1 include/riak_kv_index.hrl
  8. +38 −1 include/riak_kv_types.hrl
  9. +5 −0 include/riak_kv_vnode.hrl
  10. +3 −0 include/riak_object.hrl
  11. +66 −0 priv/multi_backend.schema
  12. +635 −0 priv/riak_kv.schema
  13. +77 −0 priv/tracers/tracer_accumulating_time.erl
  14. +197 −0 priv/tracers/tracer_backend_latency.erl
  15. +75 −0 priv/tracers/tracer_eleveldb_put_size.erl
  16. +90 −0 priv/tracers/tracer_fsm_init.erl
  17. +102 −0 priv/tracers/tracer_func_args.erl
  18. +80 −0 priv/tracers/tracer_gc_latency.erl
  19. +105 −0 priv/tracers/tracer_large4.erl
  20. +125 −0 priv/tracers/tracer_latency_histogram.erl
  21. +93 −0 priv/tracers/tracer_merge_and_and_handoff.erl
  22. +68 −0 priv/tracers/tracer_read_bin_trace_file.erl
  23. +116 −0 priv/tracers/tracer_timeit.erl
  24. BIN rebar
  25. +22 −13 rebar.config
  26. +3 −1 rebar.config.script
  27. +0 −1,186 src/hashtree.erl
  28. +90 −0 src/json_pp.erl
  29. +268 −80 src/riak_client.erl
  30. +6 −0 src/riak_core.proto
  31. +161 −82 src/riak_index.erl
  32. +34 −3 src/riak_kv.app.src
  33. +695 −0 src/riak_kv_2i_aae.erl
  34. +13 −4 src/riak_kv_app.erl
  35. +62 −28 src/riak_kv_backend.erl
  36. +14 −26 src/riak_kv_backup.erl
  37. +426 −251 src/riak_kv_bitcask_backend.erl
  38. +762 −10 src/riak_kv_bucket.erl
  39. +39 −16 src/riak_kv_buckets_fsm.erl
  40. +16 −1 src/riak_kv_cinfo.erl
  41. +380 −15 src/riak_kv_console.erl
  42. +10 −159 src/riak_kv_counter.erl
  43. +6 −1 src/riak_kv_coverage_filter.erl
  44. +477 −0 src/riak_kv_crdt.erl
  45. +417 −0 src/riak_kv_crdt_json.erl
  46. +124 −32 src/riak_kv_eleveldb_backend.erl
  47. +243 −0 src/riak_kv_ensemble_backend.erl
  48. +496 −0 src/riak_kv_ensemble_console.erl
  49. +194 −0 src/riak_kv_ensembles.erl
  50. +76 −31 src/riak_kv_entropy_info.erl
  51. +210 −9 src/riak_kv_entropy_manager.erl
  52. +52 −53 src/riak_kv_env.erl
  53. +167 −28 src/riak_kv_exchange_fsm.erl
  54. +23 −21 src/riak_kv_gcounter.erl
  55. +1 −6 src/riak_kv_get_core.erl
  56. +163 −86 src/riak_kv_get_fsm.erl
  57. +2 −2 src/riak_kv_get_put_monitor.erl
  58. +118 −0 src/riak_kv_hooks.erl
  59. +33 −10 src/riak_kv_index_fsm.erl
  60. +367 −127 src/riak_kv_index_hashtree.erl
  61. +6 −6 src/riak_kv_keys_fsm.erl
  62. +81 −16 src/riak_kv_mapred_json.erl
  63. +104 −3 src/riak_kv_mapred_term.erl
  64. +110 −36 src/riak_kv_memory_backend.erl
  65. +12 −9 src/riak_kv_mrc_map.erl
  66. +49 −9 src/riak_kv_mrc_pipe.erl
  67. +1 −1 src/riak_kv_mrc_sink.erl
  68. +43 −18 src/riak_kv_multi_backend.erl
  69. +79 −19 src/riak_kv_pb_bucket.erl
  70. +25 −12 src/riak_kv_pb_counter.erl
  71. +356 −0 src/riak_kv_pb_crdt.erl
  72. +24 −5 src/riak_kv_pb_csbucket.erl
  73. +96 −23 src/riak_kv_pb_index.erl
  74. +29 −2 src/riak_kv_pb_mapred.erl
  75. +61 −8 src/riak_kv_pb_object.erl
  76. +10 −2 src/riak_kv_pipe_get.erl
  77. +9 −2 src/riak_kv_pipe_index.erl
  78. +30 −4 src/riak_kv_pipe_listkeys.erl
  79. +32 −26 src/riak_kv_pncounter.erl
  80. +388 −239 src/riak_kv_put_fsm.erl
  81. +228 −83 src/riak_kv_stat.erl
  82. +192 −2 src/riak_kv_stat_bc.erl
  83. +3 −2 src/riak_kv_status.erl
  84. +6 −0 src/riak_kv_sup.erl
  85. +85 −16 src/riak_kv_test_util.erl
  86. +48 −6 src/riak_kv_util.erl
  87. +798 −177 src/riak_kv_vnode.erl
  88. +41 −22 src/riak_kv_web.erl
  89. +262 −0 src/riak_kv_wm_bucket_type.erl
  90. +77 −31 src/riak_kv_wm_buckets.erl
  91. +53 −13 src/riak_kv_wm_counter.erl
  92. +536 −0 src/riak_kv_wm_crdt.erl
Sorry, we could not display the entire diff because it was too big.
View
6 .gitignore
@@ -2,9 +2,15 @@
deps/*
priv/*
*.o
+include/*_pb.hrl
*.beam
doc
test.*-temp-data
ebin
/.eqc-info
/current_counterexample.eqc
+.local_dialyzer_plt
+dialyzer_unhandled_warnings
+dialyzer_warnings
+tags
+erln8.config
View
22 Makefile
@@ -1,8 +1,10 @@
.PHONY: deps test
+DIALYZER_FLAGS =
+
all: deps compile
-compile:
+compile: deps
./rebar compile
deps:
@@ -12,20 +14,10 @@ clean:
./rebar clean
rm -rf test.*-temp-data
-distclean: clean
+distclean: clean
./rebar delete-deps
-test: all
- ./rebar skip_deps=true eunit
-
-deps:
- ./rebar get-deps
-
-
-docs:
- ./rebar skip_deps=true doc
-
-dialyzer: compile
- @dialyzer -Wno_return -c apps/riak_kv/ebin
-
+DIALYZER_APPS = kernel stdlib sasl erts ssl tools os_mon runtime_tools crypto inets \
+ xmerl webtool snmp public_key mnesia eunit syntax_tools compiler
+include tools.mk
View
66 dialyzer.ignore-warnings
@@ -0,0 +1,66 @@
+riak_core_pb.erl
+riak_kv_bitcask_backend.erl:600: Guard test Dir1TS::nonempty_string() == [] can never succeed
+riak_kv_bitcask_backend.erl:602: Guard test Dir2TS::nonempty_string() == [] can never succeed
+riak_kv_gcounter.erl:64: Invalid type specification for function riak_kv_gcounter:new/2. The success typing is (_,pos_integer()) -> {'ok',riak_kv_gcounter:gcounter()}
+riak_kv_gcounter.erl:75: Invalid type specification for function riak_kv_gcounter:update/3. The success typing is ('increment' | {'increment',pos_integer()},_,riak_kv_gcounter:gcounter()) -> {'ok',riak_kv_gcounter:gcounter()}
+riak_kv_gcounter.erl:85: Function merge/2 has no local return
+riak_kv_gcounter.erl:86: The call riak_kv_gcounter:merge(GCnt1::any(),GCnt2::any(),[]) does not have an opaque term of type riak_kv_gcounter:gcounter() as 3rd argument
+riak_kv_gcounter.erl:99: The call riak_kv_gcounter:merge(Rest::any(),RestOfClock2::[tuple()],[{_,_},...]) does not have opaque terms as 2nd and 3rd arguments
+riak_kv_gcounter.erl:101: The call riak_kv_gcounter:merge(Rest::any(),Clock2::[tuple()],[{_,_},...]) does not have opaque terms as 2nd and 3rd arguments
+riak_kv_pb_crdt.erl:138: The call riak_pb_dt_codec:encode_fetch_response(Type::any(),Value::any(),Ctx::'undefined' | binary(),[{'counter','riak_dt_pncounter'} | {'flag','riak_dt_od_flag'} | {'map','riak_dt_map'} | {'register','riak_dt_lwwreg'} | {'set','riak_dt_orswot'},...]) does not have an opaque term of type riak_pb_dt_codec:context() as 3rd argument
+riak_kv_pb_crdt.erl:142: The call riak_pb_dt_codec:encode_fetch_response(Type::any(),'undefined','undefined') breaks the contract (toplevel_type(),toplevel_value(),context()) -> #dtfetchresp{}
+riak_kv_pb_crdt.erl:189: The call riak_pb_dt_codec:encode_update_response(Type::any(),Value::any(),Key::'undefined' | binary(),'undefined' | binary(),[{'counter','riak_dt_pncounter'} | {'flag','riak_dt_od_flag'} | {'map','riak_dt_map'} | {'register','riak_dt_lwwreg'} | {'set','riak_dt_orswot'},...]) does not have an opaque term of type riak_pb_dt_codec:context() as 4th argument
+riak_kv_pb_crdt.erl:337: The call riak_pb_dt_codec:encode_fetch_response('counter' | 'flag' | 'map' | 'register' | 'set' | 'undefined',Val::any(),'undefined',[{'counter','riak_dt_pncounter'} | {'flag','riak_dt_od_flag'} | {'map','riak_dt_map'} | {'register','riak_dt_lwwreg'} | {'set','riak_dt_orswot'},...]) does not have an opaque term of type riak_pb_dt_codec:context() as 3rd argument
+riak_kv_pb_index.erl:93: Clause guard cannot succeed. The pattern {'ok', {'riak_kv_index_v3', _, _, Start, _, _, _, _, _, Re, _}} was matched against the type {'error',{'field_parsing_failed',{_,_}} | {'unknown_field_type',binary()} | {'downgrade_not_supported',_,{_,_,_} | {_,_,_,_} | {_,_,_,_,_,_,_,_,_} | {_,_,_,_,_,_,_,_,_,_,_}} | {'invalid_continuation',{_,_},{_,_,_,_,_,_,_,_,_,_,_}}} | {'ok',{'eq',binary(),'undefined' | binary()} | {'range',binary(),'undefined' | binary(),'undefined' | binary()} | #riak_kv_index_v2{start_key::binary(),filter_field::binary(),start_term::'undefined' | binary(),end_term::'undefined' | binary(),return_terms::boolean(),start_inclusive::boolean(),end_inclusive::boolean(),return_body::boolean()} | #riak_kv_index_v3{start_key::binary(),filter_field::binary(),start_term::'undefined' | binary(),end_term::'undefined' | binary(),return_terms::boolean(),start_inclusive::boolean(),end_inclusive::boolean(),return_body::boolean(),term_regex::'undefined' | binary(),max_results::'undefined' | integer()}}
+riak_kv_pb_index.erl:110: The pattern <{'error', Reason}, _Req, State> can never match the type <{'ok',{'eq',binary(),'undefined' | binary()} | {'range',binary(),'undefined' | binary(),'undefined' | binary()} | #riak_kv_index_v2{start_key::binary(),filter_field::binary(),start_term::'undefined' | binary(),end_term::'undefined' | binary(),return_terms::boolean(),start_inclusive::boolean(),end_inclusive::boolean(),return_body::boolean()} | #riak_kv_index_v3{start_key::binary(),filter_field::binary(),start_term::'undefined' | binary(),end_term::'undefined' | binary(),return_terms::boolean(),start_inclusive::boolean(),end_inclusive::boolean(),return_body::boolean(),term_regex::'undefined' | binary(),max_results::'undefined' | integer()}},#rpbindexreq{term_regex::'undefined' | binary() | maybe_improper_list(binary() | maybe_improper_list(any(),binary() | []) | byte(),binary() | [])},_>
+riak_kv_pipe_index.erl:155: Function queue_existing_pipe/4 has no local return
+riak_kv_pipe_listkeys.erl:159: Function queue_existing_pipe/3 has no local return
+riak_kv_pncounter.erl:100: Function merge/2 has no local return
+riak_kv_put_fsm.erl:233: Function init/1 has no local return
+riak_kv_put_fsm.erl:238: Record construction #state{robj::riak_object:riak_object(),options::[any()],bkey::{binary() | {binary(),binary()},binary()},vnode_options::[],precommit::[],postcommit::[],timing::[{atom(),{non_neg_integer(),non_neg_integer(),non_neg_integer()}},...],trace::'undefined' | {'ok',_},tracked_bucket::'false',bad_coordinators::[]} violates the declared type of field trace::boolean()
+riak_kv_put_fsm.erl:269: The created fun has no local return
+riak_kv_put_fsm.erl:822: The pattern <[COP = {'counter_op', _Amt} | T], Acc> can never match the type <[{'details' | 'dw' | 'n_val' | 'pw' | 'retry_put_coordinator_failure' | 'returnbody' | 'sloppy_quorum' | 'timeout' | 'update_last_modified' | 'w','false' | 'infinity' | 'true' | [any()] | non_neg_integer()},...],[{'details' | 'dw' | 'n_val' | 'pw' | 'retry_put_coordinator_failure' | 'returnbody' | 'sloppy_quorum' | 'timeout' | 'update_last_modified' | 'w','false' | 'infinity' | 'true' | [any()] | non_neg_integer()}]>
+riak_kv_put_fsm.erl:825: The pattern <[COP = {'crdt_op', _Op} | T], Acc> can never match the type <[{'details' | 'dw' | 'n_val' | 'pw' | 'retry_put_coordinator_failure' | 'returnbody' | 'sloppy_quorum' | 'timeout' | 'update_last_modified' | 'w','false' | 'infinity' | 'true' | [any()] | non_neg_integer()},...],[{'details' | 'dw' | 'n_val' | 'pw' | 'retry_put_coordinator_failure' | 'returnbody' | 'sloppy_quorum' | 'timeout' | 'update_last_modified' | 'w','false' | 'infinity' | 'true' | [any()] | non_neg_integer()}]>
+riak_kv_util.erl:294: The pattern 'error' can never match the type 'ok' | 'undefined'
+riak_kv_vnode.erl:23: Callback info about the riak_core_vnode behaviour is not available
+riak_kv_vnode.erl:839: The pattern Q = {'riak_kv_index_v3', _, _, _, _, _, _, _, _, RE, _} can never match the type 'undefined' | {_,_}
+riak_kv_vnode.erl:849: The pattern <{'riak_kv_index_v3', _, _, _, _, _, _, _, _, _, N}, DefaultSize> can never match the type <'undefined' | {_,_},'undefined' | pos_integer()>
+riak_kv_vnode.erl:1092: The call riak_kv_vnode:raw_put(HOTarget::atom(),Key::{binary() | {binary(),binary()},binary()},Obj::riak_object:riak_object()) will never return since it differs in the 1st argument from the success typing arguments: ({_,_},any(),any())
+riak_kv_vnode.erl:1214: Function raw_put/3 has no local return
+riak_kv_vnode.erl:1214: The pattern <{Idx, Node}, Key, Obj> can never match the type <atom(),{binary() | {binary(),binary()},binary()},riak_object:riak_object()>
+riak_kv_vnode.erl:1254: Function do_backend_delete/3 has no local return
+riak_kv_vnode.erl:1922: Function delete_from_hashtree/3 has no local return
+riak_kv_vnode.erl:1926: The call riak_kv_index_hashtree:async_delete(Items::[{'object',{_,_}},...],Trees::'undefined' | pid()) breaks the contract ({binary(),binary()} | [{binary(),binary()}],pid()) -> 'ok'
+riak_kv_vnode.erl:1929: The call riak_kv_index_hashtree:delete(Items::[{'object',{_,_}},...],Trees::'undefined' | pid()) breaks the contract ([{binary(),binary()}],pid()) -> 'ok'
+riak_kv_vnode.erl:2065: Function update_index_delete_stats/1 will never be called
+riak_kv_wm_crdt.erl:387: Function produce_json/2 has no local return
+riak_kv_wm_crdt.erl:391: The call riak_kv_crdt_json:fetch_response_to_json(Type::'counter' | 'flag' | 'map' | 'register' | 'set' | 'undefined',Value::any(),'undefined' | binary(),[{'counter','riak_dt_pncounter'} | {'flag','riak_dt_od_flag'} | {'map','riak_dt_map'} | {'register','riak_dt_lwwreg'} | {'set','riak_dt_orswot'},...]) does not have an opaque term of type riak_kv_crdt_json:context() as 3rd argument
+riak_kv_wm_index.erl:214: Guard test is_integer(NormStart::'undefined' | binary()) can never succeed
+riak_kv_wm_utils.erl:288: The pattern {Scheme, _, Host, Port, _, _} can never match the type {'error','no_scheme' | {_,atom(),_}}
+cluster_info:dump_all_connected/1
+cluster_info:dump_nodes/2
+cluster_info:format/3
+cluster_info:register_app/1
+riak_kv_console.erl:154: The pattern {'error', 'legacy_mode'} can never match the type {'error','is_up' | 'not_member' | 'only_member'}
+# Callback info not available
+Callback info about the riak_pipe_vnode_worker behaviour is not available
+Callback info about the riak_core_vnode_worker behaviour is not available
+Callback info about the riak_core_vnode behaviour is not available
+Callback info about the riak_core_coverage_fsm behaviour is not available
+Callback info about the riak_kv_backend behaviour is not available
+Unknown functions:
+ dtrace:init/0
+ yz_kv:index/3
+ yz_kv:should_handoff/1
+ yz_stat:search_stats/0
+Unknown types:
+ base64:ascii_binary/0
+ calendar:t_now/0
+ mochijson2:json_object/0
+ mochijson2:json_string/0
+ mochijson2:json_term/0
+ mochijson2:json_value/0
+ proplist:proplist/0
+ riak_core_apl:preflist2/0
+ riak_dt:operation/0
+ riak_dt:value/0
View
64 docs/hashtree.md
@@ -1,64 +0,0 @@
-`hashtree.erl` implements a fixed-sized hash tree, avoiding any need
-for rebalancing. The tree consists of a fixed number of on-disk
-`segments` and a hash tree constructed over these `segments`. Each
-level of the tree is grouped into buckets based on a fixed `tree
-width`. Each hash at level `i` corresponds to the hash of a bucket of
-hashes at level `i+1`. The following figure depicts a tree with 16
-segments and a tree-width of 4:
-
-![image](https://github.com/basho/riak_kv/raw/jdb-hashtree/docs/hashtree.png)
-
-To insert a new `(key, hash)` pair, the key is hashed and mapped to
-one of the segments. The `(key, hash)` pair is then stored in the
-appropriate segment, which is an ordered `(key, hash)` dictionary. The
-given segment is then marked as dirty. Whenever `update_tree` is
-called, the hash for each dirty segment is re-computed, the
-appropriate leaf node in the hash tree updated, and the hash tree is
-updated bottom-up as necessary. Only paths along which hashes have
-been changed are re-computed.
-
-The current implementation uses LevelDB for the heavy lifting. Rather
-than reading/writing the on-disk segments as a unit, `(key, hash)`
-pairs are written to LevelDB as simple key-value pairs. The LevelDB
-key written is the binary `<<$s, SegmentId:64/integer,
-Key/binary>>`. Thus, inserting a new key-value hash is nothing more
-than a single LevelDB write. Likewise, key-hash pairs for a segment
-are laided on sequentially on-disk based on key sorting. An in-memory
-bitvector is used to track dirty segments, although a `gb_sets` was
-formerly used.
-
-When updating the segment hashes, a LevelDB iterator is used to access
-the segment keys in-order. The iterator seeks to the beginning of the
-segment and then iterators through all of the key-hash pairs. As an
-optimization, the iteration process is designed to read in multiple
-segments when possible. For example, if the list of dirty segments was
-`[1, 2, 3, 5, 6, 10]`, the code will seek an iterator to the beginning
-of segment 1, iterator through all of its keys, compute the
-appropriate segment 1 hash, then continue to traverse through segment
-2 and segment 3's keys, updating those hashes as well. After segment
-3, a new iterator will be created to seek to the beginning of segment
-5, and handle both 5, and 6; and then a final iterator used to access
-segment 10. This design works very well when constructing a new tree
-from scratch. There's a phase of inserting a bunch of key-hash pairs
-(all writes), followed by an in-order traversal of the LevelDB
-database (all reads).
-
-Trees are compared using standard hash tree approach, comparing the
-hash at each level, and recursing to the next level down when
-different. After reaching the leaf nodes, any differing hashes results
-in a key exchange of the keys in the associated differing segments.
-
-By default, the hash tree itself is entirely in-memory. However, the
-code provides a `MEM_LEVEL` paramemter that specifics that levels
-greater than the parameter should be stored on-disk instead. These
-buckets are simply stored on disk in the same LevelDB structure as
-`{$b, Level, Bucket} -> orddict(Key, Hash)}` objects.
-
-The default settings use `1024*1024` segments with a tree width of
-`1024`. Thus, the resulting tree is only 3 levels deep. And there
-are only `1+1024+1024*1024` hashs stored in memory -- so, a few
-MB per hash tree. Given `1024*1024` on-disk segments, and assuming
-the code uniformly hashes keys to each segment, you end up with ~1000
-keys per segment with a 1 billion key hash tree. Thus, a single key
-difference would require 3 hash exchanges and a key exchange of
-1000 keys to determine the differing key.
View
BIN docs/hashtree.png
Deleted file not rendered
View
7 include/riak_kv_dtrace.hrl
@@ -6,6 +6,13 @@
-define(DTRACE(Category, Ints, Strings),
dtrace_int(Category, Ints, Strings)).
+-define(DTRACE(Cond, Category, Ints, Strings),
+ case Cond of
+ true ->
+ dtrace_int(Category, Ints, Strings);
+ _ -> ok
+ end).
+
%% Probe categories
-define(C_GET_FSM_INIT, 500).
-define(C_GET_FSM_PREPARE, 501).
View
15 include/riak_kv_index.hrl
@@ -32,4 +32,17 @@
return_body=false ::boolean() %% Note, only for riak cs bucket folds
}).
--define(KV_INDEX_Q, #riak_kv_index_v2).
+-record(riak_kv_index_v3, {
+ start_key= <<>> :: binary(),
+ filter_field :: binary() | undefined,
+ start_term :: integer() | binary() | undefined, %% Note, in a $key query, start_key==start_term
+ end_term :: integer() | binary() | undefined, %% Note, in an eq query, start==end
+ return_terms=true :: boolean(), %% Note, should be false for an equals query
+ start_inclusive=true :: boolean(),
+ end_inclusive=true :: boolean(),
+ return_body=false ::boolean(), %% Note, only for riak cs bucket folds
+ term_regex :: binary() | undefined,
+ max_results :: integer() | undefined
+ }).
+
+-define(KV_INDEX_Q, #riak_kv_index_v3).
View
39 include/riak_kv_types.hrl
@@ -1 +1,38 @@
--define(COUNTER_TYPE, "application/riak_pncounter").
+-record(crdt, {mod, ctype, value}).
+-record(crdt_op, {mod, op, ctx}).
+
+-define(CRDT, #crdt).
+-define(CRDT_OP, #crdt_op).
+
+%% Top Level Key->Type Types
+-define(V1_COUNTER_TYPE, riak_kv_pncounter).
+-define(V1_COUNTER_TYPE(Val), #crdt{mod=?V1_COUNTER_TYPE, ctype="application/riak_counter", value=Val}).
+-define(COUNTER_TYPE, riak_dt_pncounter).
+-define(COUNTER_TYPE(Val), #crdt{mod=?COUNTER_TYPE, ctype="application/riak_counter", value=Val}).
+
+-define(SET_TYPE, riak_dt_orswot).
+-define(SET_TYPE(Val), #crdt{mod=?SET_TYPE, ctype="application/riak_set", value=Val}).
+
+-define(MAP_TYPE, riak_dt_map).
+-define(MAP_TYPE(Val), #crdt{mod=?MAP_TYPE, ctype="application/riak_map", value=Val}).
+
+%% Internal Only Key->Map->Field->Type types
+-define(FLAG_TYPE, riak_dt_od_flag).
+-define(REG_TYPE, riak_dt_lwwreg).
+-define(EMCNTR_TYPE, riak_dt_emcntr).
+
+-define(V1_TOP_LEVEL_TYPES, [pncounter]).
+-define(V2_TOP_LEVEL_TYPES, [?COUNTER_TYPE, ?SET_TYPE, ?MAP_TYPE]).
+-define(TOP_LEVEL_TYPES, ?V1_TOP_LEVEL_TYPES ++ ?V2_TOP_LEVEL_TYPES).
+-define(ALL_TYPES, ?TOP_LEVEL_TYPES ++ [?FLAG_TYPE, ?REG_TYPE]).
+-define(EMBEDDED_TYPES, [{map, ?MAP_TYPE}, {set, ?SET_TYPE},
+ {counter, ?EMCNTR_TYPE}, {flag, ?FLAG_TYPE},
+ {register, ?REG_TYPE}]).
+
+-define(MOD_MAP, [{map, ?MAP_TYPE}, {set, ?SET_TYPE},
+ {counter, ?COUNTER_TYPE}]).
+
+-define(DATATYPE_STATS_DEFAULTS, [actor_count]).
+
+-type crdt() :: ?CRDT{}.
+-type crdt_op() :: ?CRDT_OP{}.
View
5 include/riak_kv_vnode.hrl
@@ -64,3 +64,8 @@
-define(KV_DELETE_REQ, #riak_kv_delete_req_v1).
-define(KV_MAP_REQ, #riak_kv_map_req_v1).
-define(KV_VCLOCK_REQ, #riak_kv_vclock_req_v1).
+
+%% @doc vnode_lock(PartitionIndex) is a kv per-vnode lock, used possibly,
+%% by AAE tree rebuilds, fullsync, and handoff.
+%% See @link riak_core_background_mgr:get_lock/1
+-define(KV_VNODE_LOCK(Idx), {vnode_lock, Idx}).
View
3 include/riak_object.hrl
@@ -0,0 +1,3 @@
+-define(DOT, <<"dot">>). %% The event at which a value was written, stored in metadata
+
+
View
66 priv/multi_backend.schema
@@ -0,0 +1,66 @@
+%%-*- mode: erlang -*-
+
+%% @doc The default name of a backend when one is not specified.
+{mapping, "multi_backend.default", "riak_kv.multi_backend_default", [
+ hidden
+]}.
+
+{translation,
+ "riak_kv.multi_backend_default",
+ fun(Conf) ->
+ list_to_binary(cuttlefish:conf_get(["multi_backend", "default"], Conf))
+ end}.
+
+%% @doc Storage_backend specifies the Erlang module defining the storage
+%% mechanism that will be used on this node.
+{mapping, "multi_backend.$name.storage_backend", "riak_kv.multi_backend", [
+ {default, bitcask},
+ {datatype, {enum, [bitcask, leveldb, memory]}},
+ hidden
+]}.
+
+{translation,
+ "riak_kv.multi_backend",
+ fun(Conf, Schema) ->
+ GenerateSubConfig = fun(Name, Prefix, ProplistKey, ModuleName) ->
+ BackendConfigName = ["multi_backend", Name],
+ BackendConfigPrefix = BackendConfigName ++ [Prefix],
+ SubConf = [ begin
+ {lists:nthtail(2, Key), Value}
+ end || {Key, Value} <- cuttlefish_variable:filter_by_prefix(BackendConfigPrefix, Conf)],
+
+ case cuttlefish_generator:map(Schema, SubConf) of
+ {error, _Phase, _Errors} ->
+ cuttlefish:invalid(
+ lists:flatten(io_lib:format(
+ "Error processing multi_backend configuration for backend ~s", [Name])));
+ BackendProplist ->
+ Proplist = lists:foldl(
+ fun(K, Acc) ->
+ proplists:get_value(K, Acc, [])
+ end,
+ BackendProplist, ProplistKey),
+ {ModuleName, Proplist}
+ end
+ end,
+ %% group by $name into list, also cut the "multi_backend.$name" off every key
+ BackendNames = cuttlefish_variable:fuzzy_matches(["multi_backend","$name","storage_backend"], Conf),
+ %% for each in list, case statement on backend type
+ Backends = [ begin
+ BackendConfigName = ["multi_backend", Name],
+ {BackendModule, BackendConfig} = case cuttlefish:conf_get(BackendConfigName ++ ["storage_backend"], Conf) of
+ bitcask ->
+ GenerateSubConfig(Name, "bitcask", [bitcask], riak_kv_bitcask_backend);
+ leveldb ->
+ GenerateSubConfig(Name, "leveldb", [eleveldb], riak_kv_eleveldb_backend);
+ memory ->
+ GenerateSubConfig(Name, "memory_backend", [riak_kv, memory_backend], riak_kv_memory_backend)
+ end,
+ {list_to_binary(Name), BackendModule, BackendConfig}
+ end || {"$name", Name} <- BackendNames],
+ case Backends of
+ [] -> throw(unset);
+ _ -> Backends
+ end
+ end
+}.
View
635 priv/riak_kv.schema
@@ -0,0 +1,635 @@
+%%-*- mode: erlang -*-
+
+%% @doc How Riak will repair out-of-sync keys. Some features require
+%% this to be set to 'active', including search.
+%%
+%% * active: out-of-sync keys will be repaired in the background
+%% * passive: out-of-sync keys are only repaired on read
+%% * active-debug: like active, but outputs verbose debugging
+%% information
+{mapping, "anti_entropy", "riak_kv.anti_entropy", [
+ {datatype, {enum, [active, passive, 'active-debug']}},
+ {default, active}
+]}.
+
+{translation,
+ "riak_kv.anti_entropy",
+ fun(Conf) ->
+ Setting = cuttlefish:conf_get("anti_entropy", Conf),
+ case Setting of
+ active -> {on, []};
+ 'active-debug' -> {on, [debug]};
+ passive -> {off, []};
+ _Default -> {on, []}
+ end
+ end
+}.
+
+%% @doc Specifies the storage engine used for Riak's key-value data
+%% and secondary indexes (if supported).
+{mapping, "storage_backend", "riak_kv.storage_backend", [
+ {default, {{storage_backend}} },
+ {datatype, {enum, [bitcask, leveldb, memory, multi]}}
+]}.
+
+{translation,
+ "riak_kv.storage_backend",
+ fun(Conf) ->
+ Setting = cuttlefish:conf_get("storage_backend", Conf),
+ case Setting of
+ bitcask -> riak_kv_bitcask_backend;
+ leveldb -> riak_kv_eleveldb_backend;
+ memory -> riak_kv_memory_backend;
+ multi -> riak_kv_multi_backend;
+ _Default -> riak_kv_bitcask_backend
+ end
+ end}.
+
+%% @doc Restrict how fast AAE can build hash trees. Building the tree
+%% for a given partition requires a full scan over that partition's
+%% data. Once built, trees stay built until they are expired.
+%% * .number is the number of builds
+%% * .per_timespan is the amount of time in which that .number of builds
+%% occurs
+%%
+%% Default is 1 build per hour.
+{mapping, "anti_entropy.tree.build_limit.number", "riak_kv.anti_entropy_build_limit", [
+ {default, 1},
+ {datatype, integer},
+ hidden
+]}.
+
+%% @see anti_entropy.build_limit.number
+{mapping, "anti_entropy.tree.build_limit.per_timespan", "riak_kv.anti_entropy_build_limit", [
+ {default, "1h"},
+ {datatype, {duration, ms}},
+ hidden
+]}.
+
+{translation,
+ "riak_kv.anti_entropy_build_limit",
+ fun(Conf) ->
+ {cuttlefish:conf_get("anti_entropy.tree.build_limit.number", Conf),
+ cuttlefish:conf_get("anti_entropy.tree.build_limit.per_timespan", Conf)}
+ end}.
+
+%% @doc Determine how often hash trees are expired after being built.
+%% Periodically expiring a hash tree ensures the on-disk hash tree
+%% data stays consistent with the actual k/v backend data. It also
+%% helps Riak identify silent disk failures and bit rot. However,
+%% expiration is not needed for normal AAE operation and should be
+%% infrequent for performance reasons. The time is specified in
+%% milliseconds.
+{mapping, "anti_entropy.tree.expiry", "riak_kv.anti_entropy_expire", [
+ {default, "1w"},
+ {datatype, [{duration, ms}, {atom, never}]},
+ hidden
+]}.
+
+%% @doc Limit how many AAE exchanges or builds can happen concurrently.
+{mapping, "anti_entropy.concurrency_limit", "riak_kv.anti_entropy_concurrency", [
+ {default, 2},
+ {datatype, integer},
+ hidden
+]}.
+
+%% @doc The tick determines how often the AAE manager looks for work
+%% to do (building/expiring trees, triggering exchanges, etc).
+%% The default is every 15 seconds. Lowering this value will
+%% speedup the rate that all replicas are synced across the cluster.
+%% Increasing the value is not recommended.
+{mapping, "anti_entropy.trigger_interval", "riak_kv.anti_entropy_tick", [
+ {default, "15s"},
+ {datatype, {duration, ms}},
+ hidden
+]}.
+
+%% @doc The directory where AAE hash trees are stored.
+{mapping, "anti_entropy.data_dir", "riak_kv.anti_entropy_data_dir", [
+ {default, "$(platform_data_dir)/anti_entropy"},
+ hidden,
+ {datatype, directory}
+]}.
+
+%% @doc The LevelDB options used by AAE to generate the LevelDB-backed
+%% on-disk hashtrees.
+%% @see leveldb.write_buffer_size
+{mapping, "anti_entropy.write_buffer_size", "riak_kv.anti_entropy_leveldb_opts.write_buffer_size", [
+ {default, "4MB"},
+ {datatype, bytesize},
+ hidden
+]}.
+
+{mapping, "anti_entropy.max_open_files", "riak_kv.anti_entropy_leveldb_opts.max_open_files", [
+ {default, 20},
+ {datatype, integer},
+ hidden
+]}.
+
+%% @doc Whether the distributed throttle for active anti-entropy is
+%% enabled.
+{mapping, "anti_entropy.throttle", "riak_kv.aae_throttle_kill_switch", [
+ {default, on},
+ {datatype, {flag, off, on}},
+ hidden
+]}.
+
+%% @doc Sets the throttling tiers for active anti-entropy. Each tier
+%% is a minimum vnode mailbox size and a time-delay that the throttle
+%% should observe at that size and above. For example:
+%%
+%% anti_entropy.throttle.tier1.mailbox_size = 0
+%% anti_entropy.throttle.tier1.delay = 0ms
+%% anti_entropy.throttle.tier2.mailbox_size = 40
+%% anti_entropy.throttle.tier2.delay = 5ms
+%%
+%% If configured, there must be a tier which includes a mailbox size
+%% of 0. Both .mailbox_size and .delay must be set for each tier.
+%% @see anti_entropy.throttle
+{mapping,
+ "anti_entropy.throttle.$tier.mailbox_size",
+ "riak_kv.aae_throttle_limits", [
+ {datatype, integer},
+ hidden,
+ {validators, ["non_negative"]}
+]}.
+
+%% @see anti_entropy.throttle.$tier.mailbox_size
+{mapping,
+ "anti_entropy.throttle.$tier.delay",
+ "riak_kv.aae_throttle_limits", [
+ {datatype, {duration, ms}},
+ hidden
+]}.
+
+{validator,
+ "non_negative",
+ "must be greater than or equal to 0",
+ fun(Value) -> Value >= 0 end}.
+
+{translation,
+ "riak_kv.aae_throttle_limits",
+ fun(Conf) ->
+ %% Grab all of the possible names of tiers so we can ensure that
+ %% both mailbox_size and delay are included for each tier.
+ TierNamesM = cuttlefish_variable:fuzzy_matches(["anti_entropy", "throttle", "$tier", "mailbox_size"], Conf),
+ TierNamesD = cuttlefish_variable:fuzzy_matches(["anti_entropy", "throttle", "$tier", "delay"], Conf),
+ TierNames = lists:usort(TierNamesM ++ TierNamesD),
+ Throttles = lists:sort(lists:foldl(
+ fun({"$tier", Tier}, Settings) ->
+ Mbox = cuttlefish:conf_get(["anti_entropy", "throttle", Tier, "mailbox_size"], Conf),
+ Delay = cuttlefish:conf_get(["anti_entropy", "throttle", Tier, "delay"], Conf),
+ [{Mbox - 1, Delay}|Settings]
+ end, [], TierNames)),
+ case Throttles of
+ %% -1 is a magic "minimum" bound and must be included, so if it
+ %% isn't present we call it invalid
+ [{-1,_}|_] -> Throttles;
+ _ -> cuttlefish:invalid("anti_entropy.throttle tiers must include a tier with mailbox_size 0")
+ end
+end
+}.
+
+%% @see leveldb.bloomfilter
+{mapping, "anti_entropy.bloomfilter", "riak_kv.anti_entropy_leveldb_opts.use_bloomfilter", [
+ {default, on},
+ {datatype, flag},
+ hidden
+]}.
+
+%% @doc How many JavaScript virtual machines are available for
+%% executing map functions.
+{mapping, "javascript.map_pool_size", "riak_kv.map_js_vm_count", [
+ {default, {{map_js_vms}} },
+ {datatype, integer},
+ hidden
+]}.
+
+%% @doc How many JavaScript virtual machines are available for
+%% executing reduce functions.
+{mapping, "javascript.reduce_pool_size", "riak_kv.reduce_js_vm_count", [
+ {default, {{reduce_js_vms}} },
+ {datatype, integer},
+ hidden
+]}.
+
+%% @doc How many JavaScript virtual machines are available for
+%% executing pre-commit hook functions.
+{mapping, "javascript.hook_pool_size", "riak_kv.hook_js_vm_count", [
+ {default, {{hook_js_vms}} },
+ {datatype, integer},
+ hidden
+]}.
+
+%% @doc The maximum amount of memory allocated to each JavaScript
+%% virtual machine.
+{mapping, "javascript.maximum_heap_size", "riak_kv.js_max_vm_mem", [
+ {default, "8MB"},
+ {datatype, bytesize},
+ hidden
+]}.
+
+{translation,
+ "riak_kv.js_max_vm_mem",
+ fun(Conf) ->
+ cuttlefish_util:ceiling(cuttlefish:conf_get("javascript.maximum_heap_size", Conf) / 1048576)
+ end}.
+
+%% @doc The maximum amount of thread stack memory to allocate
+%% to each JavaScript virtual machine.
+{mapping, "javascript.maximum_stack_size", "riak_kv.js_thread_stack", [
+ {default, "16MB"},
+ {datatype, bytesize},
+ hidden
+]}.
+
+{translation,
+ "riak_kv.js_thread_stack",
+ fun(Conf) ->
+ cuttlefish_util:ceiling(cuttlefish:conf_get("javascript.maximum_stack_size", Conf) / 1048576)
+ end}.
+
+%% @doc A directory containing Javascript source files which will be
+%% loaded by Riak when it initializes Javascript VMs.
+{mapping, "javascript.source_dir", "riak_kv.js_source_dir", [
+ {commented, "/tmp/js_source"},
+ {datatype, directory},
+ hidden
+]}.
+
+%% We left riak_kv.add_paths out on purpose.
+
+%% @doc The maximum number of concurrent requests of each type (get or
+%% put) that is allowed. Setting this value to infinite disables
+%% overload protection. The 'erlang.process_limit' should be at least
+%% 3 times more than this setting.
+%% @see erlang.process_limit
+{mapping, "max_concurrent_requests", "riak_kv.fsm_limit", [
+ {default, 50000},
+ {datatype, [integer, {atom, infinite}]},
+ hidden
+]}.
+
+{translation, "riak_kv.fsm_limit",
+ fun(Conf) ->
+ TheLimit = cuttlefish:conf_get("max_concurrent_requests", Conf),
+ case TheLimit of
+ infinite -> undefined;
+ Int when is_integer(Int) -> Int;
+ _ ->
+ cuttlefish:invalid("max_concurrent_requests must be an integer or 'infinite'")
+ end
+ end
+}.
+
+%% @doc If forwarding to a replica-local coordinator on PUT fails,
+%% this setting will retry the operation when set to 'on'.
+%% * on = Riak 2.0 behavior (strongly recommended)
+%% * off = Riak 1.x behavior
+{mapping, "retry_put_coordinator_failure", "riak_kv.retry_put_coordinator_failure", [
+ {default, on},
+ {datatype, flag},
+ hidden
+]}.
+
+%% @doc Controls which binary representation of a riak value is stored
+%% on disk.
+%% * 0: Original erlang:term_to_binary format. Higher space overhead.
+%% * 1: New format for more compact storage of small values.
+{mapping, "object.format", "riak_kv.object_format", [
+ {default, 1},
+ {datatype, [{integer, 1}, {integer, 0}]}
+]}.
+
+{translation, "riak_kv.object_format",
+ fun(Conf) ->
+ case cuttlefish:conf_get("object.format", Conf) of
+ 0 -> v0;
+ 1 -> v1;
+ _ -> cuttlefish:invalid("invalid object format version")
+ end
+ end
+}.
+
+%% @doc Controls the size of the metadata cache for each vnode. Set to
+%% 'off' to disable the cache. This shouldn't be necessary on-disk
+%% based backends, but can help performance in some cases (i.e. memory
+%% backend, data fits in block cache, etc). Note that this is the size
+%% of the ETS table, rather than the actual data, to keep the size
+%% calculation simple, thus more space may be used than the simple
+%% size * vnode_count calculation would imply.
+%%
+%% Caution: Do not use without extensive benchmarking.
+{mapping, "metadata_cache_size", "riak_kv.vnode_md_cache_size", [
+ {datatype, [{atom, off}, bytesize]},
+ {default, off}, %% disabled by default, 256KB is a reasonable value
+ hidden
+]}.
+
+{ translation,
+ "riak_kv.vnode_md_cache_size",
+ fun(Conf) ->
+ case cuttlefish:conf_get("metadata_cache_size", Conf) of
+ off -> 0;
+ Size -> Size
+ end
+ end
+}.
+
+%%%% Memory backend section
+%% @doc The maximum amount of memory consumed per vnode by the memory
+%% storage backend. Minimum: 1MB
+{mapping, "memory_backend.max_memory_per_vnode", "riak_kv.memory_backend.max_memory", [
+ {datatype, bytesize},
+ hidden
+]}.
+
+%% @see memory_backend.max_memory
+{mapping, "multi_backend.$name.memory_backend.max_memory_per_vnode", "riak_kv.multi_backend", [
+ {datatype, bytesize},
+ hidden
+]}.
+
+{translation,
+ "riak_kv.memory_backend.max_memory",
+ fun(Conf) ->
+ Bytes = cuttlefish:conf_get("memory_backend.max_memory_per_vnode", Conf),
+ cuttlefish_util:ceiling(Bytes / 1048576)
+ end
+}.
+
+%% @doc Each value written will be written with this "time to
+%% live". Once that object's time is up, it will be deleted on the
+%% next read of its key. Minimum: 1s
+{mapping, "memory_backend.ttl", "riak_kv.memory_backend.ttl", [
+ {datatype, {duration, s}},
+ hidden
+]}.
+
+%% @see memory_backend.ttl
+{mapping, "multi_backend.$name.memory_backend.ttl", "riak_kv.multi_backend", [
+ {datatype, {duration, s}},
+ hidden
+]}.
+
+%% @doc Measures were added to Riak 1.2 to counteract cross-site
+%% scripting and request-forgery attacks. Some reverse-proxies cannot
+%% remove the Referer header and make serving data directly from Riak
+%% impossible. Turning secure_referer_check = off disables this
+%% security check.
+{mapping, "secure_referer_check", "riak_kv.secure_referer_check", [
+ {datatype, flag},
+ {default, on},
+ hidden
+]}.
+
+%% @doc Reading or writing objects bigger than this size will write a
+%% warning in the logs.
+{mapping, "object.size.warning_threshold", "riak_kv.warn_object_size", [
+ {datatype, bytesize},
+ {default, "5MB"}
+]}.
+
+%% @doc Writing an object bigger than this will send a failure to the
+%% client.
+{mapping, "object.size.maximum", "riak_kv.max_object_size", [
+ {datatype, bytesize},
+ {default, "50MB"}
+]}.
+
+%% @doc Writing an object with more than this number of siblings will
+%% generate a warning in the logs.
+{mapping, "object.siblings.warning_threshold", "riak_kv.warn_siblings", [
+ {datatype, integer},
+ {default, 25}
+]}.
+
+%% @doc Writing an object with more than this number of siblings will
+%% send a failure to the client.
+{mapping, "object.siblings.maximum", "riak_kv.max_siblings", [
+ {datatype, integer},
+ {default, 100}
+]}.
+
+%% @doc The strategy used when merging objects that potentially have
+%% conflicts.
+%%
+%% * 2: Riak 2.0 typed bucket default - reduces sibling creation through additional
+%% metadata on each sibling (also known as dotted version vectors)
+%% * 1: Riak 1.4, default buckets, and earlier default - may duplicate siblings
+%% from interleaved writes (sibling explosion.)
+{mapping, "buckets.default.merge_strategy", "riak_core.default_bucket_props.dvv_enabled", [
+ {default, '1'},
+ {datatype, {flag, '2', '1'}},
+ hidden
+]}.
+
+%% @doc The number of primary replicas (non-fallback) that must reply
+%% to a read request.
+{mapping, "buckets.default.pr", "riak_core.default_bucket_props.pr", [
+ {datatype, [integer, {enum, [quorum, all]}]},
+ {default, 0},
+ hidden
+]}.
+
+%% @doc The number of replicas which must reply to a read request.
+{mapping, "buckets.default.r", "riak_core.default_bucket_props.r", [
+ {datatype, [{enum, [quorum, all]}, integer]},
+ {default, quorum},
+ hidden
+]}.
+
+%% @doc The number of replicas which must reply to a write request,
+%% indicating that the write was received.
+{mapping, "buckets.default.w", "riak_core.default_bucket_props.w", [
+ {datatype, [{enum, [quorum, all]}, integer]},
+ {default, quorum},
+ hidden
+]}.
+
+%% @doc The number of primary replicas (non-fallback) which must reply
+%% to a write request.
+{mapping, "buckets.default.pw", "riak_core.default_bucket_props.pw", [
+ {datatype, [integer, {enum, [quorum, all]}]},
+ {default, 0},
+ hidden
+]}.
+
+%% @doc The number of replicas which must reply to a write request,
+%% indicating that the write was committed to durable storage.
+{mapping, "buckets.default.dw", "riak_core.default_bucket_props.dw", [
+ {datatype, [{enum, [quorum, all]}, integer]},
+ {default, quorum},
+ hidden
+]}.
+
+%% @doc The number of replicas which must reply to a delete request.
+{mapping, "buckets.default.rw", "riak_core.default_bucket_props.rw", [
+ {datatype, [{enum, [quorum, all]}, integer]},
+ {default, quorum},
+ hidden
+]}.
+
+%% @doc Whether not-founds will count toward a quorum of reads.
+{mapping,
+ "buckets.default.notfound_ok",
+ "riak_core.default_bucket_props.notfound_ok", [
+ {default, true},
+ {datatype, {enum, [true, false]}},
+ hidden
+]}.
+
+%% @doc Whether not-founds will invoke the "basic quorum"
+%% optimization. This setting will short-circuit fetches where the
+%% majority of replicas report that the key is not found. Only used
+%% when notfound_ok = false.
+{mapping,
+ "buckets.default.basic_quorum",
+ "riak_core.default_bucket_props.basic_quorum", [
+ {default, false},
+ {datatype, {enum, [true, false]}},
+ hidden
+]}.
+
+%% @doc Whether or not siblings are allowed, by default, for untyped buckets.
+%% Note: See Vector Clocks for a discussion of sibling resolution.
+{mapping, "buckets.default.allow_mult", "riak_core.default_bucket_props.allow_mult", [
+ {datatype, {enum, [true, false]}},
+ {default, false},
+ hidden
+]}.
+
+%% @doc Whether conflicting writes resolve via timestamp.
+{mapping,
+ "buckets.default.last_write_wins",
+ "riak_core.default_bucket_props.last_write_wins", [
+ {datatype, {enum, [true, false]}},
+ {default, false},
+ hidden
+]}.
+
+%% @doc A space delimited list of functions that will be run before a
+%% value is stored, and that can abort the write. For Erlang
+%% functions, use "module:function" and for JavaScript, use
+%% "functionName".
+{mapping, "buckets.default.precommit", "riak_core.default_bucket_props.precommit", [
+ hidden
+]}.
+
+{translation, "riak_core.default_bucket_props.precommit",
+ fun(Conf) ->
+ RawString = cuttlefish:conf_get("buckets.default.precommit", Conf, []),
+ StringList = string:tokens(RawString, " "),
+ [ begin
+ case string:tokens(String, ":") of
+ %% Javascript make this: {struct, [{<<"name">>, <<"SomeJS.nonsense">>}]}
+ [JavascriptFunction] ->
+ {struct, [{<<"name">>, list_to_binary(JavascriptFunction)}]};
+ %% Erlang make this: {struct, [{<<"mod">>, <<"module">>}, {<<"fun">>,<<"function">>}]}
+ [Module, Function] ->
+ {struct, [
+ {<<"mod">>, list_to_binary(Module)},
+ {<<"fun">>, list_to_binary(Function)}
+ ]};
+ _ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
+ end
+ end || String <- StringList]
+ end
+}.
+
+%% @doc A space delimited list of functions that will be run after a
+%% value is stored. Only Erlang functions are allowed, using the
+%% "module:function" format.
+{mapping, "buckets.default.postcommit", "riak_core.default_bucket_props.postcommit", [
+ hidden
+]}.
+
+{translation, "riak_core.default_bucket_props.postcommit",
+ fun(Conf) ->
+ RawString = cuttlefish:conf_get("buckets.default.postcommit", Conf, []),
+ StringList = string:tokens(RawString, " "),
+ [ begin
+ case string:tokens(String, ":") of
+ [Module, Function] ->
+ {struct, [
+ {<<"mod">>, list_to_binary(Module)},
+ {<<"fun">>, list_to_binary(Function)}
+ ]};
+ _ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
+ end
+ end || String <- StringList]
+ end
+}.
+
+%% @doc Whether serialized datatypes will use compression, and at what
+%% level. When an integer, this refers to the aggressiveness (and
+%% slowness) of compression, on a scale from 0 to 9. 'on' is
+%% equivalent to 6, 'off' is equivalent to 0.
+{mapping, "datatypes.compression_level", "riak_dt.binary_compression", [
+ {datatype, [integer, flag]},
+ {default, 1},
+ {validators, ["is_compression_value"]},
+ hidden
+]}.
+
+{validator, "is_compression_value", "must be on/off or a value between 0 and 9",
+ fun(Value)->
+ is_boolean(Value) orelse (is_integer(Value) andalso Value =< 9 andalso Value >= 0)
+ end}.
+
+%% @doc Whether to use the background manager to limit KV handoff.
+%% This will help to prevent system response degradation under times
+%% of heavy load from multiple background tasks that contend for the
+%% same resources.
+%% @see background_manager
+{mapping, "handoff.use_background_manager", "riak_kv.handoff_use_background_manager", [
+ {datatype, flag},
+ {default, off},
+ hidden
+]}.
+
+%% @doc The maximum number of times that a secondary system like Riak
+%% Search 2.0 can block handoff of primary key-value data. The
+%% approximate maximum duration handoff of a vnode can be blocked for
+%% can be determined by multiplying this number by the value of
+%% "vnode_management_timer". To prevent handoff from ever being
+%% blocked by a secondary system set this value to 0.
+%% @see vnode_management_timer
+{mapping, "handoff.max_rejects", "riak_kv.handoff_rejected_max", [
+ {datatype, integer},
+ {default, "6"},
+ hidden
+]}.
+
+%% @doc Whether to use the background manager to limit AAE tree
+%% rebuilds. This will help to prevent system response degradation
+%% under times of heavy load from multiple background tasks that
+%% contend for the same resources.
+%% @see background_manager
+{mapping, "anti_entropy.use_background_manager", "riak_kv.aae_use_background_manager", [
+ {datatype, flag},
+ {default, off},
+ hidden
+]}.
+
+%% @doc Time in between the checks that trigger Bitcask merges.
+{mapping, "bitcask.merge_check_interval", "riak_kv.bitcask_merge_check_interval", [
+ {default, "3m"},
+ {datatype, {duration, ms}},
+ hidden
+]}.
+
+%% @doc Jitter used to randomize the time in between the checks that trigger
+%% Bitcask merges.
+{mapping, "bitcask.merge_check_jitter", "riak_kv.bitcask_merge_check_jitter", [
+ {default, "30%"},
+ {datatype, {percent, float}},
+ hidden
+]}.
+
+%% @doc Maximum amount of data to merge in one go in the Bitcask backend.
+{mapping, "bitcask.max_merge_size", "riak_kv.bitcask_max_merge_size", [
+ {default, "100GB"},
+ {datatype, bytesize},
+ hidden
+]}.
View
77 priv/tracers/tracer_accumulating_time.erl
@@ -0,0 +1,77 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_accumulating_time).
+-compile(export_all).
+
+start(Pid_list, MFA_list, IntervalMS) ->
+ dbg:tracer(process, {fun trace/2, new_stats()}),
+ [dbg:p(Pid, [call, timestamp, arity]) || Pid <- Pid_list],
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ {Mod, Func, Arity} <- MFA_list],
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ timer:send_interval(IntervalMS, TPid, print_report),
+ {started, TPid}.
+
+stop() ->
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ stopped.
+
+trace({trace_ts, Pid, call, {Mod, Func, Arity}, TS}, {Dict}) ->
+ MFA = {Mod, Func, Arity},
+ DKey = {Pid, MFA},
+ {dict:store(DKey, TS, Dict)};
+trace({trace_ts, Pid, return_from, {Mod, Func, Arity}, _Res, TS}, {Dict}) ->
+ MFA = {Mod, Func, Arity},
+ DKey = {Pid, MFA},
+ Start = case dict:find(DKey, Dict) of
+ {ok, StTime} -> StTime;
+ error -> now()
+ end,
+ Elapsed = timer:now_diff(TS, Start),
+ SumKey = {sum, MFA},
+ {OldCount, OldTime} = case dict:find(SumKey, Dict) of
+ error ->
+ {0, 0};
+ {ok, Else} ->
+ Else
+ end,
+ Dict2 = dict:erase(DKey, Dict),
+ {dict:store(SumKey, {OldCount+1, OldTime+Elapsed}, Dict2)};
+trace(print_report, {Dict}) ->
+ print_stats(Dict),
+ {dict:from_list([X || {K, _V} = X <- dict:to_list(Dict),
+ element(1, K) /= sum])};
+trace(Unknown, {Dict}) ->
+ erlang:display(wha),
+ io:format("Unknown! ~P\n", [Unknown, 20]),
+ {Dict}.
+
+new_stats() ->
+ {dict:new()}.
+
+print_stats(Dict) ->
+ Reports = lists:sort([{MFA, X} || {{sum, MFA}, X} <- dict:to_list(Dict)]),
+ [io:format("~p MFA ~p count ~p elapsed_msec ~p\n",
+ [time(), MFA, Count, Sum div 1000]) ||
+ {MFA, {Count, Sum}} <- Reports].
View
197 priv/tracers/tracer_backend_latency.erl
@@ -0,0 +1,197 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_backend_latency).
+-compile(export_all).
+
+start() ->
+ start(500).
+
+start(LatencyMS) ->
+ start(LatencyMS, [get_fsm, put_fsm,
+ bitcask, eleveldb, file, prim_file, riak_kv_fs2_backend]).
+
+start(LatencyMS, Modules) ->
+ %% catch folsom_metrics:delete_metric(foo),
+ %% folsom_metrics:new_histogram(foo, uniform, 9981239823),
+
+ dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}),
+ dbg:p(all, [call, timestamp, arity]),
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) ||
+ lists:member(put_fsm, Modules),
+ Mod <- [riak_kv_put_fsm],
+ {Func, Arity} <- [{init, 1}, {finish, 2}]],
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) ||
+ lists:member(get_fsm, Modules),
+ Mod <- [riak_kv_get_fsm],
+ {Func, Arity} <- [{init, 1}, {finalize, 1}]],
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ lists:member(bitcask, Modules),
+ Mod <- [bitcask],
+ {Func, Arity} <- [
+ {open,1}, {open,2},
+ {close,1},
+ {close_write_file,1},
+ {get,2},
+ {put,3},
+ {delete,2},
+ {sync,1},
+ {iterator,3}, {iterator_next,1}, {iterator_release,1},
+ {needs_merge,1},
+ {is_empty_estimate,1},
+ {status,1}]],
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ lists:member(eleveldb, Modules),
+ Mod <- [eleveldb],
+ {Func, Arity} <- [
+ {open,2},
+ {close,1},
+ {get,3},
+ {put,4},
+ {delete,3},
+ {write,3},
+ {status,2},
+ {destroy,2},
+ {is_empty,1},
+ {iterator,2},
+ {iterator,3},
+ {iterator_move,2},
+ {iterator_close,1}]],
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ lists:member(file, Modules),
+ Mod <- [file],
+ {Func, Arity} <- [
+ {open,2},
+ {close,1},
+ {pread,2},
+ {pread,3},
+ {read,2},
+ {write,2},
+ {pwrite,2},
+ {pwrite,3},
+ {truncate,1},
+ {delete,1},
+ {position,2},
+ {sync,1}
+ ]],
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ lists:member(prim_file, Modules),
+ Mod <- [prim_file],
+ {Func, Arity} <- [
+ {list_dir,2},
+ {read_file_info,1},
+ {write_file_info,1}
+ ]],
+
+ [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) ||
+ lists:member(riak_kv_fs2_backend, Modules),
+ Mod <- [riak_kv_fs2_backend],
+ {Func, Arity} <- [
+ {get_object,4},
+ {put_object,5},
+ {delete,4}]],
+
+ %% Don't need return_trace events for this use case, but here's
+ %% how to do it if needed.
+ %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]).
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ %% timer:send_interval(Interval, TPid, print_report),
+ io:format("Not using timer:send_interval...\n"),
+ {started, TPid}.
+
+stop() ->
+ %% io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]),
+ %% catch folsom_metrics:delete_metric(foo),
+
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ stopped.
+
+trace({trace_ts, Pid, call, {riak_kv_put_fsm, init, _}, TS}, {Dict, LMS}) ->
+ {dict:store({put, Pid}, TS, Dict), LMS};
+trace({trace_ts, Pid, call, {riak_kv_put_fsm, finish, _}, TS}, {Dict, LatencyMS}) ->
+ Start = case dict:find({put, Pid}, Dict) of
+ {ok, StTime} -> StTime;
+ error -> now()
+ end,
+ case timer:now_diff(TS, Start) div 1000 of
+ Elapsed when Elapsed > LatencyMS ->
+ io:format("~p ~p: put_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]);
+ _Elapsed ->
+ ok
+ end,
+ {dict:erase(Pid, Dict), LatencyMS};
+trace({trace_ts, Pid, call, {riak_kv_get_fsm, init, _}, TS}, {Dict, LMS}) ->
+ {dict:store({get, Pid}, TS, Dict), LMS};
+trace({trace_ts, Pid, call, {riak_kv_get_fsm, finalize, _}, TS}, {Dict, LatencyMS}) ->
+ Start = case dict:find({get, Pid}, Dict) of
+ {ok, StTime} -> StTime;
+ error -> now()
+ end,
+ case timer:now_diff(TS, Start) div 1000 of
+ Elapsed when Elapsed > LatencyMS ->
+ io:format("~p ~p: get_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]);
+ _Elapsed ->
+ ok
+ end,
+ {dict:erase(Pid, Dict), LatencyMS};
+trace({trace_ts, Pid, call, {Mod, _, _}, TS}, {Dict, LMS}) ->
+ {dict:store({Mod, Pid}, TS, Dict), LMS};
+trace({trace_ts, Pid, return_from, {Mod, Func, _}, _Res, TS}, {Dict, LatencyMS}) ->
+ DKey = {Mod, Pid},
+ Start = case dict:find(DKey, Dict) of
+ {ok, StTime} -> StTime;
+ error -> now()
+ end,
+ case timer:now_diff(TS, Start) div 1000 of
+ Elapsed when Elapsed > LatencyMS ->
+ io:format("~p ~p: ~p ~p: ~p msec\n", [date(), time(), Mod,
+ Func, Elapsed]);
+ _Elapsed ->
+ ok
+ end,
+ %% if Mod == file, Func == pread ->
+ %% folsom_metrics_histogram:update(foo, Elapsed);
+ %% true ->
+ %% ok
+ %% end,
+ {dict:erase(DKey, Dict), LatencyMS};
+trace(print_report, DictStats) ->
+ %% print_stats(DictStats),
+ %% new_stats();
+ DictStats;
+trace(Unknown, DictStats) ->
+ erlang:display(wha),
+ io:format("Unknown! ~P\n", [Unknown, 20]),
+ DictStats.
+
+new_stats(LatencyMS) ->
+ {dict:new(), LatencyMS}.
+
+print_stats(_DictStats) ->
+ ok.
+
View
75 priv/tracers/tracer_eleveldb_put_size.erl
@@ -0,0 +1,75 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_eleveldb_put_size).
+-compile(export_all).
+
+start() ->
+ start(10*1000).
+
+start(Interval) ->
+ Stats = {StatName, _} = new_stats(),
+ reset_metric(StatName),
+
+ dbg:tracer(process, {fun trace/2, Stats}),
+ dbg:p(all, [call]),
+ dbg:tpl(eleveldb, write, 3, [{'_', [], []}]),
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ timer:send_interval(Interval, TPid, print_report),
+ {started, TPid}.
+
+stop() ->
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ stopped.
+
+trace({trace, _Pid, call, {eleveldb, write, [_, PutList, _]}},
+ {StatName, SumBytes}) ->
+ Bs = [begin
+ Bs = size(K) + size(V),
+ folsom_metrics_histogram:update(StatName, Bs),
+ Bs
+ end || {put, K, V} <- PutList],
+ {StatName, SumBytes + lists:sum(Bs)};
+trace(print_report, Stats = {StatName, _}) ->
+ print_stats(Stats),
+ reset_metric(StatName),
+ new_stats();
+trace(_Unknown, Stats) ->
+ erlang:display(wha),
+ %% io:format("Unknown! ~P\n", [Unknown, 20]),
+ Stats.
+
+new_stats() ->
+ {foo, 0}.
+
+print_stats({StatName, SumBytes}) ->
+ if SumBytes == 0 ->
+ io:format("~p ~p: 0 bytes\n", [date(), time()]);
+ true ->
+ Ps = folsom_metrics:get_histogram_statistics(StatName),
+ io:format("~p ~p: ~p bytes\n ~p\n", [date(), time(), SumBytes, Ps])
+ end.
+
+reset_metric(Stats) ->
+ catch folsom_metrics:delete_metric(Stats),
+ folsom_metrics:new_histogram(Stats, uniform, 9981239823).
View
90 priv/tracers/tracer_fsm_init.erl
@@ -0,0 +1,90 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_fsm_init).
+-compile(export_all).
+
+start() ->
+ start(1*1000).
+
+start(Interval) ->
+ %%% Count the get, put, buckets, keys, exchange, and index FSM init() calls
+ dbg:tracer(process, {fun trace/2, new_stats()}),
+ dbg:p(all, call),
+ [dbg:tpl(Mod, init, 1, [{'_', [], []}]) ||
+ Mod <- [riak_kv_buckets_fsm, riak_kv_exchange_fsm, riak_kv_get_fsm, riak_kv_index_fsm, riak_kv_keys_fsm, riak_kv_put_fsm]],
+ dbg:tpl(riak_kv_put_fsm, start_link, 3, [{'_', [], []}]),
+
+ %% Don't need return_trace events for this use case, but here's
+ %% how to do it if needed.
+ %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]).
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ timer:send_interval(Interval, TPid, print_report),
+ {started, TPid}.
+
+stop() ->
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ stopped.
+
+trace({trace, _Pid, call, {riak_kv_put_fsm, start_link, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link+1, R, P, B, E, I, K};
+trace({trace, _Pid, call, {riak_kv_get_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R+1, P, B, E, I, K};
+trace({trace, _Pid, call, {riak_kv_put_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R, P+1, B, E, I, K};
+trace({trace, _Pid, call, {riak_kv_buckets_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R, P, B+1, E, I, K};
+trace({trace, _Pid, call, {riak_kv_exchange_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R, P, B, E+1, I, K};
+trace({trace, _Pid, call, {riak_kv_index_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R, P, B, E, I+1, K};
+trace({trace, _Pid, call, {riak_kv_keys_fsm, init, _}},
+ {Pstart_link, R, P, B, E, I, K}) ->
+ {Pstart_link, R, P, B, E, I, K+1};
+trace(print_report, Stats) ->
+ print_stats(Stats),
+ new_stats();
+trace(Unknown, Stats) ->
+ erlang:display(wha),
+ io:format("Unknown! ~P\n", [Unknown, 20]),
+ Stats.
+
+new_stats() ->
+ {0,
+ 0, 0, 0, 0, 0, 0}.
+
+print_stats({Pstart_link, Get, Put, Buckets, Exchange, Index, Keys}) ->
+ Stats = [{put_start, Pstart_link},
+ {get, Get},
+ {put, Put},
+ {buckets, Buckets},
+ {exchange, Exchange},
+ {index, Index},
+ {keys, Keys}],
+ io:format("~p ~p: ~p\n", [date(), time(), Stats]).
View
102 priv/tracers/tracer_func_args.erl
@@ -0,0 +1,102 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% For example: what ETS tables are being called the most by ets:lookup/2?
+%% The 1st arg of ets:lookup/2 is the table name.
+%% Watch for 10 seconds.
+%%
+%% > func_args_tracer:start(ets, lookup, 2, 10, fun(Args) -> hd(Args) end).
+%%
+%% Tracer pid: <0.16102.15>, use func_args_tracer:stop() to stop
+%% Otherwise, tracing stops in 10 seconds
+%% Current date & time: {2013,9,19} {18,5,48}
+%% {started,<0.16102.15>}
+%% Total calls: 373476
+%% Call stats:
+%% [{folsom_histograms,114065},
+%% {ac_tab,69689},
+%% {ets_riak_core_ring_manager,67147},
+%% {folsom_spirals,57076},
+%% {riak_capability_ets,48862},
+%% {riak_core_node_watcher,8149},
+%% {riak_api_pb_registrations,8144},
+%% {folsom,243},
+%% {folsom_meters,43},
+%% {folsom_durations,20},
+%% {timer_tab,18},
+%% {folsom_gauges,8},
+%% {riak_core_stat_cache,5},
+%% {sys_dist,3},
+%% {inet_db,1},
+%% {21495958,1},
+%% {3145765,1},
+%% {3407910,1}]
+%%
+
+-module(tracer_func_args).
+-compile(export_all).
+
+start(Mod, Func, Arity, RunSeconds) ->
+ start(Mod, Func, Arity, RunSeconds, fun(Args) -> Args end).
+
+start(Mod, Func, Arity, RunSeconds, ArgMangler) ->
+ catch ets:delete(foo),
+ ets:new(foo, [named_table, public, set]),
+ dbg:tracer(process, {fun trace/2, new_stats({foo, ArgMangler})}),
+ dbg:p(all, call),
+ dbg:tpl(Mod, Func, Arity, [{'_', [], []}]),
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]),
+ io:format("Current date & time: ~p ~p\n", [date(), time()]),
+ spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end),
+ {started, TPid}.
+
+stop() ->
+ Sort = fun({_,A}, {_, B}) -> A > B end,
+ Res = ets:tab2list(foo),
+ TotalCalls = lists:sum([Count || {_Arg, Count} <- Res]),
+ io:format("Total calls: ~p\n", [TotalCalls]),
+ io:format("Call stats:\n~p\n", [catch lists:sort(Sort, Res)]),
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ timer:sleep(100),
+ stopped.
+
+trace({trace, _Pid, call, {_, _, Args}}, {Tab, ArgMangler} = Acc) ->
+ Args2 = ArgMangler(Args),
+ try
+ ets:update_counter(Tab, Args2, {2, 1})
+ catch _:_ ->
+ ets:insert(Tab, {Args2, 1})
+ end,
+ Acc;
+trace(Unknown, DictStats) ->
+ io:format("Unknown! ~P\n", [Unknown, 20]),
+ DictStats.
+
+new_stats({Tab, _ArgMangler} = Acc) ->
+ ets:delete_all_objects(Tab),
+ Acc.
+
+print_stats(_DictStats) ->
+ ok.
+
View
80 priv/tracers/tracer_gc_latency.erl
@@ -0,0 +1,80 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_gc_latency).
+-compile(export_all).
+
+start(LatencyMS) ->
+ catch folsom_metrics:delete_metric(foo),
+ folsom_metrics:new_histogram(foo, uniform, 50*1000*1000),
+ dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}),
+ {ok, _} = dbg:p(all, [timestamp, garbage_collection, running]),
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ io:format("Current date & time: ~p ~p local time\n", [date(), time()]),
+ {started, TPid}.
+
+stop() ->
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ timer:sleep(100),
+ catch folsom_metrics:delete_metric(foo),
+ stopped.
+
+trace({trace_ts, Pid, gc_start, _Stats, TS}, {Dict, LMS}) ->
+ {dict:store(Pid, TS, Dict), LMS};
+trace({trace_ts, Pid, gc_end, _Stats, TS}, {Dict, LMS}=Acc) ->
+ DKey = Pid,
+ case dict:find(DKey, Dict) of
+ {ok, GcStart} ->
+ Elapsed = erlang:max(-1, (timer:now_diff(TS, GcStart) div 1000)),
+ if Elapsed > LMS ->
+ io:format("~p: GC of ~p elapsed time ~p > threshold ~p\n",
+ [time(), Pid, Elapsed, LMS]),
+ io:format(" ~w,~w\n", [process_info(Pid, message_queue_len), _Stats]);
+ true ->
+ ok
+ end,
+ {dict:erase(DKey, Dict), LMS};
+ error ->
+ Acc
+ end;
+trace({trace_ts, Pid, InOrOut, _MFA, TS}, {Dict, _LMS}=Acc) ->
+ DKey = Pid,
+ case dict:find(DKey, Dict) of
+ {ok, GcStart} ->
+ io:format("Hey, pid ~p scheduled ~p but started GC ~p msec ago\n",
+ [Pid, InOrOut, timer:now_diff(TS, GcStart)]);
+ _ ->
+ ok
+ end,
+ Acc;
+trace(Unknown, DictStats) ->
+ erlang:display(wha),
+ io:format("Unknown! ~P\n\t~P", [Unknown, 20, DictStats,7]),
+ DictStats.
+
+new_stats(LatencyMS) ->
+ {dict:new(), LatencyMS}.
+
+print_stats(_DictStats) ->
+ ok.
+
View
105 priv/tracers/tracer_large4.erl
@@ -0,0 +1,105 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_large4).
+-compile(export_all).
+
+-record(r_object, {bucket = '_',
+ key = '_',
+ contents = '_',
+ vclock = '_',
+ updatemetadata = '_',
+ updatevalue = '_'
+ }).
+
+go(Time, Count, Size) ->
+ ss(),
+ %% gets
+ GetMS = [{['_',
+ #r_object{bucket='$1',
+ key='$2'}],
+ [],
+ [{message,{{'$1','$2'}}}, {return_trace}]}],
+ erlang:trace_pattern({riak_kv_get_fsm, calculate_objsize, 2}, GetMS, [local]),
+
+ %% puts
+ PutMS = [{['$1','$2','_','$3','_'],
+ [{'>',{size,'$3'},Size}],
+ [{message,{{'$1','$2',{size,'$3'}}}}]}],
+ erlang:trace_pattern({riak_kv_eleveldb_backend, put, 5}, PutMS, [local]),
+ erlang:trace_pattern({riak_kv_bitcask_backend, put, 5}, PutMS, [local]),
+ erlang:trace_pattern({riak_kv_memory_backend, put, 5}, PutMS, [local]),
+
+ {Tracer, _} = spawn_monitor(?MODULE, tracer, [0, Count, Size, dict:new()]),
+ erlang:trace(all, true, [call, arity, {tracer, Tracer}]),
+ receive
+ {'DOWN', _, process, Tracer, _} ->
+ ok
+ after Time ->
+ exit(Tracer, kill),
+ receive
+ {'DOWN', _, process, Tracer, _} ->
+ ok
+ end
+ end,
+ ss(),
+ io:format("object trace stopped~n").
+
+tracer(Limit, Limit, _, _) ->
+ ok;
+tracer(Count, Limit, Threshold, Objs) ->
+ receive
+ {trace,Pid,call,{riak_kv_get_fsm,calculate_objsize,2},{Bucket,Key}} ->
+ Objs2 = dict:store(Pid, {Bucket,Key}, Objs),
+ tracer(Count+1, Limit, Threshold, Objs2);
+ {trace,Pid,return_from,{riak_kv_get_fsm,calculate_objsize,2},Size} ->
+ case Size >= Threshold of
+ true ->
+ case dict:find(Pid, Objs) of
+ {ok, {Bucket, Key}} ->
+ io:format("~p: get: ~p~n", [ts(), {Bucket, Key, Size}]);
+ _ ->
+ ok
+ end;
+ false ->
+ ok
+ end,
+ Objs2 = dict:erase(Pid, Objs),
+ tracer(Count+1, Limit, Threshold, Objs2);
+ {trace,_Pid,call,{riak_kv_eleveldb_backend,put,5},{Bucket,Key,Size}} ->
+ io:format("~p: put(l): ~p~n", [ts(), {Bucket, Key, Size}]),
+ tracer(Count+1, Limit, Threshold, Objs);
+ {trace,_Pid,call,{riak_kv_bitcask_backend,put,5},{Bucket,Key,Size}} ->
+ io:format("~p: put(b): ~p~n", [ts(), {Bucket, Key, Size}]),
+ tracer(Count+1, Limit, Threshold, Objs);
+ {trace,_Pid,call,{riak_kv_memory_backend,put,5},{Bucket,Key,Size}} ->
+ io:format("~p: put(m): ~p~n", [ts(), {Bucket, Key, Size}]),
+ tracer(Count+1, Limit, Threshold, Objs);
+ Msg ->
+ io:format("tracer: ~p~n", [Msg]),
+ tracer(Count+1, Limit, Threshold, Objs)
+ end.
+
+ts() ->
+ calendar:now_to_datetime(os:timestamp()).
+
+ss() ->
+ erlang:trace_pattern({'_','_','_'}, false, [local]),
+ erlang:trace(all, false, [call, arity]).
View
125 priv/tracers/tracer_latency_histogram.erl
@@ -0,0 +1,125 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+%% For example: create a histogram of call latencies for bitcask:put/3.
+%% Watch for 10 seconds.
+%%
+%% > latency_histogram_tracer:start(bitcask, put, 3, 10).
+%%
+%% Tracer pid: <0.2108.18>, use latency_histogram_tracer:stop() to stop
+%% Otherwise, tracing stops in 10 seconds
+%% Current date & time: {2013,9,19} {18,14,13}
+%% {started,<0.2108.18>}
+%% Histogram stats:
+%% [{min,0},
+%% {max,48},
+%% {arithmetic_mean,2.765411819271055},
+%% {geometric_mean,2.527103493663478},
+%% {harmonic_mean,2.2674039086593973},
+%% {median,3},
+%% {variance,3.5629207473971585},
+%% {standard_deviation,1.8875700642352746},
+%% {skewness,2.0360354571500774},
+%% {kurtosis,18.529695846728423},
+%% {percentile,[{50,3},{75,4},{90,5},{95,6},{99,8},{999,14}]},
+%% {histogram,[{1,13436},
+%% {2,12304},
+%% {3,10789},
+%% {4,7397},
+%% {5,4191},
+%% {6,1929},
+%% {7,873},
+%% {8,420},
+%% {9,163},
+%% {10,79},
+%% {11,42},
+%% {12,47},
+%% {13,11},
+%% {14,16},
+%% {15,7},
+%% {16,5},
+%% {17,3},
+%% {18,4},
+%% {19,2},
+%% {20,4},
+%% {21,1},
+%% {22,11},
+%% {23,2},
+%% {24,1},
+%% {25,2},
+%% {26,1},
+%% {27,0},
+%% {28,1},
+%% {29,2},
+%% {30,0},
+%% {31,0},
+%% {40,2},
+%% {50,1}]},
+%% {n,51746}]
+
+-module(tracer_latency_histogram).
+-compile(export_all).
+
+start(Mod, Func, Arity, RunSeconds) ->
+ catch folsom_metrics:delete_metric(foo),
+ folsom_metrics:new_histogram(foo, uniform, 50*1000*1000),
+ dbg:tracer(process, {fun trace/2, new_stats(0)}),
+ dbg:p(all, [call, timestamp, arity]),
+ dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]),
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]),
+ io:format("Current date & time: ~p ~p\n", [date(), time()]),
+ spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end),
+ {started, TPid}.
+
+stop() ->
+ io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]),
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ timer:sleep(100),
+ catch folsom_metrics:delete_metric(foo),
+ stopped.
+
+trace({trace_ts, Pid, call, {_, _, _}, TS}, {Dict, LMS}) ->
+ {dict:store(Pid, TS, Dict), LMS};
+trace({trace_ts, Pid, return_from, {_, _, _}, _Res, TS}, {Dict, LatencyMS}) ->
+ DKey = Pid,
+ Start = case dict:find(DKey, Dict) of
+ {ok, StTime} -> StTime;
+ error -> now()
+ end,
+ Elapsed = timer:now_diff(TS, Start) div 1000,
+ folsom_metrics_histogram:update(foo, Elapsed),
+ {dict:erase(DKey, Dict), LatencyMS};
+trace(print_report, DictStats) ->
+ DictStats;
+trace(Unknown, DictStats) ->
+ erlang:display(wha),
+ io:format("Unknown! ~P\n", [Unknown, 20]),
+ DictStats.
+
+new_stats(LatencyMS) ->
+ {dict:new(), LatencyMS}.
+
+print_stats(_DictStats) ->
+ ok.
+
View
93 priv/tracers/tracer_merge_and_and_handoff.erl
@@ -0,0 +1,93 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_merge_and_and_handoff).
+-compile(export_all).
+
+start() ->
+ start(1*1000).
+
+start(Interval) ->
+ dbg:tracer(process, {fun trace/2, {orddict:new(), orddict:new()}}),
+ dbg:p(all, call),
+ dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], []}]),
+ dbg:tpl(riak_kv_vnode, encode_handoff_item, 2, [{'_', [], []}]),
+ dbg:tpl(riak_core_handoff_receiver, process_message, 3, [{'_', [], []}]),
+
+ %% Don't need return_trace events for this use case, but here's
+ %% how to do it if needed.
+ %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]).
+
+ {ok, TPid} = dbg:get_tracer(),
+ io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]),
+ timer:send_interval(Interval, TPid, print_report),
+ {started, TPid}.
+
+stop() ->
+ dbg:stop_clear(),
+ catch exit(element(2,dbg:get_tracer()), kill),
+ stopped.
+
+trace({trace, _Pid, call, {bitcask, merge_single_entry,
+ [K, V, _TS, _FId, {File,_,_,_}, _State]}},
+ {MDict, HDict}) ->
+ Dir = re:replace(File, "/[^/]*\$", "", [{return, binary}]),
+ Bytes = size(K) + size(V),
+ MDict2 = increment_cbdict(MDict, Dir, Bytes),
+ {MDict2, HDict};
+trace({trace, _Pid, call, {riak_kv_vnode, encode_handoff_item,
+ [{B, K}, V]}},
+ {MDict, HDict}) ->
+ Bytes = size(B) + size(K) + size(V),
+ Key = "all-sending-handoff",
+ HDict2 = increment_cbdict(HDict, Key, Bytes),
+ {MDict, HDict2};
+trace({trace, _Pid, call, {riak_core_handoff_receiver, process_message,
+ [_Type, Msg, State]}},
+ {MDict, HDict}) ->
+ Bytes = size(Msg),
+ Partition = element(5, State), % ugly hack
+ Key = Partition,
+ HDict2 = increment_cbdict(HDict, Key, Bytes),
+ {MDict, HDict2};
+trace(print_report, {MDict, HDict}) ->
+ print_stats(MDict, merge),
+ print_stats(HDict, handoff),
+ {orddict:new(), orddict:new()}.
+
+%% "cb" = count + bytes
+increment_cbdict(Dict, Key, Bytes) ->
+ orddict:update(Key, fun({Count, Bs}) -> {Count + 1, Bs + Bytes} end,
+ {1, Bytes}, Dict).
+
+print_stats(Dict, Type) ->
+ F = fun(Key, {Count, Bytes}, {SumC, SumB}) when Count > 0 ->
+ io:format("~p ~p: ~p items ~p bytes ~p avg-size ~p\n",
+ [date(), time(), Count, Bytes, Bytes div Count, Key]),
+ {SumC + Count, SumB + Bytes};
+ (_, _, Acc) ->
+ Acc
+ end,
+ {Count, Bytes} = orddict:fold(F, {0, 0}, Dict),
+ Avg = if Count > 0 -> Bytes div Count;
+ true -> 0
+ end,
+ io:format("~p ~p: ~p total: ~p items ~p bytes ~p avg-size\n",
+ [date(), time(), Type, Count, Bytes, Avg]).
View
68 priv/tracers/tracer_read_bin_trace_file.erl
@@ -0,0 +1,68 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_read_bin_trace_file).
+-compile(export_all).
+
+read(Path) ->
+ read(Path, 1).
+
+read(Path, LatencyMS) ->
+ {ok, FH} = file:open(Path, [read, binary, raw]),
+ read(file:read(FH, 5), FH, LatencyMS, []).
+
+read(eof, _FH, _, _) ->
+ ok;
+read({ok, <<Size:40>>}, FH, LatencyMS, Hist) ->
+ {ok, Bin} = file:read(FH, Size),
+ case binary_to_term(Bin) of
+ {trace_ts, _, call, {M,F,A}, Time} ->
+ %%io:format("call MFA = ~p:~p/~p, ", [M, F, length(A)]),
+ read(file:read(FH, 5), FH, LatencyMS, [{{M,F,length(A)}, Time, A}|Hist]);
+ {trace_ts, _, return_from, MFA, Res, EndTime} ->
+ %%io:format("MFA ~p Hist ~p\n", [MFA, Hist]),
+ try
+ {value, {_, StartTime, A}, NewHist} = lists:keytake(MFA, 1, Hist),
+ MSec = timer:now_diff(EndTime, StartTime)/1000,
+ if MSec > LatencyMS ->
+ io:format("~p ~p msec\nArgs: (~p/~p) ~P\nRes: ~P\n\n",
+ [MFA, MSec,
+ erts_debug:flat_size(A), erts_debug:size(A),
+ A, 20, Res, 20]);
+ true ->
+ ok
+ end,
+ read(file:read(FH, 5), FH, LatencyMS, NewHist)
+ catch
+ error:{badmatch,false} ->
+ read(file:read(FH, 5), FH, LatencyMS, Hist);
+ X:Y ->
+ io:format("ERR ~p ~p @ ~p\n", [X, Y, erlang:get_stacktrace()]),
+ read(file:read(FH, 5), FH, LatencyMS, Hist)
+ end
+ end.
+
+%% read(eof, _FH) ->
+%% ok;
+%% read({ok, <<Size:40>>}, FH) ->
+%% {ok, Bin} = file:read(FH, Size),
+%% io:format("~P\n", [binary_to_term(Bin), 15]),
+%% read(file:read(FH, 5), FH).
+
View
116 priv/tracers/tracer_timeit.erl
@@ -0,0 +1,116 @@
+%% -------------------------------------------------------------------
+%%
+%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(tracer_timeit).
+-compile(export_all).
+
+%% @doc Dynamically add timing to MFA. There are various types of
+%% timing.
+%%
+%% all - time latency of all calls to MFA
+%%
+%% {sample, N, Max} - sample every N calls and stop sampling after Max
+%%
+%% {threshold, Millis, Max} - count # of calls where latency is > Millis
+%% and count # of calls total, thus percentage of calls over threshold
+timeit(Mod, Fun, Arity, Type) ->
+ Type2 = case Type of
+ {sample, N, Max} -> {sample, {N, Max}, {0, 0, 0}};
+ {threshold, Millis, Max} -> {threshold, {Millis, Max}, {0, 0}};
+ {all, Max} -> {all, {0, Max}}
+ end,
+ dbg:tracer(process, {fun trace/2, {orddict:new(), Type2}}),
+ dbg:p(all, call),
+ dbg:tpl(Mod, Fun, Arity, [{'_', [], [{return_trace}]}]).
+
+stop() -> dbg:stop_clear().
+
+trace({trace, Pid, call, {Mod, Fun, _}}, {D, {all, {Count, Max}}}) ->
+ D2 = orddict:store({Pid, Mod, Fun}, now(), D),
+ {D2, {all, {Count, Max}}};
+trace({trace, Pid, call, {Mod, Fun, _}},
+ {D, {sample, {N, Max}, {M, K, Total}}}) ->
+ M2 = M+1,
+ Total2 = Total+1,
+ if N == M2 ->
+ D2 = orddict:store({Pid, Mod, Fun}, now(), D),
+ {D2, {sample, {N, Max}, {0, K, Total2}}};
+ true ->
+ {D, {