Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved location claim algorithm #1008

Merged
merged 32 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
234e863
Configuration and module alignment
martinsumner Mar 27, 2023
e1ce784
Remove deprecated v1 claim/wants
martinsumner Mar 27, 2023
28fbcf9
Update framework for Claim
martinsumner Mar 27, 2023
eb13a48
Add choose_claim_v4
martinsumner Mar 27, 2023
35b9e2f
Location claim improvements
martinsumner Mar 29, 2023
4e2c963
Refinements to claim_v4
martinsumner Mar 30, 2023
3ab0f4b
Correction following removal of log
martinsumner Mar 30, 2023
8fe7fc7
Count remove list not Excess to determine loops
martinsumner Mar 31, 2023
58d6264
Better order of initial striping
martinsumner Mar 31, 2023
bfe605b
A new claim algorithm (#1003)
ThomasArts May 3, 2023
278ffa4
Claim API requires export of 2-arity choose function as well as 3-arity
martinsumner May 3, 2023
e13f4e0
Always return indices
martinsumner May 9, 2023
e5083f1
Calculate swaps only once
martinsumner May 9, 2023
6820c7b
Remember v4 solutions via claimant
martinsumner May 11, 2023
6527033
Long-running tests
martinsumner May 15, 2023
0da5c68
Adding an extra test (#1004)
ThomasArts May 17, 2023
be0451c
Revert "Long-running tests"
martinsumner May 17, 2023
e7329f0
Merge branch 'mas-i1001-claimrefactor' of https://github.com/basho/ri…
martinsumner May 17, 2023
c43d0a6
Test adjustments
martinsumner May 17, 2023
b6dcbc2
Test adjustments
martinsumner May 18, 2023
2dd845f
Add support for configured target_location_n_val
martinsumner May 19, 2023
0c05dc4
Memoise fixes
martinsumner May 19, 2023
057b17e
Example configurations saves in source format (#1005)
ThomasArts May 23, 2023
c9ca336
Add full-rebalance for v4
martinsumner May 23, 2023
ba4ef70
Merge branch 'mas-i1001-claimrefactor' of https://github.com/basho/ri…
martinsumner May 23, 2023
470a57f
Support leave in prop_claim
martinsumner May 23, 2023
c6a3dd5
Update - to use correct claim_fun on leave
martinsumner May 23, 2023
a49697c
Use application env to read target_n_val (#1007)
ThomasArts May 25, 2023
60e7199
Add warning if simple_transfer produces unbalanced result
martinsumner May 25, 2023
57675fb
only_swap/swap_only confusion
martinsumner May 26, 2023
bf1e668
Update riak_core_claim_eqc.erl
martinsumner May 27, 2023
5d8912f
Mas i1001 docupdate (#1009)
martinsumner Jun 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
224 changes: 224 additions & 0 deletions docs/claim-version4.md

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions eqc/hashtree_eqc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ command(_S = #state{started = true, tree_id = TreeId,
%% to make sure the iterator code is fully exercised.
%%
%% Store the hashtree records in the process dictionary under keys 't1' and 't2'.
%%
%%
start(Params, [TreeId | ExtraIds], Tree1OpenOrEmpty, Tree2OpenOrEmpty) ->
{Segments, Width, MemLevels} = Params,
%% Return now so we can store symbolic value in procdict in next_state call
Expand Down Expand Up @@ -271,7 +271,7 @@ update_snapshot(T, S) ->
ok.


%%
%%
%% Wrap the hashtree:update_perform call and erase the snapshot hashtree state.
%% Should only happen if a snapshot state exists.
%%
Expand Down Expand Up @@ -490,7 +490,7 @@ next_state(S,_R,{call, _, local_compare1, []}) ->
%%
prop_correct() ->
?SETUP(fun() ->
application:set_env(lager, handlers, [{lager_console_backend, info}]),
application:set_env(lager, handlers, [{level, info}]),
application:ensure_started(syntax_tools),
application:ensure_started(compiler),
application:ensure_started(goldrush),
Expand Down Expand Up @@ -531,7 +531,7 @@ prop_correct() ->
Res0
end,
%% Clean up after the test
case Res of
case Res of
ok -> % if all went well, remove leveldb files
catch cleanup_hashtree(get(t1)),
catch cleanup_hashtree(get(t2));
Expand Down
6 changes: 3 additions & 3 deletions eqc/new_cluster_membership_model_eqc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1608,14 +1608,14 @@ handle_down_nodes(CState, Next) ->

claim_until_balanced(Ring, Node) ->
%%{WMod, WFun} = app_helper:get_env(riak_core, wants_claim_fun),
{WMod, WFun} = {riak_core_claim, default_wants_claim},
{WMod, WFun} = {riak_core_membership_claim, default_wants_claim},
NeedsIndexes = apply(WMod, WFun, [Ring, Node]),
case NeedsIndexes of
no ->
Ring;
{yes, _NumToClaim} ->
%%{CMod, CFun} = app_helper:get_env(riak_core, choose_claim_fun),
{CMod, CFun} = {riak_core_claim, default_choose_claim},
{CMod, CFun} = {riak_core_membership_claim, default_choose_claim},
NewRing = CMod:CFun(Ring, Node),
claim_until_balanced(NewRing, Node)
end.
Expand Down Expand Up @@ -1682,7 +1682,7 @@ remove_from_cluster(Ring, ExitingNode) ->
end,
Ring,
AllOwners),
riak_core_claim:claim_rebalance_n(TempRing, Other)
riak_core_membership_claim:claim_rebalance_n(TempRing, Other)
end,

ExitRing.
Expand Down
80 changes: 80 additions & 0 deletions priv/riak_core.schema
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,83 @@
hidden
]}.

%% @doc Choose claim function
%% Claim function to be used when handling joins to the cluster.
%% There are three supported functions:
%% - choose_claim_v2 (the default) designed for environments without location
%% awareness as a requirement
%% - choose_claim_v3 (deprecated) a claim function which treats claim as an
%% optimisation problem. It creates a number of possible claim plans and
%% evaluates them for violations, balance and diversity, choosing the 'best'
%% plan. claim_v3 is not location aware
%% - choose_claim_v4 a claim algorithm which refactors v2 to improve location
%% awareness
{mapping, "choose_claim_fun", "riak_core.choose_claim_fun", [
{commented, "choose_claim_v2"},
{datatype, {enum, [choose_claim_v2, choose_claim_v3, choose_claim_v4]}},
merge
]}.

%% @doc Target N Val for Cluster Administration
%% Cluster change operations such as joins and leaves will use a target_n_val
%% to control spacing of preflists across physical nodes. The default value
%% is 4, which is the default bucket propery for n_val + 1. This means that
%% the target for a cluster change operation is to make sure that all preflists
%% of n_val 3 are on 3 deperate physical devices, even when a single failure
%% has occurred.
%% If the target_n_val is not met by a cluster chnage operation, the failure is
%% not blocking - a warning will be printed in the cluster plan, but the plan
%% will not be prevented from being committed.
%% In some cases, by reducing the target_n_val it may be possible to reduce the
%% number of transfers necessary to complete a cluster change operation.
%% In clusters with a large number of nodes, larger target_n_val values can be
%% supported, and may result to a better spread of load across the cluster
%% when node failure occurs.
{mapping, "target_n_val", "riak_core.target_n_val", [
{datatype, integer},
{default, 4},
{validators, ["target_nval_max", "target_nval_min"]},
{commented, 4}
]}.

%% ring_size validators
{validator, "target_nval_max",
"7 and larger are supported, but considered advanced config",
fun(Size) ->
Size =< 6
end}.

{validator, "target_nval_min", "must be at least 1",
fun(Size) ->
Size >= 1
end}.

%% @doc Target Location N Val for Cluster Administration
%% Cluster change operations such as joins and leaves will use a
%% target_location_n_val to control spacing of preflists across locations. This
%% is to support clusters which have a concept of `location` failure as well as
%% Node failure (e.g. rack awareness is required, or support for AWS placement
%% groups).
%% In this case, nodes are assigned to locations, and as well as supporting
%% the splitting of data replicas across nodes, attempts will also be made
%% during cluster chnage operations to split preflists across locations.
%% If the target_location_n_val is not met by a cluster chnage operation, the failure is
%% not blocking - a warning will be printed in the cluster plan, but the plan
%% will not be prevented from being committed.
%% In some cases, by reducing the target_location_n_val it may be possible to
%% reduce the number of transfers necessary to complete a cluster change
%% operation.
%% In clusters with a large number of nodes, larger target_location_n_val
%% values can be supported.
%% If the target_location_nval is greater than the target_nval, the target_nval
%% will be used.
{mapping, "target_location_n_val", "riak_core.target_location_n_val", [
{datatype, integer},
{default, 3},
{validators, ["target_nval_max", "target_nval_min"]},
{commented, 3}
]}.

%% @doc On cluster leave - force full rebalance partitions
%% By default on a cluster leave there will first be an attempt to handoff
%% vnodes to safe (in terms of target_n_val) locations. In small clusters,
Expand All @@ -237,6 +314,9 @@
%% all nodes.
%% Please carefully consider any cluster plan created with this option before
%% committing
%% If cluster planning with locations enabled, then `full_rebalance_onleave`
%% should also be enabled. With claim_v4 this should result in a cluster
%% plan which is correct, but also relatively efficient.
{mapping, "full_rebalance_onleave", "riak_core.full_rebalance_onleave", [
{datatype, flag},
{default, off}
Expand Down
6 changes: 4 additions & 2 deletions src/riak_core.app.src
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@
{target_n_val, 4},

%% Default claims functions
{wants_claim_fun, {riak_core_claim, default_wants_claim}},
{choose_claim_fun, {riak_core_claim, default_choose_claim}},
{wants_claim_fun,
{riak_core_membership_claim, default_wants_claim}},
{choose_claim_fun,
{riak_core_membership_claim, default_choose_claim}},

%% Vnode inactivity timeout (how often to check if fallback vnodes
%% should return their data) in ms.
Expand Down