Skip to content

Commit

Permalink
MB-50712: Use the greedy vbmap algorithm by default
Browse files Browse the repository at this point in the history
This patch changes the vbmap generation logic to use the greedy approach
by default in all cases. However, it leaves the old logic in place too -
it is possible to swtich back to use the old logic with the following
diag/eval:

    ns_config:set(use_vbmap_greedy_optimization,false)

Change-Id: I7e37c22984c7470a4b0004667fd323cb18bc1cc1
Reviewed-on: https://review.couchbase.org/c/ns_server/+/175370
Well-Formed: Restriction Checker
Well-Formed: Build Bot <build@couchbase.com>
Tested-by: Hareen Kancharla <hareen.kancharla@couchbase.com>
Reviewed-by: Dave Finlay <dave.finlay@couchbase.com>
  • Loading branch information
hareen-kancharla committed May 31, 2022
1 parent 807bd6b commit b3810da
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 35 deletions.
148 changes: 114 additions & 34 deletions src/mb_map.erl
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ promote_replicas_for_graceful_failover_for_chain(Chain, RemoveNodes) ->
ChangedChain ++ RemoveNodesChain ++ Undefineds.

vbucket_movements_rec(AccMasters, AccReplicas, [], []) ->
{AccMasters, AccReplicas};
{AccReplicas, AccMasters};
vbucket_movements_rec(AccMasters, AccReplicas,
[[MasterSrc|_] = SrcChain | RestSrcChains],
[[MasterDst|RestDst] | RestDstChains]) ->
[[MasterDst|_] = DstChain | RestDstChains]) ->
true = (MasterDst =/= undefined),
AccMasters2 = case MasterSrc =:= MasterDst of
true ->
Expand All @@ -84,7 +84,7 @@ vbucket_movements_rec(AccMasters, AccReplicas,
true -> Acc;
false -> Acc+1
end
end, AccReplicas, RestDst),
end, AccReplicas, DstChain),
vbucket_movements_rec(AccMasters2, AccReplicas2, RestSrcChains, RestDstChains).

%% returns 'score' for difference between Src and Dst map. It's a
Expand Down Expand Up @@ -297,8 +297,9 @@ matching_renamings_same_vbuckets_count(KeepNodesSet, CurrentTags,
generate_map(Map, NumReplicas, Nodes, Options) ->
Tags = proplists:get_value(tags, Options),
UseOldCode = (Tags =:= undefined) andalso (NumReplicas =< 1),
UseGreedy = proplists:get_bool(use_vbmap_greedy_optimization, Options),

case UseOldCode of
case UseOldCode andalso not UseGreedy of
true ->
generate_map_old(Map, NumReplicas, Nodes, Options);
false ->
Expand All @@ -317,17 +318,16 @@ generate_map_new(Map, NumReplicas, Nodes, Options) ->
NumVBuckets = length(Map),
NumSlaves = proplists:get_value(max_slaves, Options, 10),
Tags = proplists:get_value(tags, Options),
UseGreedy = proplists:get_bool(use_vbmap_greedy_optimization, Options),

MapsFromPast0 = find_matching_past_maps(Nodes, Map, Options, MapsHistory),
MapsFromPast = score_maps(Map, MapsFromPast0),
?log_debug("Scores for past maps:~n~p", [[S || {_, S} <- MapsFromPast]]),

GeneratedMaps0 =
lists:append(
%% vbmap itself randomizes some things internally so let's give it a
%% chance
[[invoke_vbmap(Map, ShuffledNodes, NumVBuckets,
NumSlaves, NumReplicas, Tags) ||
NumSlaves, NumReplicas, Tags, UseGreedy) ||
_ <- lists:seq(1, 3)] ||
ShuffledNodes <- [misc:shuffle(KeepNodes) || _ <- lists:seq(1, 3)]]),

Expand Down Expand Up @@ -693,7 +693,7 @@ slaves([], _, _, Set) ->
testnodes(NumNodes) ->
[list_to_atom([$n | tl(integer_to_list(1000+N))]) || N <- lists:seq(1, NumNodes)].

invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->
invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags, UseGreedy) ->
VbmapName =
case misc:is_windows() of
true ->
Expand All @@ -707,22 +707,26 @@ invoke_vbmap(CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->

try
{ok, Map} = do_invoke_vbmap(VbmapPath, DiagPath, CurrentMap, Nodes,
NumVBuckets, NumSlaves, NumReplicas, Tags),
NumVBuckets, NumSlaves, NumReplicas, Tags,
UseGreedy),
Map
after
file:delete(DiagPath)
end.

do_invoke_vbmap(VbmapPath, DiagPath,
CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags) ->
CurrentMap, Nodes, NumVBuckets, NumSlaves, NumReplicas, Tags,
UseGreedy) ->
misc:executing_on_new_process(
fun () ->
do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
NumVBuckets, NumSlaves, NumReplicas, Tags)
NumVBuckets, NumSlaves, NumReplicas, Tags,
UseGreedy)
end).

do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
NumVBuckets, NumSlaves, NumReplicas, Tags) ->
NumVBuckets, NumSlaves, NumReplicas, Tags,
UseGreedy) ->
NumNodes = length(Nodes),

Args0 = ["--diag", DiagPath,
Expand All @@ -731,8 +735,41 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
"--num-nodes", integer_to_list(NumNodes),
"--num-slaves", integer_to_list(NumSlaves),
"--num-replicas", integer_to_list(NumReplicas),
"--relax-all"],
Args = vbmap_tags_args(Nodes, Tags) ++ Args0,
"--relax-all"] ++
case UseGreedy of
true ->
["--greedy"];
_ ->
[]
end,

MaxNodeId = length(Nodes) - 1,
NodeIdList = lists:zip(Nodes, lists:seq(0, MaxNodeId)),
?log_debug("Node Id Map: ~p", [NodeIdList]),

NodeIdMap = dict:from_list(NodeIdList),

IdVbMap = make_vbmap_with_node_ids(MaxNodeId, NodeIdMap, CurrentMap),

PrevMapFile = path_config:tempfile("prev-vbmap", ".json"),

ChainsWritten =
case write_vbmap_to_file(IdVbMap, PrevMapFile) of
ok ->
?log_debug("Wrote vbmap to ~p", [PrevMapFile]),
ok;
Err ->
?log_debug("Couldn't write to file: ~p, reason: ~p", [PrevMapFile, Err]),
not_ok
end,

Args = vbmap_tags_args(NodeIdMap, Tags) ++ Args0 ++
(case ChainsWritten of
ok ->
["--current-map", PrevMapFile];
_ ->
[]
end),

Port = erlang:open_port({spawn_executable, VbmapPath},
[stderr_to_stdout, binary,
Expand All @@ -750,28 +787,45 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,

case PortResult of
{ok, Output} ->
NodesMapping = dict:from_list(misc:enumerate(Nodes, 0)),
IdNodeMap = dict:from_list(misc:enumerate(Nodes, 0)),

try
Chains0 = ejson:decode(Output),
Chains = lists:map(
fun (Chain) ->
[dict:fetch(N, NodesMapping) || N <- Chain]
[dict:fetch(N, IdNodeMap) || N <- Chain]
end, Chains0),

EffectiveNumCopies = length(hd(Chains)),
S1 = vbucket_movements(CurrentMap, Chains),
?log_debug("Score before simple minimization: ~p", [S1]),

Map0 = simple_minimize_moves(CurrentMap, Chains,
EffectiveNumCopies, Nodes),

S2 = vbucket_movements(CurrentMap, Map0),
?log_debug("Score after simple minimization: ~p", [S2]),

MapToUse =
case S1 < S2 of
true ->
?log_debug("Map from vbmap better before simple
minimization; using it"),
Chains;
_ ->
?log_debug("Map better after simple minimization;
using it"),
Map0
end,

Map =
case EffectiveNumCopies < NumReplicas + 1 of
true ->
N = NumReplicas + 1 - EffectiveNumCopies,
Extension = lists:duplicate(N, undefined),
[Chain ++ Extension || Chain <- Map0];
[Chain ++ Extension || Chain <- MapToUse];
false ->
Map0
MapToUse
end,

{ok, Map}
Expand All @@ -788,14 +842,31 @@ do_invoke_vbmap_body(VbmapPath, DiagPath, CurrentMap, Nodes,
exit({vbmap_error, iolist_to_binary(Output)})
end.

map_tags(Nodes, RawTags) ->
{_, NodeIxMap} =
map_chain(Chain, MaxNodeId, NodeIdMap) ->
{ReversedChain, MaxNodeIdx1, NodeIdxMap1} =
lists:foldl(
fun (Node, {Ix, Acc}) ->
Acc1 = dict:store(Node, Ix, Acc),
{Ix + 1, Acc1}
end, {0, dict:new()}, Nodes),
fun(N, {ChainPart, MaxNId, NIdMap}) ->
case dict:find(N, NIdMap) of
{ok, Idx} ->
{[Idx | ChainPart], MaxNId, NIdMap};
error ->
{[MaxNodeId + 1 | ChainPart], MaxNodeId + 1,
dict:store(N, MaxNId + 1, NIdMap)}
end
end, {[], MaxNodeId, NodeIdMap}, Chain),
{lists:reverse(ReversedChain), MaxNodeIdx1, NodeIdxMap1}.

make_vbmap_with_node_ids(MaxNodeId, NodeIdMap, CurrentMap) ->
NodeIdMapWithUndefined = dict:store(undefined, -1, NodeIdMap),
{ReversedChains, _, _} =
lists:foldl(
fun (Chain, {NewChains, MaxNId, NIdMap}) ->
{Chain1, MaxNId1, NIdMap1} = map_chain(Chain, MaxNId, NIdMap),
{[Chain1 | NewChains], MaxNId1, NIdMap1}
end, {[], MaxNodeId, NodeIdMapWithUndefined}, CurrentMap),
lists:reverse(ReversedChains).

map_tags(NodeIxMap, RawTags) ->
{_, TagIxMap} =
lists:foldl(
fun (Tag, {Ix, Acc}) ->
Expand All @@ -810,16 +881,25 @@ map_tags(Nodes, RawTags) ->

[{dict:fetch(N, NodeIxMap), dict:fetch(T, TagIxMap)} || {N, T} <- RawTags].

vbmap_tags_args(Nodes, RawTags) ->
case RawTags of
undefined ->
[];
_ ->
Tags = map_tags(Nodes, RawTags),
TagsStrings = [?i2l(N) ++ ":" ++ ?i2l(T) || {N, T} <- Tags],
TagsString = string:join(TagsStrings, ","),
["--tags", TagsString]
end.
vbmap_tags_args(NodeIdMap, RawTags) ->
case RawTags of
undefined ->
[];
_ ->
Tags = map_tags(NodeIdMap, RawTags),
TagsStrings = [?i2l(N) ++ ":" ++ ?i2l(T) || {N, T} <- Tags],
TagsString = string:join(TagsStrings, ","),
["--tags", TagsString]
end.

write_vbmap_to_file(VbMap, Filename) ->
try
BinChains = ejson:encode(VbMap),
file:write_file(Filename, BinChains)
catch T1:E1:S1 ->
{error, {T1, E1, S1}}
end.


collect_vbmap_output(Port) ->
do_collect_vbmap_output(Port, []).
Expand Down
4 changes: 3 additions & 1 deletion src/ns_rebalancer.erl
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ generate_vbucket_map_options(KeepNodes, BucketConfig) ->
end,

Opts0 = ns_bucket:config_to_map_options(BucketConfig),
UseGreedy = ns_config:read_key_fast(use_vbmap_greedy_optimization, true),

%% Note that we don't need to have replication_topology here (in fact as
%% of today it's still returned by ns_bucket:config_to_map_options/1), but
Expand All @@ -90,7 +91,8 @@ generate_vbucket_map_options(KeepNodes, BucketConfig) ->
%% wrongly believe that rebalance is needed even when the cluster is
%% balanced. See MB-15543 for details.
misc:update_proplist(Opts0, [{replication_topology, star},
{tags, Tags}]).
{tags, Tags},
{use_vbmap_greedy_optimization, UseGreedy}]).

generate_vbucket_map(CurrentMap, KeepNodes, BucketConfig) ->
Opts = generate_vbucket_map_options(KeepNodes, BucketConfig),
Expand Down

0 comments on commit b3810da

Please sign in to comment.