Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

fix forced_ownership_handoff during resize #331

Merged
merged 1 commit into from

2 participants

@jrwest

All resize operations remain in the ring's list of pending
changes until all complete. Prior to this change transfers would
only be triggered for the first forced_ownership_handoff operations.
Subsequent operations would only be triggered by vnode inactivity.

This commit modifies the use of forced_ownership_handoff during resize
to ensure that only resize operations that are still pending are in
the throttled transfer list.

This addresses the feature blocking issue in the second to last paragraph here


This PR can be verified using basho_bench w/ configs here:

  1. Build a 64-partition, 4-node cluster and run mapred_populate.config. verify everything is ok w/ mapred_verify.config.
  2. Kick off a resize with riak-admin cluster resize-ring 128, then plan and commit.
  3. Run mapred_verify.config over and over w/ something like while 1; do ./basho_bench mapred_verify.config; done.

w/o this change during 3. the resize will stall (no transfer output in logs is an easy way to verify this, also ring-status will not progress). w/ this change, you will see the resize progress despite the mapreduce traffic.

Note: mapreduce traffic is not necessary, any sufficient load that prevents all vnodes from reaching their inactivity timeout will do

@jrwest jrwest fix forced_ownership_handoff during resize
All resize operations remain in the ring's list of pending
changes until all complete. Prior to this change transfers would
only be triggered for the first forced_ownership_handoff operations.
Subsequent operations would only be triggered by vnode *inactivity*.

This commit modifies the use of forced_ownership_handoff during resize
to ensure that only resize operations that are still pending are in
the throttled transfer list.
d99549d
@jrwest

forgot to mention, when running mapred_verify.config after the resize completes, you will see errors from basho_bench because the expected key counts will be wrong depending on the resize operation. For more information see the last paragraph here. This is expected behaviour for now and is not the subject of this PR.

@jtuple

Reviewed code, nothing of consequence to note, all looks good there.

Retested against master to verify transfer stall. Then tested again using this branch merged to master, and no stall occurred. Awesomeness.

+1 merge away

@jrwest jrwest merged commit 7acc8c9 into master

1 check failed

Details default The Travis CI build failed
@jaredmorrow jaredmorrow deleted the jrw-resize-foh-fix branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jun 3, 2013
  1. @jrwest

    fix forced_ownership_handoff during resize

    jrwest authored
    All resize operations remain in the ring's list of pending
    changes until all complete. Prior to this change transfers would
    only be triggered for the first forced_ownership_handoff operations.
    Subsequent operations would only be triggered by vnode *inactivity*.
    
    This commit modifies the use of forced_ownership_handoff during resize
    to ensure that only resize operations that are still pending are in
    the throttled transfer list.
This page is out of date. Refresh to see the latest.
Showing with 21 additions and 7 deletions.
  1. +21 −7 src/riak_core_vnode_manager.erl
View
28 src/riak_core_vnode_manager.erl
@@ -421,7 +421,7 @@ handle_info(management_tick, State0) ->
_ ->
Repairs = State#state.repairs,
kill_repairs(Repairs, ownership_change),
- trigger_ownership_handoff(Transfers, Mods, State2),
+ trigger_ownership_handoff(Transfers, Mods, Ring, State2),
State2#state{repairs=[]}
end,
@@ -485,7 +485,7 @@ ring_changed(Ring, CHBin, State) ->
%% Trigger ownership transfers.
Transfers = riak_core_ring:pending_changes(Ring),
- trigger_ownership_handoff(Transfers, Mods, State3),
+ trigger_ownership_handoff(Transfers, Mods, Ring, State3),
State3.
maybe_ensure_vnodes_started(Ring) ->
@@ -514,11 +514,9 @@ schedule_management_timer() ->
10000),
erlang:send_after(ManagementTick, ?MODULE, management_tick).
-trigger_ownership_handoff(Transfers, Mods, State) ->
- Limit = app_helper:get_env(riak_core,
- forced_ownership_handoff,
- ?DEFAULT_OWNERSHIP_TRIGGER),
- Throttle = lists:sublist(Transfers, Limit),
+trigger_ownership_handoff(Transfers, Mods, Ring, State) ->
+ IsResizing = riak_core_ring:is_resizing(Ring),
+ Throttle = limit_ownership_handoff(Transfers, IsResizing),
Awaiting = [{Mod, Idx} || {Idx, Node, _, CMods, S} <- Throttle,
Mod <- Mods,
S =:= awaiting,
@@ -527,6 +525,22 @@ trigger_ownership_handoff(Transfers, Mods, State) ->
[maybe_trigger_handoff(Mod, Idx, State) || {Mod, Idx} <- Awaiting],
ok.
+limit_ownership_handoff(Transfers, IsResizing) ->
+ Limit = app_helper:get_env(riak_core,
+ forced_ownership_handoff,
+ ?DEFAULT_OWNERSHIP_TRIGGER),
+ limit_ownership_handoff(Limit, Transfers, IsResizing).
+
+limit_ownership_handoff(Limit, Transfers, false) ->
+ lists:sublist(Transfers, Limit);
+limit_ownership_handoff(Limit, Transfers, true) ->
+ %% if we are resizing: filter out completed resize operations,
+ %% since they remain in the list until all are complete. then
+ %% treat transfers as normal
+ Filtered = [Transfer || {_,_,_,_,Status}=Transfer <- Transfers,
+ Status =:= awaiting],
+ limit_ownership_handoff(Limit, Filtered, false).
+
%% @private
idx2vnode(Idx, Mod, _State=#state{idxtab=T}) ->
case ets:lookup(T, {Idx, Mod}) of
Something went wrong with that request. Please try again.