Skip to content
This repository
Newer
Older
100644 311 lines (275 sloc) 11.401 kb
90f6ce85 » argv0
2010-07-21 initial import
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
35 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce85 » argv0
2010-07-21 initial import
36
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
37 -ifdef(TEST).
90f6ce85 » argv0
2010-07-21 initial import
38 -include_lib("eunit/include/eunit.hrl").
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
39 -endif.
90f6ce85 » argv0
2010-07-21 initial import
40
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
41 -export_type([vclock/0, timestamp/0, vclock_node/0]).
42
43 -opaque vclock() :: [vc_entry()].
90f6ce85 » argv0
2010-07-21 initial import
44 % The timestamp is present but not used, in case a client wishes to inspect it.
45 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
46
47 % Nodes can have any term() as a name, but they must differ from each other.
48 -type vclock_node() :: term().
49 -type counter() :: integer().
50 -type timestamp() :: integer().
51
52 % @doc Create a brand new vclock.
53 -spec fresh() -> vclock().
54 fresh() ->
55 [].
56
57 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
58 -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean().
90f6ce85 » argv0
2010-07-21 initial import
59 descends(_, []) ->
60 % all vclocks descend from the empty vclock
61 true;
62 descends(Va, Vb) ->
63 [{NodeB, {CtrB, _T}}|RestB] = Vb,
e58c769f » eriksoe
2011-04-08 vclock: simplification of descends/2
64 case lists:keyfind(NodeB, 1, Va) of
65 false ->
66 false;
67 {_, {CtrA, _TSA}} ->
68 (CtrA >= CtrB) andalso descends(Va,RestB)
69 end.
90f6ce85 » argv0
2010-07-21 initial import
70
71 % @doc Combine all VClocks in the input list into their least possible
72 % common descendant.
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
73 -spec merge(VClocks :: [vclock()]) -> vclock() | [].
90f6ce85 » argv0
2010-07-21 initial import
74 merge([]) -> [];
75 merge([SingleVclock]) -> SingleVclock;
76 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
77
78 merge([], NClock) -> NClock;
79 merge([AClock|VClocks],NClock) ->
80 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
81
bc17d412 » jonmeredith
2011-08-30 Change vclock:merge to give deterministic result for an id with same …
82 merge([], [], AccClock) -> lists:reverse(AccClock);
d3dd32ae » eriksoe
2011-04-08 vclock: simplifying merge/3 using lists:reverse/2.
83 merge([], Left, AccClock) -> lists:reverse(AccClock, Left);
84 merge(Left, [], AccClock) -> lists:reverse(AccClock, Left);
563bdcc7 » jonmeredith
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
85 merge(V=[{Node1,{Ctr1,TS1}=CT1}=NCT1|VClock],
86 N=[{Node2,{Ctr2,TS2}=CT2}=NCT2|NClock], AccClock) ->
90f6ce85 » argv0
2010-07-21 initial import
87 if Node1 < Node2 ->
563bdcc7 » jonmeredith
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
88 merge(VClock, N, [NCT1|AccClock]);
90f6ce85 » argv0
2010-07-21 initial import
89 Node1 > Node2 ->
563bdcc7 » jonmeredith
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
90 merge(V, NClock, [NCT2|AccClock]);
90f6ce85 » argv0
2010-07-21 initial import
91 true ->
bc17d412 » jonmeredith
2011-08-30 Change vclock:merge to give deterministic result for an id with same …
92 ({_Ctr,_TS} = CT) = if Ctr1 > Ctr2 -> CT1;
93 Ctr1 < Ctr2 -> CT2;
94 true -> {Ctr1, erlang:max(TS1,TS2)}
95 end,
96 merge(VClock, NClock, [{Node1,CT}|AccClock])
90f6ce85 » argv0
2010-07-21 initial import
97 end.
98
99 % @doc Get the counter value in VClock set from Node.
c5c6710e » jonmeredith
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
100 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter().
90f6ce85 » argv0
2010-07-21 initial import
101 get_counter(Node, VClock) ->
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
102 case lists:keyfind(Node, 1, VClock) of
103 {_, {Ctr, _TS}} -> Ctr;
c5c6710e » jonmeredith
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
104 false -> 0
90f6ce85 » argv0
2010-07-21 initial import
105 end.
106
107 % @doc Get the timestamp value in a VClock set from Node.
108 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
109 get_timestamp(Node, VClock) ->
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
110 case lists:keyfind(Node, 1, VClock) of
111 {_, {_Ctr, TS}} -> TS;
112 false -> undefined
90f6ce85 » argv0
2010-07-21 initial import
113 end.
114
115 % @doc Increment VClock at Node.
116 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
117 increment(Node, VClock) ->
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
118 increment(Node, timestamp(), VClock).
119
120 % @doc Increment VClock at Node.
121 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
122 VClock :: vclock()) -> vclock().
123 increment(Node, IncTs, VClock) ->
90f6ce85 » argv0
2010-07-21 initial import
124 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
125 false ->
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
126 {{1, IncTs}, VClock};
90f6ce85 » argv0
2010-07-21 initial import
127 {value, {_N, {C, _T}}, ModV} ->
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
128 {{C + 1, IncTs}, ModV}
90f6ce85 » argv0
2010-07-21 initial import
129 end,
130 [{Node,C1}|NewV].
131
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
132
90f6ce85 » argv0
2010-07-21 initial import
133 % @doc Return the list of all nodes that have ever incremented VClock.
134 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
135 all_nodes(VClock) ->
136 [X || {X,{_,_}} <- VClock].
137
213eec2c » eriksoe
2011-07-08 vclock: faster timestamp().
138 -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, (1970*365+478)).
139 -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH,
140 (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH * 24*60*60)
141 %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}})
142 ).
143
97231e05 » Jon Meredith
2011-03-16 Add vclock:increment variant that accepts a timestamp.
144 % @doc Return a timestamp for a vector clock
145 -spec timestamp() -> timestamp().
90f6ce85 » argv0
2010-07-21 initial import
146 timestamp() ->
213eec2c » eriksoe
2011-07-08 vclock: faster timestamp().
147 %% Same as calendar:datetime_to_gregorian_seconds(erlang:universaltime()),
148 %% but significantly faster.
b6a33cf0 » Vagabond
2011-08-01 Minor cleanup of trifork vclock patch
149 {MegaSeconds, Seconds, _} = os:timestamp(),
213eec2c » eriksoe
2011-07-08 vclock: faster timestamp().
150 ?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH + MegaSeconds*1000000 + Seconds.
90f6ce85 » argv0
2010-07-21 initial import
151
152 % @doc Compares two VClocks for equality.
153 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
154 equal(VA,VB) ->
265a185a » eriksoe
2011-07-08 vclock:equal/2: Much simpler, much faster (25x).
155 lists:sort(VA) =:= lists:sort(VB).
90f6ce85 » argv0
2010-07-21 initial import
156
157 % @doc Possibly shrink the size of a vclock, depending on current age and size.
158 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
159 prune(V,Now,BucketProps) ->
724b9511 » eriksoe
2011-03-03 vclock:prune(): In case of oldish vclock entries with identical times…
160 %% This sort need to be deterministic, to avoid spurious merge conflicts later.
161 %% We achieve this by using the node ID as secondary key.
162 SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V),
90f6ce85 » argv0
2010-07-21 initial import
163 prune_vclock1(SortV,Now,BucketProps).
164 % @private
165 prune_vclock1(V,Now,BProps) ->
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
166 case length(V) =< get_property(small_vclock, BProps) of
90f6ce85 » argv0
2010-07-21 initial import
167 true -> V;
168 false ->
169 {_,{_,HeadTime}} = hd(V),
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
170 case (Now - HeadTime) < get_property(young_vclock,BProps) of
90f6ce85 » argv0
2010-07-21 initial import
171 true -> V;
172 false -> prune_vclock1(V,Now,BProps,HeadTime)
173 end
174 end.
175 % @private
176 prune_vclock1(V,Now,BProps,HeadTime) ->
177 % has a precondition that V is longer than small and older than young
b6a33cf0 » Vagabond
2011-08-01 Minor cleanup of trifork vclock patch
178 case (length(V) > get_property(big_vclock,BProps)) orelse
179 ((Now - HeadTime) > get_property(old_vclock,BProps)) of
90f6ce85 » argv0
2010-07-21 initial import
180 true -> prune_vclock1(tl(V),Now,BProps);
7ef6adea » eriksoe
2011-07-08 Slight simplification of prune_vclock1().
181 false -> V
90f6ce85 » argv0
2010-07-21 initial import
182 end.
183
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
184 get_property(Key, PairList) ->
10b8387b » Vagabond
2011-06-09 Make get_property act more like proplists:get_value
185 case lists:keyfind(Key, 1, PairList) of
186 {_Key, Value} ->
187 Value;
188 false ->
189 undefined
190 end.
c4fe87f5 » eriksoe
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
191
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
192 %% ===================================================================
193 %% EUnit tests
194 %% ===================================================================
195 -ifdef(TEST).
196
e2759dc5 » matehat
2011-11-15 A few changes to make the docs compile
197 % doc Serves as both a trivial test and some example code.
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
198 example_test() ->
199 A = vclock:fresh(),
200 B = vclock:fresh(),
201 A1 = vclock:increment(a, A),
202 B1 = vclock:increment(b, B),
203 true = vclock:descends(A1,A),
204 true = vclock:descends(B1,B),
205 false = vclock:descends(A1,B1),
206 A2 = vclock:increment(a, A1),
207 C = vclock:merge([A2, B1]),
208 C1 = vclock:increment(c, C),
209 true = vclock:descends(C1, A2),
210 true = vclock:descends(C1, B1),
211 false = vclock:descends(B1, C1),
212 false = vclock:descends(B1, A1),
213 ok.
214
90f6ce85 » argv0
2010-07-21 initial import
215 prune_small_test() ->
216 % vclock with less entries than small_vclock will be untouched
217 Now = riak_core_util:moment(),
218 OldTime = Now - 32000000,
219 SmallVC = [{<<"1">>, {1, OldTime}},
220 {<<"2">>, {2, OldTime}},
221 {<<"3">>, {3, OldTime}}],
222 Props = [{small_vclock,4}],
223 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
224
225 prune_young_test() ->
226 % vclock with all entries younger than young_vclock will be untouched
227 Now = riak_core_util:moment(),
228 NewTime = Now - 1,
229 VC = [{<<"1">>, {1, NewTime}},
230 {<<"2">>, {2, NewTime}},
231 {<<"3">>, {3, NewTime}}],
232 Props = [{small_vclock,1},{young_vclock,1000}],
233 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
234
235 prune_big_test() ->
236 % vclock not preserved by small or young will be pruned down to
237 % no larger than big_vclock entries
238 Now = riak_core_util:moment(),
239 NewTime = Now - 1000,
240 VC = [{<<"1">>, {1, NewTime}},
241 {<<"2">>, {2, NewTime}},
242 {<<"3">>, {3, NewTime}}],
243 Props = [{small_vclock,1},{young_vclock,1},
244 {big_vclock,2},{old_vclock,100000}],
245 ?assert(length(prune(VC, Now, Props)) =:= 2).
246
247 prune_old_test() ->
248 % vclock not preserved by small or young will be pruned down to
249 % no larger than big_vclock and no entries more than old_vclock ago
250 Now = riak_core_util:moment(),
251 NewTime = Now - 1000,
252 OldTime = Now - 100000,
253 VC = [{<<"1">>, {1, NewTime}},
254 {<<"2">>, {2, OldTime}},
255 {<<"3">>, {3, OldTime}}],
256 Props = [{small_vclock,1},{young_vclock,1},
257 {big_vclock,2},{old_vclock,10000}],
258 ?assert(length(prune(VC, Now, Props)) =:= 1).
259
7ce0ed9c » jonmeredith
2011-03-03 Added test for determinstic vclock pruning for nodes with same timest…
260 prune_order_test() ->
261 % vclock with two nodes of the same timestamp will be pruned down
262 % to the same node
263 Now = riak_core_util:moment(),
264 OldTime = Now - 100000,
265 VC1 = [{<<"1">>, {1, OldTime}},
266 {<<"2">>, {2, OldTime}}],
267 VC2 = lists:reverse(VC1),
268 Props = [{small_vclock,1},{young_vclock,1},
269 {big_vclock,2},{old_vclock,10000}],
270 ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)).
271
90f6ce85 » argv0
2010-07-21 initial import
272 accessor_test() ->
273 VC = [{<<"1">>, {1, 1}},
274 {<<"2">>, {2, 2}}],
275 ?assertEqual(1, get_counter(<<"1">>, VC)),
276 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
277 ?assertEqual(2, get_counter(<<"2">>, VC)),
278 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
c5c6710e » jonmeredith
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
279 ?assertEqual(0, get_counter(<<"3">>, VC)),
90f6ce85 » argv0
2010-07-21 initial import
280 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
281 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
282
283 merge_test() ->
284 VC1 = [{<<"1">>, {1, 1}},
285 {<<"2">>, {2, 2}},
286 {<<"4">>, {4, 4}}],
287 VC2 = [{<<"3">>, {3, 3}},
288 {<<"4">>, {3, 3}}],
289 ?assertEqual([], merge(vclock:fresh())),
290 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
291 merge([VC1, VC2])).
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
292
b58266db » jonmeredith
2011-08-30 Added extra unit tests for vclock.
293 merge_less_left_test() ->
294 VC1 = [{<<"5">>, {5, 5}}],
295 VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
296 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
297 vclock:merge([VC1, VC2])).
298
299 merge_less_right_test() ->
300 VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
301 VC2 = [{<<"5">>, {5, 5}}],
302 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
303 vclock:merge([VC1, VC2])).
304
305 merge_same_id_test() ->
306 VC1 = [{<<"1">>, {1, 2}},{<<"2">>,{1,4}}],
307 VC2 = [{<<"1">>, {1, 3}},{<<"3">>,{1,5}}],
308 ?assertEqual([{<<"1">>, {1, 3}},{<<"2">>,{1,4}},{<<"3">>,{1,5}}],
309 vclock:merge([VC1, VC2])).
310
ec82f3a7 » argv0
2011-04-04 dialyzer: riak_core fixes
311 -endif.
Something went wrong with that request. Please try again.