Skip to content
This repository
Newer
Older
100644 315 lines (277 sloc) 11.487 kb
90f6ce85 »
2010-07-21 initial import
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -author('Justin Sheehy <justin@basho.com>').
35 -author('Andy Gross <andy@basho.com>').
36
37 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
38 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce85 »
2010-07-21 initial import
39
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
40 -ifdef(TEST).
90f6ce85 »
2010-07-21 initial import
41 -include_lib("eunit/include/eunit.hrl").
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
42 -endif.
90f6ce85 »
2010-07-21 initial import
43
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
44 -export_type([vclock/0, timestamp/0, vclock_node/0]).
45
46 -opaque vclock() :: [vc_entry()].
90f6ce85 »
2010-07-21 initial import
47 % The timestamp is present but not used, in case a client wishes to inspect it.
48 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
49
50 % Nodes can have any term() as a name, but they must differ from each other.
51 -type vclock_node() :: term().
52 -type counter() :: integer().
53 -type timestamp() :: integer().
54
55 % @doc Create a brand new vclock.
56 -spec fresh() -> vclock().
57 fresh() ->
58 [].
59
60 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
61 -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean().
90f6ce85 »
2010-07-21 initial import
62 descends(_, []) ->
63 % all vclocks descend from the empty vclock
64 true;
65 descends(Va, Vb) ->
66 [{NodeB, {CtrB, _T}}|RestB] = Vb,
e58c769f »
2011-04-08 vclock: simplification of descends/2
67 case lists:keyfind(NodeB, 1, Va) of
68 false ->
69 false;
70 {_, {CtrA, _TSA}} ->
71 (CtrA >= CtrB) andalso descends(Va,RestB)
72 end.
90f6ce85 »
2010-07-21 initial import
73
74 % @doc Combine all VClocks in the input list into their least possible
75 % common descendant.
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
76 -spec merge(VClocks :: [vclock()]) -> vclock() | [].
90f6ce85 »
2010-07-21 initial import
77 merge([]) -> [];
78 merge([SingleVclock]) -> SingleVclock;
79 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
80
81 merge([], NClock) -> NClock;
82 merge([AClock|VClocks],NClock) ->
83 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
84
bc17d412 »
2011-08-30 Change vclock:merge to give deterministic result for an id with same …
85 merge([], [], AccClock) -> lists:reverse(AccClock);
d3dd32ae »
2011-04-08 vclock: simplifying merge/3 using lists:reverse/2.
86 merge([], Left, AccClock) -> lists:reverse(AccClock, Left);
87 merge(Left, [], AccClock) -> lists:reverse(AccClock, Left);
563bdcc7 »
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
88 merge(V=[{Node1,{Ctr1,TS1}=CT1}=NCT1|VClock],
89 N=[{Node2,{Ctr2,TS2}=CT2}=NCT2|NClock], AccClock) ->
90f6ce85 »
2010-07-21 initial import
90 if Node1 < Node2 ->
563bdcc7 »
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
91 merge(VClock, N, [NCT1|AccClock]);
90f6ce85 »
2010-07-21 initial import
92 Node1 > Node2 ->
563bdcc7 »
2011-09-01 Incorporated vclock:merge improvement suggested by Scott.
93 merge(V, NClock, [NCT2|AccClock]);
90f6ce85 »
2010-07-21 initial import
94 true ->
bc17d412 »
2011-08-30 Change vclock:merge to give deterministic result for an id with same …
95 ({_Ctr,_TS} = CT) = if Ctr1 > Ctr2 -> CT1;
96 Ctr1 < Ctr2 -> CT2;
97 true -> {Ctr1, erlang:max(TS1,TS2)}
98 end,
99 merge(VClock, NClock, [{Node1,CT}|AccClock])
90f6ce85 »
2010-07-21 initial import
100 end.
101
102 % @doc Get the counter value in VClock set from Node.
c5c6710e »
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
103 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter().
90f6ce85 »
2010-07-21 initial import
104 get_counter(Node, VClock) ->
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
105 case lists:keyfind(Node, 1, VClock) of
106 {_, {Ctr, _TS}} -> Ctr;
c5c6710e »
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
107 false -> 0
90f6ce85 »
2010-07-21 initial import
108 end.
109
110 % @doc Get the timestamp value in a VClock set from Node.
111 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
112 get_timestamp(Node, VClock) ->
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
113 case lists:keyfind(Node, 1, VClock) of
114 {_, {_Ctr, TS}} -> TS;
115 false -> undefined
90f6ce85 »
2010-07-21 initial import
116 end.
117
118 % @doc Increment VClock at Node.
119 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
120 increment(Node, VClock) ->
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
121 increment(Node, timestamp(), VClock).
122
123 % @doc Increment VClock at Node.
124 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
125 VClock :: vclock()) -> vclock().
126 increment(Node, IncTs, VClock) ->
90f6ce85 »
2010-07-21 initial import
127 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
128 false ->
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
129 {{1, IncTs}, VClock};
90f6ce85 »
2010-07-21 initial import
130 {value, {_N, {C, _T}}, ModV} ->
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
131 {{C + 1, IncTs}, ModV}
90f6ce85 »
2010-07-21 initial import
132 end,
133 [{Node,C1}|NewV].
134
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
135
90f6ce85 »
2010-07-21 initial import
136 % @doc Return the list of all nodes that have ever incremented VClock.
137 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
138 all_nodes(VClock) ->
139 [X || {X,{_,_}} <- VClock].
140
213eec2c »
2011-07-08 vclock: faster timestamp().
141 -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, (1970*365+478)).
142 -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH,
143 (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH * 24*60*60)
144 %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}})
145 ).
146
97231e05 »
2011-03-16 Add vclock:increment variant that accepts a timestamp.
147 % @doc Return a timestamp for a vector clock
148 -spec timestamp() -> timestamp().
90f6ce85 »
2010-07-21 initial import
149 timestamp() ->
213eec2c »
2011-07-08 vclock: faster timestamp().
150 %% Same as calendar:datetime_to_gregorian_seconds(erlang:universaltime()),
151 %% but significantly faster.
b6a33cf0 »
2011-08-01 Minor cleanup of trifork vclock patch
152 {MegaSeconds, Seconds, _} = os:timestamp(),
213eec2c »
2011-07-08 vclock: faster timestamp().
153 ?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH + MegaSeconds*1000000 + Seconds.
90f6ce85 »
2010-07-21 initial import
154
155 % @doc Compares two VClocks for equality.
156 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
157 equal(VA,VB) ->
265a185a »
2011-07-08 vclock:equal/2: Much simpler, much faster (25x).
158 lists:sort(VA) =:= lists:sort(VB).
90f6ce85 »
2010-07-21 initial import
159
160 % @doc Possibly shrink the size of a vclock, depending on current age and size.
161 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
162 prune(V,Now,BucketProps) ->
724b9511 »
2011-03-03 vclock:prune(): In case of oldish vclock entries with identical times…
163 %% This sort need to be deterministic, to avoid spurious merge conflicts later.
164 %% We achieve this by using the node ID as secondary key.
165 SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V),
90f6ce85 »
2010-07-21 initial import
166 prune_vclock1(SortV,Now,BucketProps).
167 % @private
168 prune_vclock1(V,Now,BProps) ->
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
169 case length(V) =< get_property(small_vclock, BProps) of
90f6ce85 »
2010-07-21 initial import
170 true -> V;
171 false ->
172 {_,{_,HeadTime}} = hd(V),
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
173 case (Now - HeadTime) < get_property(young_vclock,BProps) of
90f6ce85 »
2010-07-21 initial import
174 true -> V;
175 false -> prune_vclock1(V,Now,BProps,HeadTime)
176 end
177 end.
178 % @private
179 prune_vclock1(V,Now,BProps,HeadTime) ->
180 % has a precondition that V is longer than small and older than young
b6a33cf0 »
2011-08-01 Minor cleanup of trifork vclock patch
181 case (length(V) > get_property(big_vclock,BProps)) orelse
182 ((Now - HeadTime) > get_property(old_vclock,BProps)) of
90f6ce85 »
2010-07-21 initial import
183 true -> prune_vclock1(tl(V),Now,BProps);
7ef6adea »
2011-07-08 Slight simplification of prune_vclock1().
184 false -> V
90f6ce85 »
2010-07-21 initial import
185 end.
186
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
187 get_property(Key, PairList) ->
10b8387b »
2011-06-09 Make get_property act more like proplists:get_value
188 case lists:keyfind(Key, 1, PairList) of
189 {_Key, Value} ->
190 Value;
191 false ->
192 undefined
193 end.
c4fe87f5 »
2011-04-08 vclock: Replacing proplists:get_value() calls with far faster lists:k…
194
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
195 %% ===================================================================
196 %% EUnit tests
197 %% ===================================================================
198 -ifdef(TEST).
199
e2759dc5 »
2011-11-15 A few changes to make the docs compile
200 % doc Serves as both a trivial test and some example code.
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
201 example_test() ->
202 A = vclock:fresh(),
203 B = vclock:fresh(),
204 A1 = vclock:increment(a, A),
205 B1 = vclock:increment(b, B),
206 true = vclock:descends(A1,A),
207 true = vclock:descends(B1,B),
208 false = vclock:descends(A1,B1),
209 A2 = vclock:increment(a, A1),
210 C = vclock:merge([A2, B1]),
211 C1 = vclock:increment(c, C),
212 true = vclock:descends(C1, A2),
213 true = vclock:descends(C1, B1),
214 false = vclock:descends(B1, C1),
215 false = vclock:descends(B1, A1),
216 ok.
217
90f6ce85 »
2010-07-21 initial import
218 prune_small_test() ->
219 % vclock with less entries than small_vclock will be untouched
220 Now = riak_core_util:moment(),
221 OldTime = Now - 32000000,
222 SmallVC = [{<<"1">>, {1, OldTime}},
223 {<<"2">>, {2, OldTime}},
224 {<<"3">>, {3, OldTime}}],
225 Props = [{small_vclock,4}],
226 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
227
228 prune_young_test() ->
229 % vclock with all entries younger than young_vclock will be untouched
230 Now = riak_core_util:moment(),
231 NewTime = Now - 1,
232 VC = [{<<"1">>, {1, NewTime}},
233 {<<"2">>, {2, NewTime}},
234 {<<"3">>, {3, NewTime}}],
235 Props = [{small_vclock,1},{young_vclock,1000}],
236 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
237
238 prune_big_test() ->
239 % vclock not preserved by small or young will be pruned down to
240 % no larger than big_vclock entries
241 Now = riak_core_util:moment(),
242 NewTime = Now - 1000,
243 VC = [{<<"1">>, {1, NewTime}},
244 {<<"2">>, {2, NewTime}},
245 {<<"3">>, {3, NewTime}}],
246 Props = [{small_vclock,1},{young_vclock,1},
247 {big_vclock,2},{old_vclock,100000}],
248 ?assert(length(prune(VC, Now, Props)) =:= 2).
249
250 prune_old_test() ->
251 % vclock not preserved by small or young will be pruned down to
252 % no larger than big_vclock and no entries more than old_vclock ago
253 Now = riak_core_util:moment(),
254 NewTime = Now - 1000,
255 OldTime = Now - 100000,
256 VC = [{<<"1">>, {1, NewTime}},
257 {<<"2">>, {2, OldTime}},
258 {<<"3">>, {3, OldTime}}],
259 Props = [{small_vclock,1},{young_vclock,1},
260 {big_vclock,2},{old_vclock,10000}],
261 ?assert(length(prune(VC, Now, Props)) =:= 1).
262
7ce0ed9c »
2011-03-03 Added test for determinstic vclock pruning for nodes with same timest…
263 prune_order_test() ->
264 % vclock with two nodes of the same timestamp will be pruned down
265 % to the same node
266 Now = riak_core_util:moment(),
267 OldTime = Now - 100000,
268 VC1 = [{<<"1">>, {1, OldTime}},
269 {<<"2">>, {2, OldTime}}],
270 VC2 = lists:reverse(VC1),
271 Props = [{small_vclock,1},{young_vclock,1},
272 {big_vclock,2},{old_vclock,10000}],
273 ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)).
274
90f6ce85 »
2010-07-21 initial import
275 accessor_test() ->
276 VC = [{<<"1">>, {1, 1}},
277 {<<"2">>, {2, 2}}],
278 ?assertEqual(1, get_counter(<<"1">>, VC)),
279 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
280 ?assertEqual(2, get_counter(<<"2">>, VC)),
281 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
c5c6710e »
2011-08-25 Changed get_counter to return 0 on missing rather than undefined.
282 ?assertEqual(0, get_counter(<<"3">>, VC)),
90f6ce85 »
2010-07-21 initial import
283 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
284 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
285
286 merge_test() ->
287 VC1 = [{<<"1">>, {1, 1}},
288 {<<"2">>, {2, 2}},
289 {<<"4">>, {4, 4}}],
290 VC2 = [{<<"3">>, {3, 3}},
291 {<<"4">>, {3, 3}}],
292 ?assertEqual([], merge(vclock:fresh())),
293 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
294 merge([VC1, VC2])).
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
295
b58266db »
2011-08-30 Added extra unit tests for vclock.
296 merge_less_left_test() ->
297 VC1 = [{<<"5">>, {5, 5}}],
298 VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
299 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
300 vclock:merge([VC1, VC2])).
301
302 merge_less_right_test() ->
303 VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
304 VC2 = [{<<"5">>, {5, 5}}],
305 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
306 vclock:merge([VC1, VC2])).
307
308 merge_same_id_test() ->
309 VC1 = [{<<"1">>, {1, 2}},{<<"2">>,{1,4}}],
310 VC2 = [{<<"1">>, {1, 3}},{<<"3">>,{1,5}}],
311 ?assertEqual([{<<"1">>, {1, 3}},{<<"2">>,{1,4}},{<<"3">>,{1,5}}],
312 vclock:merge([VC1, VC2])).
313
ec82f3a7 »
2011-04-04 dialyzer: riak_core fixes
314 -endif.
Something went wrong with that request. Please try again.