Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 315 lines (277 sloc) 11.488 kb
90f6ce8 Andy Gross initial import
argv0 authored
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -author('Justin Sheehy <justin@basho.com>').
35 -author('Andy Gross <andy@basho.com>').
36
37 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
38 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce8 Andy Gross initial import
argv0 authored
39
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
40 -ifdef(TEST).
90f6ce8 Andy Gross initial import
argv0 authored
41 -include_lib("eunit/include/eunit.hrl").
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
42 -endif.
90f6ce8 Andy Gross initial import
argv0 authored
43
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
44 -export_type([vclock/0, timestamp/0, vclock_node/0]).
45
46 -opaque vclock() :: [vc_entry()].
90f6ce8 Andy Gross initial import
argv0 authored
47 % The timestamp is present but not used, in case a client wishes to inspect it.
48 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
49
50 % Nodes can have any term() as a name, but they must differ from each other.
51 -type vclock_node() :: term().
52 -type counter() :: integer().
53 -type timestamp() :: integer().
54
55 % @doc Create a brand new vclock.
56 -spec fresh() -> vclock().
57 fresh() ->
58 [].
59
60 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
61 -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean().
90f6ce8 Andy Gross initial import
argv0 authored
62 descends(_, []) ->
63 % all vclocks descend from the empty vclock
64 true;
65 descends(Va, Vb) ->
66 [{NodeB, {CtrB, _T}}|RestB] = Vb,
e58c769 Erik Søe Sørensen vclock: simplification of descends/2
eriksoe authored
67 case lists:keyfind(NodeB, 1, Va) of
68 false ->
69 false;
70 {_, {CtrA, _TSA}} ->
71 (CtrA >= CtrB) andalso descends(Va,RestB)
72 end.
90f6ce8 Andy Gross initial import
argv0 authored
73
74 % @doc Combine all VClocks in the input list into their least possible
75 % common descendant.
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
76 -spec merge(VClocks :: [vclock()]) -> vclock() | [].
90f6ce8 Andy Gross initial import
argv0 authored
77 merge([]) -> [];
78 merge([SingleVclock]) -> SingleVclock;
79 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
80
81 merge([], NClock) -> NClock;
82 merge([AClock|VClocks],NClock) ->
83 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
84
bc17d41 Jon Meredith Change vclock:merge to give deterministic result for an id with same cou...
jonmeredith authored
85 merge([], [], AccClock) -> lists:reverse(AccClock);
d3dd32a Erik Søe Sørensen vclock: simplifying merge/3 using lists:reverse/2.
eriksoe authored
86 merge([], Left, AccClock) -> lists:reverse(AccClock, Left);
87 merge(Left, [], AccClock) -> lists:reverse(AccClock, Left);
563bdcc Jon Meredith Incorporated vclock:merge improvement suggested by Scott.
jonmeredith authored
88 merge(V=[{Node1,{Ctr1,TS1}=CT1}=NCT1|VClock],
89 N=[{Node2,{Ctr2,TS2}=CT2}=NCT2|NClock], AccClock) ->
90f6ce8 Andy Gross initial import
argv0 authored
90 if Node1 < Node2 ->
563bdcc Jon Meredith Incorporated vclock:merge improvement suggested by Scott.
jonmeredith authored
91 merge(VClock, N, [NCT1|AccClock]);
90f6ce8 Andy Gross initial import
argv0 authored
92 Node1 > Node2 ->
563bdcc Jon Meredith Incorporated vclock:merge improvement suggested by Scott.
jonmeredith authored
93 merge(V, NClock, [NCT2|AccClock]);
90f6ce8 Andy Gross initial import
argv0 authored
94 true ->
bc17d41 Jon Meredith Change vclock:merge to give deterministic result for an id with same cou...
jonmeredith authored
95 ({_Ctr,_TS} = CT) = if Ctr1 > Ctr2 -> CT1;
96 Ctr1 < Ctr2 -> CT2;
97 true -> {Ctr1, erlang:max(TS1,TS2)}
98 end,
99 merge(VClock, NClock, [{Node1,CT}|AccClock])
90f6ce8 Andy Gross initial import
argv0 authored
100 end.
101
102 % @doc Get the counter value in VClock set from Node.
c5c6710 Jon Meredith Changed get_counter to return 0 on missing rather than undefined.
jonmeredith authored
103 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter().
90f6ce8 Andy Gross initial import
argv0 authored
104 get_counter(Node, VClock) ->
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
105 case lists:keyfind(Node, 1, VClock) of
106 {_, {Ctr, _TS}} -> Ctr;
c5c6710 Jon Meredith Changed get_counter to return 0 on missing rather than undefined.
jonmeredith authored
107 false -> 0
90f6ce8 Andy Gross initial import
argv0 authored
108 end.
109
110 % @doc Get the timestamp value in a VClock set from Node.
111 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
112 get_timestamp(Node, VClock) ->
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
113 case lists:keyfind(Node, 1, VClock) of
114 {_, {_Ctr, TS}} -> TS;
115 false -> undefined
90f6ce8 Andy Gross initial import
argv0 authored
116 end.
117
118 % @doc Increment VClock at Node.
119 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
120 increment(Node, VClock) ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
121 increment(Node, timestamp(), VClock).
122
123 % @doc Increment VClock at Node.
124 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
125 VClock :: vclock()) -> vclock().
126 increment(Node, IncTs, VClock) ->
90f6ce8 Andy Gross initial import
argv0 authored
127 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
128 false ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
129 {{1, IncTs}, VClock};
90f6ce8 Andy Gross initial import
argv0 authored
130 {value, {_N, {C, _T}}, ModV} ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
131 {{C + 1, IncTs}, ModV}
90f6ce8 Andy Gross initial import
argv0 authored
132 end,
133 [{Node,C1}|NewV].
134
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
135
90f6ce8 Andy Gross initial import
argv0 authored
136 % @doc Return the list of all nodes that have ever incremented VClock.
137 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
138 all_nodes(VClock) ->
139 [X || {X,{_,_}} <- VClock].
140
213eec2 Erik Søe Sørensen vclock: faster timestamp().
eriksoe authored
141 -define(DAYS_FROM_GREGORIAN_BASE_TO_EPOCH, (1970*365+478)).
142 -define(SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH,
143 (?DAYS_FROM_GREGORIAN_BASE_TO_EPOCH * 24*60*60)
144 %% == calendar:datetime_to_gregorian_seconds({{1970,1,1},{0,0,0}})
145 ).
146
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
147 % @doc Return a timestamp for a vector clock
148 -spec timestamp() -> timestamp().
90f6ce8 Andy Gross initial import
argv0 authored
149 timestamp() ->
213eec2 Erik Søe Sørensen vclock: faster timestamp().
eriksoe authored
150 %% Same as calendar:datetime_to_gregorian_seconds(erlang:universaltime()),
151 %% but significantly faster.
b6a33cf Andrew Thompson Minor cleanup of trifork vclock patch
Vagabond authored
152 {MegaSeconds, Seconds, _} = os:timestamp(),
213eec2 Erik Søe Sørensen vclock: faster timestamp().
eriksoe authored
153 ?SECONDS_FROM_GREGORIAN_BASE_TO_EPOCH + MegaSeconds*1000000 + Seconds.
90f6ce8 Andy Gross initial import
argv0 authored
154
155 % @doc Compares two VClocks for equality.
156 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
157 equal(VA,VB) ->
265a185 Erik Søe Sørensen vclock:equal/2: Much simpler, much faster (25x).
eriksoe authored
158 lists:sort(VA) =:= lists:sort(VB).
90f6ce8 Andy Gross initial import
argv0 authored
159
160 % @doc Possibly shrink the size of a vclock, depending on current age and size.
161 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
162 prune(V,Now,BucketProps) ->
724b951 Erik Søe Sørensen vclock:prune(): In case of oldish vclock entries with identical timestam...
eriksoe authored
163 %% This sort need to be deterministic, to avoid spurious merge conflicts later.
164 %% We achieve this by using the node ID as secondary key.
165 SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V),
90f6ce8 Andy Gross initial import
argv0 authored
166 prune_vclock1(SortV,Now,BucketProps).
167 % @private
168 prune_vclock1(V,Now,BProps) ->
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
169 case length(V) =< get_property(small_vclock, BProps) of
90f6ce8 Andy Gross initial import
argv0 authored
170 true -> V;
171 false ->
172 {_,{_,HeadTime}} = hd(V),
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
173 case (Now - HeadTime) < get_property(young_vclock,BProps) of
90f6ce8 Andy Gross initial import
argv0 authored
174 true -> V;
175 false -> prune_vclock1(V,Now,BProps,HeadTime)
176 end
177 end.
178 % @private
179 prune_vclock1(V,Now,BProps,HeadTime) ->
180 % has a precondition that V is longer than small and older than young
b6a33cf Andrew Thompson Minor cleanup of trifork vclock patch
Vagabond authored
181 case (length(V) > get_property(big_vclock,BProps)) orelse
182 ((Now - HeadTime) > get_property(old_vclock,BProps)) of
90f6ce8 Andy Gross initial import
argv0 authored
183 true -> prune_vclock1(tl(V),Now,BProps);
7ef6ade Erik Søe Sørensen Slight simplification of prune_vclock1().
eriksoe authored
184 false -> V
90f6ce8 Andy Gross initial import
argv0 authored
185 end.
186
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
187 get_property(Key, PairList) ->
10b8387 Andrew Thompson Make get_property act more like proplists:get_value
Vagabond authored
188 case lists:keyfind(Key, 1, PairList) of
189 {_Key, Value} ->
190 Value;
191 false ->
192 undefined
193 end.
c4fe87f Erik Søe Sørensen vclock: Replacing proplists:get_value() calls with far faster lists:keyf...
eriksoe authored
194
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
195 %% ===================================================================
196 %% EUnit tests
197 %% ===================================================================
198 -ifdef(TEST).
199
200 % @doc Serves as both a trivial test and some example code.
201 example_test() ->
202 A = vclock:fresh(),
203 B = vclock:fresh(),
204 A1 = vclock:increment(a, A),
205 B1 = vclock:increment(b, B),
206 true = vclock:descends(A1,A),
207 true = vclock:descends(B1,B),
208 false = vclock:descends(A1,B1),
209 A2 = vclock:increment(a, A1),
210 C = vclock:merge([A2, B1]),
211 C1 = vclock:increment(c, C),
212 true = vclock:descends(C1, A2),
213 true = vclock:descends(C1, B1),
214 false = vclock:descends(B1, C1),
215 false = vclock:descends(B1, A1),
216 ok.
217
90f6ce8 Andy Gross initial import
argv0 authored
218 prune_small_test() ->
219 % vclock with less entries than small_vclock will be untouched
220 Now = riak_core_util:moment(),
221 OldTime = Now - 32000000,
222 SmallVC = [{<<"1">>, {1, OldTime}},
223 {<<"2">>, {2, OldTime}},
224 {<<"3">>, {3, OldTime}}],
225 Props = [{small_vclock,4}],
226 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
227
228 prune_young_test() ->
229 % vclock with all entries younger than young_vclock will be untouched
230 Now = riak_core_util:moment(),
231 NewTime = Now - 1,
232 VC = [{<<"1">>, {1, NewTime}},
233 {<<"2">>, {2, NewTime}},
234 {<<"3">>, {3, NewTime}}],
235 Props = [{small_vclock,1},{young_vclock,1000}],
236 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
237
238 prune_big_test() ->
239 % vclock not preserved by small or young will be pruned down to
240 % no larger than big_vclock entries
241 Now = riak_core_util:moment(),
242 NewTime = Now - 1000,
243 VC = [{<<"1">>, {1, NewTime}},
244 {<<"2">>, {2, NewTime}},
245 {<<"3">>, {3, NewTime}}],
246 Props = [{small_vclock,1},{young_vclock,1},
247 {big_vclock,2},{old_vclock,100000}],
248 ?assert(length(prune(VC, Now, Props)) =:= 2).
249
250 prune_old_test() ->
251 % vclock not preserved by small or young will be pruned down to
252 % no larger than big_vclock and no entries more than old_vclock ago
253 Now = riak_core_util:moment(),
254 NewTime = Now - 1000,
255 OldTime = Now - 100000,
256 VC = [{<<"1">>, {1, NewTime}},
257 {<<"2">>, {2, OldTime}},
258 {<<"3">>, {3, OldTime}}],
259 Props = [{small_vclock,1},{young_vclock,1},
260 {big_vclock,2},{old_vclock,10000}],
261 ?assert(length(prune(VC, Now, Props)) =:= 1).
262
7ce0ed9 Jon Meredith Added test for determinstic vclock pruning for nodes with same timestamp...
jonmeredith authored
263 prune_order_test() ->
264 % vclock with two nodes of the same timestamp will be pruned down
265 % to the same node
266 Now = riak_core_util:moment(),
267 OldTime = Now - 100000,
268 VC1 = [{<<"1">>, {1, OldTime}},
269 {<<"2">>, {2, OldTime}}],
270 VC2 = lists:reverse(VC1),
271 Props = [{small_vclock,1},{young_vclock,1},
272 {big_vclock,2},{old_vclock,10000}],
273 ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)).
274
90f6ce8 Andy Gross initial import
argv0 authored
275 accessor_test() ->
276 VC = [{<<"1">>, {1, 1}},
277 {<<"2">>, {2, 2}}],
278 ?assertEqual(1, get_counter(<<"1">>, VC)),
279 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
280 ?assertEqual(2, get_counter(<<"2">>, VC)),
281 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
c5c6710 Jon Meredith Changed get_counter to return 0 on missing rather than undefined.
jonmeredith authored
282 ?assertEqual(0, get_counter(<<"3">>, VC)),
90f6ce8 Andy Gross initial import
argv0 authored
283 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
284 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
285
286 merge_test() ->
287 VC1 = [{<<"1">>, {1, 1}},
288 {<<"2">>, {2, 2}},
289 {<<"4">>, {4, 4}}],
290 VC2 = [{<<"3">>, {3, 3}},
291 {<<"4">>, {3, 3}}],
292 ?assertEqual([], merge(vclock:fresh())),
293 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
294 merge([VC1, VC2])).
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
295
b58266d Jon Meredith Added extra unit tests for vclock.
jonmeredith authored
296 merge_less_left_test() ->
297 VC1 = [{<<"5">>, {5, 5}}],
298 VC2 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
299 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
300 vclock:merge([VC1, VC2])).
301
302 merge_less_right_test() ->
303 VC1 = [{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
304 VC2 = [{<<"5">>, {5, 5}}],
305 ?assertEqual([{<<"5">>, {5, 5}},{<<"6">>, {6, 6}}, {<<"7">>, {7, 7}}],
306 vclock:merge([VC1, VC2])).
307
308 merge_same_id_test() ->
309 VC1 = [{<<"1">>, {1, 2}},{<<"2">>,{1,4}}],
310 VC2 = [{<<"1">>, {1, 3}},{<<"3">>,{1,5}}],
311 ?assertEqual([{<<"1">>, {1, 3}},{<<"2">>,{1,4}},{<<"3">>,{1,5}}],
312 vclock:merge([VC1, VC2])).
313
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
314 -endif.
Something went wrong with that request. Please try again.