Skip to content
Newer
Older
100644 288 lines (254 sloc) 10.1 KB
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -author('Justin Sheehy <justin@basho.com>').
35 -author('Andy Gross <andy@basho.com>').
36
37 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 16, 2011
38 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
39
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
40 -ifdef(TEST).
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
41 -include_lib("eunit/include/eunit.hrl").
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
42 -endif.
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
43
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
44 -export_type([vclock/0, timestamp/0, vclock_node/0]).
45
46 -opaque vclock() :: [vc_entry()].
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
47 % The timestamp is present but not used, in case a client wishes to inspect it.
48 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
49
50 % Nodes can have any term() as a name, but they must differ from each other.
51 -type vclock_node() :: term().
52 -type counter() :: integer().
53 -type timestamp() :: integer().
54
55 % @doc Create a brand new vclock.
56 -spec fresh() -> vclock().
57 fresh() ->
58 [].
59
60 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
61 -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean().
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
62 descends(_, []) ->
63 % all vclocks descend from the empty vclock
64 true;
65 descends(Va, Vb) ->
66 [{NodeB, {CtrB, _T}}|RestB] = Vb,
e58c769 @eriksoe vclock: simplification of descends/2
eriksoe authored Apr 8, 2011
67 case lists:keyfind(NodeB, 1, Va) of
68 false ->
69 false;
70 {_, {CtrA, _TSA}} ->
71 (CtrA >= CtrB) andalso descends(Va,RestB)
72 end.
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
73
74 % @doc Combine all VClocks in the input list into their least possible
75 % common descendant.
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
76 -spec merge(VClocks :: [vclock()]) -> vclock() | [].
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
77 merge([]) -> [];
78 merge([SingleVclock]) -> SingleVclock;
79 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
80
81 merge([], NClock) -> NClock;
82 merge([AClock|VClocks],NClock) ->
83 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
84
d3dd32a @eriksoe vclock: simplifying merge/3 using lists:reverse/2.
eriksoe authored Apr 8, 2011
85 merge([], Left, AccClock) -> lists:reverse(AccClock, Left);
86 merge(Left, [], AccClock) -> lists:reverse(AccClock, Left);
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
87 merge(V=[{Node1,{Ctr1,TS1}}|VClock],
88 N=[{Node2,{Ctr2,TS2}}|NClock], AccClock) ->
89 if Node1 < Node2 ->
90 merge(VClock, N, [{Node1,{Ctr1,TS1}}|AccClock]);
91 Node1 > Node2 ->
92 merge(V, NClock, [{Node2,{Ctr2,TS2}}|AccClock]);
93 true ->
94 ({_Ctr,_TS} = C1) = if Ctr1 > Ctr2 -> {Ctr1,TS1};
95 true -> {Ctr2,TS2}
96 end,
97 merge(VClock, NClock, [{Node1,C1}|AccClock])
98 end.
99
100 % @doc Get the counter value in VClock set from Node.
101 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter() | undefined.
102 get_counter(Node, VClock) ->
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
103 case lists:keyfind(Node, 1, VClock) of
104 {_, {Ctr, _TS}} -> Ctr;
105 false -> undefined
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
106 end.
107
108 % @doc Get the timestamp value in a VClock set from Node.
109 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
110 get_timestamp(Node, VClock) ->
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
111 case lists:keyfind(Node, 1, VClock) of
112 {_, {_Ctr, TS}} -> TS;
113 false -> undefined
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
114 end.
115
116 % @doc Increment VClock at Node.
117 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
118 increment(Node, VClock) ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 17, 2011
119 increment(Node, timestamp(), VClock).
120
121 % @doc Increment VClock at Node.
122 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
123 VClock :: vclock()) -> vclock().
124 increment(Node, IncTs, VClock) ->
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
125 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
126 false ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 17, 2011
127 {{1, IncTs}, VClock};
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
128 {value, {_N, {C, _T}}, ModV} ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 17, 2011
129 {{C + 1, IncTs}, ModV}
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
130 end,
131 [{Node,C1}|NewV].
132
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 17, 2011
133
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
134 % @doc Return the list of all nodes that have ever incremented VClock.
135 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
136 all_nodes(VClock) ->
137 [X || {X,{_,_}} <- VClock].
138
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored Mar 17, 2011
139 % @doc Return a timestamp for a vector clock
140 -spec timestamp() -> timestamp().
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
141 timestamp() ->
142 calendar:datetime_to_gregorian_seconds(erlang:universaltime()).
143
144 % @doc Compares two VClocks for equality.
145 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
146 equal(VA,VB) ->
265a185 @eriksoe vclock:equal/2: Much simpler, much faster (25x).
eriksoe authored Jul 8, 2011
147 lists:sort(VA) =:= lists:sort(VB).
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
148
149 % @doc Possibly shrink the size of a vclock, depending on current age and size.
150 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
151 prune(V,Now,BucketProps) ->
724b951 @eriksoe vclock:prune(): In case of oldish vclock entries with identical times…
eriksoe authored Mar 3, 2011
152 %% This sort need to be deterministic, to avoid spurious merge conflicts later.
153 %% We achieve this by using the node ID as secondary key.
154 SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V),
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
155 prune_vclock1(SortV,Now,BucketProps).
156 % @private
157 prune_vclock1(V,Now,BProps) ->
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
158 case length(V) =< get_property(small_vclock, BProps) of
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
159 true -> V;
160 false ->
161 {_,{_,HeadTime}} = hd(V),
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
162 case (Now - HeadTime) < get_property(young_vclock,BProps) of
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
163 true -> V;
164 false -> prune_vclock1(V,Now,BProps,HeadTime)
165 end
166 end.
167 % @private
168 prune_vclock1(V,Now,BProps,HeadTime) ->
169 % has a precondition that V is longer than small and older than young
7ef6ade @eriksoe Slight simplification of prune_vclock1().
eriksoe authored Jul 8, 2011
170 case (length(V) > get_property(big_vclock,BProps))
171 orelse
172 ((Now - HeadTime) > get_property(old_vclock,BProps))
173 of
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
174 true -> prune_vclock1(tl(V),Now,BProps);
7ef6ade @eriksoe Slight simplification of prune_vclock1().
eriksoe authored Jul 8, 2011
175 false -> V
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
176 end.
177
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
178 get_property(Key, PairList) ->
10b8387 @Vagabond Make get_property act more like proplists:get_value
Vagabond authored Jun 9, 2011
179 case lists:keyfind(Key, 1, PairList) of
180 {_Key, Value} ->
181 Value;
182 false ->
183 undefined
184 end.
c4fe87f @eriksoe vclock: Replacing proplists:get_value() calls with far faster lists:k…
eriksoe authored Apr 8, 2011
185
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
186 %% ===================================================================
187 %% EUnit tests
188 %% ===================================================================
189 -ifdef(TEST).
190
191 % @doc Serves as both a trivial test and some example code.
192 example_test() ->
193 A = vclock:fresh(),
194 B = vclock:fresh(),
195 A1 = vclock:increment(a, A),
196 B1 = vclock:increment(b, B),
197 true = vclock:descends(A1,A),
198 true = vclock:descends(B1,B),
199 false = vclock:descends(A1,B1),
200 A2 = vclock:increment(a, A1),
201 C = vclock:merge([A2, B1]),
202 C1 = vclock:increment(c, C),
203 true = vclock:descends(C1, A2),
204 true = vclock:descends(C1, B1),
205 false = vclock:descends(B1, C1),
206 false = vclock:descends(B1, A1),
207 ok.
208
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
209 prune_small_test() ->
210 % vclock with less entries than small_vclock will be untouched
211 Now = riak_core_util:moment(),
212 OldTime = Now - 32000000,
213 SmallVC = [{<<"1">>, {1, OldTime}},
214 {<<"2">>, {2, OldTime}},
215 {<<"3">>, {3, OldTime}}],
216 Props = [{small_vclock,4}],
217 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
218
219 prune_young_test() ->
220 % vclock with all entries younger than young_vclock will be untouched
221 Now = riak_core_util:moment(),
222 NewTime = Now - 1,
223 VC = [{<<"1">>, {1, NewTime}},
224 {<<"2">>, {2, NewTime}},
225 {<<"3">>, {3, NewTime}}],
226 Props = [{small_vclock,1},{young_vclock,1000}],
227 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
228
229 prune_big_test() ->
230 % vclock not preserved by small or young will be pruned down to
231 % no larger than big_vclock entries
232 Now = riak_core_util:moment(),
233 NewTime = Now - 1000,
234 VC = [{<<"1">>, {1, NewTime}},
235 {<<"2">>, {2, NewTime}},
236 {<<"3">>, {3, NewTime}}],
237 Props = [{small_vclock,1},{young_vclock,1},
238 {big_vclock,2},{old_vclock,100000}],
239 ?assert(length(prune(VC, Now, Props)) =:= 2).
240
241 prune_old_test() ->
242 % vclock not preserved by small or young will be pruned down to
243 % no larger than big_vclock and no entries more than old_vclock ago
244 Now = riak_core_util:moment(),
245 NewTime = Now - 1000,
246 OldTime = Now - 100000,
247 VC = [{<<"1">>, {1, NewTime}},
248 {<<"2">>, {2, OldTime}},
249 {<<"3">>, {3, OldTime}}],
250 Props = [{small_vclock,1},{young_vclock,1},
251 {big_vclock,2},{old_vclock,10000}],
252 ?assert(length(prune(VC, Now, Props)) =:= 1).
253
7ce0ed9 @jonmeredith Added test for determinstic vclock pruning for nodes with same timest…
jonmeredith authored Mar 3, 2011
254 prune_order_test() ->
255 % vclock with two nodes of the same timestamp will be pruned down
256 % to the same node
257 Now = riak_core_util:moment(),
258 OldTime = Now - 100000,
259 VC1 = [{<<"1">>, {1, OldTime}},
260 {<<"2">>, {2, OldTime}}],
261 VC2 = lists:reverse(VC1),
262 Props = [{small_vclock,1},{young_vclock,1},
263 {big_vclock,2},{old_vclock,10000}],
264 ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)).
265
90f6ce8 @argv0 initial import
argv0 authored Jul 21, 2010
266 accessor_test() ->
267 VC = [{<<"1">>, {1, 1}},
268 {<<"2">>, {2, 2}}],
269 ?assertEqual(1, get_counter(<<"1">>, VC)),
270 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
271 ?assertEqual(2, get_counter(<<"2">>, VC)),
272 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
273 ?assertEqual(undefined, get_counter(<<"3">>, VC)),
274 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
275 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
276
277 merge_test() ->
278 VC1 = [{<<"1">>, {1, 1}},
279 {<<"2">>, {2, 2}},
280 {<<"4">>, {4, 4}}],
281 VC2 = [{<<"3">>, {3, 3}},
282 {<<"4">>, {3, 3}}],
283 ?assertEqual([], merge(vclock:fresh())),
284 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
285 merge([VC1, VC2])).
ec82f3a @argv0 dialyzer: riak_core fixes
argv0 authored Apr 4, 2011
286
287 -endif.
Something went wrong with that request. Please try again.