Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 301 lines (268 sloc) 10.625 kb
90f6ce8 Andy Gross initial import
argv0 authored
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -author('Justin Sheehy <justin@basho.com>').
35 -author('Andy Gross <andy@basho.com>').
36
37 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
38 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce8 Andy Gross initial import
argv0 authored
39
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
40 -ifdef(TEST).
90f6ce8 Andy Gross initial import
argv0 authored
41 -include_lib("eunit/include/eunit.hrl").
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
42 -endif.
90f6ce8 Andy Gross initial import
argv0 authored
43
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
44 -export_type([vclock/0, timestamp/0, vclock_node/0]).
45
46 -opaque vclock() :: [vc_entry()].
90f6ce8 Andy Gross initial import
argv0 authored
47 % The timestamp is present but not used, in case a client wishes to inspect it.
48 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
49
50 % Nodes can have any term() as a name, but they must differ from each other.
51 -type vclock_node() :: term().
8c5e75e Andy Gross first pass at some better / more complete specs
argv0 authored
52 -type counter() :: non_neg_integer().
53 -type timestamp() :: non_neg_integer().
90f6ce8 Andy Gross initial import
argv0 authored
54
55 % @doc Create a brand new vclock.
56 -spec fresh() -> vclock().
57 fresh() ->
58 [].
59
60 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
61 -spec descends(Va :: vclock()|[], Vb :: vclock()|[]) -> boolean().
90f6ce8 Andy Gross initial import
argv0 authored
62 descends(_, []) ->
63 % all vclocks descend from the empty vclock
64 true;
65 descends(Va, Vb) ->
66 [{NodeB, {CtrB, _T}}|RestB] = Vb,
67 CtrA =
68 case proplists:get_value(NodeB, Va) of
69 undefined ->
70 false;
71 {CA, _TSA} -> CA
72 end,
73 case CtrA of
74 false -> false;
75 _ ->
76 if
77 CtrA < CtrB ->
78 false;
79 true ->
80 descends(Va,RestB)
81 end
82 end.
83
84 % @doc Combine all VClocks in the input list into their least possible
85 % common descendant.
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
86 -spec merge(VClocks :: [vclock()]) -> vclock() | [].
90f6ce8 Andy Gross initial import
argv0 authored
87 merge([]) -> [];
88 merge([SingleVclock]) -> SingleVclock;
89 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
90
91 merge([], NClock) -> NClock;
92 merge([AClock|VClocks],NClock) ->
93 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
94
95 merge([], [], AccClock) -> lists:reverse(AccClock);
96 merge([], [Left|Rest], AccClock) -> merge([], Rest, [Left|AccClock]);
97 merge(Left, [], AccClock) -> merge([], Left, AccClock);
98 merge(V=[{Node1,{Ctr1,TS1}}|VClock],
99 N=[{Node2,{Ctr2,TS2}}|NClock], AccClock) ->
100 if Node1 < Node2 ->
101 merge(VClock, N, [{Node1,{Ctr1,TS1}}|AccClock]);
102 Node1 > Node2 ->
103 merge(V, NClock, [{Node2,{Ctr2,TS2}}|AccClock]);
104 true ->
105 ({_Ctr,_TS} = C1) = if Ctr1 > Ctr2 -> {Ctr1,TS1};
106 true -> {Ctr2,TS2}
107 end,
108 merge(VClock, NClock, [{Node1,C1}|AccClock])
109 end.
110
111 % @doc Get the counter value in VClock set from Node.
112 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter() | undefined.
113 get_counter(Node, VClock) ->
114 case proplists:get_value(Node, VClock) of
115 {Ctr, _TS} -> Ctr;
116 undefined -> undefined
117 end.
118
119 % @doc Get the timestamp value in a VClock set from Node.
120 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
121 get_timestamp(Node, VClock) ->
122 case proplists:get_value(Node, VClock) of
123 {_Ctr, TS} -> TS;
124 undefined -> undefined
125 end.
126
127 % @doc Increment VClock at Node.
128 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
129 increment(Node, VClock) ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
130 increment(Node, timestamp(), VClock).
131
132 % @doc Increment VClock at Node.
133 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
134 VClock :: vclock()) -> vclock().
135 increment(Node, IncTs, VClock) ->
90f6ce8 Andy Gross initial import
argv0 authored
136 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
137 false ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
138 {{1, IncTs}, VClock};
90f6ce8 Andy Gross initial import
argv0 authored
139 {value, {_N, {C, _T}}, ModV} ->
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
140 {{C + 1, IncTs}, ModV}
90f6ce8 Andy Gross initial import
argv0 authored
141 end,
142 [{Node,C1}|NewV].
143
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
144
90f6ce8 Andy Gross initial import
argv0 authored
145 % @doc Return the list of all nodes that have ever incremented VClock.
146 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
147 all_nodes(VClock) ->
148 [X || {X,{_,_}} <- VClock].
149
97231e0 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
150 % @doc Return a timestamp for a vector clock
151 -spec timestamp() -> timestamp().
90f6ce8 Andy Gross initial import
argv0 authored
152 timestamp() ->
153 calendar:datetime_to_gregorian_seconds(erlang:universaltime()).
154
155 % @doc Compares two VClocks for equality.
156 % Not very fast.
157 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
158 equal(VA,VB) ->
159 VSet1 = sets:from_list(VA),
160 VSet2 = sets:from_list(VB),
161 case sets:size(sets:subtract(VSet1,VSet2)) > 0 of
162 true -> false;
163 false ->
164 case sets:size(sets:subtract(VSet2,VSet1)) > 0 of
165 true -> false;
166 false -> true
167 end
168 end.
169
170 % @doc Possibly shrink the size of a vclock, depending on current age and size.
171 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
172 prune(V,Now,BucketProps) ->
724b951 Erik Søe Sørensen vclock:prune(): In case of oldish vclock entries with identical timestam...
eriksoe authored
173 %% This sort need to be deterministic, to avoid spurious merge conflicts later.
174 %% We achieve this by using the node ID as secondary key.
175 SortV = lists:sort(fun({N1,{_,T1}},{N2,{_,T2}}) -> {T1,N1} < {T2,N2} end, V),
90f6ce8 Andy Gross initial import
argv0 authored
176 prune_vclock1(SortV,Now,BucketProps).
177 % @private
178 prune_vclock1(V,Now,BProps) ->
179 case length(V) =< proplists:get_value(small_vclock,BProps) of
180 true -> V;
181 false ->
182 {_,{_,HeadTime}} = hd(V),
183 case (Now - HeadTime) < proplists:get_value(young_vclock,BProps) of
184 true -> V;
185 false -> prune_vclock1(V,Now,BProps,HeadTime)
186 end
187 end.
188 % @private
189 prune_vclock1(V,Now,BProps,HeadTime) ->
190 % has a precondition that V is longer than small and older than young
191 case length(V) > proplists:get_value(big_vclock,BProps) of
192 true -> prune_vclock1(tl(V),Now,BProps);
193 false ->
194 case (Now - HeadTime) > proplists:get_value(old_vclock,BProps) of
195 true -> prune_vclock1(tl(V),Now,BProps);
196 false -> V
197 end
198 end.
199
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
200 %% ===================================================================
201 %% EUnit tests
202 %% ===================================================================
203 -ifdef(TEST).
204
205 example_test() ->
206 A = vclock:fresh(),
207 B = vclock:fresh(),
208 A1 = vclock:increment(a, A),
209 B1 = vclock:increment(b, B),
210 true = vclock:descends(A1,A),
211 true = vclock:descends(B1,B),
212 false = vclock:descends(A1,B1),
213 A2 = vclock:increment(a, A1),
214 C = vclock:merge([A2, B1]),
215 C1 = vclock:increment(c, C),
216 true = vclock:descends(C1, A2),
217 true = vclock:descends(C1, B1),
218 false = vclock:descends(B1, C1),
219 false = vclock:descends(B1, A1),
220 ok.
221
90f6ce8 Andy Gross initial import
argv0 authored
222 prune_small_test() ->
223 % vclock with less entries than small_vclock will be untouched
224 Now = riak_core_util:moment(),
225 OldTime = Now - 32000000,
226 SmallVC = [{<<"1">>, {1, OldTime}},
227 {<<"2">>, {2, OldTime}},
228 {<<"3">>, {3, OldTime}}],
229 Props = [{small_vclock,4}],
230 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
231
232 prune_young_test() ->
233 % vclock with all entries younger than young_vclock will be untouched
234 Now = riak_core_util:moment(),
235 NewTime = Now - 1,
236 VC = [{<<"1">>, {1, NewTime}},
237 {<<"2">>, {2, NewTime}},
238 {<<"3">>, {3, NewTime}}],
239 Props = [{small_vclock,1},{young_vclock,1000}],
240 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
241
242 prune_big_test() ->
243 % vclock not preserved by small or young will be pruned down to
244 % no larger than big_vclock entries
245 Now = riak_core_util:moment(),
246 NewTime = Now - 1000,
247 VC = [{<<"1">>, {1, NewTime}},
248 {<<"2">>, {2, NewTime}},
249 {<<"3">>, {3, NewTime}}],
250 Props = [{small_vclock,1},{young_vclock,1},
251 {big_vclock,2},{old_vclock,100000}],
252 ?assert(length(prune(VC, Now, Props)) =:= 2).
253
254 prune_old_test() ->
255 % vclock not preserved by small or young will be pruned down to
256 % no larger than big_vclock and no entries more than old_vclock ago
257 Now = riak_core_util:moment(),
258 NewTime = Now - 1000,
259 OldTime = Now - 100000,
260 VC = [{<<"1">>, {1, NewTime}},
261 {<<"2">>, {2, OldTime}},
262 {<<"3">>, {3, OldTime}}],
263 Props = [{small_vclock,1},{young_vclock,1},
264 {big_vclock,2},{old_vclock,10000}],
265 ?assert(length(prune(VC, Now, Props)) =:= 1).
266
7ce0ed9 Jon Meredith Added test for determinstic vclock pruning for nodes with same timestamp...
jonmeredith authored
267 prune_order_test() ->
268 % vclock with two nodes of the same timestamp will be pruned down
269 % to the same node
270 Now = riak_core_util:moment(),
271 OldTime = Now - 100000,
272 VC1 = [{<<"1">>, {1, OldTime}},
273 {<<"2">>, {2, OldTime}}],
274 VC2 = lists:reverse(VC1),
275 Props = [{small_vclock,1},{young_vclock,1},
276 {big_vclock,2},{old_vclock,10000}],
277 ?assertEqual(prune(VC1, Now, Props), prune(VC2, Now, Props)).
278
90f6ce8 Andy Gross initial import
argv0 authored
279 accessor_test() ->
280 VC = [{<<"1">>, {1, 1}},
281 {<<"2">>, {2, 2}}],
282 ?assertEqual(1, get_counter(<<"1">>, VC)),
283 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
284 ?assertEqual(2, get_counter(<<"2">>, VC)),
285 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
286 ?assertEqual(undefined, get_counter(<<"3">>, VC)),
287 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
288 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
289
290 merge_test() ->
291 VC1 = [{<<"1">>, {1, 1}},
292 {<<"2">>, {2, 2}},
293 {<<"4">>, {4, 4}}],
294 VC2 = [{<<"3">>, {3, 3}},
295 {<<"4">>, {3, 3}}],
296 ?assertEqual([], merge(vclock:fresh())),
297 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
298 merge([VC1, VC2])).
ec82f3a Andy Gross dialyzer: riak_core fixes
argv0 authored
299
300 -endif.
Something went wrong with that request. Please try again.