Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 279 lines (250 sloc) 9.866 kB
90f6ce8 @argv0 initial import
argv0 authored
1 %% -------------------------------------------------------------------
2 %%
3 %% riak_core: Core Riak Application
4 %%
5 %% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
6 %%
7 %% This file is provided to you under the Apache License,
8 %% Version 2.0 (the "License"); you may not use this file
9 %% except in compliance with the License. You may obtain
10 %% a copy of the License at
11 %%
12 %% http://www.apache.org/licenses/LICENSE-2.0
13 %%
14 %% Unless required by applicable law or agreed to in writing,
15 %% software distributed under the License is distributed on an
16 %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 %% KIND, either express or implied. See the License for the
18 %% specific language governing permissions and limitations
19 %% under the License.
20 %%
21 %% -------------------------------------------------------------------
22
23 %% @doc A simple Erlang implementation of vector clocks as inspired by Lamport logical clocks.
24 %%
25 %% @reference Leslie Lamport (1978). "Time, clocks, and the ordering of events
26 %% in a distributed system". Communications of the ACM 21 (7): 558-565.
27 %%
28 %% @reference Friedemann Mattern (1988). "Virtual Time and Global States of
29 %% Distributed Systems". Workshop on Parallel and Distributed Algorithms:
30 %% pp. 215-226
31
32 -module(vclock).
33
34 -author('Justin Sheehy <justin@basho.com>').
35 -author('Andy Gross <andy@basho.com>').
36
37 -export([fresh/0,descends/2,merge/1,get_counter/2,get_timestamp/2,
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
38 increment/2,increment/3,all_nodes/1,equal/2,prune/3,timestamp/0]).
90f6ce8 @argv0 initial import
argv0 authored
39
40 -include_lib("eunit/include/eunit.hrl").
41
42 -type vclock() :: [vc_entry()].
43 % The timestamp is present but not used, in case a client wishes to inspect it.
44 -type vc_entry() :: {vclock_node(), {counter(), timestamp()}}.
45
46 % Nodes can have any term() as a name, but they must differ from each other.
47 -type vclock_node() :: term().
48 -type counter() :: integer().
49 -type timestamp() :: integer().
50
51 % @doc Create a brand new vclock.
52 -spec fresh() -> vclock().
53 fresh() ->
54 [].
55
56 %% @todo Use common_test or other good test framework, and write more tests.
57 %
58 % @doc Serves as both a trivial test and some example code.
59 example_test() ->
60 A = vclock:fresh(),
61 B = vclock:fresh(),
62 A1 = vclock:increment(a, A),
63 B1 = vclock:increment(b, B),
64 true = vclock:descends(A1,A),
65 true = vclock:descends(B1,B),
66 false = vclock:descends(A1,B1),
67 A2 = vclock:increment(a, A1),
68 C = vclock:merge([A2, B1]),
69 C1 = vclock:increment(c, C),
70 true = vclock:descends(C1, A2),
71 true = vclock:descends(C1, B1),
72 false = vclock:descends(B1, C1),
73 false = vclock:descends(B1, A1),
74 ok.
75
76 % @doc Return true if Va is a direct descendant of Vb, else false -- remember, a vclock is its own descendant!
77 -spec descends(Va :: vclock(), Vb :: vclock()) -> boolean().
78 descends(_, []) ->
79 % all vclocks descend from the empty vclock
80 true;
81 descends(Va, Vb) ->
82 [{NodeB, {CtrB, _T}}|RestB] = Vb,
83 CtrA =
84 case proplists:get_value(NodeB, Va) of
85 undefined ->
86 false;
87 {CA, _TSA} -> CA
88 end,
89 case CtrA of
90 false -> false;
91 _ ->
92 if
93 CtrA < CtrB ->
94 false;
95 true ->
96 descends(Va,RestB)
97 end
98 end.
99
100 % @doc Combine all VClocks in the input list into their least possible
101 % common descendant.
102 -spec merge(VClocks :: [vclock()]) -> vclock().
103 merge([]) -> [];
104 merge([SingleVclock]) -> SingleVclock;
105 merge([First|Rest]) -> merge(Rest, lists:keysort(1, First)).
106
107 merge([], NClock) -> NClock;
108 merge([AClock|VClocks],NClock) ->
109 merge(VClocks, merge(lists:keysort(1, AClock), NClock, [])).
110
111 merge([], [], AccClock) -> lists:reverse(AccClock);
112 merge([], [Left|Rest], AccClock) -> merge([], Rest, [Left|AccClock]);
113 merge(Left, [], AccClock) -> merge([], Left, AccClock);
114 merge(V=[{Node1,{Ctr1,TS1}}|VClock],
115 N=[{Node2,{Ctr2,TS2}}|NClock], AccClock) ->
116 if Node1 < Node2 ->
117 merge(VClock, N, [{Node1,{Ctr1,TS1}}|AccClock]);
118 Node1 > Node2 ->
119 merge(V, NClock, [{Node2,{Ctr2,TS2}}|AccClock]);
120 true ->
121 ({_Ctr,_TS} = C1) = if Ctr1 > Ctr2 -> {Ctr1,TS1};
122 true -> {Ctr2,TS2}
123 end,
124 merge(VClock, NClock, [{Node1,C1}|AccClock])
125 end.
126
127 % @doc Get the counter value in VClock set from Node.
128 -spec get_counter(Node :: vclock_node(), VClock :: vclock()) -> counter() | undefined.
129 get_counter(Node, VClock) ->
130 case proplists:get_value(Node, VClock) of
131 {Ctr, _TS} -> Ctr;
132 undefined -> undefined
133 end.
134
135 % @doc Get the timestamp value in a VClock set from Node.
136 -spec get_timestamp(Node :: vclock_node(), VClock :: vclock()) -> timestamp() | undefined.
137 get_timestamp(Node, VClock) ->
138 case proplists:get_value(Node, VClock) of
139 {_Ctr, TS} -> TS;
140 undefined -> undefined
141 end.
142
143 % @doc Increment VClock at Node.
144 -spec increment(Node :: vclock_node(), VClock :: vclock()) -> vclock().
145 increment(Node, VClock) ->
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
146 increment(Node, timestamp(), VClock).
147
148 % @doc Increment VClock at Node.
149 -spec increment(Node :: vclock_node(), IncTs :: timestamp(),
150 VClock :: vclock()) -> vclock().
151 increment(Node, IncTs, VClock) ->
90f6ce8 @argv0 initial import
argv0 authored
152 {{_Ctr, _TS}=C1,NewV} = case lists:keytake(Node, 1, VClock) of
153 false ->
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
154 {{1, IncTs}, VClock};
90f6ce8 @argv0 initial import
argv0 authored
155 {value, {_N, {C, _T}}, ModV} ->
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
156 {{C + 1, IncTs}, ModV}
90f6ce8 @argv0 initial import
argv0 authored
157 end,
158 [{Node,C1}|NewV].
159
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
160
90f6ce8 @argv0 initial import
argv0 authored
161 % @doc Return the list of all nodes that have ever incremented VClock.
162 -spec all_nodes(VClock :: vclock()) -> [vclock_node()].
163 all_nodes(VClock) ->
164 [X || {X,{_,_}} <- VClock].
165
c4a56f6 Add vclock:increment variant that accepts a timestamp.
Jon Meredith authored
166 % @doc Return a timestamp for a vector clock
167 -spec timestamp() -> timestamp().
90f6ce8 @argv0 initial import
argv0 authored
168 timestamp() ->
169 calendar:datetime_to_gregorian_seconds(erlang:universaltime()).
170
171 % @doc Compares two VClocks for equality.
172 % Not very fast.
173 -spec equal(VClockA :: vclock(), VClockB :: vclock()) -> boolean().
174 equal(VA,VB) ->
175 VSet1 = sets:from_list(VA),
176 VSet2 = sets:from_list(VB),
177 case sets:size(sets:subtract(VSet1,VSet2)) > 0 of
178 true -> false;
179 false ->
180 case sets:size(sets:subtract(VSet2,VSet1)) > 0 of
181 true -> false;
182 false -> true
183 end
184 end.
185
186 % @doc Possibly shrink the size of a vclock, depending on current age and size.
187 -spec prune(V::vclock(), Now::integer(), BucketProps::term()) -> vclock().
188 prune(V,Now,BucketProps) ->
189 SortV = lists:sort(fun({_,{_,A}},{_,{_,B}}) -> A < B end, V),
190 prune_vclock1(SortV,Now,BucketProps).
191 % @private
192 prune_vclock1(V,Now,BProps) ->
193 case length(V) =< proplists:get_value(small_vclock,BProps) of
194 true -> V;
195 false ->
196 {_,{_,HeadTime}} = hd(V),
197 case (Now - HeadTime) < proplists:get_value(young_vclock,BProps) of
198 true -> V;
199 false -> prune_vclock1(V,Now,BProps,HeadTime)
200 end
201 end.
202 % @private
203 prune_vclock1(V,Now,BProps,HeadTime) ->
204 % has a precondition that V is longer than small and older than young
205 case length(V) > proplists:get_value(big_vclock,BProps) of
206 true -> prune_vclock1(tl(V),Now,BProps);
207 false ->
208 case (Now - HeadTime) > proplists:get_value(old_vclock,BProps) of
209 true -> prune_vclock1(tl(V),Now,BProps);
210 false -> V
211 end
212 end.
213
214 prune_small_test() ->
215 % vclock with less entries than small_vclock will be untouched
216 Now = riak_core_util:moment(),
217 OldTime = Now - 32000000,
218 SmallVC = [{<<"1">>, {1, OldTime}},
219 {<<"2">>, {2, OldTime}},
220 {<<"3">>, {3, OldTime}}],
221 Props = [{small_vclock,4}],
222 ?assertEqual(lists:sort(SmallVC), lists:sort(prune(SmallVC, Now, Props))).
223
224 prune_young_test() ->
225 % vclock with all entries younger than young_vclock will be untouched
226 Now = riak_core_util:moment(),
227 NewTime = Now - 1,
228 VC = [{<<"1">>, {1, NewTime}},
229 {<<"2">>, {2, NewTime}},
230 {<<"3">>, {3, NewTime}}],
231 Props = [{small_vclock,1},{young_vclock,1000}],
232 ?assertEqual(lists:sort(VC), lists:sort(prune(VC, Now, Props))).
233
234 prune_big_test() ->
235 % vclock not preserved by small or young will be pruned down to
236 % no larger than big_vclock entries
237 Now = riak_core_util:moment(),
238 NewTime = Now - 1000,
239 VC = [{<<"1">>, {1, NewTime}},
240 {<<"2">>, {2, NewTime}},
241 {<<"3">>, {3, NewTime}}],
242 Props = [{small_vclock,1},{young_vclock,1},
243 {big_vclock,2},{old_vclock,100000}],
244 ?assert(length(prune(VC, Now, Props)) =:= 2).
245
246 prune_old_test() ->
247 % vclock not preserved by small or young will be pruned down to
248 % no larger than big_vclock and no entries more than old_vclock ago
249 Now = riak_core_util:moment(),
250 NewTime = Now - 1000,
251 OldTime = Now - 100000,
252 VC = [{<<"1">>, {1, NewTime}},
253 {<<"2">>, {2, OldTime}},
254 {<<"3">>, {3, OldTime}}],
255 Props = [{small_vclock,1},{young_vclock,1},
256 {big_vclock,2},{old_vclock,10000}],
257 ?assert(length(prune(VC, Now, Props)) =:= 1).
258
259 accessor_test() ->
260 VC = [{<<"1">>, {1, 1}},
261 {<<"2">>, {2, 2}}],
262 ?assertEqual(1, get_counter(<<"1">>, VC)),
263 ?assertEqual(1, get_timestamp(<<"1">>, VC)),
264 ?assertEqual(2, get_counter(<<"2">>, VC)),
265 ?assertEqual(2, get_timestamp(<<"2">>, VC)),
266 ?assertEqual(undefined, get_counter(<<"3">>, VC)),
267 ?assertEqual(undefined, get_timestamp(<<"3">>, VC)),
268 ?assertEqual([<<"1">>, <<"2">>], all_nodes(VC)).
269
270 merge_test() ->
271 VC1 = [{<<"1">>, {1, 1}},
272 {<<"2">>, {2, 2}},
273 {<<"4">>, {4, 4}}],
274 VC2 = [{<<"3">>, {3, 3}},
275 {<<"4">>, {3, 3}}],
276 ?assertEqual([], merge(vclock:fresh())),
277 ?assertEqual([{<<"1">>,{1,1}},{<<"2">>,{2,2}},{<<"3">>,{3,3}},{<<"4">>,{4,4}}],
278 merge([VC1, VC2])).
Something went wrong with that request. Please try again.