Skip to content
Browse files

Add CRC to hintfile and check during startup.

Calculate CRC of hintfile as it is written and write to
special backward compatible trailer record at the end of
the hints file.

Changed the hint folding code to return {error, blah} more
reliably and added a new fold_keys variant that will
switch to scanning the cask files if there were any issues
with the hint file.
  • Loading branch information...
1 parent d989a49 commit 46ebecdaec1e919887bdeb2c48c9d6ae3bb654cb @jonmeredith jonmeredith committed Feb 1, 2012
Showing with 80 additions and 23 deletions.
  1. +2 −0 include/bitcask.hrl
  2. +3 −3 src/bitcask.erl
  3. +57 −14 src/bitcask_fileops.erl
  4. +18 −6 test/bitcask_qc_fsm.erl
View
2 include/bitcask.hrl
@@ -12,6 +12,7 @@
tstamp, % Tstamp portion of filename
fd, % File handle
hintfd, % File handle for hints
+ hintcrc=0,% CRC-32 of current hint
ofs }). % Current offset for writing
-record(file_status, { filename,
@@ -34,3 +35,4 @@
-define(HEADER_SIZE, 14). % 4 + 4 + 2 + 4 bytes
-define(MAXKEYSIZE, 2#1111111111111111).
-define(MAXVALSIZE, 2#11111111111111111111111111111111).
+-define(MAXOFFSET, 16#ffffffffffffffff). % max 64-bit unsigned
View
6 src/bitcask.erl
@@ -150,8 +150,8 @@ close(Ref) ->
ok;
fresh ->
ok;
- _ ->
- ok = bitcask_fileops:close(State#bc_state.write_file),
+ WriteFile ->
+ bitcask_fileops:close_for_writing(WriteFile),
ok = bitcask_lockops:release(State#bc_state.write_lock)
end.
@@ -758,7 +758,7 @@ scan_key_files([Filename | Rest], KeyDir, Acc) ->
Offset,
Tstamp)
end,
- bitcask_fileops:fold_keys(File, F, undefined),
+ bitcask_fileops:fold_keys(File, F, undefined, recovery),
scan_key_files(Rest, KeyDir, [File | Acc]).
%%
View
71 src/bitcask_fileops.erl
@@ -97,7 +97,15 @@ close(#filestate{ fd = FD, hintfd = HintFd }) ->
close_for_writing(fresh) -> ok;
close_for_writing(undefined) -> ok;
close_for_writing(State =
- #filestate{ mode = read_write, fd = Fd, hintfd = HintFd }) ->
+ #filestate{ mode = read_write, fd = Fd,
+ hintfd = HintFd, hintcrc = HintCRC }) ->
+ %% Write out CRC check at end of hint file. Write with an empty key,
+ %% zero timestamp and offset as large as the file format supports so
+ %% opening with an older version of bitcask will just reject the
+ %% record at the end of the hintfile and otherwise work normally.
+ Iolist = hintfile_entry(<<>>, 0, {?MAXOFFSET, HintCRC}),
+ ok = bitcask_nifs:file_write(HintFd, Iolist),
+
bitcask_nifs:file_sync(Fd),
bitcask_nifs:file_sync(HintFd),
bitcask_nifs:file_close(HintFd),
@@ -131,7 +139,8 @@ delete(#filestate{ filename = FN } = State) ->
{error, read_only}.
write(#filestate { mode = read_only }, _K, _V, _Tstamp) ->
{error, read_only};
-write(Filestate=#filestate{fd = FD, hintfd = HintFD, ofs = Offset},
+write(Filestate=#filestate{fd = FD, hintfd = HintFD,
+ hintcrc = HintCRC0, ofs = Offset},
Key, Value, Tstamp) ->
KeySz = size(Key),
true = (KeySz =< ?MAXKEYSIZE),
@@ -150,7 +159,9 @@ write(Filestate=#filestate{fd = FD, hintfd = HintFD, ofs = Offset},
ok = bitcask_nifs:file_write(HintFD, Iolist),
%% Record our final offset
TotalSz = iolist_size(Bytes),
- {ok, Filestate#filestate{ofs = Offset + TotalSz}, Offset, TotalSz}.
+ HintCRC = erlang:crc32(HintCRC0, Iolist), % compute crc of hint
+ {ok, Filestate#filestate{ofs = Offset + TotalSz,
+ hintcrc = HintCRC}, Offset, TotalSz}.
%% @doc Given an Offset and Size, get the corresponding k/v from Filename.
@@ -237,6 +248,20 @@ fold_keys(#filestate { fd = Fd } = State, Fun, Acc, Mode) ->
fold_hintfile(State, Fun, Acc);
false ->
fold_keys_loop(Fd, 0, Fun, Acc)
+ end;
+ recovery -> % if hint files are corrupt, restart scanning cask files
+ % Fun should be side-effect free or tolerant of being
+ % called twice
+ case has_hintfile(State) of
+ true ->
+ case fold_hintfile(State, Fun, Acc) of
+ {error, _} ->
+ fold_keys_loop(Fd, 0, Fun, Acc);
+ Acc1 ->
+ Acc1
+ end;
+ false ->
+ fold_keys_loop(Fd, 0, Fun, Acc)
end
end.
@@ -378,24 +403,25 @@ fold_keys_loop(Fd, Offset, Fun, Acc0) ->
fold_hintfile(State, Fun, Acc) ->
- case bitcask_nifs:file_open(hintfile_name(State), [readonly]) of
+ HintFile = hintfile_name(State),
+ case bitcask_nifs:file_open(HintFile, [readonly]) of
{ok, HintFd} ->
try
{ok, DataI} = file:read_file_info(State#filestate.filename),
DataSize = DataI#file_info.size,
case bitcask_nifs:file_read(HintFd, ?HINT_RECORD_SZ) of
{ok, <<H:?HINT_RECORD_SZ/bytes>>} ->
- fold_hintfile_loop(DataSize, hintfile_name(State),
- HintFd, H, Fun, Acc);
+ fold_hintfile_loop(DataSize, HintFile,
+ HintFd, 0, H, Fun, Acc);
{ok, Bytes} ->
error_logger:error_msg("~s:fold_hintfile: ~s: expected "
"~p bytes but got only ~p "
"bytes, skipping\n",
- [?MODULE, hintfile_name(State),
+ [?MODULE, HintFile,
?HINT_RECORD_SZ, size(Bytes)]),
- Acc;
+ {error, {incomplete_hint, 1}};
eof ->
- Acc;
+ {error, empty_hintfile}; % shoudld never be empty hintfiles
{error, Reason} ->
{error, {fold_hintfile, Reason}}
end
@@ -406,7 +432,20 @@ fold_hintfile(State, Fun, Acc) ->
{error, {fold_hintfile, Reason}}
end.
-fold_hintfile_loop(DataSize, HintFile, Fd, HintRecord, Fun, Acc0) ->
+
+fold_hintfile_loop(_DataSize, HintFile, _Fd, HintCRC,
+ <<0:?TSTAMPFIELD, 0:?KEYSIZEFIELD,
+ ExpectCRC:?TOTALSIZEFIELD, (?MAXOFFSET):?OFFSETFIELD>>, _Fun, Acc0) ->
+ case HintCRC of
+ ExpectCRC ->
+ Acc0;
+ _ ->
+ error_logger:error_msg("Hintfile '~s' has bad CRC ~p expected ~p\n",
+ [HintFile, HintCRC, ExpectCRC]),
+ {eror, {bad_crc, HintCRC, ExpectCRC}}
+ end;
+fold_hintfile_loop(DataSize, HintFile, Fd, HintCRC0,
+ HintRecord, Fun, Acc0) ->
<<Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
TotalSz:?TOTALSIZEFIELD, Offset:?OFFSETFIELD>> = HintRecord,
ReadSz = KeySz + ?HINT_RECORD_SZ,
@@ -417,19 +456,23 @@ fold_hintfile_loop(DataSize, HintFile, Fd, HintRecord, Fun, Acc0) ->
Acc = Fun(Key, Tstamp, PosInfo, Acc0),
case Rest of
<<NextRecord:?HINT_RECORD_SZ/bytes>> ->
- fold_hintfile_loop(DataSize, HintFile,
- Fd, NextRecord, Fun, Acc);
+ HintCRC=erlang:crc32(HintCRC0, [HintRecord, Key]),
+ fold_hintfile_loop(DataSize, HintFile, Fd,
+ HintCRC, NextRecord, Fun, Acc);
<<>> ->
+ %% Hint files without CRCs will end on a record boundary.
+ %% No way to know whether to expect a crc or not.
+ %% Over time, merges will add CRCs to all hint files.
Acc;
X ->
error_logger:error_msg("Bad hintfile data 1: ~p\n", [X]),
- Acc
+ {error, {incomplete_hint, 2}}
end;
{ok, _} ->
error_logger:error_msg("Hintfile '~s' contains pointer ~p ~p "
"that is greater than total data size ~p\n",
[HintFile, Offset, TotalSz, DataSize]),
- Acc0;
+ {error, {incomplete_hint, 3}};
eof ->
{error, incomplete_key};
{error, Reason} ->
View
24 test/bitcask_qc_fsm.erl
@@ -88,10 +88,20 @@ precondition(_From,_To,_S,{call,_,_,_}) ->
true.
-postcondition(opened, opened, S, {call, bitcask, get, [_, Key]}, not_found) ->
- not orddict:is_key(Key, S#state.data);
-postcondition(opened, opened, S, {call, bitcask, get, [_, Key]}, {ok, Value}) ->
- Value == orddict:fetch(Key, S#state.data);
+postcondition(opened, opened, S, {call, _, get, [_, Key]}, not_found) ->
+ case orddict:find(Key, S#state.data) of
+ error ->
+ true;
+ {ok, Exp} ->
+ {expected, Exp, got, not_found}
+ end;
+postcondition(opened, opened, S, {call, _, get, [_, Key]}, {ok, Value}) ->
+ case orddict:find(Key, S#state.data) of
+ {ok, Value} ->
+ true;
+ Exp ->
+ {expected, Exp, got, Value}
+ end;
postcondition(opened, opened, _S, {call, _, merge, [_TestDir]}, Res) ->
Res == ok;
postcondition(_From,_To,_S,{call,_,_,_},_Res) ->
@@ -120,6 +130,8 @@ prop_bitcask() ->
%% Specify how often each transition should be chosen
weight(_From, _To,{call,_,close,_}) ->
10;
+weight(_From, _To,{call,_,truncate_hint,_}) ->
+ 10;
weight(_From,_To,{call,_,_,_}) ->
100.
@@ -152,8 +164,8 @@ truncate_hint(Seed, TruncBy0) ->
{ok, Fi} = file:read_file_info(Hint),
{ok, Fh} = file:open(Hint, [read, write]),
TruncBy = (1 + abs(TruncBy0)) rem (Fi#file_info.size+1),
- io:format(user, "Truncating ~p by ~p\n", [Hint, TruncBy]),
- {ok, _} = file:position(Fh, {eof, -TruncBy}),
+ {ok, To} = file:position(Fh, {eof, -TruncBy}),
+ io:format(user, "Truncating ~p by ~p to ~p\n", [Hint, TruncBy, To]),
file:truncate(Fh),
file:close(Fh)
end.

0 comments on commit 46ebecd

Please sign in to comment.
Something went wrong with that request. Please try again.