Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fix log spam introduced by branch 'gh62-badrecord-mstate'

So, I worried a bit about the possibility of spamminess of
the changes made by branch 'gh62-badrecord-mstate' at the time
I wrote it, but not enough.  In a pull-the-power-cord failure,
the next Riak start included tens of thousands of error messages.
See example below.

Fix: Introduce a cutoff of 20 CRC errors when folding over any
single file.

    ( echo "line-count path bytes-skipped" ; grep 'skip' ALL.console.log | awk '{print $11, $15}' | uniq -c | sort -k 2 | sed 's/^  *//' ) | sed 's/ /, /g'
    42918, /var/lib/riak/bitcask/1019054852592055355075616182186939527377281089536/3.bitcask.data, 14
    1, /var/lib/riak/bitcask/1183188337253240741397319068673631468061296951296/3.bitcask.data, 1049176
    34757, /var/lib/riak/bitcask/1218869529570889738423776217909868846470865616896/3.bitcask.data, 14
    23, /var/lib/riak/bitcask/1247414483425008936044941937298858749198520549376/3.bitcask.data, 14
    1, /var/lib/riak/bitcask/1283095675742657933071399086535096127608089214976/3.bitcask.data, 1049177
    23444, /var/lib/riak/bitcask/1283095675742657933071399086535096127608089214976/3.bitcask.data, 14
    1, /var/lib/riak/bitcask/1290231914206187732476690516382343603290002948096/3.bitcask.data, 1049177
    36198, /var/lib/riak/bitcask/1290231914206187732476690516382343603290002948096/3.bitcask.data, 14
    292, /var/lib/riak/bitcask/248341098530837019304141758684212153730597912576/3.bitcask.data, 14
    258, /var/lib/riak/bitcask/398202106264962806815261785476409143050786308096/3.bitcask.data, 14
    165, /var/lib/riak/bitcask/419610821655552205031136075018151570096527507456/3.bitcask.data, 14
    156, /var/lib/riak/bitcask/433883298582611803841718934712646521460354973696/3.bitcask.data, 14
    1, /var/lib/riak/bitcask/533790637072028995515798952574111181007147237376/3.bitcask.data, 1049176
    3954, /var/lib/riak/bitcask/533790637072028995515798952574111181007147237376/3.bitcask.data, 14
    1, /var/lib/riak/bitcask/626561737097916387784587540588328364872025767936/3.bitcask.data, 1049174
    77531, /var/lib/riak/bitcask/62798898479062234766564582655777786000840851456/3.bitcask.data, 14
    41394, /var/lib/riak/bitcask/783558983295571974700998997227772829874127896576/3.bitcask.data, 14
  • Loading branch information...
commit 3c16f7592fc39808c12d5ab0dd39b99522caf7aa 1 parent 15ab02e
@slfritchie slfritchie authored
Showing with 41 additions and 6 deletions.
  1. +27 −0 src/bitcask.erl
  2. +14 −6 src/bitcask_fileops.erl
View
27 src/bitcask.erl
@@ -1665,6 +1665,33 @@ truncated_datafile_test() ->
{1, [{_, _, _, 513}]} = bitcask:status(B2),
ok.
+trailing_junk_big_datafile_test() ->
+ Dir = "/tmp/bc.test.trailingdata",
+ NumKeys = 400,
+ os:cmd("rm -rf " ++ Dir),
+ os:cmd("mkdir " ++ Dir),
+ B1 = bitcask:open(Dir, [read_write, {max_file_size, 1024*1024*1024}]),
+ [ok = bitcask:put(B1, <<"k", X:32>>, <<X:1024>>) || X <- lists:seq(1, NumKeys)],
+ ok = bitcask:close(B1),
+
+ [DataFile|_] = filelib:wildcard(Dir ++ "/*.data"),
+ {ok, FH} = file:open(DataFile, [read, write]),
+ {ok, _} = file:position(FH, 40*1024),
+ ok = file:write(FH, <<0:(40*1024*8)>>),
+ ok = file:close(FH),
+
+ %% Merge everything
+ ok = merge(Dir),
+
+ B2 = bitcask:open(Dir, [read_write]),
+ KeyList = bitcask:fold(B2, fun(K, _V, Acc0) -> [K|Acc0] end, []),
+ true = length(KeyList) < NumKeys,
+ ArbKey = 5, % get arbitrary key near start
+ {ok, <<ArbKey:1024>>} = bitcask:get(B2, <<"k", ArbKey:32>>),
+ ok = bitcask:close(B2),
+
+ ok.
+
truncated_merge_test() ->
Dir = "/tmp/bc.test.truncmerge",
os:cmd("rm -rf " ++ Dir),
View
20 src/bitcask_fileops.erl
@@ -210,7 +210,7 @@ fold(#filestate { fd=Fd, filename=Filename, tstamp=FTStamp }, Fun, Acc) ->
case bitcask_nifs:file_read(Fd, ?HEADER_SIZE) of
{ok, <<_Crc:?CRCSIZEFIELD, _Tstamp:?TSTAMPFIELD, _KeySz:?KEYSIZEFIELD,
_ValueSz:?VALSIZEFIELD>> = H} ->
- fold_loop(Fd, Filename, FTStamp, H, 0, Fun, Acc);
+ fold_loop(Fd, Filename, FTStamp, H, 0, Fun, Acc, 0);
{ok, OtherBytes} ->
error_logger:error_msg("~s:fold: ~s: expected ~p bytes but got "
"only ~p bytes, skipping\n",
@@ -322,18 +322,23 @@ has_valid_hintfile(State) ->
%% Internal functions
%% ===================================================================
-fold_loop(Fd, Filename, FTStamp, Header, Offset, Fun, Acc0) ->
+fold_loop(_Fd, Filename, _FTStamp, _Header, Offset, _Fun, Acc, 20) ->
+ error_logger:error_msg("fold_loop: CRC error limit at file ~p offset ~p\n",
+ [Filename, Offset]),
+ Acc;
+fold_loop(Fd, Filename, FTStamp, Header, Offset, Fun, Acc0, CrcSkipCount) ->
<<Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
ValueSz:?VALSIZEFIELD>> = Header,
<<_:4/binary, HeaderMinusCRC/binary>> = Header,
TotalSz = KeySz + ValueSz + ?HEADER_SIZE,
case bitcask_nifs:file_read(Fd, TotalSz) of
{ok, <<Key:KeySz/bytes, Value:ValueSz/bytes, Rest/binary>>} ->
- Acc = case erlang:crc32([HeaderMinusCRC, Key, Value]) of
- Crc32 ->
+ CrcMatch = erlang:crc32([HeaderMinusCRC, Key, Value]) =:= Crc32,
+ Acc = case CrcMatch of
+ true ->
PosInfo = {Filename, FTStamp, Offset, TotalSz},
Fun(Key, Value, Tstamp, PosInfo, Acc0);
- _ ->
+ false ->
error_logger:error_msg(
"fold_loop: CRC error at file ~s offset ~p, "
"skipping ~p bytes\n", [Filename, Offset, TotalSz]),
@@ -341,8 +346,11 @@ fold_loop(Fd, Filename, FTStamp, Header, Offset, Fun, Acc0) ->
end,
case Rest of
<<NextHeader:?HEADER_SIZE/bytes>> ->
+ NewCrcSkipCount = if (not CrcMatch) -> CrcSkipCount + 1;
+ true -> CrcSkipCount
+ end,
fold_loop(Fd, Filename, FTStamp, NextHeader,
- Offset + TotalSz, Fun, Acc);
+ Offset + TotalSz, Fun, Acc, NewCrcSkipCount);
<<>> ->
Acc;
Tail ->
Please sign in to comment.
Something went wrong with that request. Please try again.