Permalink
Browse files

Speed up open and various fold operations. This is comprised of

two major changes:
  - replace the visitor fold with an explict crc check.
  - rewrite all folds in terms a new large file fold function that
    adaptively prefetches (and jumps around when prefetching is
    the wrong thing to do) data so that we're operating on larger
    chunks of data, instead of doing a large number of small reads.
  • Loading branch information...
1 parent 83ec5b4 commit 709413104db7b6f9b446ff0cdfc0bc24ce248626 @evanmcc evanmcc committed Mar 7, 2013
Showing with 507 additions and 176 deletions.
  1. +2 −0 .gitignore
  2. +29 −0 c_src/bitcask_nifs.c
  3. +5 −0 include/bitcask.hrl
  4. +16 −8 src/bitcask.erl
  5. +10 −3 src/bitcask_file.erl
  6. +277 −165 src/bitcask_fileops.erl
  7. +4 −0 src/bitcask_io.erl
  8. +8 −0 src/bitcask_nifs.erl
  9. +104 −0 test/bcfold_perf
  10. +52 −0 test/bcfold_setup
View
2 .gitignore
@@ -4,3 +4,5 @@ ebin
priv/*.so
*.o
*.beam
+*~
+#*#
View
29 c_src/bitcask_nifs.c
@@ -215,6 +215,7 @@ ERL_NIF_TERM bitcask_nifs_file_pread(ErlNifEnv* env, int argc, const ERL_NIF_TER
ERL_NIF_TERM bitcask_nifs_file_pwrite(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_read(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM errno_atom(ErlNifEnv* env, int error);
@@ -257,6 +258,7 @@ static ErlNifFunc nif_funcs[] =
{"file_pwrite_int", 3, bitcask_nifs_file_pwrite},
{"file_read_int", 2, bitcask_nifs_file_read},
{"file_write_int", 2, bitcask_nifs_file_write},
+ {"file_position_int", 2, bitcask_nifs_file_position},
{"file_seekbof_int", 1, bitcask_nifs_file_seekbof}
};
@@ -1601,6 +1603,33 @@ ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TER
}
}
+ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ bitcask_file_handle* handle;
+ unsigned long offset_ul;
+
+ if (enif_get_resource(env, argv[0], bitcask_file_RESOURCE, (void**)&handle) &&
+ enif_get_ulong(env, argv[1], &offset_ul))
+ {
+
+ off_t offset = offset_ul;
+ off_t new_offset = lseek(handle->fd, offset, SEEK_SET);
+ if (new_offset != -1)
+ {
+ return enif_make_tuple2(env, ATOM_OK, enif_make_ulong(env, new_offset));
+ }
+ else
+ {
+ /* Write failed altogether */
+ return enif_make_tuple2(env, ATOM_ERROR, errno_atom(env, errno));
+ }
+ }
+ else
+ {
+ return enif_make_badarg(env);
+ }
+}
+
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
bitcask_file_handle* handle;
View
5 include/bitcask.hrl
@@ -37,3 +37,8 @@
-define(MAXKEYSIZE, 2#1111111111111111).
-define(MAXVALSIZE, 2#11111111111111111111111111111111).
-define(MAXOFFSET, 16#ffffffffffffffff). % max 64-bit unsigned
+
+%% for hintfile validation
+-define(CHUNK_SIZE, 65535).
+-define(MIN_CHUNK_SIZE, 1024).
+-define(MAX_CHUNK_SIZE, 134217728).
View
24 src/bitcask.erl
@@ -315,21 +315,27 @@ fold_keys(Ref, Fun, Acc0, MaxAge, MaxPut) ->
%% @doc fold over all K/V pairs in a bitcask datastore.
%% Fun is expected to take F(K,V,Acc0) -> Acc
--spec fold(reference(), fun((binary(), binary(), any()) -> any()), any()) -> any() | {error, any()}.
-fold(Ref, Fun, Acc0) ->
+-spec fold(reference() | record(),
+ fun((binary(), binary(), any()) -> any()),
+ any()) -> any() | {error, any()}.
+fold(Ref, Fun, Acc0) when is_reference(Ref)->
State = get_state(Ref),
+ fold(State, Fun, Acc0);
+fold(State, Fun, Acc0) ->
MaxAge = get_opt(max_fold_age, State#bc_state.opts) * 1000, % convert from ms to us
MaxPuts = get_opt(max_fold_puts, State#bc_state.opts),
- fold(Ref, Fun, Acc0, MaxAge, MaxPuts).
+ fold(State, Fun, Acc0, MaxAge, MaxPuts).
%% @doc fold over all K/V pairs in a bitcask datastore specifying max age/updates of
%% the frozen keystore.
%% Fun is expected to take F(K,V,Acc0) -> Acc
--spec fold(reference(), fun((binary(), binary(), any()) -> any()), any(),
- non_neg_integer() | undefined, non_neg_integer() | undefined) ->
+-spec fold(reference() | record(), fun((binary(), binary(), any()) -> any()), any(),
+ non_neg_integer() | undefined, non_neg_integer() | undefined) ->
any() | {error, any()}.
-fold(Ref, Fun, Acc0, MaxAge, MaxPut) ->
+fold(Ref, Fun, Acc0, MaxAge, MaxPut) when is_reference(Ref)->
State = get_state(Ref),
+ fold(State, Fun, Acc0, MaxAge, MaxPut);
+fold(State, Fun, Acc0, MaxAge, MaxPut) ->
FrozenFun =
fun() ->
case open_fold_files(State#bc_state.dirname, 3) of
@@ -853,13 +859,15 @@ scan_key_files([Filename | Rest], KeyDir, Acc, CloseFile, EnoentOK) ->
%% Restrictive pattern matching below is intentional
case bitcask_fileops:open_file(Filename) of
{ok, File} ->
+ FileTstamp = bitcask_fileops:file_tstamp(File),
F = fun(K, Tstamp, {Offset, TotalSz}, _) ->
bitcask_nifs:keydir_put(KeyDir,
K,
- bitcask_fileops:file_tstamp(File),
+ FileTstamp,
TotalSz,
Offset,
- Tstamp)
+ Tstamp,
+ false)
end,
bitcask_fileops:fold_keys(File, F, undefined, recovery),
if CloseFile == true ->
View
13 src/bitcask_file.erl
@@ -64,6 +64,9 @@ file_read(Pid, Size) ->
file_write(Pid, Bytes) ->
file_request(Pid, {file_write, Bytes}).
+file_position(Pid, Position) ->
+ file_request(Pid, {file_position, Position}).
+
file_seekbof(Pid) ->
file_request(Pid, file_seekbof).
@@ -106,11 +109,11 @@ handle_call({file_open, Owner, Filename, Opts}, _From, State) ->
IsReadOnly = proplists:get_bool(readonly, Opts),
Mode = case {IsReadOnly, IsCreate} of
{true, _} ->
- [read, raw, binary, read_ahead];
+ [read, raw, binary];
{_, false} ->
- [read, write, raw, binary, read_ahead];
+ [read, write, raw, binary];
{_, true} ->
- [read, write, exclusive, raw, binary, read_ahead]
+ [read, write, exclusive, raw, binary]
end,
[warn("Bitcask file option '~p' not supported~n", [Opt])
|| Opt <- [o_sync],
@@ -146,6 +149,10 @@ handle_call({file_write, Bytes}, From, State=#state{fd=Fd}) ->
check_owner(From, State),
Reply = file:write(Fd, Bytes),
{reply, Reply, State};
+handle_call({file_position, Position}, From, State=#state{fd=Fd}) ->
+ check_owner(From, State),
+ Reply = file:position(Fd, Position),
+ {reply, Reply, State};
handle_call(file_seekbof, From, State=#state{fd=Fd}) ->
check_owner(From, State),
{ok, _} = file:position(Fd, bof),
View
442 src/bitcask_fileops.erl
@@ -207,27 +207,14 @@ sync(#filestate { mode = read_write, fd = Fd, hintfd = HintFd }) ->
any()) ->
any() | {error, any()}.
fold(fresh, _Fun, Acc) -> Acc;
-fold(#filestate { fd=Fd, filename=Filename, tstamp=FTStamp }, Fun, Acc) ->
+fold(#filestate { fd=Fd, filename=Filename, tstamp=FTStamp }, Fun, Acc0) ->
%% TODO: Add some sort of check that this is a read-only file
- %% TODO: Need to position+read?!
ok = bitcask_io:file_seekbof(Fd),
- case bitcask_io:file_read(Fd, ?HEADER_SIZE) of
- {ok, <<_Crc:?CRCSIZEFIELD, _Tstamp:?TSTAMPFIELD, _KeySz:?KEYSIZEFIELD,
- _ValueSz:?VALSIZEFIELD>> = H} ->
- fold_loop(Fd, Filename, FTStamp, H, 0, Fun, Acc, 0);
- {ok, OtherBytes} ->
- error_logger:error_msg("~s:fold: ~s: expected ~p bytes but got "
- "only ~p bytes, skipping\n",
- [?MODULE, Filename, ?HEADER_SIZE,
- size(OtherBytes)]),
- Acc;
- eof ->
- Acc;
+ case fold_file_loop(Fd, fun fold_int_loop/6, Fun, Acc0,
+ {Filename, FTStamp, 0, 0}) of
{error, Reason} ->
- %% A truncated file would yield {ok, OtherBytes} or eof.
- %% Something really bad has happened, either an allocation
- %% error or something Truly Bad, e.g. EBADF, EIO.
- throw({fold_error, {read_read, Filename, 0, ?HEADER_SIZE, Reason}, Acc})
+ {error, Reason};
+ Acc -> Acc
end.
-spec fold_keys(fresh | #filestate{}, fun((binary(), integer(), {integer(), integer()}, any()) -> any()), any()) ->
@@ -257,12 +244,21 @@ fold_keys(#filestate { fd = Fd } = State, Fun, Acc, Mode) ->
case has_valid_hintfile(State) of
true ->
case fold_hintfile(State, Fun, Acc) of
- {error, _} ->
+ {error, {trunc_hintfile, Acc0}} ->
+ Acc0;
+ {error, Reason} ->
+ HintFile = hintfile_name(State),
+ error_logger:error_msg("Hintfile '~s' failed fold: ~p\n",
+ [HintFile, Reason]),
fold_keys_loop(Fd, 0, Fun, Acc);
Acc1 ->
Acc1
end;
false ->
+ HintFile = hintfile_name(State),
+ error_logger:error_msg("Hintfile '~s' invalid\n",
+ [HintFile]),
+
fold_keys_loop(Fd, 0, Fun, Acc)
end
end.
@@ -310,126 +306,163 @@ has_hintfile(#filestate { filename = Fname }) ->
has_valid_hintfile(State) ->
case has_hintfile(State) of
true ->
- case fold_hintfile(State, fun has_valid_hintfile_visitor/4, 0) of
- N when is_number(N) ->
- true;
+ HintFile = hintfile_name(State),
+ case bitcask_io:file_open(HintFile, [readonly, read_ahead]) of
+ {ok, HintFd} ->
+ {ok, HintI} = file:read_file_info(HintFile),
+ HintSize = HintI#file_info.size,
+ hintfile_validate_loop(HintFd, 0, HintSize);
_ ->
false
end;
- X ->
- X
+ Else -> Else
end.
-
+hintfile_validate_loop(Fd, CRC0, Rem) ->
+ {ReadLen, HasCRC} =
+ case Rem =< ?CHUNK_SIZE of
+ true ->
+ case Rem < ?HINT_RECORD_SZ of
+ true ->
+ {0, error};
+ false ->
+ {Rem - ?HINT_RECORD_SZ, true}
+ end;
+ false ->
+ {?CHUNK_SIZE, false}
+ end,
+
+ case bitcask_io:file_read(Fd, ReadLen) of
+ {ok, Bytes} ->
+ case HasCRC of
+ true ->
+ ExpectCRC = read_crc(Fd),
+ CRC = erlang:crc32(CRC0, Bytes),
+ ExpectCRC =:= CRC;
+ false ->
+ hintfile_validate_loop(Fd,
+ erlang:crc32(CRC0, Bytes),
+ Rem - ReadLen);
+ error ->
+ false
+ end;
+ _ -> false
+ end.
+
+read_crc(Fd) ->
+ case bitcask_io:file_read(Fd, ?HINT_RECORD_SZ) of
+ {ok, <<0:?TSTAMPFIELD,
+ 0:?KEYSIZEFIELD,
+ ExpectCRC:?TOTALSIZEFIELD,
+ (?MAXOFFSET):?OFFSETFIELD>>} ->
+ ExpectCRC;
+ _ -> error
+ end.
+
%% ===================================================================
%% Internal functions
%% ===================================================================
-fold_loop(_Fd, Filename, _FTStamp, _Header, Offset, _Fun, Acc, 20) ->
+fold_int_loop(_Bytes, _Fun, Acc, _Consumed, {Filename, _, Offset, 20}, _EOI) ->
error_logger:error_msg("fold_loop: CRC error limit at file ~p offset ~p\n",
[Filename, Offset]),
- Acc;
-fold_loop(Fd, Filename, FTStamp, Header, Offset, Fun, Acc0, CrcSkipCount) ->
- <<Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
- ValueSz:?VALSIZEFIELD>> = Header,
- <<_:4/binary, HeaderMinusCRC/binary>> = Header,
+ {done, Acc};
+fold_int_loop(<<Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD,
+ KeySz:?KEYSIZEFIELD, ValueSz:?VALSIZEFIELD,
+ Key:KeySz/bytes, Value:ValueSz/bytes, Rest/binary>>,
+ Fun, Acc0, Consumed0,
+ {Filename, FTStamp, Offset, CrcSkipCount},
+ EOI) ->
TotalSz = KeySz + ValueSz + ?HEADER_SIZE,
- case bitcask_io:file_read(Fd, TotalSz) of
- {ok, <<Key:KeySz/bytes, Value:ValueSz/bytes, Rest/binary>>} ->
- CrcMatch = erlang:crc32([HeaderMinusCRC, Key, Value]) =:= Crc32,
- Acc = case CrcMatch of
- true ->
- PosInfo = {Filename, FTStamp, Offset, TotalSz},
- Fun(Key, Value, Tstamp, PosInfo, Acc0);
- false ->
- error_logger:error_msg(
- "fold_loop: CRC error at file ~s offset ~p, "
- "skipping ~p bytes\n", [Filename, Offset, TotalSz]),
- Acc0
- end,
- case Rest of
- <<NextHeader:?HEADER_SIZE/bytes>> ->
- NewCrcSkipCount = if (not CrcMatch) -> CrcSkipCount + 1;
- true -> CrcSkipCount
- end,
- fold_loop(Fd, Filename, FTStamp, NextHeader,
- Offset + TotalSz, Fun, Acc, NewCrcSkipCount);
- <<>> ->
- Acc;
- Tail ->
- error_logger:error_msg(
- "Trailing data, discarding (~p bytes)\n",
- [size(Tail)]),
- Acc
- end;
- {ok, X} ->
- error_logger:error_msg("Bad datafile entry, discarding"
- "(~p/~p bytes)\n", [size(X),TotalSz]),
- Acc0;
- eof ->
- error_logger:error_msg("Unexpected EOF, ignore (~p bytes)\n",
- [TotalSz]),
- Acc0;
- {error, Reason} ->
- %% Again, either we had an allocation error in NIF land,
- %% or something Truly Bad happened, e.g. EBADF, EIO
- throw({fold_error, {file_read, Filename, Offset, TotalSz, Reason}, Acc0})
- end.
+ case erlang:crc32([<<Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
+ ValueSz:?VALSIZEFIELD>>, Key, Value]) of
+ Crc32 ->
+ PosInfo = {Filename, FTStamp, Offset, TotalSz},
+ Acc = Fun(Key, Value, Tstamp, PosInfo, Acc0),
+ fold_int_loop(Rest, Fun, Acc, Consumed0 + TotalSz,
+ {Filename, FTStamp, Offset + TotalSz,
+ CrcSkipCount}, EOI);
+ _ ->
+ error_logger:error_msg("fold_loop: CRC error at file ~s offset ~p, "
+ "skipping ~p bytes\n",
+ [Filename, Offset, TotalSz]),
+ fold_int_loop(Rest, Fun, Acc0, Consumed0 + TotalSz,
+ {Filename, FTStamp, Offset + TotalSz,
+ CrcSkipCount + 1}, EOI)
+ end;
+fold_int_loop(<<>>, _Fun, Acc, Consumed, _Args, EOI) when EOI =:= true ->
+ {done, Acc, Consumed};
+fold_int_loop(_Bytes, _Fun, Acc, Consumed, Args, EOI) when EOI =:= false ->
+ {more, Acc, Consumed, Args};
+fold_int_loop(Bytes, _Fun, Acc, _Consumed, _Args, EOI) when EOI =:= true ->
+ error_logger:error_msg("Trailing data, discarding (~p bytes)\n",
+ [size(Bytes)]),
+ {done, Acc}.
fold_keys_loop(Fd, Offset, Fun, Acc0) ->
- case bitcask_io:file_pread(Fd, Offset, ?HEADER_SIZE) of
- {ok, Header} when erlang:size(Header) =:= ?HEADER_SIZE ->
- <<_Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
- ValueSz:?VALSIZEFIELD>> = Header,
- TotalSz = KeySz + ValueSz + ?HEADER_SIZE,
- PosInfo = {Offset, TotalSz},
- %% NOTE: We are intentionally *not* reading the value blob,
- %% so we cannot check the checksum.
- case bitcask_io:file_pread(Fd, Offset + ?HEADER_SIZE, KeySz) of
- {ok, Key} when erlang:size(Key) =:= KeySz ->
- Acc = Fun(Key, Tstamp, PosInfo, Acc0),
- fold_keys_loop(Fd, Offset + TotalSz, Fun, Acc);
- eof ->
- Acc0;
- {error, Reason} ->
- {error, Reason};
- X ->
- error_logger:error_msg("Bad datafile entry 1: ~p\n", [X]),
- Acc0
- end;
- eof ->
- Acc0;
+ case bitcask_io:file_position(Fd, Offset) of
+ {ok, Offset} -> ok;
+ Other -> error(Other)
+ end,
+
+ case fold_file_loop(Fd, fun fold_keys_int_loop/6, Fun, Acc0, {Offset, 0}) of
{error, Reason} ->
{error, Reason};
- X ->
- error_logger:error_msg("Bad datafile entry 2: ~p\n", [X]),
- Acc0
+ Acc -> Acc
end.
-
-fold_hintfile(State, Fun, Acc) ->
+fold_keys_int_loop(<<_Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD,
+ KeySz:?KEYSIZEFIELD, ValueSz:?VALSIZEFIELD,
+ Key:KeySz/bytes, _:ValueSz/bytes,
+ Rest/binary>>,
+ Fun, Acc0, Consumed0,
+ {Offset, AvgValSz0},
+ EOI) ->
+ TotalSz = KeySz + ValueSz + ?HEADER_SIZE,
+ PosInfo = {Offset, TotalSz},
+ Consumed = Consumed0 + TotalSz,
+ AvgValSz = (AvgValSz0 + ValueSz) div 2,
+ Acc = Fun(Key, Tstamp, PosInfo, Acc0),
+ fold_keys_int_loop(Rest, Fun, Acc, Consumed,
+ {Offset + TotalSz, AvgValSz}, EOI);
+%% in the case where values are very large, we don't actually want to
+%% get a larger binary if we don't have to, so just issue a skip.
+fold_keys_int_loop(<<_Crc32:?CRCSIZEFIELD, Tstamp:?TSTAMPFIELD,
+ KeySz:?KEYSIZEFIELD, ValueSz:?VALSIZEFIELD,
+ Key:KeySz/bytes,
+ _Rest/binary>>,
+ Fun, Acc0, _Consumed0,
+ {Offset, AvgValSz0},
+ _EOI) when AvgValSz0 > ?CHUNK_SIZE ->
+ TotalSz = KeySz + ValueSz + ?HEADER_SIZE,
+ PosInfo = {Offset, TotalSz},
+ Acc = Fun(Key, Tstamp, PosInfo, Acc0),
+ AvgValSz = (AvgValSz0 + ValueSz) div 2,
+ NewPos = Offset + TotalSz,
+ {skip, Acc, NewPos, {NewPos, AvgValSz}};
+fold_keys_int_loop(<<>>, _Fun, Acc, Consumed, _Args, EOI) when EOI =:= true ->
+ {done, Acc, Consumed};
+fold_keys_int_loop(_Bytes, _Fun, Acc, Consumed, Args, EOI) when EOI =:= false ->
+ {more, Acc, Consumed, Args};
+fold_keys_int_loop(Bytes, _Fun, Acc, _Consumed, _Args, EOI) when EOI =:= true ->
+ error_logger:error_msg("Bad datafile entry 1: ~p\n", [Bytes]),
+ {done, Acc}.
+
+
+fold_hintfile(State, Fun, Acc0) ->
HintFile = hintfile_name(State),
- case bitcask_io:file_open(HintFile, [readonly]) of
+ case bitcask_io:file_open(HintFile, [readonly, read_ahead]) of
{ok, HintFd} ->
try
{ok, DataI} = file:read_file_info(State#filestate.filename),
DataSize = DataI#file_info.size,
- case bitcask_io:file_read(HintFd, ?HINT_RECORD_SZ) of
- {ok, <<H:?HINT_RECORD_SZ/bytes>>} ->
- fold_hintfile_loop(DataSize, HintFile,
- HintFd, 0, H, Fun, Acc);
- {ok, Bytes} ->
- error_logger:error_msg("~s:fold_hintfile: ~s: expected "
- "~p bytes but got only ~p "
- "bytes, skipping\n",
- [?MODULE, HintFile,
- ?HINT_RECORD_SZ, size(Bytes)]),
- {error, {incomplete_hint, 1}};
- eof ->
- {error, empty_hintfile}; % shoudld never be empty hintfiles
+ case fold_file_loop(HintFd, fun fold_hintfile_loop/6, Fun, Acc0,
+ {DataSize, HintFile}) of
{error, Reason} ->
- {error, {fold_hintfile, Reason}}
+ {error, Reason};
+ Acc ->
+ Acc
end
after
bitcask_io:file_close(HintFd)
@@ -438,55 +471,142 @@ fold_hintfile(State, Fun, Acc) ->
{error, {fold_hintfile, Reason}}
end.
-
-fold_hintfile_loop(_DataSize, HintFile, _Fd, HintCRC,
- <<0:?TSTAMPFIELD, 0:?KEYSIZEFIELD,
- ExpectCRC:?TOTALSIZEFIELD, (?MAXOFFSET):?OFFSETFIELD>>, _Fun, Acc0) ->
- case HintCRC of
- ExpectCRC ->
- Acc0;
- _ ->
- error_logger:error_msg("Hintfile '~s' has bad CRC ~p expected ~p\n",
- [HintFile, HintCRC, ExpectCRC]),
- {error, {bad_crc, HintCRC, ExpectCRC}}
- end;
-fold_hintfile_loop(DataSize, HintFile, Fd, HintCRC0,
- HintRecord, Fun, Acc0) ->
- <<Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
- TotalSz:?TOTALSIZEFIELD, Offset:?OFFSETFIELD>> = HintRecord,
- ReadSz = KeySz + ?HINT_RECORD_SZ,
- case bitcask_io:file_read(Fd, ReadSz) of
- {ok, <<Key:KeySz/bytes, Rest/binary>>} when
- Offset + TotalSz =< DataSize ->
+%% conditional end match here, checking that we get the expected CRC-containing
+%% hint record, three-tuple done indicates that we've exhausted all bytes, or
+%% it's an error
+fold_hintfile_loop(<<0:?TSTAMPFIELD, 0:?KEYSIZEFIELD,
+ _ExpectCRC:?TOTALSIZEFIELD, (?MAXOFFSET):?OFFSETFIELD>>,
+ _Fun, Acc, Consumed, _Args, EOI) when EOI =:= true ->
+ {done, Acc, Consumed + ?HINT_RECORD_SZ};
+%% main work loop here, containing the full match of hint record and key.
+%% if it gets a match, it proceeds to recurse over the rest of the big
+%% binary
+fold_hintfile_loop(<<Tstamp:?TSTAMPFIELD, KeySz:?KEYSIZEFIELD,
+ TotalSz:?TOTALSIZEFIELD, Offset:?OFFSETFIELD,
+ Key:KeySz/bytes, Rest/binary>>,
+ Fun, Acc0, Consumed0, {DataSize, HintFile} = Args,
+ EOI) ->
+ case Offset + TotalSz =< DataSize + 1 of
+ true ->
PosInfo = {Offset, TotalSz},
Acc = Fun(Key, Tstamp, PosInfo, Acc0),
- case Rest of
- <<NextRecord:?HINT_RECORD_SZ/bytes>> ->
- HintCRC=erlang:crc32(HintCRC0, [HintRecord, Key]),
- fold_hintfile_loop(DataSize, HintFile, Fd,
- HintCRC, NextRecord, Fun, Acc);
- <<>> ->
- %% Hint files without CRCs will end on a record boundary.
- %% No way to know whether to expect a crc or not.
- %% Over time, merges will add CRCs to all hint files and
- %% we can set this as the default.
- case application:get_env(bitcask, require_hint_crc) of
- {ok, true} ->
- {error, {incomplete_hint, 4}};
- _ ->
- <<>>
- end;
- X ->
- error_logger:error_msg("Bad hintfile data 1: ~p\n", [X]),
- {error, {incomplete_hint, 2}}
- end;
- {ok, _} ->
+ Consumed = KeySz + ?HINT_RECORD_SZ + Consumed0,
+ fold_hintfile_loop(Rest, Fun, Acc,
+ Consumed, Args, EOI);
+ false ->
error_logger:error_msg("Hintfile '~s' contains pointer ~p ~p "
"that is greater than total data size ~p\n",
[HintFile, Offset, TotalSz, DataSize]),
- {error, {incomplete_hint, 3}};
+ {error, {trunc_hintfile, Acc0}}
+ end;
+%% error case where we've gotten to the end of the file without the CRC match
+fold_hintfile_loop(<<>>, _Fun, Acc, _Consumed, _Args, EOI) when EOI =:= true ->
+ case application:get_env(bitcask, require_hint_crc) of
+ {ok, true} ->
+ {error, {incomplete_hint, 4}};
+ _ ->
+ {done, Acc}
+ end;
+%% catchall case where we don't get enough bytes from fold_file_loop
+fold_hintfile_loop(_Bytes, _Fun, _Acc0, _Consumed0, _Args, EOI)
+ when EOI =:= true ->
+ {error, {incomplete_hint, 5}};
+fold_hintfile_loop(_Bytes, _Fun, Acc0, Consumed0, Args, _EOI) ->
+ {more, Acc0, Consumed0, Args}.
+
+
+%% @doc scaffolding for faster folds over large files.
+%% The somewhat tricky thing here is the FoldFn, which is a /6
+%% that does all the actual work. see fold_hintfile_loop as a
+%% commented example
+-spec fold_file_loop(port(),
+ fun((binary(), fun(), any(), integer(),
+ any(), true | false) ->
+ {more, any(), integer(), any()} |
+ {done, any()} |
+ {done, any(), integer()} |
+ {skip, any(), integer(), any()} |
+ {error, any()}),
+ fun(), any(), any()) ->
+ {error, any()} | any().
+fold_file_loop(Fd, FoldFn, IntFoldFn, Acc, Args) ->
+ fold_file_loop(Fd, FoldFn, IntFoldFn, Acc, Args, none, ?CHUNK_SIZE).
+
+fold_file_loop(Fd, FoldFn, IntFoldFn, Acc0, Args0, Prev0, ChunkSz0) ->
+ %% analyze what happened in the last loop to determine whether or
+ %% not to change the read size. This is an optimization for large values
+ %% in datafile folds and key folds
+ {Prev, ChunkSz}
+ = case Prev0 of
+ none -> {<<>>, ChunkSz0};
+ %if we're skipping around, we're likely too big
+ skip ->
+ CS = case ChunkSz0 >= (?MIN_CHUNK_SIZE * 2) of
+ true -> ChunkSz0 div 2;
+ false -> ?MIN_CHUNK_SIZE
+ end,
+ {<<>>, CS};
+ Other ->
+ CS = case byte_size(Other) of
+ %% to avoid having to rescan the same
+ %% binaries over and over again.
+ N when N >= ?MAX_CHUNK_SIZE ->
+ ?MAX_CHUNK_SIZE;
+ N when N > ChunkSz0 ->
+ ChunkSz0 * 2;
+ _ -> ChunkSz0
+ end,
+ {Other, CS}
+ end,
+ case bitcask_io:file_read(Fd, ChunkSz) of
+ {ok, <<Bytes0/binary>>} ->
+ Bytes = <<Prev/binary, Bytes0/binary>>,
+ case FoldFn(Bytes, IntFoldFn, Acc0, 0, Args0,
+ byte_size(Bytes0) /= ChunkSz) of
+ %% foldfuns should return more when they don't have enough
+ %% bytes to satisfy their main binary match.
+ {more, Acc, Consumed, Args} ->
+ Rest =
+ case Consumed > byte_size(Bytes) of
+ true -> <<>>;
+ false ->
+ <<_:Consumed/bytes, R/binary>> = Bytes,
+ R
+ end,
+ fold_file_loop(Fd, FoldFn, IntFoldFn,
+ Acc, Args, Rest, ChunkSz);
+ %% foldfuns should return skip when they have no need for
+ %% the rest of the binary that they've been handed.
+ %% see fold_int_loop for proper usage.
+ {skip, Acc, SkipTo, Args} ->
+ case bitcask_io:file_position(Fd, SkipTo) of
+ {ok, SkipTo} ->
+ fold_file_loop(Fd, FoldFn, IntFoldFn,
+ Acc, Args, skip, ChunkSz);
+ {error, Reason} ->
+ {error, Reason};
+ Other1 ->
+ {error, {file_fold_error, Other1}}
+ end;
+ %% the done two tuple is returned when we want to be
+ %% unconditionally successfully finished, i.e. trailing data
+ %% is a non-fatal error
+ {done, Acc} ->
+ Acc;
+ %% three tuple done requires full consumption of all bytes given
+ %% to the internal fold function, to satisfy the pre-existing
+ %% semantics of hintfile folds.
+ {done, Acc, Consumed} ->
+ case Consumed =:= byte_size(Bytes) of
+ true -> Acc;
+ false ->
+ {error, {partial_fold, Consumed, Bytes, Bytes0}}
+ end;
+ {error, Reason} ->
+ {error, Reason}
+ end;
eof ->
- {error, incomplete_key};
+ Acc0;
{error, Reason} ->
{error, Reason}
end.
@@ -531,14 +651,6 @@ hintfile_entry(Key, Tstamp, {Offset, TotalSz}) ->
[<<Tstamp:?TSTAMPFIELD>>, <<KeySz:?KEYSIZEFIELD>>,
<<TotalSz:?TOTALSIZEFIELD>>, <<Offset:?OFFSETFIELD>>, Key].
-%% Visitor function for hintfile validation - broken out as a
-%% separate function to make tracing with dbg easier if there is
-%% ever an issue with hintfiles.
-has_valid_hintfile_visitor(_Key, _Tstamp, _PosInfo, Acc0) when is_number(Acc0) ->
- Acc0+1;
-has_valid_hintfile_visitor(_Key, _Tstamp, _PosInfo, Acc0) ->
- Acc0.
-
%% Find the most recent timestamp for a .data file
most_recent_tstamp(DirName) ->
lists:foldl(fun({TS,_},Acc) ->
View
4 src/bitcask_io.erl
@@ -54,6 +54,10 @@ file_seekbof(Ref) ->
M = file_module(),
M:file_seekbof(Ref).
+file_position(Ref, Position) ->
+ M = file_module(),
+ M:file_position(Ref, Position).
+
file_module() ->
case get(bitcask_file_mod) of
undefined ->
View
8 src/bitcask_nifs.erl
@@ -49,6 +49,7 @@
file_pwrite/3,
file_read/2,
file_write/2,
+ file_position/2,
file_seekbof/1]).
-on_load(init/0).
@@ -382,6 +383,13 @@ file_write(Ref, Bytes) ->
file_write_int(_Ref, _Bytes) ->
erlang:nif_error({error, not_loaded}).
+file_position(Ref, Position) ->
+ bitcask_bump:big(),
+ file_position_int(Ref, Position).
+
+file_position_int(_Ref, _Position) ->
+ erlang:nif_error({error, not_loaded}).
+
file_seekbof(Ref) ->
bitcask_bump:big(),
file_seekbof_int(Ref).
View
104 test/bcfold_perf
@@ -0,0 +1,104 @@
+#!/usr/bin/env escript
+
+-mode(compile).
+
+main(Args) when length(Args) >= 2 ->
+ [DataDir | List] = Args,
+ [begin
+ test_small(DataDir, D),
+ test_medium(DataDir,D),
+ test_large(DataDir,D),
+ test_cs(DataDir,D)
+ end ||
+ D <- List];
+main(_) ->
+ usage().
+
+usage() ->
+ io:format("bcfold_perf <data_dir> <branch dir>+~n"),
+ io:format("run bcfold_setup <data_dir> before running this~n").
+
+test(DataDir, BranchDir, Subdir) ->
+ Modlist = ensure_and_load_bitcask(BranchDir),
+ time_folds(DataDir, Subdir),
+ cleanup(Modlist).
+
+test_small(DataDir, BranchDir) ->
+ test(DataDir, BranchDir, "small").
+
+test_medium(DataDir, BranchDir) ->
+ test(DataDir, BranchDir, "medium").
+
+test_large(DataDir, BranchDir) ->
+ test(DataDir, BranchDir, "large").
+
+test_cs(DataDir, BranchDir) ->
+ test(DataDir, BranchDir, "cs").
+
+sum(_, _, A0) ->
+ A0 + 1.
+
+sum(_, _, _, A0) ->
+ A0 + 1.
+
+
+time_folds(DataDir, SubDir) ->
+ clean_env(),
+ %% this is basically a proxy for hintfile folds and
+ %% the CRC check
+ {OpenTime, Ref} = timer:tc(bitcask, open,
+ [DataDir++SubDir++"/"]),
+ %%bitcask:close(Ref),
+
+ clean_env(),
+ {ok, Fd1} =
+ bitcask_fileops:open_file(DataDir++SubDir++"/1.bitcask.data"),
+ {FoldDatafileKeyTime, Count} =
+ timer:tc(bitcask_fileops, fold_keys,
+ [Fd1, fun sum/4, 0, datafile]),
+ bitcask_io:file_close(Fd1),
+
+ clean_env(),
+ {FoldDatafileTime, Count2} =
+ timer:tc(bitcask, fold, [Ref, fun sum/3, 0]),
+
+
+ io:format("Report for run: ~s~n", [SubDir]),
+ io:format("Open: ~15w~n", [OpenTime]),
+ io:format("Datafile Key Fold: ~15w~10w~n", [FoldDatafileKeyTime, Count]),
+ io:format("Datafile Fold: ~15w~10w~n~n", [FoldDatafileTime, Count2]).
+
+clean_env() ->
+ case os:type() of
+ {unix, darwin} ->
+ os:cmd("purge");
+ {unix, linux} ->
+ io:format("if not root or called with sudo, this may fail~n"),
+ os:cmd("echo 3 >/proc/sys/vm/drop_caches");
+ _ ->
+ halt("unknown os")
+ end.
+
+
+ensure_and_load_bitcask(BranchDir) ->
+ Path = BranchDir ++ "/ebin/",
+ Modlist = filelib:fold_files(Path,
+ ".*.beam", false,
+ fun(X, Acc) ->
+ Mod0 = filename:rootname(filename:basename(X)),
+ Mod = list_to_atom(Mod0),
+ [Mod | Acc]
+ end, []),
+ code:add_path(Path),
+ lists:map(fun code:load_abs/1, Modlist),
+ code:del_path(Path),
+ application:start(bitcask),
+ application:set_env(bitcask, io_mode, erlang),
+ Modlist.
+
+cleanup(Modlist) ->
+ [begin
+ code:delete(M),
+ code:purge(M)
+ end ||
+ M <- Modlist].
View
52 test/bcfold_setup
@@ -0,0 +1,52 @@
+#! /usr/bin/env escript
+
+main([BitcaskDir, DataDir]) ->
+ load_bitcask(BitcaskDir),
+ make_small(DataDir),
+ make_medium(DataDir),
+ make_large(DataDir),
+ make_cs(DataDir).
+
+
+load_bitcask(Dir) ->
+ Path = Dir ++ "/ebin/",
+ Modlist = filelib:fold_files(Path,
+ ".*.beam", false,
+ fun(X, Acc) ->
+ Mod0 = filename:rootname(filename:basename(X)),
+ Mod = list_to_atom(Mod0),
+ [Mod | Acc]
+ end, []),
+ code:add_path(Path),
+ lists:map(fun code:load_abs/1, Modlist).
+
+
+make_small(Dir) ->
+ make_dir(Dir++"/small/", 32, 100).
+
+make_medium(Dir) ->
+ make_dir(Dir++"/medium/", 32, 2048).
+
+make_large(Dir) ->
+ make_dir(Dir++"/large/", 32, 50*1024).
+
+make_cs(Dir) ->
+ make_dir(Dir++"/cs/", 32, 1024*1024).
+
+make_dir(Dir, KeySz, BinSz) ->
+ Ref = bitcask:open(Dir, [read_write]),
+
+ %% header size + key size + binary size + ext term format overhead
+ BinarySize = 14 + KeySz + BinSz + 12,
+ %% 1GB
+ NumObjs = (1073741824 div BinarySize) + 1,
+ Key = crypto:rand_bytes(KeySz),
+ Bin = crypto:rand_bytes(BinSz),
+ [bitcask:put(Ref, <<Key/binary,X:32>>, Bin) ||
+ X <- lists:seq(1, NumObjs)],
+ bitcask:close(Ref).
+
+
+
+
+

0 comments on commit 7094131

Please sign in to comment.