Permalink
Browse files

Merge pull request #83 from basho/pevm-fast-fold

Improve fold speed for large files filled with small objects.
  • Loading branch information...
2 parents 83ec5b4 + 7094131 commit 2d8f1b90d707d9cfe1c3488e42adc6c91cfb259b @evanmcc evanmcc committed Apr 23, 2013
Showing with 507 additions and 176 deletions.
  1. +2 −0 .gitignore
  2. +29 −0 c_src/bitcask_nifs.c
  3. +5 −0 include/bitcask.hrl
  4. +16 −8 src/bitcask.erl
  5. +10 −3 src/bitcask_file.erl
  6. +277 −165 src/bitcask_fileops.erl
  7. +4 −0 src/bitcask_io.erl
  8. +8 −0 src/bitcask_nifs.erl
  9. +104 −0 test/bcfold_perf
  10. +52 −0 test/bcfold_setup
View
@@ -4,3 +4,5 @@ ebin
priv/*.so
*.o
*.beam
+*~
+#*#
View
@@ -215,6 +215,7 @@ ERL_NIF_TERM bitcask_nifs_file_pread(ErlNifEnv* env, int argc, const ERL_NIF_TER
ERL_NIF_TERM bitcask_nifs_file_pwrite(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_read(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
+ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM errno_atom(ErlNifEnv* env, int error);
@@ -257,6 +258,7 @@ static ErlNifFunc nif_funcs[] =
{"file_pwrite_int", 3, bitcask_nifs_file_pwrite},
{"file_read_int", 2, bitcask_nifs_file_read},
{"file_write_int", 2, bitcask_nifs_file_write},
+ {"file_position_int", 2, bitcask_nifs_file_position},
{"file_seekbof_int", 1, bitcask_nifs_file_seekbof}
};
@@ -1601,6 +1603,33 @@ ERL_NIF_TERM bitcask_nifs_file_write(ErlNifEnv* env, int argc, const ERL_NIF_TER
}
}
+ERL_NIF_TERM bitcask_nifs_file_position(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ bitcask_file_handle* handle;
+ unsigned long offset_ul;
+
+ if (enif_get_resource(env, argv[0], bitcask_file_RESOURCE, (void**)&handle) &&
+ enif_get_ulong(env, argv[1], &offset_ul))
+ {
+
+ off_t offset = offset_ul;
+ off_t new_offset = lseek(handle->fd, offset, SEEK_SET);
+ if (new_offset != -1)
+ {
+ return enif_make_tuple2(env, ATOM_OK, enif_make_ulong(env, new_offset));
+ }
+ else
+ {
+ /* Write failed altogether */
+ return enif_make_tuple2(env, ATOM_ERROR, errno_atom(env, errno));
+ }
+ }
+ else
+ {
+ return enif_make_badarg(env);
+ }
+}
+
ERL_NIF_TERM bitcask_nifs_file_seekbof(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
bitcask_file_handle* handle;
View
@@ -37,3 +37,8 @@
-define(MAXKEYSIZE, 2#1111111111111111).
-define(MAXVALSIZE, 2#11111111111111111111111111111111).
-define(MAXOFFSET, 16#ffffffffffffffff). % max 64-bit unsigned
+
+%% for hintfile validation
+-define(CHUNK_SIZE, 65535).
+-define(MIN_CHUNK_SIZE, 1024).
+-define(MAX_CHUNK_SIZE, 134217728).
View
@@ -315,21 +315,27 @@ fold_keys(Ref, Fun, Acc0, MaxAge, MaxPut) ->
%% @doc fold over all K/V pairs in a bitcask datastore.
%% Fun is expected to take F(K,V,Acc0) -> Acc
--spec fold(reference(), fun((binary(), binary(), any()) -> any()), any()) -> any() | {error, any()}.
-fold(Ref, Fun, Acc0) ->
+-spec fold(reference() | record(),
+ fun((binary(), binary(), any()) -> any()),
+ any()) -> any() | {error, any()}.
+fold(Ref, Fun, Acc0) when is_reference(Ref)->
State = get_state(Ref),
+ fold(State, Fun, Acc0);
+fold(State, Fun, Acc0) ->
MaxAge = get_opt(max_fold_age, State#bc_state.opts) * 1000, % convert from ms to us
MaxPuts = get_opt(max_fold_puts, State#bc_state.opts),
- fold(Ref, Fun, Acc0, MaxAge, MaxPuts).
+ fold(State, Fun, Acc0, MaxAge, MaxPuts).
%% @doc fold over all K/V pairs in a bitcask datastore specifying max age/updates of
%% the frozen keystore.
%% Fun is expected to take F(K,V,Acc0) -> Acc
--spec fold(reference(), fun((binary(), binary(), any()) -> any()), any(),
- non_neg_integer() | undefined, non_neg_integer() | undefined) ->
+-spec fold(reference() | record(), fun((binary(), binary(), any()) -> any()), any(),
+ non_neg_integer() | undefined, non_neg_integer() | undefined) ->
any() | {error, any()}.
-fold(Ref, Fun, Acc0, MaxAge, MaxPut) ->
+fold(Ref, Fun, Acc0, MaxAge, MaxPut) when is_reference(Ref)->
State = get_state(Ref),
+ fold(State, Fun, Acc0, MaxAge, MaxPut);
+fold(State, Fun, Acc0, MaxAge, MaxPut) ->
FrozenFun =
fun() ->
case open_fold_files(State#bc_state.dirname, 3) of
@@ -853,13 +859,15 @@ scan_key_files([Filename | Rest], KeyDir, Acc, CloseFile, EnoentOK) ->
%% Restrictive pattern matching below is intentional
case bitcask_fileops:open_file(Filename) of
{ok, File} ->
+ FileTstamp = bitcask_fileops:file_tstamp(File),
F = fun(K, Tstamp, {Offset, TotalSz}, _) ->
bitcask_nifs:keydir_put(KeyDir,
K,
- bitcask_fileops:file_tstamp(File),
+ FileTstamp,
TotalSz,
Offset,
- Tstamp)
+ Tstamp,
+ false)
end,
bitcask_fileops:fold_keys(File, F, undefined, recovery),
if CloseFile == true ->
View
@@ -64,6 +64,9 @@ file_read(Pid, Size) ->
file_write(Pid, Bytes) ->
file_request(Pid, {file_write, Bytes}).
+file_position(Pid, Position) ->
+ file_request(Pid, {file_position, Position}).
+
file_seekbof(Pid) ->
file_request(Pid, file_seekbof).
@@ -106,11 +109,11 @@ handle_call({file_open, Owner, Filename, Opts}, _From, State) ->
IsReadOnly = proplists:get_bool(readonly, Opts),
Mode = case {IsReadOnly, IsCreate} of
{true, _} ->
- [read, raw, binary, read_ahead];
+ [read, raw, binary];
{_, false} ->
- [read, write, raw, binary, read_ahead];
+ [read, write, raw, binary];
{_, true} ->
- [read, write, exclusive, raw, binary, read_ahead]
+ [read, write, exclusive, raw, binary]
end,
[warn("Bitcask file option '~p' not supported~n", [Opt])
|| Opt <- [o_sync],
@@ -146,6 +149,10 @@ handle_call({file_write, Bytes}, From, State=#state{fd=Fd}) ->
check_owner(From, State),
Reply = file:write(Fd, Bytes),
{reply, Reply, State};
+handle_call({file_position, Position}, From, State=#state{fd=Fd}) ->
+ check_owner(From, State),
+ Reply = file:position(Fd, Position),
+ {reply, Reply, State};
handle_call(file_seekbof, From, State=#state{fd=Fd}) ->
check_owner(From, State),
{ok, _} = file:position(Fd, bof),
Oops, something went wrong.

0 comments on commit 2d8f1b9

Please sign in to comment.