Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

bucket_importer: rewrite #19

Open
wants to merge 1 commit into from

1 participant

@helllamer

Currently, bucket_importer has several really big issues:

  • original keys is lost, if they have $/ (for example, key <<"a/b/c/d">> will be imported as <<"d">>);
  • fat memory leak: tens of thousands strings of filenames = several GB, and lists:map stacktrace over such list = + 500 Mb of stacktraces
  • really useless return value.

This pull request has rewritten bucket_importer:

  • keys with $/ are imported ok
  • memory fingerprint is really minimal
  • returns ok
  • minimized garbage for gc
  • it works really faster, because gc now eats very small CPU% (no long_gc warnings even on default vm configuration).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on May 15, 2012
  1. @helllamer
This page is out of date. Refresh to see the latest.
Showing with 30 additions and 16 deletions.
  1. +30 −16 other/erlang/bucket_importer.erl
View
46 other/erlang/bucket_importer.erl
@@ -1,6 +1,5 @@
%% -------------------------------------------------------------------
%%
-%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
@@ -22,19 +21,34 @@
-export([import_data/4]).
-
+import_data(ToServer, Bucket, Directory, ContentType) when is_list(Bucket) ->
+ import_data(ToServer, list_to_binary(Bucket), Directory, ContentType);
import_data(ToServer, Bucket, Directory, ContentType) ->
- {ok, Client} = riak:client_connect(ToServer),
- FL = filelib:fold_files(Directory, ".*", true, fun(F, L) -> [F|L] end, []),
- [ load_data(F, Client, list_to_binary(Bucket), ContentType) || F <- FL ].
-
-load_data(FName, Client, Bucket, ContentType) ->
- case file:read_file(FName) of
- {ok, Data} ->
- Key = list_to_binary(filename:basename(FName, filename:extension(FName))),
- Object = riak_object:new(Bucket, Key, Data, ContentType),
- Client:put(Object, 1),
- io:format(".");
- {error, Reason} ->
- io:format("Error reading ~p:~p~n", [FName, Reason])
- end.
+ {ok, Client} = riak:client_connect(ToServer),
+ {ok, StripExtensionRe} = re:compile("\\.[a-z0-9]+$", [caseless]),
+ DirectoryLen = length(Directory),
+
+ F = fun(Filename_, Acc0_) ->
+ case file:read_file(Filename_) of
+ {ok, Data} ->
+ FilenameRel = lists:nthtail(DirectoryLen, Filename_),
+ KeyBase = unmunge_directory(FilenameRel),
+ Key = re:replace(KeyBase, StripExtensionRe, "", [{return,binary}]),
+ Object = riak_object:new(Bucket, Key, Data, ContentType),
+ Client:put(Object, 1),
+ io:format(".");
+
+ {error, Reason} ->
+ io:format("Error reading ~p:~p~n", [Filename_, Reason])
+ end,
+ Acc0_
+ end,
+ [] = filelib:fold_files(Directory, ".*", true, F, []),
+ ok.
+
+
+unmunge_directory([$/ | Rest]) ->
+ unmunge_directory(Rest);
+unmunge_directory([C1,$/,C2,$/,C3,$/ | [C1,C2,C3 | _] = Rest]) ->
+ Rest.
+
Something went wrong with that request. Please try again.