Skip to content

Commit

Permalink
Initial import
Browse files Browse the repository at this point in the history
  • Loading branch information
fdmanana committed Mar 2, 2011
0 parents commit 59bc101
Show file tree
Hide file tree
Showing 19 changed files with 2,798 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/ebin
/tmp
*~
/src/*~
/support/*~
/tests/*~
couchfoo
29 changes: 29 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
EBIN_DIR = ebin
SRC_DIR = src
TESTS_DIR = tests
TEST_TMP_DIR = tmp
ERLC = erlc

all: clean couchfoo

couchfoo: ebin
cd ebin && zip -9 ../ebin.zip *.beam && cd ..
echo '#!/usr/bin/env escript' > $@
echo "%%! -smp enable -escript main $@" >> $@
cat ebin.zip >> $@
chmod +x $@
rm -f ebin.zip

ebin: $(SRC_DIR)/*.* $(TESTS_DIR)/*.*
mkdir -p $(EBIN_DIR)
$(ERLC) -o $(EBIN_DIR) -I $(SRC_DIR) -I $(TESTS_DIR) $(SRC_DIR)/*.erl $(TESTS_DIR)/*.erl

clean:
rm -fr $(EBIN_DIR) $(TEST_TMP_DIR)
rm -f ebin.zip couchfoo

test: ebin
mkdir -p $(TEST_TMP_DIR)
rm -fr $(TEST_TMP_DIR)/*
./support/run_tests.escript $(EBIN_DIR)
rm -fr $(TEST_TMP_DIR)/*
130 changes: 130 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# couchfoo

couchfoo is a standalone command line tool to analyse offline Apache CouchDB database
files. It also allows to grab an existing header from a database file and append a copy
of it to the end of that same database file. These two main features make it an useful
tool to help recover corrupted database files and hack on the core database engine.

Some of the things it currently does:

* scan a database file (or just the region delimited by a given offset range) for
valid headers and report meaningful information about them and the state of the
database when each header is the current header. It also reports which headers
are corrupted and why they are considered corrupted

* count the number of existing valid headers and corrupted headers in a database
(or just in a region delimited by a given offset range)

* verifies that each valid header points to valid BTree root offsets

* extract a valid header from a database file, make a copy of it and append it to the
end of that same database file

* analyze the BTrees pointed by each header and report some useful information about
them such as: depth, # of kp_nodes and # of kv_nodes

## TODOs

* Output all the information in a JSON format so that it can be used by other tools.
For example a tool to build a UI graph or an Heat Map

* Add more useful BTree statistics, examples: maximum and minimum number of values per
kv_node and kp_node, maximum file offset distance between consecutive levels in the
BTree, etc


# Usage

<pre>
$ ./couchfoo -h
Usage:

./couchfoo [options] database_file

Available options are:

-h, --help Print help and then exit.

-N, --headers COUNT The number of headers to extract and report.
Defaults to 3.

-a, --start-offset OFFSET_A The starting file offset (in bytes) from which headers will be
searched backwards. Defaults to the file length (EOF).

-b, --end-offset OFFSET_B The file offset (in bytes) at which the header search or count operation
will stop at. Defaults to 0 (the beginning of the file).

--count-headers Count the number of headers in the file, in the range between OFFSET_A
(supplied by --start-offset) and OFFSET_B (supplied by --end-offset).

-C, --copy-header OFFSET_H Grab the header at offset OFFSET_H and append a copy of it to the end of
the given database file.

-S, --btree-stats Report statistics for each BTree pointed by each reported header.
Warning: this can be very slow for large databases.

$
</pre>

Examples:

<pre>
$ ./couchfoo -S -N 1 /mnt/cm/fdmanana/test_dbs/large1kb.couch
Database file `/mnt/cm/fdmanana/test_dbs/large1kb.couch` has 117272 blocks and is 480342114 bytes long.

Found header at offset 480342016 (block 117271), 77 bytes, details:

version : 5
update seq : 341301
unused : 0
by ID BTree root offset : 480339898
# not deleted documents : 341298
# deleted documents : 0
BTree stats
depth : 5
# kp_nodes : 1686
# kv_nodes : 31956
by Seq BTree root offset : 480336370
# doc_info records : 341298
BTree stats
depth : 4
# kp_nodes : 353
# kv_nodes : 17961
local docs BTree root offset : 480317540
BTree stats
depth : 1
# kp_nodes : 0
# kv_nodes : 1
purge seq : 1
purge docs offset : 480340016
purged docs : {"docfoo1":["1-967a00dff5e02add41819138abb3284d"]}
_security object offset : 480321636
security_object : {"admins":{"names":[],"roles":["boss","foobar"]},"members":{"names":[],"roles":[]}}
revs limit : 1000


1 headers shown.
$
</pre>


<pre>
$ ./couchfoo --count-headers test_dbs/foo.couch
Database file `test_dbs/foo.couch` has 13 blocks and is 49241 bytes long.

Found 10 valid headers and 1 corrupted headers.
$
</pre>


# Build and testing

<pre>
$ make
</pre>

and

<pre>
$ make test
</pre>
56 changes: 56 additions & 0 deletions src/couch_btree.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
% Copyright 2011, Filipe David Manana <fdmanana@apache.org>
% Web site: http://github.com/fdmanana/couchfoo
%
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_btree).

-export([valid_node/1]).
-export([depth_first_traverse/4]).

-include("couchfoo.hrl").


valid_node({kp_node, [_ | _]}) ->
true;
valid_node({kv_node, [_ | _]}) ->
true;
valid_node(_) ->
false.


depth_first_traverse(nil, _File, _Fun, Acc) ->
Acc;
depth_first_traverse({Offset, _Red}, File, Fun, Acc) ->
case get_node(File, Offset) of
{kv_node, _KvList} = Node ->
Fun(map, Node, Acc);
{kp_node, KpList} = Node ->
ChildAccs = lists:map(
fun({_Key, ChildState}) ->
depth_first_traverse(ChildState, File, Fun, Acc)
end,
KpList),
Fun(reduce, Node, ChildAccs)
end.


get_node(File, Offset) ->
try
{ok, {NodeType, NodeList}} = couch_file:pread_term(File, Offset),
{NodeType, NodeList}
catch T:E ->
throw({btree_error, iolist_to_binary(
io_lib:format("Error reading BTree node at offset ~p: ~p:~p~n",
[Offset, T, E]))})
end.
97 changes: 97 additions & 0 deletions src/couch_db_header.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
% Copyright 2011, Filipe David Manana <fdmanana@apache.org>
% Web site: http://github.com/fdmanana/couchfoo
%
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_db_header).

-export([headerbin_to_tuple/1]).
-export([upgrade_header/1]).

-include("couchfoo.hrl").


upgrade_header(Header) ->
UpgradedHeader = simple_upgrade_record(Header, #db_header{}),
case element(2, UpgradedHeader) of
Old when Old < 4 ->
throw({
database_header_version_unsupported,
<<"CouchDB 0.9 (or older) database headers are not supported">>
});
4 ->
% 0.10 and pre 0.11
UpgradedHeader#db_header{security_ptr = nil};
?LATEST_DISK_VERSION ->
UpgradedHeader;
V when V > ?LATEST_DISK_VERSION ->
throw({
database_header_version_unsupported,
list_to_binary("database header version " ++ integer_to_list(V) ++
" is not yet unsupported")
})
end.


%% simple_upgrade_record/2 copied from Apache CouchDB's couch_db_updater.erl

simple_upgrade_record(Old, New) when tuple_size(Old) < tuple_size(New) ->
OldSz = tuple_size(Old),
NewValuesTail =
lists:sublist(tuple_to_list(New), OldSz + 1, tuple_size(New) - OldSz),
list_to_tuple(tuple_to_list(Old) ++ NewValuesTail);
simple_upgrade_record(Old, _New) ->
Old.


headerbin_to_tuple(HeaderBin) ->
case headerbin_to_term(HeaderBin) of
{ok, HeaderTerm} ->
case is_tuple(HeaderTerm) of
true ->
case tuple_size(HeaderTerm) > 1 of
true ->
case element(1, HeaderTerm) of
db_header ->
try
{ok, upgrade_header(HeaderTerm)}
catch
throw:{database_header_version_unsupported, Reason} ->
{corrupted_header, Reason};
_:Error ->
ErrorBin = iolist_to_binary(io_lib:format("~p", [Error])),
{corrupted_header, <<"Header record upgrade failed: ", ErrorBin/binary, ".">>}
end;
Tag ->
TagBin = iolist_to_binary(io_lib:format("~p", [Tag])),
{corrupted_header,
<<"Header tuple tag is not db_header but it's `", TagBin/binary, "'.">>}
end;
false ->
{corrupted_header, <<"Header term is a tuple with size < 2.">>}
end;
false ->
{corrupted_header, <<"Header term is not a tuple.">>}
end;
_ ->
{corrupted_header, <<"Header binary is not a serialized Erlang term.">>}
end.


headerbin_to_term(HeaderBin) ->
try
Term = binary_to_term(HeaderBin),
{ok, Term}
catch _:_ ->
false
end.
39 changes: 39 additions & 0 deletions src/couch_doc.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
% Copyright 2011, Filipe David Manana <fdmanana@apache.org>
% Web site: http://github.com/fdmanana/couchfoo
%
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_doc).

-export([rev_to_str/1, revs_to_strs/1]).

-include("couchfoo.hrl").


%% All the following functions were copied, or are slight variations, from
%% Apache CouchDB's couch_doc.erl

revid_to_str(RevId) when size(RevId) =:= 16 ->
?l2b(couchfoo_util:to_hex(RevId));
revid_to_str(RevId) ->
RevId.


rev_to_str({Pos, RevId}) ->
?l2b([integer_to_list(Pos), "-", revid_to_str(RevId)]).


revs_to_strs([]) ->
[];
revs_to_strs([{Pos, RevId}| Rest]) ->
[rev_to_str({Pos, RevId}) | revs_to_strs(Rest)].
Loading

0 comments on commit 59bc101

Please sign in to comment.