Skip to content

Commit

Permalink
Provide Content-MD5 header support for attachments.
Browse files Browse the repository at this point in the history
Fixes COUCHDB-558.

Thanks to Filipe Manana we now have checks for attachment transfer integrity
using the Content-MD5 header (or trailer). Use of this integrity check is
triggered by specifying a Content-MD5 header in your request with a value that
is a base64 encoded md5. For requests that are using a chunked Transfer-Encoding
it is also possible to use a trailer so that the Content-MD5 doesn't need to be
known before transfer. This works by specifying a header "Trailer:
Content-MD5" and then in the final chunk (the one with a size of zero) you can
specify a Content-MD5 with exactly the same format as in the request headers.

See the ETap test 130-attachments-md5.t for explicit examples of the request
messages.



git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@891077 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
davisp committed Dec 16, 2009
1 parent 04404e2 commit 22c551b
Show file tree
Hide file tree
Showing 7 changed files with 378 additions and 8 deletions.
1 change: 1 addition & 0 deletions THANKS
Expand Up @@ -40,5 +40,6 @@ suggesting improvements or submitting changes. Some of these people are:
* Joshua Bronson <jabronson@gmail.com> * Joshua Bronson <jabronson@gmail.com>
* Kostis Sagonas <kostis@cs.ntua.gr> * Kostis Sagonas <kostis@cs.ntua.gr>
* Matthew Hooker <mwhooker@gmail.com> * Matthew Hooker <mwhooker@gmail.com>
* Filipe Manana <fdmanana@gmail.com>


For a list of authors see the `AUTHORS` file. For a list of authors see the `AUTHORS` file.
34 changes: 29 additions & 5 deletions src/couchdb/couch_db.erl
Expand Up @@ -687,7 +687,7 @@ doc_flush_atts(Doc, Fd) ->


check_md5(_NewSig, <<>>) -> ok; check_md5(_NewSig, <<>>) -> ok;
check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok; check_md5(Sig1, Sig2) when Sig1 == Sig2 -> ok;
check_md5(_, _) -> throw(data_corruption). check_md5(_, _) -> throw(md5_mismatch).


flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd -> flush_att(Fd, #att{data={Fd0, _}}=Att) when Fd0 == Fd ->
% already written to our file, nothing to write % already written to our file, nothing to write
Expand All @@ -713,8 +713,14 @@ flush_att(Fd, #att{data=Fun,len=undefined}=Att) when is_function(Fun) ->
% WriterFun({0, _Footers}, State) % WriterFun({0, _Footers}, State)
% Called with Length == 0 on the last time. % Called with Length == 0 on the last time.
% WriterFun returns NewState. % WriterFun returns NewState.
fun({0, _Footers}, _) -> fun({0, Footers}, _) ->
ok; F = mochiweb_headers:from_binary(Footers),
case mochiweb_headers:get_value("Content-MD5", F) of
undefined ->
ok;
Md5 ->
{md5, base64:decode(Md5)}
end;
({_Length, Chunk}, _) -> ({_Length, Chunk}, _) ->
couch_stream:write(OutputStream, Chunk) couch_stream:write(OutputStream, Chunk)
end, ok) end, ok)
Expand All @@ -725,11 +731,29 @@ flush_att(Fd, #att{data=Fun,len=Len}=Att) when is_function(Fun) ->
write_streamed_attachment(OutputStream, Fun, Len) write_streamed_attachment(OutputStream, Fun, Len)
end). end).


% From RFC 2616 3.6.1 - Chunked Transfer Coding
%
% In other words, the origin server is willing to accept
% the possibility that the trailer fields might be silently
% discarded along the path to the client.
%
% I take this to mean that if "Trailers: Content-MD5\r\n"
% is present in the request, but there is no Content-MD5
% trailer, we're free to ignore this inconsistency and
% pretend that no Content-MD5 exists.
with_stream(Fd, #att{md5=InMd5}=Att, Fun) -> with_stream(Fd, #att{md5=InMd5}=Att, Fun) ->
{ok, OutputStream} = couch_stream:open(Fd), {ok, OutputStream} = couch_stream:open(Fd),
Fun(OutputStream), ReqMd5 = case Fun(OutputStream) of
{md5, FooterMd5} ->
case InMd5 of
md5_in_footer -> FooterMd5;
_ -> InMd5
end;
_ ->
InMd5
end,
{StreamInfo, Len, Md5} = couch_stream:close(OutputStream), {StreamInfo, Len, Md5} = couch_stream:close(OutputStream),
check_md5(Md5, InMd5), check_md5(Md5, ReqMd5),
Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}. Att#att{data={Fd,StreamInfo},len=Len,md5=Md5}.




Expand Down
3 changes: 3 additions & 0 deletions src/couchdb/couch_httpd.erl
Expand Up @@ -541,6 +541,9 @@ error_info({bad_request, Reason}) ->
{400, <<"bad_request">>, Reason}; {400, <<"bad_request">>, Reason};
error_info({query_parse_error, Reason}) -> error_info({query_parse_error, Reason}) ->
{400, <<"query_parse_error">>, Reason}; {400, <<"query_parse_error">>, Reason};
% Prior art for md5 mismatch resulting in a 400 is from AWS S3
error_info(md5_mismatch) ->
{400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>};
error_info(not_found) -> error_info(not_found) ->
{404, <<"not_found">>, <<"missing">>}; {404, <<"not_found">>, <<"missing">>};
error_info({not_found, Reason}) -> error_info({not_found, Reason}) ->
Expand Down
26 changes: 24 additions & 2 deletions src/couchdb/couch_httpd_db.erl
Expand Up @@ -1043,8 +1043,9 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts)
undefined; undefined;
Length -> Length ->
list_to_integer(Length) list_to_integer(Length)
end end,
}] md5 = get_md5_header(Req)
}]
end, end,


Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of Doc = case extract_header_rev(Req, couch_httpd:qs_value(Req, "rev")) of
Expand Down Expand Up @@ -1084,6 +1085,27 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts)
db_attachment_req(Req, _Db, _DocId, _FileNameParts) -> db_attachment_req(Req, _Db, _DocId, _FileNameParts) ->
send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT"). send_method_not_allowed(Req, "DELETE,GET,HEAD,PUT").



get_md5_header(Req) ->
ContentMD5 = couch_httpd:header_value(Req, "Content-MD5"),
Length = couch_httpd:body_length(Req),
Trailer = couch_httpd:header_value(Req, "Trailer"),
case {ContentMD5, Length, Trailer} of
_ when is_list(ContentMD5) orelse is_binary(ContentMD5) ->
base64:decode(ContentMD5);
{_, chunked, undefined} ->
<<>>;
{_, chunked, _} ->
case re:run(Trailer, "\\bContent-MD5\\b", [caseless]) of
{match, _} ->
md5_in_footer;
_ ->
<<>>
end;
_ ->
<<>>
end.

parse_doc_format(FormatStr) when is_binary(FormatStr) -> parse_doc_format(FormatStr) when is_binary(FormatStr) ->
parse_doc_format(?b2l(FormatStr)); parse_doc_format(?b2l(FormatStr));
parse_doc_format(FormatStr) when is_list(FormatStr) -> parse_doc_format(FormatStr) when is_list(FormatStr) ->
Expand Down
5 changes: 4 additions & 1 deletion src/couchdb/couch_util.erl
Expand Up @@ -419,4 +419,7 @@ json_encode(V) ->


json_decode(V) -> json_decode(V) ->
try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V) try (mochijson2:decoder([{object_hook, fun({struct,L}) -> {L} end}]))(V)
catch _:_ -> throw({invalid_json,V}) end. catch
_Type:_Error ->
throw({invalid_json,V})
end.
65 changes: 65 additions & 0 deletions src/mochiweb/mochiweb_headers.erl
Expand Up @@ -9,6 +9,7 @@
-export([delete_any/2, get_primary_value/2]). -export([delete_any/2, get_primary_value/2]).
-export([default/3, enter_from_list/2, default_from_list/2]). -export([default/3, enter_from_list/2, default_from_list/2]).
-export([to_list/1, make/1]). -export([to_list/1, make/1]).
-export([from_binary/1]).
-export([test/0]). -export([test/0]).


%% @type headers(). %% @type headers().
Expand Down Expand Up @@ -37,6 +38,36 @@ test() ->
"content-type", H4), "content-type", H4),
H4 = ?MODULE:delete_any("nonexistent-header", H4), H4 = ?MODULE:delete_any("nonexistent-header", H4),
H3 = ?MODULE:delete_any("content-type", H4), H3 = ?MODULE:delete_any("content-type", H4),
HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
H_HB = ?MODULE:from_binary(HB),
H_HB = ?MODULE:from_binary(binary_to_list(HB)),
"47" = ?MODULE:get_value("Content-Length", H_HB),
"text/plain" = ?MODULE:get_value("Content-Type", H_HB),
L_H_HB = ?MODULE:to_list(H_HB),
2 = length(L_H_HB),
true = lists:member({'Content-Length', "47"}, L_H_HB),
true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
H_HL = ?MODULE:from_binary(HL),
H_HL = ?MODULE:from_binary(HL2),
H_HL = ?MODULE:from_binary(HL3),
"47" = ?MODULE:get_value("Content-Length", H_HL),
"text/plain" = ?MODULE:get_value("Content-Type", H_HL),
L_H_HL = ?MODULE:to_list(H_HL),
2 = length(L_H_HL),
true = lists:member({'Content-Length', "47"}, L_H_HL),
true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
[] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
[] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
[] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
[] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
[] = ?MODULE:to_list(?MODULE:from_binary("")),
[] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
[] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
[] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
[] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
ok. ok.


%% @spec empty() -> headers() %% @spec empty() -> headers()
Expand All @@ -52,6 +83,40 @@ make(L) when is_list(L) ->
make(T) when is_tuple(T) -> make(T) when is_tuple(T) ->
T. T.


%% @spec from_binary(RawHttpHeader()) -> headers()
%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ]
%%
%% @doc Transforms a raw HTTP header into a mochiweb headers structure.
%%
%% The given raw HTTP header can be one of the following:
%%
%% 1) A string or a binary representing a full HTTP header ending with
%% double CRLF.
%% Examples:
%% "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
%% <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>
%%
%% 2) A list of binaries or strings where each element represents a raw
%% HTTP header line ending with a single CRLF.
%% Examples:
%% [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ]
%% [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ]
%% [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ]
%%
from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
from_binary(RawHttpHeader, []);

from_binary(RawHttpHeaderList) ->
from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).

from_binary(RawHttpHeader, Acc) ->
case erlang:decode_packet(httph, RawHttpHeader, []) of
{ ok, {http_header, _, H, _, V}, Rest } ->
from_binary(Rest, [{H, V} | Acc]);
_ ->
make(Acc)
end.

%% @spec from_list([{key(), value()}]) -> headers() %% @spec from_list([{key(), value()}]) -> headers()
%% @doc Construct a headers() from the given list. %% @doc Construct a headers() from the given list.
from_list(List) -> from_list(List) ->
Expand Down

0 comments on commit 22c551b

Please sign in to comment.