COUCHDB-259 allow arbitrary data to be stored along attachments #54

Closed
wants to merge 1 commit into
from
View
@@ -9,6 +9,8 @@ Apache CouchDB CHANGES
# HTTP Interface:
#
# * Fixed bug with CORS and replication (COUCHDB-1689).
+# * Allow update of content_type in attachments (COUCHDB-259).
+# * Allow arbitrary data to be stored in attachments (COUHDB-259).
#
# Test Suite:
#
@@ -650,6 +650,82 @@ The JSON returned will include the updated revision number:
For information on batched writes, which can provide improved
performance, see :ref:`api-batch-writes`.
+Inline Attachments
+------------------
+
+It is possible to store attachments inline, along with the main
+JSON structure. Attachments go into a special _attachments attribute
+of the document. They are encoded in a JSON structure that holds
+the name, the content_type and the base64 encoded data of an
+attachment.
+
+Creating a document with an attachment:
+
+.. code-block:: javascript
+
+ {
+ "_id":"attachment_doc",
+ "_attachments":
+ {
+ "foo.txt":
+ {
+ "content_type":"text/plain",
+ "data": "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ="
+ }
+ }
+ }
+
+While metatdata for the attachment (length, MD5-sum, etc.) is
+maintained by the server, it is possible to change the content_type
+field without sending the attachment data. This is useful for cases
+when the supplied content type cannot be trusted (i.e. direct uploads
+via browser).
+
+To change the content type field, PUT the document along with a stub
+attachment structure and the updated content type field:
+
+.. code-block:: javascript
+
+ {
+ "_id":"attachment_doc",
+ "_attachments":
+ {
+ "foo.txt":
+ {
+ "stub": true,
+ "content_type":"text/x-markdown"
+ }
+ }
+ }
+
+Additional Arbitrary Data in Attachments
+----------------------------------------
+
+It is possible to store additional arbitrary data inside the _attachments
+attribute (as long as there is no clash with the standard attachment
+attributes like ``stub``, ``content_type``, ``encoding`` etc.). Deleting
+the attachment also removes associated data, as one would expect.
+
+A typical use case would be to store an ``uploaded_by`` field along with
+every attachment. Another use case would be to store state data for post-
+processing workers with the attachment.
+
+.. code-block:: javascript
+
+ {
+ "_id":"attachment_doc",
+ "_attachments":
+ {
+ "foo.txt":
+ {
+ "data": "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=",
+ "content_type":"text/plain",
+ "uploaded_by": "Sam Simple",
+ "spell_checked": false
+ }
+ }
+ }
+
.. _api-del-doc:
``DELETE /db/doc``
@@ -0,0 +1,71 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+couchTests.attachment_arbitrary_data = function(debug) {
+
+ var db = new CouchDB("test_suite_db", {"X-Couch-Full-Commit":"false"});
+ db.deleteDb();
+ db.createDb();
+
+ // test COUCHDB-259 - allow arbitrary data to be stored along with the attachment
+ var bin_doc9 = {
+ _id: "bin_doc9",
+ name: "Don Draper",
+ _attachments:{
+ "foo.txt": {
+ "content_type":"text/plain",
+ "data": "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=",
+ "field1": "I am a simple string.",
+ "field2": 1234,
+ "field3": [1234, "string", null, {"hey":"ho", "tic":"tac"}]
+ },
+ "foo2.txt": {
+ "content_type":"text/plain",
+ "data": "SGV5IHRoZXJlCg=="
+ }
+ }
+ };
+
+ TEquals(true, db.save(bin_doc9).ok);
+ bin_doc9 = db.open("bin_doc9");
+
+ var stub_data9 = bin_doc9._attachments["foo.txt"];
+ TEquals("I am a simple string.", stub_data9.field1);
+ TEquals(1234, stub_data9.field2);
+
+ TEquals(4, stub_data9.field3.length);
+ TEquals(1234, stub_data9.field3[0]);
+ TEquals("string", stub_data9.field3[1]);
+ TEquals(null, stub_data9.field3[2]);
+ TEquals("tac", stub_data9.field3[3].tic);
+
+
+ // update a field via stub structure
+ var oldRev = bin_doc9._rev;
+ stub_data9.field1 = "I am a better string.";
+ delete stub_data9.field2;
+ bin_doc9._attachments["foo.txt"] = stub_data9;
+ TEquals(true, db.save(bin_doc9).ok);
+ bin_doc9 = db.open("bin_doc9");
+ TEquals("I am a better string.", bin_doc9._attachments["foo.txt"].field1);
+ // has the revision changed properly?
+ T(bin_doc9._rev != oldRev);
+ // is the deleted field2 gone?
+ TEquals(undefined, bin_doc9._attachments["foo.txt"].field2);
+
+ // remove the attachment
+ delete bin_doc9._attachments["foo.txt"];
+ TEquals(true, db.save(bin_doc9).ok);
+ bin_doc9 = db.open("bin_doc9");
+ TEquals(undefined, bin_doc9._attachments["foo.txt"]);
+
+};
@@ -298,4 +298,37 @@ couchTests.attachments= function(debug) {
var xhr = CouchDB.request("GET", "/test_suite_db/bin_doc7/attachment.txt");
TEquals('MntvB0NYESObxH4VRDUycw==', xhr.getResponseHeader("Content-MD5"));
+ // test COUCHDB-259 - allow update of content type via stubs
+ var bin_doc8 = {
+ _id: "bin_doc8",
+ _attachments:{
+ "foo.txt": {
+ content_type:"application/octet-stream",
+ data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ="
+ }
+ }
+ };
+
+ TEquals(true, db.save(bin_doc8).ok);
+
+ // reopen the document, change the content type and update
+ bin_doc8 = db.open("bin_doc8");
+ bin_doc8._attachments["foo.txt"] = {stub: true, content_type: "text/plain"};
+ TEquals(true, db.save(bin_doc8).ok);
+ // reopen the docuemnt, check if the new content type persisted
+ bin_doc8 = db.open("bin_doc8");
+ TEquals("text/plain", bin_doc8._attachments["foo.txt"].content_type);
+
+ // try setting to null, this should be ignored by the server
+ bin_doc8._attachments["foo.txt"] = {stub: true, content_type: null};
+ TEquals(true, db.save(bin_doc8).ok);
+ bin_doc8 = db.open("bin_doc8");
+ TEquals("text/plain", bin_doc8._attachments["foo.txt"].content_type);
+
+ // try setting to undefined, this should be ignored by the server
+ bin_doc8._attachments["foo.txt"] = {stub: true, content_type: undefined};
+ TEquals(true, db.save(bin_doc8).ok);
+ bin_doc8 = db.open("bin_doc8");
+ TEquals("text/plain", bin_doc8._attachments["foo.txt"].content_type);
+
};
@@ -30,7 +30,8 @@ couchTests.attachments_multipart= function(debug) {
"foo.txt": {
"follows":true,
"content_type":"application/test",
- "length":21
+ "length":21,
+ "arbitrary_field": "arbitrary content"
},
"bar.txt": {
"follows":true,
@@ -176,8 +177,11 @@ couchTests.attachments_multipart= function(debug) {
// parse out the multipart
var sections = parseMultipart(xhr);
- TEquals("790", xhr.getResponseHeader("Content-Length"),
+ TEquals("828", xhr.getResponseHeader("Content-Length"),
"Content-Length should be correct");
+
+
+
T(sections.length == 3);
// The first section is the json doc. Check it's content-type.
// Each part carries their own meta data.
@@ -40,7 +40,8 @@
md5= <<>>,
revpos=0,
data,
- encoding=identity
+ encoding=identity,
+ body={[]}
}).
@@ -40,7 +40,8 @@
md5= <<>>,
revpos=0,
data,
- encoding=identity
+ encoding=identity,
+ body={[]}
}).
-define(b2l(B), binary_to_list(B)).
@@ -40,7 +40,8 @@
md5= <<>>,
revpos=0,
data,
- encoding=identity
+ encoding=identity,
+ body={[]}
}).
-define(b2l(B), binary_to_list(B)).
View
@@ -773,9 +773,7 @@ update_docs(Db, Docs, Options, interactive_edit) ->
check_dup_atts(Doc)), Db#db.updater_fd), Ref}
|| {Doc, Ref} <- B] || B <- DocBuckets2],
{DocBuckets4, IdRevs} = new_revs(DocBuckets3, [], []),
-
{ok, CommitResults} = write_and_commit(Db, DocBuckets4, NonRepDocs, Options2),
-
ResultsDict = dict:from_list(IdRevs ++ CommitResults ++ PreCommitFailures),
{ok, lists:map(
fun({#doc{}, Ref}) ->
@@ -864,9 +862,9 @@ write_and_commit(#db{update_pid=UpdatePid}=Db, DocBuckets1,
prepare_doc_summaries(Db, BucketList) ->
[lists:map(
fun({#doc{body = Body, atts = Atts} = Doc, Ref}) ->
- DiskAtts = [{N, T, P, AL, DL, R, M, E} ||
+ DiskAtts = [{N, T, P, AL, DL, R, M, E, AttBody} ||
#att{name = N, type = T, data = {_, P}, md5 = M, revpos = R,
- att_len = AL, disk_len = DL, encoding = E} <- Atts],
+ att_len = AL, disk_len = DL, encoding = E, body = AttBody} <- Atts],
AttsFd = case Atts of
[#att{data = {Fd, _}} | _] ->
Fd;
@@ -1271,7 +1269,29 @@ make_doc(#db{updater_fd = Fd} = Db, Id, Deleted, Bp, RevisionPath) ->
end,
{BodyData0,
lists:map(
- fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) ->
+ fun({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc,AttBody}) ->
+ #att{name=Name,
+ type=Type,
+ att_len=AttLen,
+ disk_len=DiskLen,
+ md5=Md5,
+ revpos=RevPos,
+ data={Fd,Sp},
+ encoding=
+ case Enc of
+ true ->
+ % 0110 UPGRADE CODE
+ gzip;
+ false ->
+ % 0110 UPGRADE CODE
+ identity;
+ _ ->
+ Enc
+ end,
+ body=AttBody
+ };
+
+ ({Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) ->
#att{name=Name,
type=Type,
att_len=AttLen,
@@ -1298,15 +1318,17 @@ make_doc(#db{updater_fd = Fd} = Db, Id, Deleted, Bp, RevisionPath) ->
disk_len=AttLen,
md5=Md5,
revpos=RevPos,
- data={Fd,Sp}};
+ data={Fd,Sp}
+ };
({Name,{Type,Sp,AttLen}}) ->
#att{name=Name,
type=Type,
att_len=AttLen,
disk_len=AttLen,
md5= <<>>,
revpos=0,
- data={Fd,Sp}}
+ data={Fd,Sp}
+ }
end, Atts0)}
end,
Doc = #doc{
View
@@ -134,10 +134,11 @@
md5= <<>>,
revpos=0,
data,
- encoding=identity % currently supported values are:
+ encoding=identity, % currently supported values are:
% identity, gzip
% additional values to support in the future:
% deflate, compress
+ body={[]}
}).
@@ -826,11 +826,11 @@ copy_doc_attachments(#db{updater_fd = SrcFd} = SrcDb, SrcSp, DestFd) ->
NewBinInfos = lists:map(
fun({Name, Type, BinSp, AttLen, RevPos, Md5}) ->
% 010 UPGRADE CODE
- {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
+ {NewBinSp, AttLen, AttLen, Md5, _IdentityMd5} =
couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
{Name, Type, NewBinSp, AttLen, AttLen, RevPos, Md5, identity};
({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc1}) ->
- {NewBinSp, AttLen, _, Md5, _IdentityMd5} =
+ {NewBinSp, AttLen, _, Md5, _IdentityMd5} =
couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
Enc = case Enc1 of
true ->
@@ -842,10 +842,15 @@ copy_doc_attachments(#db{updater_fd = SrcFd} = SrcDb, SrcSp, DestFd) ->
_ ->
Enc1
end,
- {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc}
+ {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc};
+ ({Name, Type, BinSp, AttLen, DiskLen, RevPos, Md5, Enc, AttBody}) ->
+ {NewBinSp, AttLen, _, Md5, _IdentityMd5} =
+ couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
+ {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, Md5, Enc, AttBody}
end, BinInfos),
{BodyData, NewBinInfos}.
+
copy_docs(Db, #db{updater_fd = DestFd} = NewDb, InfoBySeq0, Retry) ->
% COUCHDB-968, make sure we prune duplicates during compaction
InfoBySeq = lists:usort(fun(#doc_info{id=A}, #doc_info{id=B}) -> A =< B end,
@@ -868,7 +873,7 @@ copy_docs(Db, #db{updater_fd = DestFd} = NewDb, InfoBySeq0, Retry) ->
{ok, Pos, SummarySize} = couch_file:append_raw_chunk(
DestFd, SummaryChunk),
TotalLeafSize = lists:foldl(
- fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen end,
+ fun({_, _, _, AttLen, _, _, _, _, _}, S) -> S + AttLen end,
SummarySize, AttsInfo),
{IsDel, Pos, Seq, TotalLeafSize}
end, RevTree)}
Oops, something went wrong.