From 7af46047276873cf1411fdbf5861107345af2723 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Mon, 23 Feb 2015 13:08:28 -0800 Subject: [PATCH 01/14] Added delta-compression support for getting attachments * With GET /db/doc, adding "?deltas=true" will allow delta-compression of attachments, provided both ?attachments and ?atts_since are specified (the latter is needed so that the gateway knows which versions of attachments the client already has.) Delta-encoded attachments will be indicated by an "encoding" value of "zdelta", and an extra "deltaSrc" property whose value is the digest of the attachment to use as the delta source. * The same is true of _bulk_get. * With GET /db/doc/attachment, adding "?deltas=XXX,YYY,XXX", where the values are digests of previous versions of the attachment, will allow delta compression. The response will have a Content-Encoding "zdelta" and a "X-Delta-Source" header whose value is the digest of the source attachment. See https://github.com/couchbaselabs/couchbase-lite-api/wiki/Delta-Compression For #452; see also couchbase/couchbase-lite-ios#168 --- .../sync_gateway/db/attachment.go | 85 +++++++++++--- .../sync_gateway/db/attachment_test.go | 45 +++++++- .../couchbaselabs/sync_gateway/db/crud.go | 82 ++++++++----- .../sync_gateway/db/database_test.go | 16 +-- .../sync_gateway/rest/api_test.go | 109 ++++++++++++++++++ .../sync_gateway/rest/bulk_api.go | 3 +- .../sync_gateway/rest/doc_api.go | 35 ++++-- src/github.com/snej/zdelta-go | 1 + sync_gateway.sublime-project | 24 ---- 9 files changed, 313 insertions(+), 87 deletions(-) create mode 160000 src/github.com/snej/zdelta-go diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go index 2b2350db46..5f965ce81a 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go @@ -25,11 +25,12 @@ import ( "strings" "github.com/couchbaselabs/sync_gateway/base" + "github.com/snej/zdelta-go" ) // Attachments shorter than this will be left in the JSON as base64 rather than being a separate // MIME part. -const kMaxInlineAttachmentSize = 200 +var MaxInlineAttachmentSize = 200 // JSON bodies smaller than this won't be GZip-encoded. const kMinCompressedJSONSize = 300 @@ -113,30 +114,80 @@ func (db *Database) storeAttachments(doc *document, body Body, generation int, p // marshaler will convert that to base64. // If minRevpos is > 0, then only attachments that have been changed in a revision of that // generation or later are loaded. -func (db *Database) loadBodyAttachments(body Body, minRevpos int) (Body, error) { - +func (db *Database) loadBodyAttachments(body Body, minRevpos int, deltaSrcKeys map[string]AttachmentKey) (Body, error) { body = body.ImmutableAttachmentsCopy() - for _, value := range BodyAttachments(body) { + for name, value := range BodyAttachments(body) { meta := value.(map[string]interface{}) revpos, ok := base.ToInt64(meta["revpos"]) if ok && revpos >= int64(minRevpos) { key := AttachmentKey(meta["digest"].(string)) - data, err := db.GetAttachment(key) + var sourceKeys []AttachmentKey + if _, ok := meta["encoding"].(string); !ok { // leave encoded attachment alone + if srcKey, ok := deltaSrcKeys[name]; ok { + sourceKeys = []AttachmentKey{srcKey} + } + } + data, srcKey, err := db.GetAttachmentMaybeAsDelta(key, sourceKeys) if err != nil { return nil, err } meta["data"] = data delete(meta, "stub") + if srcKey != "" { + meta["encoding"] = "zdelta" + meta["deltasrc"] = srcKey + } } } return body, nil } -// Retrieves an attachment, base64-encoded, given its key. +// Retrieves an attachment's body given its key. func (db *Database) GetAttachment(key AttachmentKey) ([]byte, error) { return db.Bucket.GetRaw(attachmentKeyToString(key)) } +// Retrieves an attachment's body, preferably as a delta from one of the versions specified +// in `sourceKeys` +func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []AttachmentKey) (result []byte, sourceKey AttachmentKey, err error) { + target, err := db.GetAttachment(key) + if err != nil { + return + } + for _, sourceKey = range sourceKeys { + var src []byte + src, err = db.Bucket.GetRaw(attachmentKeyToString(sourceKey)) + if err == nil { + //OPT: Cache deltas. For now, this just computes the delta every time. + result, err = zdelta.CreateDelta(src, target) + if err == nil && len(result) < len(target) { + base.LogTo("Attach", "Generated zdelta {%s --> %s} (%d%%)", + sourceKey, key, len(result)*100/len(target)) + return + } + } + if !base.IsDocNotFoundError(err) { + base.Warn("GetAttachmentAsDelta: Error for %q-->%q: %v", sourceKey, key, err) + } + } + // No delta available so return entire attachment: + result = target + sourceKey = "" + return +} + +// Returns the digests of all attachments in a Body, as a map of attachment names to keys. +func (db *Database) getAttachmentDigests(body Body) map[string]AttachmentKey { + keys := map[string]AttachmentKey{} + for name, value := range BodyAttachments(body) { + meta := value.(map[string]interface{}) + if key := AttachmentKey(meta["digest"].(string)); key != "" { + keys[name] = key + } + } + return keys +} + // Stores a base64-encoded attachment and returns the key to get it by. func (db *Database) setAttachment(attachment []byte) (AttachmentKey, error) { key := AttachmentKey(sha1DigestKey(attachment)) @@ -176,12 +227,6 @@ func ReadJSONFromMIME(headers http.Header, input io.Reader, into interface{}) er return nil } -type attInfo struct { - name string - contentType string - data []byte -} - func writeJSONPart(writer *multipart.Writer, contentType string, body Body, compressed bool) (err error) { bytes, err := json.Marshal(body) if err != nil { @@ -213,6 +258,12 @@ func writeJSONPart(writer *multipart.Writer, contentType string, body Body, comp // Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, compress bool) { + type attInfo struct { + name string + data []byte + meta map[string]interface{} + } + // First extract the attachments that should follow: following := []attInfo{} for name, value := range BodyAttachments(body) { @@ -220,14 +271,14 @@ func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, if meta["stub"] != true { var err error var info attInfo - info.contentType, _ = meta["content_type"].(string) info.data, err = decodeAttachment(meta["data"]) if info.data == nil { base.Warn("Couldn't decode attachment %q of doc %q: %v", name, body["_id"], err) meta["stub"] = true delete(meta, "data") - } else if len(info.data) > kMaxInlineAttachmentSize { + } else if len(info.data) > MaxInlineAttachmentSize { info.name = name + info.meta = meta following = append(following, info) meta["follows"] = true delete(meta, "data") @@ -241,8 +292,10 @@ func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, // Write the following attachments for _, info := range following { partHeaders := textproto.MIMEHeader{} - if info.contentType != "" { - partHeaders.Set("Content-Type", info.contentType) + if contentType, ok := info.meta["content_type"].(string); ok { + if info.meta["encoding"] == nil { + partHeaders.Set("Content-Type", contentType) + } } partHeaders.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", info.name)) part, _ := writer.CreatePart(partHeaders) diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go index fab722f5c5..753d85e119 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go @@ -51,7 +51,7 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev1output := `{"_attachments":{"bye.txt":{"data":"Z29vZGJ5ZSBjcnVlbCB3b3JsZA==","digest":"sha1-l+N7VpXGnoxMm8xfvtWPbz2YvDc=","length":19,"revpos":1},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"1-54f3a105fb903018c160712ffddb74dc"}` - gotbody, err := db.GetRev("doc1", "", false, []string{}) + gotbody, err := db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") assert.Equals(t, tojson(gotbody), rev1output) @@ -66,13 +66,13 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev2output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` - gotbody, err = db.GetRev("doc1", "", false, []string{}) + gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") assert.Equals(t, tojson(gotbody), rev2output) log.Printf("Retrieve doc with atts_since...") rev2Aoutput := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1,"stub":true}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` - gotbody, err = db.GetRev("doc1", "", false, []string{"1-54f3a105fb903018c160712ffddb74dc", "1-foo", "993-bar"}) + gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-54f3a105fb903018c160712ffddb74dc", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") assert.Equals(t, tojson(gotbody), rev2Aoutput) @@ -87,7 +87,7 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev3output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2}},"_id":"doc1","_rev":"3-252b9fa1f306930bffc07e7d75b77faf"}` - gotbody, err = db.GetRev("doc1", "", false, []string{}) + gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") assert.Equals(t, tojson(gotbody), rev3output) @@ -101,3 +101,40 @@ func TestAttachments(t *testing.T) { err = db.PutExistingRev("doc1", body2B, []string{"2-f000", rev1id}) assertNoError(t, err, "Couldn't update document") } + +func TestAttachmentDeltas(t *testing.T) { + context, err := NewDatabaseContext("db", testBucket(), false) + assertNoError(t, err, "Couldn't create context for database 'db'") + defer context.Close() + db, err := CreateDatabase(context) + assertNoError(t, err, "Couldn't create database 'db'") + + // Rev 1: + log.Printf("Create rev 1...") + rev1input := `{"_attachments": {"bye.txt": {"data":"VGhpcyBpcyBhIHN0cmluZyBmb3IgdXNlIGluIHRlc3RpbmcgZGVsdGEgY29tcHJlc3Npb24K"}}}` + var body Body + json.Unmarshal([]byte(rev1input), &body) + revid, err := db.Put("doc1", unjson(rev1input)) + assertNoError(t, err, "Couldn't create document") + assert.Equals(t, revid, "1-c0c61706d3f3692aacc0ec0a91425a65") + + log.Printf("Create rev 2...") + rev2str := `{"_attachments": {"bye.txt": {"data": "VGhpcyBpcyBhIHRlc3QuIFRoaXMgaXMgb25seSBhIHRlc3QuCg=="}}}` + var body2 Body + json.Unmarshal([]byte(rev2str), &body2) + body2["_rev"] = revid + revid, err = db.Put("doc1", body2) + assertNoError(t, err, "Couldn't update document") + + log.Printf("Retrieve doc with delta-encoded attachment...") + rev2output := `{"_attachments":{"bye.txt":{"data":"ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT","deltasrc":"sha1-l5fhr3wrVdXDCNkamTn8KypCswQ=","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","encoding":"zdelta","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` + gotbody, err := db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, true) + assertNoError(t, err, "Couldn't get document") + assert.Equals(t, tojson(gotbody), rev2output) + + log.Printf("Retrieve doc without delta-encoded attachment...") + rev2Boutput := `{"_attachments":{"bye.txt":{"data":"VGhpcyBpcyBhIHRlc3QuIFRoaXMgaXMgb25seSBhIHRlc3QuCg==","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` + gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) + assertNoError(t, err, "Couldn't get document") + assert.Equals(t, tojson(gotbody), rev2Boutput) +} diff --git a/src/github.com/couchbaselabs/sync_gateway/db/crud.go b/src/github.com/couchbaselabs/sync_gateway/db/crud.go index b04ec66988..706ff8bb6c 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/crud.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/crud.go @@ -73,12 +73,11 @@ func (context *DatabaseContext) revCacheLoader(id IDAndRev) (body Body, history // Returns the body of the current revision of a document func (db *Database) Get(docid string) (Body, error) { - return db.GetRev(docid, "", false, nil) + return db.GetRev(docid, "", false) } -// Returns the body of a revision of a document. Uses the revision cache. -// revid may be "", meaning the current revision. -func (db *Database) GetRev(docid, revid string, listRevisions bool, attachmentsSince []string) (Body, error) { +// Common subroutine of GetRev and GetRevWithAttachments +func (db *Database) getRev(docid, revid string, listRevisions bool) (Body, *document, error) { var doc *document var body Body var revisions Body @@ -93,16 +92,16 @@ func (db *Database) GetRev(docid, revid string, listRevisions bool, attachmentsS if err == nil { err = base.HTTPErrorf(404, "missing") } - return nil, err + return nil, nil, err } } else { // No rev ID given, so load doc and get its current revision: if doc, err = db.GetDoc(docid); doc == nil { - return nil, err + return nil, nil, err } revid = doc.CurrentRev if body, err = db.getRevision(doc, revid); err != nil { - return nil, err + return nil, nil, err } if doc.hasFlag(channels.Deleted) { body["_deleted"] = true @@ -115,7 +114,7 @@ func (db *Database) GetRev(docid, revid string, listRevisions bool, attachmentsS if db.user != nil { if err := db.user.AuthorizeAnyChannel(inChannels); err != nil { if !revIDGiven { - return nil, base.HTTPErrorf(403, "forbidden") + return nil, nil, base.HTTPErrorf(403, "forbidden") } // On access failure, return (only) the doc history and deletion/removal // status instead of returning an error. For justification see the comment in @@ -130,13 +129,13 @@ func (db *Database) GetRev(docid, revid string, listRevisions bool, attachmentsS if listRevisions { redactedBody["_revisions"] = revisions } - return redactedBody, nil + return redactedBody, doc, nil } } if !revIDGiven { if deleted, _ := body["_deleted"].(bool); deleted { - return nil, base.HTTPErrorf(404, "deleted") + return nil, nil, base.HTTPErrorf(404, "deleted") } } @@ -144,27 +143,58 @@ func (db *Database) GetRev(docid, revid string, listRevisions bool, attachmentsS if listRevisions { body["_revisions"] = revisions } + return body, doc, nil +} - // Add attachment bodies: - if attachmentsSince != nil && len(BodyAttachments(body)) > 0 { - minRevpos := 1 - if len(attachmentsSince) > 0 { - if doc == nil { // if rev was in the cache, we don't have the document struct yet - if doc, err = db.GetDoc(docid); doc == nil { - return nil, err - } - } - ancestor := doc.History.findAncestorFromSet(revid, attachmentsSince) - if ancestor != "" { - minRevpos, _ = parseRevID(ancestor) - minRevpos++ +// Returns the body of a revision of a document. Uses the revision cache. +// revid may be "", meaning the current revision. +// If listRevisions is true, a "_revisions" property will be added containing the revision history. +func (db *Database) GetRev(docid, revid string, listRevisions bool) (Body, error) { + body, _, err := db.getRev(docid, revid, listRevisions) + return body, err +} + +// Returns the body of a revision of a document, including attachments. Based on GetRev. +// If attachmentsSince is non-nil, attachment bodies ("data" properties) will be added for all +// revisions newer than the revIDs in attachmentsSince. +// If useDeltas is true, attachments will be delta-compressed based on the versions of the +// attachments in the attachmentsSince revisions. +func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions bool, attachmentsSince []string, useDeltas bool) (Body, error) { + body, doc, err := db.getRev(docid, revid, listRevisions) + if err != nil || attachmentsSince == nil || len(BodyAttachments(body)) == 0 { + return body, err + } + + if revid == "" { + revid = body["_rev"].(string) + } + + // Figure out the min revpos to get bodies of, and the available delta sources: + minRevpos := 1 // Don't include atts whose revpos is < this + var deltaSrcKeys map[string]AttachmentKey // Old versions to use as delta srcs + if len(attachmentsSince) > 0 { + if doc == nil { // if rev was in the cache, we don't have the document struct yet + if doc, err = db.GetDoc(docid); doc == nil { + return nil, err } } - body, err = db.loadBodyAttachments(body, minRevpos) - if err != nil { - return nil, err + ancestorRevID := doc.History.findAncestorFromSet(revid, attachmentsSince) + if ancestorRevID != "" { + minRevpos, _ = parseRevID(ancestorRevID) + minRevpos++ + // Now load the ancestor rev's body to get the attachment digests: + if useDeltas { + if ancestorBody, _ := db.getRevision(doc, ancestorRevID); ancestorBody != nil { + deltaSrcKeys = db.getAttachmentDigests(ancestorBody) + } + } } } + // Add attachment bodies: + body, err = db.loadBodyAttachments(body, minRevpos, deltaSrcKeys) + if err != nil { + return nil, err + } return body, nil } diff --git a/src/github.com/couchbaselabs/sync_gateway/db/database_test.go b/src/github.com/couchbaselabs/sync_gateway/db/database_test.go index ed49d838dd..eb71986257 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/database_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/database_test.go @@ -92,22 +92,22 @@ func TestDatabase(t *testing.T) { assert.DeepEquals(t, gotbody, body) log.Printf("Retrieve rev 1...") - gotbody, err = db.GetRev("doc1", rev1id, false, nil) + gotbody, err = db.GetRev("doc1", rev1id, false) assertNoError(t, err, "Couldn't get document with rev 1") assert.DeepEquals(t, gotbody, Body{"key1": "value1", "key2": 1234, "_id": "doc1", "_rev": rev1id}) log.Printf("Retrieve rev 2...") - gotbody, err = db.GetRev("doc1", rev2id, false, nil) + gotbody, err = db.GetRev("doc1", rev2id, false) assertNoError(t, err, "Couldn't get document with rev") assert.DeepEquals(t, gotbody, body) - gotbody, err = db.GetRev("doc1", "bogusrev", false, nil) + gotbody, err = db.GetRev("doc1", "bogusrev", false) status, _ := base.ErrorAsHTTPStatus(err) assert.Equals(t, status, 404) // Test the _revisions property: log.Printf("Check _revisions...") - gotbody, err = db.GetRev("doc1", rev2id, true, nil) + gotbody, err = db.GetRev("doc1", rev2id, true) revisions := gotbody["_revisions"].(Body) assert.Equals(t, revisions["start"], 2) assert.DeepEquals(t, revisions["ids"], @@ -169,7 +169,7 @@ func TestGetDeleted(t *testing.T) { assertNoError(t, err, "DeleteDoc") // Get the deleted doc with its history; equivalent to GET with ?revs=true - body, err = db.GetRev("doc1", rev2id, true, nil) + body, err = db.GetRev("doc1", rev2id, true) assertNoError(t, err, "GetRev") expectedResult := Body{ "_id": "doc1", @@ -185,7 +185,7 @@ func TestGetDeleted(t *testing.T) { assertNoError(t, err, "GetUser") db.user.SetExplicitChannels(nil) - body, err = db.GetRev("doc1", rev2id, true, nil) + body, err = db.GetRev("doc1", rev2id, true) assertNoError(t, err, "GetRev") assert.DeepEquals(t, body, expectedResult) } @@ -403,10 +403,10 @@ func TestConflicts(t *testing.T) { "channels": []interface{}{"all", "2b"}}) // Verify we can still get the other two revisions: - gotBody, err = db.GetRev("doc", "1-a", false, nil) + gotBody, err = db.GetRev("doc", "1-a", false) assert.DeepEquals(t, gotBody, Body{"_id": "doc", "_rev": "1-a", "n": 1, "channels": []string{"all", "1"}}) - gotBody, err = db.GetRev("doc", "2-a", false, nil) + gotBody, err = db.GetRev("doc", "2-a", false) assert.DeepEquals(t, gotBody, Body{"_id": "doc", "_rev": "2-a", "n": 3, "channels": []string{"all", "2a"}}) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go index bb04a2929b..421cc72c59 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go @@ -12,13 +12,19 @@ package rest import ( "bytes" "compress/gzip" + "encoding/base64" "encoding/json" "fmt" + "github.com/snej/zdelta-go" + "io/ioutil" "log" + "mime" + "mime/multipart" "net/http" "net/http/httptest" "runtime" "sort" + "strings" "testing" "time" @@ -157,6 +163,14 @@ func assertStatus(t *testing.T, response *testResponse, expectedStatus int) { } } +func readMultipartResponse(response *testResponse) *multipart.Reader { + contentType, attrs, _ := mime.ParseMediaType(response.HeaderMap.Get("Content-Type")) + if !strings.HasPrefix(contentType, "multipart/") { + panic("Error: response type is " + contentType + ", not multipart") + } + return multipart.NewReader(response.Body, attrs["boundary"]) +} + func (sc *ServerContext) Database(name string) *db.DatabaseContext { db, err := sc.GetDatabase(name) if err != nil { @@ -1665,3 +1679,98 @@ func TestCreateTarget(t *testing.T) { response = rt.sendRequest("PUT", "/foo/", "") assertStatus(t, response, 403) } + +func TestGetAttachmentAsDelta(t *testing.T) { + var rt restTester + + putDocAttach1 := func(queries string, attachmentBody string) string { + response := rt.sendRequest("PUT", "/db/doc1/attach1"+queries, attachmentBody) + assertStatus(t, response, 201) + var body db.Body + json.Unmarshal(response.Body.Bytes(), &body) + revID := body["rev"].(string) + assert.True(t, revID != "") + return revID + } + getDocAttach1 := func(queries string) map[string]interface{} { + headers := map[string]string{"Accept": "application/json"} + response := rt.sendRequestWithHeaders("GET", "/db/doc1"+queries, "", headers) + assertStatus(t, response, 200) + var body db.Body + json.Unmarshal(response.Body.Bytes(), &body) + attachments := body["_attachments"].(map[string]interface{}) + return attachments["attach1"].(map[string]interface{}) + } + + // Create 1st rev of doc with attachment: + attachmentBody := "This is a string for use in testing delta compression. This is only a string. It has two ends." + revID1 := putDocAttach1("", attachmentBody) + attach1 := getDocAttach1("") + digest1 := attach1["digest"].(string) + + // Update doc attachment: + attachmentBody2 := "This is test. This is only a test. The test ends." + putDocAttach1("?rev="+revID1, attachmentBody2) + + // Get the doc without deltas enabled, in JSON format: + attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]") + assert.Equals(t, attach1["data"], base64.StdEncoding.EncodeToString([]byte(attachmentBody2))) + assert.Equals(t, attach1["encoding"], nil) + assert.Equals(t, attach1["deltasrc"], nil) + + // Get the doc with deltas enabled, in JSON format: + attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") + assert.Equals(t, attach1["encoding"], "zdelta") + assert.Equals(t, attach1["deltasrc"], digest1) + delta, err := base64.StdEncoding.DecodeString(attach1["data"].(string)) + assert.Equals(t, err, nil) + + // Decode the delta: + result, err := zdelta.ApplyDelta([]byte(attachmentBody), delta) + assert.Equals(t, err, nil) + assert.Equals(t, string(result), attachmentBody2) + + // Get the doc with deltas enabled, in MIME multipart format: + oldMax := db.MaxInlineAttachmentSize + db.MaxInlineAttachmentSize = 0 // Force all attachments to be MIME parts + defer func() { db.MaxInlineAttachmentSize = oldMax }() + headers := map[string]string{"Accept": "multipart/*"} + attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") + response := rt.sendRequestWithHeaders("GET", "/db/doc1?attachments=true&atts_since=[\""+revID1+"\"]&deltas=true", "", headers) + assertStatus(t, response, 200) + mp := readMultipartResponse(response) + // Check the JSON part: + part, err := mp.NextPart() + assert.Equals(t, err, nil) + assert.Equals(t, part.Header["Content-Type"][0], "application/json") + decoder := json.NewDecoder(part) + var body db.Body + decoder.Decode(&body) + attachments := body["_attachments"].(map[string]interface{}) + attach1 = attachments["attach1"].(map[string]interface{}) + assert.Equals(t, attach1["encoding"], "zdelta") + assert.Equals(t, attach1["deltasrc"], digest1) + assert.Equals(t, attach1["follows"], true) + assert.Equals(t, attach1["data"], nil) + // Check the attachment part: + part, err = mp.NextPart() + assert.Equals(t, err, nil) + assert.Equals(t, part.FileName(), "attach1") + assert.DeepEquals(t, part.Header["Content-Type"], []string(nil)) + delta, err = ioutil.ReadAll(part) + assert.Equals(t, err, nil) + // Decode the delta: + result, err = zdelta.ApplyDelta([]byte(attachmentBody), delta) + assert.Equals(t, err, nil) + assert.Equals(t, string(result), attachmentBody2) + + // Now get the attachment on its own, as a delta: + response = rt.sendRequest("GET", "/db/doc1/attach1?deltas="+digest1, "") + assertStatus(t, response, 200) + assert.Equals(t, response.HeaderMap.Get("Content-Encoding"), "zdelta") + delta, err = ioutil.ReadAll(response.Body) + assert.Equals(t, err, nil) + result, err = zdelta.ApplyDelta([]byte(attachmentBody), delta) + assert.Equals(t, err, nil) + assert.Equals(t, string(result), attachmentBody2) +} diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go index d045a06de1..df2d4a8dfd 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go @@ -297,6 +297,7 @@ func (h *handler) handleDumpChannel() error { func (h *handler) handleBulkGet() error { includeRevs := h.getBoolQuery("revs") includeAttachments := h.getBoolQuery("attachments") + sendDeltas := h.getBoolQuery("deltas") canCompress := strings.Contains(h.rq.Header.Get("X-Accept-Part-Encoding"), "gzip") body, err := h.readJSON() if err != nil { @@ -341,7 +342,7 @@ func (h *handler) handleBulkGet() error { } if err == nil { - body, err = h.db.GetRev(docid, revid, includeRevs, attsSince) + body, err = h.db.GetRevWithAttachments(docid, revid, includeRevs, attsSince, sendDeltas) } if err != nil { diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go index 16838836ec..f3e6460c2f 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go @@ -28,6 +28,7 @@ func (h *handler) handleGetDoc() error { // What attachment bodies should be included? var attachmentsSince []string = nil + sendDeltas := false if h.getBoolQuery("attachments") { atts := h.getQuery("atts_since") if atts != "" { @@ -38,11 +39,12 @@ func (h *handler) handleGetDoc() error { } else { attachmentsSince = []string{} } + sendDeltas = h.getBoolQuery("deltas") } if openRevs == "" { // Single-revision GET: - value, err := h.db.GetRev(docid, revid, includeRevs, attachmentsSince) + value, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) if err != nil { return err } @@ -86,7 +88,7 @@ func (h *handler) handleGetDoc() error { if h.requestAccepts("multipart/") { err := h.writeMultipart("mixed", func(writer *multipart.Writer) error { for _, revid := range revids { - revBody, err := h.db.GetRev(docid, revid, includeRevs, attachmentsSince) + revBody, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) if err != nil { revBody = db.Body{"missing": revid} //TODO: More specific error } @@ -101,7 +103,7 @@ func (h *handler) handleGetDoc() error { h.response.Write([]byte(`[` + "\n")) separator := []byte(``) for _, revid := range revids { - revBody, err := h.db.GetRev(docid, revid, includeRevs, attachmentsSince) + revBody, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) if err != nil { revBody = db.Body{"missing": revid} //TODO: More specific error } else { @@ -122,7 +124,7 @@ func (h *handler) handleGetAttachment() error { docid := h.PathVar("docid") attachmentName := h.PathVar("attach") revid := h.getQuery("rev") - body, err := h.db.GetRev(docid, revid, false, nil) + body, err := h.db.GetRev(docid, revid, false) if err != nil { return err } @@ -134,18 +136,35 @@ func (h *handler) handleGetAttachment() error { return base.HTTPErrorf(http.StatusNotFound, "missing attachment %s", attachmentName) } digest := meta["digest"].(string) - data, err := h.db.GetAttachment(db.AttachmentKey(digest)) + + var deltaSourceKeys []db.AttachmentKey + if deltasQ := h.getQuery("deltas"); deltasQ != "" { + // The query '?deltas=XXX,YYY' indicates that the client has attachments with + // digests XXX and YYY and prefers to receive the response as a delta from one of them. + deltaStrs := strings.Split(deltasQ, ",") + deltaSourceKeys = make([]db.AttachmentKey, len(deltaStrs)) + for i, d := range deltaStrs { + deltaSourceKeys[i] = db.AttachmentKey(d) + } + } + + data, deltaSource, err := h.db.GetAttachmentMaybeAsDelta(db.AttachmentKey(digest), deltaSourceKeys) if err != nil { return err } - h.setHeader("Etag", digest) if contentType, ok := meta["content_type"].(string); ok { h.setHeader("Content-Type", contentType) } - if encoding, ok := meta["encoding"].(string); ok { + + if deltaSource != "" { + h.setHeader("Content-Encoding", "zdelta") + h.setHeader("X-Delta-Source", string(deltaSource)) + } else if encoding, ok := meta["encoding"].(string); ok { h.setHeader("Content-Encoding", encoding) } + + h.setHeader("Etag", digest) h.response.Write(data) return nil } @@ -167,7 +186,7 @@ func (h *handler) handlePutAttachment() error { return err } - body, err := h.db.GetRev(docid, revid, false, nil) + body, err := h.db.GetRev(docid, revid, false) if err != nil && base.IsDocNotFoundError(err) { // couchdb creates empty body on attachment PUT // for non-existant doc id diff --git a/src/github.com/snej/zdelta-go b/src/github.com/snej/zdelta-go new file mode 160000 index 0000000000..a13c588aa7 --- /dev/null +++ b/src/github.com/snej/zdelta-go @@ -0,0 +1 @@ +Subproject commit a13c588aa745273a969d89c8399f056f3a1a86f1 diff --git a/sync_gateway.sublime-project b/sync_gateway.sublime-project index 7b4201a24a..3d043aaa82 100644 --- a/sync_gateway.sublime-project +++ b/sync_gateway.sublime-project @@ -6,30 +6,6 @@ "name": "sync_gateway", "path": "src/github.com/couchbaselabs/sync_gateway" }, - { - "file_exclude_patterns": - [ - ".*" - ], - "follow_symlinks": true, - "path": "vendor/src/github.com/couchbaselabs/go-couchbase" - }, - { - "file_exclude_patterns": - [ - ".*" - ], - "follow_symlinks": true, - "path": "vendor/src/github.com/dustin/gomemcached" - }, - { - "file_exclude_patterns": - [ - ".*" - ], - "follow_symlinks": true, - "path": "vendor/src/github.com/couchbaselabs/walrus" - }, { "follow_symlinks": true, "path": "utils" From 0888732e7aa68064ece2923f8652476e2bd13eb8 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Wed, 25 Feb 2015 14:52:48 -0800 Subject: [PATCH 02/14] Added delta-compression support for getting JSON docs When deltas are enabled in a request, the JSON body may now also be delta-compressed. The source of the delta will be the first available revision listed in the `atts_since` parameter. This revision ID will appear in the `X-Delta-Source` header. See https://github.com/couchbaselabs/couchbase-lite-api/wiki/Delta-Compression For #452; see also couchbase/couchbase-lite-ios#168 --- .../sync_gateway/db/attachment.go | 45 ++++++++------ .../sync_gateway/db/attachment_test.go | 24 ++++---- .../couchbaselabs/sync_gateway/db/crud.go | 59 ++++++++++++------- .../sync_gateway/rest/api_test.go | 34 ++++++++--- .../sync_gateway/rest/bulk_api.go | 8 ++- .../sync_gateway/rest/doc_api.go | 34 +++++++---- sync_gateway.sublime-project | 4 ++ 7 files changed, 133 insertions(+), 75 deletions(-) diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go index 5f965ce81a..ded66ab21d 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go @@ -190,7 +190,7 @@ func (db *Database) getAttachmentDigests(body Body) map[string]AttachmentKey { // Stores a base64-encoded attachment and returns the key to get it by. func (db *Database) setAttachment(attachment []byte) (AttachmentKey, error) { - key := AttachmentKey(sha1DigestKey(attachment)) + key := AttachmentKey(SHA1DigestKey(attachment)) _, err := db.Bucket.AddRaw(attachmentKeyToString(key), 0, attachment) if err == nil { base.LogTo("Attach", "\tAdded attachment %q", key) @@ -227,17 +227,28 @@ func ReadJSONFromMIME(headers http.Header, input io.Reader, into interface{}) er return nil } -func writeJSONPart(writer *multipart.Writer, contentType string, body Body, compressed bool) (err error) { - bytes, err := json.Marshal(body) +func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, compressed bool) (err error) { + bytes, err := json.Marshal(r.Body) if err != nil { return err } + + partHeaders := textproto.MIMEHeader{} + partHeaders.Set("Content-Type", contentType) + if len(bytes) < kMinCompressedJSONSize { compressed = false } + if r.OldRevJSON != nil { + delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) + if err == nil && len(delta) < len(bytes) { + bytes = delta + compressed = false + partHeaders.Set("Content-Encoding", "zdelta") + partHeaders.Set("X-Delta-Source", r.OldRevID) + } + } - partHeaders := textproto.MIMEHeader{} - partHeaders.Set("Content-Type", contentType) if compressed { partHeaders.Set("Content-Encoding", "gzip") } @@ -257,7 +268,7 @@ func writeJSONPart(writer *multipart.Writer, contentType string, body Body, comp } // Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. -func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, compress bool) { +func (db *Database) WriteMultipartDocument(r RevResponse, writer *multipart.Writer, compress bool) { type attInfo struct { name string data []byte @@ -266,14 +277,14 @@ func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, // First extract the attachments that should follow: following := []attInfo{} - for name, value := range BodyAttachments(body) { + for name, value := range BodyAttachments(r.Body) { meta := value.(map[string]interface{}) if meta["stub"] != true { var err error var info attInfo info.data, err = decodeAttachment(meta["data"]) if info.data == nil { - base.Warn("Couldn't decode attachment %q of doc %q: %v", name, body["_id"], err) + base.Warn("Couldn't decode attachment %q of doc %q: %v", name, r.Body["_id"], err) meta["stub"] = true delete(meta, "data") } else if len(info.data) > MaxInlineAttachmentSize { @@ -287,7 +298,7 @@ func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, } // Write the main JSON body: - writeJSONPart(writer, "application/json", body, compress) + writeJSONPart(writer, "application/json", r, compress) // Write the following attachments for _, info := range following { @@ -305,16 +316,16 @@ func (db *Database) WriteMultipartDocument(body Body, writer *multipart.Writer, // Adds a new part to the given multipart writer, containing the given revision. // The revision will be written as a nested multipart body if it has attachments. -func (db *Database) WriteRevisionAsPart(revBody Body, isError bool, compress bool, writer *multipart.Writer) error { +func (db *Database) WriteRevisionAsPart(r RevResponse, isError bool, compress bool, writer *multipart.Writer) error { partHeaders := textproto.MIMEHeader{} - docID, _ := revBody["_id"].(string) - revID, _ := revBody["_rev"].(string) + docID, _ := r.Body["_id"].(string) + revID, _ := r.Body["_rev"].(string) if len(docID) > 0 { partHeaders.Set("X-Doc-ID", docID) partHeaders.Set("X-Rev-ID", revID) } - if hasInlineAttachments(revBody) { + if hasInlineAttachments(r.Body) { // Write as multipart, including attachments: // OPT: Find a way to do this w/o having to buffer the MIME body in memory! var buffer bytes.Buffer @@ -322,7 +333,7 @@ func (db *Database) WriteRevisionAsPart(revBody Body, isError bool, compress boo contentType := fmt.Sprintf("multipart/related; boundary=%q", docWriter.Boundary()) partHeaders.Set("Content-Type", contentType) - db.WriteMultipartDocument(revBody, docWriter, compress) + db.WriteMultipartDocument(r, docWriter, compress) docWriter.Close() content := bytes.TrimRight(buffer.Bytes(), "\r\n") @@ -337,7 +348,7 @@ func (db *Database) WriteRevisionAsPart(revBody Body, isError bool, compress boo if isError { contentType += `; error="true"` } - return writeJSONPart(writer, contentType, revBody, compress) + return writeJSONPart(writer, contentType, r, compress) } } @@ -394,7 +405,7 @@ func ReadMultipartDocument(reader *multipart.Reader) (Body, error) { } // Look up the attachment by its digest: - digest := sha1DigestKey(data) + digest := SHA1DigestKey(data) name, meta := findFollowingAttachment(digest) if meta == nil { name, meta = findFollowingAttachment(md5DigestKey(data)) @@ -434,7 +445,7 @@ func ReadMultipartDocument(reader *multipart.Reader) (Body, error) { //////// HELPERS: -func sha1DigestKey(data []byte) string { +func SHA1DigestKey(data []byte) string { digester := sha1.New() digester.Write(data) return "sha1-" + base64.StdEncoding.EncodeToString(digester.Sum(nil)) diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go index 753d85e119..5fb8ccf557 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go @@ -51,9 +51,9 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev1output := `{"_attachments":{"bye.txt":{"data":"Z29vZGJ5ZSBjcnVlbCB3b3JsZA==","digest":"sha1-l+N7VpXGnoxMm8xfvtWPbz2YvDc=","length":19,"revpos":1},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"1-54f3a105fb903018c160712ffddb74dc"}` - gotbody, err := db.GetRevWithAttachments("doc1", "", false, []string{}, false) + response, err := db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev1output) + assert.Equals(t, tojson(response.Body), rev1output) log.Printf("Create rev 2...") rev2str := `{"_attachments": {"hello.txt": {"stub":true, "revpos":1}, "bye.txt": {"data": "YnllLXlh"}}}` @@ -66,15 +66,15 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev2output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` - gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) + response, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev2output) + assert.Equals(t, tojson(response.Body), rev2output) log.Printf("Retrieve doc with atts_since...") rev2Aoutput := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1,"stub":true}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` - gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-54f3a105fb903018c160712ffddb74dc", "1-foo", "993-bar"}, false) + response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-54f3a105fb903018c160712ffddb74dc", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev2Aoutput) + assert.Equals(t, tojson(response.Body), rev2Aoutput) log.Printf("Create rev 3...") rev3str := `{"_attachments": {"bye.txt": {"stub":true,"revpos":2}}}` @@ -87,9 +87,9 @@ func TestAttachments(t *testing.T) { log.Printf("Retrieve doc...") rev3output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2}},"_id":"doc1","_rev":"3-252b9fa1f306930bffc07e7d75b77faf"}` - gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) + response, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev3output) + assert.Equals(t, tojson(response.Body), rev3output) log.Printf("Expire body of rev 1, then add a child...") // test fix of #498 err = db.Bucket.Delete(oldRevisionKey("doc1", rev1id)) @@ -128,13 +128,13 @@ func TestAttachmentDeltas(t *testing.T) { log.Printf("Retrieve doc with delta-encoded attachment...") rev2output := `{"_attachments":{"bye.txt":{"data":"ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT","deltasrc":"sha1-l5fhr3wrVdXDCNkamTn8KypCswQ=","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","encoding":"zdelta","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` - gotbody, err := db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, true) + response, err := db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, true) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev2output) + assert.Equals(t, tojson(response.Body), rev2output) log.Printf("Retrieve doc without delta-encoded attachment...") rev2Boutput := `{"_attachments":{"bye.txt":{"data":"VGhpcyBpcyBhIHRlc3QuIFRoaXMgaXMgb25seSBhIHRlc3QuCg==","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` - gotbody, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) + response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") - assert.Equals(t, tojson(gotbody), rev2Boutput) + assert.Equals(t, tojson(response.Body), rev2Boutput) } diff --git a/src/github.com/couchbaselabs/sync_gateway/db/crud.go b/src/github.com/couchbaselabs/sync_gateway/db/crud.go index 706ff8bb6c..21d64ddfdb 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/crud.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/crud.go @@ -154,48 +154,65 @@ func (db *Database) GetRev(docid, revid string, listRevisions bool) (Body, error return body, err } +func (db *Database) GetRevJSON(docid, revid string) ([]byte, error) { + doc, err := db.GetDoc(docid) + if doc == nil { + return nil, err + } + return db.getRevisionJSON(doc, revid) +} + +// Result of GetRevWithAttachments. +type RevResponse struct { + Body Body // The parsed doc body + OldRevJSON []byte // The JSON source of the old revision, to use for a delta + OldRevID string // The revID of the old revision +} + // Returns the body of a revision of a document, including attachments. Based on GetRev. -// If attachmentsSince is non-nil, attachment bodies ("data" properties) will be added for all -// revisions newer than the revIDs in attachmentsSince. +// If knownRevIDs is non-nil, attachment bodies ("data" properties) will be added for all +// revisions newer than the revIDs in knownRevIDs. // If useDeltas is true, attachments will be delta-compressed based on the versions of the -// attachments in the attachmentsSince revisions. -func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions bool, attachmentsSince []string, useDeltas bool) (Body, error) { - body, doc, err := db.getRev(docid, revid, listRevisions) - if err != nil || attachmentsSince == nil || len(BodyAttachments(body)) == 0 { - return body, err +// attachments in the knownRevIDs revisions. +func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions bool, knownRevIDs []string, useDeltas bool) (r RevResponse, err error) { + var doc *document + r.Body, doc, err = db.getRev(docid, revid, listRevisions) + if err != nil || knownRevIDs == nil || (!useDeltas && len(BodyAttachments(r.Body)) == 0) { + return } if revid == "" { - revid = body["_rev"].(string) + revid = r.Body["_rev"].(string) } // Figure out the min revpos to get bodies of, and the available delta sources: minRevpos := 1 // Don't include atts whose revpos is < this var deltaSrcKeys map[string]AttachmentKey // Old versions to use as delta srcs - if len(attachmentsSince) > 0 { + if len(knownRevIDs) > 0 { if doc == nil { // if rev was in the cache, we don't have the document struct yet if doc, err = db.GetDoc(docid); doc == nil { - return nil, err + r.Body = nil + return } } - ancestorRevID := doc.History.findAncestorFromSet(revid, attachmentsSince) - if ancestorRevID != "" { - minRevpos, _ = parseRevID(ancestorRevID) + r.OldRevID = doc.History.findAncestorFromSet(revid, knownRevIDs) + if r.OldRevID != "" { + minRevpos, _ = parseRevID(r.OldRevID) minRevpos++ - // Now load the ancestor rev's body to get the attachment digests: if useDeltas { - if ancestorBody, _ := db.getRevision(doc, ancestorRevID); ancestorBody != nil { - deltaSrcKeys = db.getAttachmentDigests(ancestorBody) + // Now load the ancestor rev's JSON and its attachment digests: + // The JSON is in canonical form with no _id, _rev or _deleted properties + if r.OldRevJSON, _ = db.getRevisionJSON(doc, r.OldRevID); r.OldRevJSON != nil { + var deltaSrcBody Body + json.Unmarshal(r.OldRevJSON, &deltaSrcBody) + deltaSrcKeys = db.getAttachmentDigests(deltaSrcBody) } } } } // Add attachment bodies: - body, err = db.loadBodyAttachments(body, minRevpos, deltaSrcKeys) - if err != nil { - return nil, err - } - return body, nil + r.Body, err = db.loadBodyAttachments(r.Body, minRevpos, deltaSrcKeys) + return } // Returns the body of a revision of a document, as well as the document's current channels diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go index 421cc72c59..6fb30b2e4c 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go @@ -1687,17 +1687,21 @@ func TestGetAttachmentAsDelta(t *testing.T) { response := rt.sendRequest("PUT", "/db/doc1/attach1"+queries, attachmentBody) assertStatus(t, response, 201) var body db.Body - json.Unmarshal(response.Body.Bytes(), &body) + assert.Equals(t, json.Unmarshal(response.Body.Bytes(), &body), nil) revID := body["rev"].(string) assert.True(t, revID != "") return revID } - getDocAttach1 := func(queries string) map[string]interface{} { + getDoc := func(queries string) []byte { headers := map[string]string{"Accept": "application/json"} response := rt.sendRequestWithHeaders("GET", "/db/doc1"+queries, "", headers) assertStatus(t, response, 200) + return response.Body.Bytes() + } + getDocAttach1 := func(queries string) map[string]interface{} { + response := getDoc(queries) var body db.Body - json.Unmarshal(response.Body.Bytes(), &body) + assert.Equals(t, json.Unmarshal(response, &body), nil) attachments := body["_attachments"].(map[string]interface{}) return attachments["attach1"].(map[string]interface{}) } @@ -1705,6 +1709,7 @@ func TestGetAttachmentAsDelta(t *testing.T) { // Create 1st rev of doc with attachment: attachmentBody := "This is a string for use in testing delta compression. This is only a string. It has two ends." revID1 := putDocAttach1("", attachmentBody) + bodyData1 := getDoc("") attach1 := getDocAttach1("") digest1 := attach1["digest"].(string) @@ -1718,7 +1723,8 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, attach1["encoding"], nil) assert.Equals(t, attach1["deltasrc"], nil) - // Get the doc with deltas enabled, in JSON format: + // Get the doc with deltas enabled, in JSON format. The JSON itself will not be delta- + // compressed since it has to contain the delta-compressed attachments. attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") assert.Equals(t, attach1["encoding"], "zdelta") assert.Equals(t, attach1["deltasrc"], digest1) @@ -1732,26 +1738,36 @@ func TestGetAttachmentAsDelta(t *testing.T) { // Get the doc with deltas enabled, in MIME multipart format: oldMax := db.MaxInlineAttachmentSize - db.MaxInlineAttachmentSize = 0 // Force all attachments to be MIME parts + db.MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts defer func() { db.MaxInlineAttachmentSize = oldMax }() headers := map[string]string{"Accept": "multipart/*"} attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") response := rt.sendRequestWithHeaders("GET", "/db/doc1?attachments=true&atts_since=[\""+revID1+"\"]&deltas=true", "", headers) assertStatus(t, response, 200) mp := readMultipartResponse(response) - // Check the JSON part: + + // Check the (delta-encoded) JSON part: part, err := mp.NextPart() assert.Equals(t, err, nil) - assert.Equals(t, part.Header["Content-Type"][0], "application/json") - decoder := json.NewDecoder(part) + assert.Equals(t, part.Header.Get("Content-Type"), "application/json") + assert.Equals(t, part.Header.Get("Content-Encoding"), "zdelta") + assert.Equals(t, part.Header.Get("X-Delta-Source"), revID1) + // Decode the delta: + base.Log("Decoding delta with source: %s", bodyData1) + delta, _ = ioutil.ReadAll(part) + bodyData2, err := zdelta.ApplyDelta(bodyData1, delta) + assert.Equals(t, err, nil) + base.Log("Decoded delta: %s", bodyData2) + // Decode the JSON: var body db.Body - decoder.Decode(&body) + assert.Equals(t, json.Unmarshal(bodyData2, &body), nil) attachments := body["_attachments"].(map[string]interface{}) attach1 = attachments["attach1"].(map[string]interface{}) assert.Equals(t, attach1["encoding"], "zdelta") assert.Equals(t, attach1["deltasrc"], digest1) assert.Equals(t, attach1["follows"], true) assert.Equals(t, attach1["data"], nil) + // Check the attachment part: part, err = mp.NextPart() assert.Equals(t, err, nil) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go index df2d4a8dfd..78ed3a656c 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go @@ -341,21 +341,23 @@ func (h *handler) handleBulkGet() error { } } + var responseInfo db.RevResponse if err == nil { - body, err = h.db.GetRevWithAttachments(docid, revid, includeRevs, attsSince, sendDeltas) + responseInfo, err = h.db.GetRevWithAttachments(docid, revid, includeRevs, attsSince, sendDeltas) } if err != nil { // Report error in the response for this doc: status, reason := base.ErrorAsHTTPStatus(err) errStr := base.CouchHTTPErrorName(status) - body = db.Body{"id": docid, "error": errStr, "reason": reason, "status": status} + responseInfo.Body = db.Body{"id": docid, "error": errStr, "reason": reason, "status": status} + responseInfo.OldRevJSON = nil if revid != "" { body["rev"] = revid } } - h.db.WriteRevisionAsPart(body, err != nil, canCompress, writer) + h.db.WriteRevisionAsPart(responseInfo, err != nil, canCompress, writer) } return nil }) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go index f3e6460c2f..74dda6d6c2 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go @@ -11,6 +11,7 @@ package rest import ( "encoding/json" + "github.com/snej/zdelta-go" "mime/multipart" "net/http" "strings" @@ -44,24 +45,31 @@ func (h *handler) handleGetDoc() error { if openRevs == "" { // Single-revision GET: - value, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) + responseInfo, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) if err != nil { return err } - if value == nil { + if responseInfo.Body == nil { return kNotFoundError } - h.setHeader("Etag", value["_rev"].(string)) + h.setHeader("Etag", responseInfo.Body["_rev"].(string)) - hasBodies := (attachmentsSince != nil && value["_attachments"] != nil) + hasBodies := (attachmentsSince != nil && responseInfo.Body["_attachments"] != nil) if h.requestAccepts("multipart/") && (hasBodies || !h.requestAccepts("application/json")) { canCompress := strings.Contains(h.rq.Header.Get("X-Accept-Part-Encoding"), "gzip") return h.writeMultipart("related", func(writer *multipart.Writer) error { - h.db.WriteMultipartDocument(value, writer, canCompress) + h.db.WriteMultipartDocument(responseInfo, writer, canCompress) return nil }) + } else if responseInfo.OldRevJSON != nil && !hasBodies { + h.setHeader("Content-Type", "application/json") + h.setHeader("Content-Encoding", "zdelta") + h.setHeader("X-Delta-Source", responseInfo.OldRevID) + target, _ := json.Marshal(responseInfo.Body) + var cmp zdelta.Compressor + cmp.WriteDelta(responseInfo.OldRevJSON, target, h.response) } else { - h.writeJSON(value) + h.writeJSON(responseInfo.Body) } } else { var revids []string @@ -88,11 +96,11 @@ func (h *handler) handleGetDoc() error { if h.requestAccepts("multipart/") { err := h.writeMultipart("mixed", func(writer *multipart.Writer) error { for _, revid := range revids { - revBody, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) + responseInfo, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) if err != nil { - revBody = db.Body{"missing": revid} //TODO: More specific error + responseInfo.Body = db.Body{"missing": revid} //TODO: More specific error } - h.db.WriteRevisionAsPart(revBody, err != nil, false, writer) + h.db.WriteRevisionAsPart(responseInfo, err != nil, false, writer) } return nil }) @@ -103,15 +111,15 @@ func (h *handler) handleGetDoc() error { h.response.Write([]byte(`[` + "\n")) separator := []byte(``) for _, revid := range revids { - revBody, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, sendDeltas) + responseInfo, err := h.db.GetRevWithAttachments(docid, revid, includeRevs, attachmentsSince, false) if err != nil { - revBody = db.Body{"missing": revid} //TODO: More specific error + responseInfo.Body = db.Body{"missing": revid} //TODO: More specific error } else { - revBody = db.Body{"ok": revBody} + responseInfo.Body = db.Body{"ok": responseInfo.Body} } h.response.Write(separator) separator = []byte(",") - h.addJSON(revBody) + h.addJSON(responseInfo.Body) } h.response.Write([]byte(`]`)) } diff --git a/sync_gateway.sublime-project b/sync_gateway.sublime-project index 3d043aaa82..e5029dd3f7 100644 --- a/sync_gateway.sublime-project +++ b/sync_gateway.sublime-project @@ -17,6 +17,10 @@ { "follow_symlinks": true, "path": "vendor/src/github.com/couchbaselabs/sync_gateway_admin_ui" + }, + { + "follow_symlinks": true, + "path": "src/github.com/couchbaselabs/walrus" } ], "settings": From 5a00a6f10c62806e9c4d6a0e057656d940171740 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Thu, 26 Feb 2015 09:08:59 -0800 Subject: [PATCH 03/14] Cache attachment deltas After an attachment delta is computed, it's stored in the bucket as a raw doc with a 5-minute expiration time. --- .../sync_gateway/db/attachment.go | 47 +++++++++------ .../sync_gateway/db/attachment_test.go | 12 ++++ .../sync_gateway/db/deltaizer.go | 60 +++++++++++++++++++ .../couchbaselabs/sync_gateway/db/revision.go | 10 ++-- .../sync_gateway/rest/api_test.go | 16 +++-- 5 files changed, 119 insertions(+), 26 deletions(-) create mode 100644 src/github.com/couchbaselabs/sync_gateway/db/deltaizer.go diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go index ded66ab21d..55d252facb 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go @@ -150,27 +150,40 @@ func (db *Database) GetAttachment(key AttachmentKey) ([]byte, error) { // Retrieves an attachment's body, preferably as a delta from one of the versions specified // in `sourceKeys` func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []AttachmentKey) (result []byte, sourceKey AttachmentKey, err error) { + // First, attempt to reuse a cached delta without even having to load the attachment: + for _, sourceKey = range sourceKeys { + if result = db.getCachedAttachmentZDelta(sourceKey, key); result != nil { + // Found a cached delta + if len(result) == 0 { + // ... but it's not worth using + sourceKey = "" + result, err = db.GetAttachment(key) + } + return + } + } + + // No cached deltas, so create one: target, err := db.GetAttachment(key) if err != nil { return } + for _, sourceKey = range sourceKeys { - var src []byte - src, err = db.Bucket.GetRaw(attachmentKeyToString(sourceKey)) - if err == nil { - //OPT: Cache deltas. For now, this just computes the delta every time. - result, err = zdelta.CreateDelta(src, target) - if err == nil && len(result) < len(target) { - base.LogTo("Attach", "Generated zdelta {%s --> %s} (%d%%)", - sourceKey, key, len(result)*100/len(target)) + if src, _ := db.Bucket.GetRaw(attachmentKeyToString(sourceKey)); src != nil { + // Found a previous revision; generate a delta: + result = db.generateAttachmentZDelta(src, target, sourceKey, key) + if result != nil { + if len(result) == 0 { + // ... but it's not worth using + break + } return } } - if !base.IsDocNotFoundError(err) { - base.Warn("GetAttachmentAsDelta: Error for %q-->%q: %v", sourceKey, key, err) - } } - // No delta available so return entire attachment: + + // No previous attachments available so return entire body: result = target sourceKey = "" return @@ -227,7 +240,7 @@ func ReadJSONFromMIME(headers http.Header, input io.Reader, into interface{}) er return nil } -func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, compressed bool) (err error) { +func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, gzipCompress bool) (err error) { bytes, err := json.Marshal(r.Body) if err != nil { return err @@ -237,19 +250,19 @@ func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, partHeaders.Set("Content-Type", contentType) if len(bytes) < kMinCompressedJSONSize { - compressed = false + gzipCompress = false } if r.OldRevJSON != nil { delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) if err == nil && len(delta) < len(bytes) { bytes = delta - compressed = false + gzipCompress = false partHeaders.Set("Content-Encoding", "zdelta") partHeaders.Set("X-Delta-Source", r.OldRevID) } } - if compressed { + if gzipCompress { partHeaders.Set("Content-Encoding", "gzip") } part, err := writer.CreatePart(partHeaders) @@ -257,7 +270,7 @@ func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, return err } - if compressed { + if gzipCompress { gz := gzip.NewWriter(part) _, err = gz.Write(bytes) gz.Close() diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go index 5fb8ccf557..9441741a0f 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment_test.go @@ -10,6 +10,7 @@ package db import ( + "encoding/base64" "encoding/json" "fmt" "log" @@ -109,6 +110,11 @@ func TestAttachmentDeltas(t *testing.T) { db, err := CreateDatabase(context) assertNoError(t, err, "Couldn't create database 'db'") + // For this test, ensure delta will be used even if it's not much smaller: + oldSavings := MinDeltaSavings + MinDeltaSavings = 0 + defer func() { MinDeltaSavings = oldSavings }() + // Rev 1: log.Printf("Create rev 1...") rev1input := `{"_attachments": {"bye.txt": {"data":"VGhpcyBpcyBhIHN0cmluZyBmb3IgdXNlIGluIHRlc3RpbmcgZGVsdGEgY29tcHJlc3Npb24K"}}}` @@ -137,4 +143,10 @@ func TestAttachmentDeltas(t *testing.T) { response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") assert.Equals(t, tojson(response.Body), rev2Boutput) + + // Verify contents of delta cache: + cached := db.getCachedAttachmentZDelta(AttachmentKey("sha1-l5fhr3wrVdXDCNkamTn8KypCswQ="), + AttachmentKey("sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=")) + rawDelta, _ := base64.StdEncoding.DecodeString("ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT") + assert.DeepEquals(t, cached, rawDelta) } diff --git a/src/github.com/couchbaselabs/sync_gateway/db/deltaizer.go b/src/github.com/couchbaselabs/sync_gateway/db/deltaizer.go new file mode 100644 index 0000000000..faa8105d50 --- /dev/null +++ b/src/github.com/couchbaselabs/sync_gateway/db/deltaizer.go @@ -0,0 +1,60 @@ +package db + +import ( + "fmt" + "time" + + "github.com/couchbaselabs/sync_gateway/base" + "github.com/snej/zdelta-go" +) + +// How long to cache precomputed deltas +var DeltaCacheExpirationTime = 5 * time.Minute + +// How many bytes shorter than the target a delta needs to be, to be worth using +var MinDeltaSavings = 100 + +// Looks up a cached delta between two attachments given their keys (digests). +// If the delta is not worth using (not enough space savings), returns an empty array. +// If no delta is cached, returns nil. +func (db *Database) getCachedAttachmentZDelta(srcKey, dstKey AttachmentKey) []byte { + return db._getCachedZDelta("att", string(srcKey), string(dstKey)) +} + +// Computes & caches the delta between two attachments. +// If the delta is not worth using (not enough space savings), returns an empty array. +func (db *Database) generateAttachmentZDelta(src, dst []byte, srcKey, dstKey AttachmentKey) []byte { + return db._generateZDelta(src, dst, "att", string(srcKey), string(dstKey)) +} + +// INTERNAL: + +func (db *Database) _getCachedZDelta(idType, srcID, dstID string) []byte { + key := _keyForCachedZDelta(idType, srcID, dstID) + delta, _ := db.Bucket.GetRaw(key) + if delta != nil { + base.LogTo("Delta", "Reused cached zdelta %s %s --> %s", + idType, srcID, dstID) + } + return delta +} + +func (db *Database) _generateZDelta(src, dst []byte, idType, srcID, dstID string) []byte { + delta, _ := zdelta.CreateDelta(src, dst) + if delta == nil { + return nil + } + base.LogTo("Delta", "Computed zdelta %s %s --> %s: saved %d bytes", + idType, srcID, dstID, int64(len(dst))-int64(len(delta))) + if len(delta)+MinDeltaSavings > len(dst) { + delta = []byte{} // not worth using + } + // Cache the computed delta: + key := _keyForCachedZDelta(idType, srcID, dstID) + db.Bucket.SetRaw(key, int(DeltaCacheExpirationTime.Seconds()), delta) + return delta +} + +func _keyForCachedZDelta(idType, srcID, dstID string) string { + return fmt.Sprintf("_sync:zd:%s:%d:%s:%s", idType, len(srcID), srcID, dstID) +} diff --git a/src/github.com/couchbaselabs/sync_gateway/db/revision.go b/src/github.com/couchbaselabs/sync_gateway/db/revision.go index 67a775b23e..be1df175ce 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/revision.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/revision.go @@ -17,6 +17,11 @@ import ( "github.com/couchbaselabs/sync_gateway/base" ) +// How long before old revisions expire. Larger values consume more disk space, of course, but +// save replication bandwidth by allowing clients to download revs as deltas. +// Future enhancement to make this a config setting might be appropriate. +const kOldRevisionExpiration = 12 * 60 * 60 + // The body of a CouchDB document/revision as decoded from JSON. type Body map[string]interface{} @@ -68,10 +73,7 @@ func (db *DatabaseContext) getOldRevisionJSON(docid string, revid string) ([]byt func (db *Database) setOldRevisionJSON(docid string, revid string, body []byte) error { base.LogTo("CRUD+", "Saving old revision %q / %q (%d bytes)", docid, revid, len(body)) - - // Set old revisions to expire after 5 minutes. Future enhancement to make this a config - // setting might be appropriate. - return db.Bucket.SetRaw(oldRevisionKey(docid, revid), 300, body) + return db.Bucket.SetRaw(oldRevisionKey(docid, revid), kOldRevisionExpiration, body) } //////// UTILITY FUNCTIONS: diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go index 6fb30b2e4c..e72086cca5 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go @@ -1706,6 +1706,15 @@ func TestGetAttachmentAsDelta(t *testing.T) { return attachments["attach1"].(map[string]interface{}) } + oldMax := db.MaxInlineAttachmentSize + oldMinDeltaSavings := db.MinDeltaSavings + db.MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts + db.MinDeltaSavings = 0 + defer func() { + db.MaxInlineAttachmentSize = oldMax + db.MinDeltaSavings = oldMinDeltaSavings + }() + // Create 1st rev of doc with attachment: attachmentBody := "This is a string for use in testing delta compression. This is only a string. It has two ends." revID1 := putDocAttach1("", attachmentBody) @@ -1737,9 +1746,6 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, string(result), attachmentBody2) // Get the doc with deltas enabled, in MIME multipart format: - oldMax := db.MaxInlineAttachmentSize - db.MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts - defer func() { db.MaxInlineAttachmentSize = oldMax }() headers := map[string]string{"Accept": "multipart/*"} attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") response := rt.sendRequestWithHeaders("GET", "/db/doc1?attachments=true&atts_since=[\""+revID1+"\"]&deltas=true", "", headers) @@ -1753,11 +1759,11 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, part.Header.Get("Content-Encoding"), "zdelta") assert.Equals(t, part.Header.Get("X-Delta-Source"), revID1) // Decode the delta: - base.Log("Decoding delta with source: %s", bodyData1) + log.Printf("Decoding delta with source: %s", bodyData1) delta, _ = ioutil.ReadAll(part) bodyData2, err := zdelta.ApplyDelta(bodyData1, delta) assert.Equals(t, err, nil) - base.Log("Decoded delta: %s", bodyData2) + log.Printf("Decoded delta: %s", bodyData2) // Decode the JSON: var body db.Body assert.Equals(t, json.Unmarshal(bodyData2, &body), nil) From 3f016b8d797ac371240cb846250d7fc2a3358cfd Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Thu, 26 Feb 2015 10:01:42 -0800 Subject: [PATCH 04/14] Updated zdelta-go. Minor tweak to JSON-body delta compression. --- .gitmodules | 3 +++ .../couchbaselabs/sync_gateway/db/attachment.go | 14 ++++++++------ src/github.com/snej/zdelta-go | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1b1f5e0b6a..43c3c7302a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,6 +16,9 @@ [submodule "src/github.com/gorilla/context"] path = src/github.com/gorilla/context url = https://github.com/gorilla/context +[submodule "src/github.com/snej/zdelta-go"] + path = src/github.com/snej/zdelta-go + url = https://github.com/snej/zdelta-go.git [submodule "src/github.com/tleyden/fakehttp"] path = src/github.com/tleyden/fakehttp url = https://github.com/tleyden/fakehttp.git diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go index 55d252facb..d81c0e1271 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go @@ -249,12 +249,9 @@ func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, partHeaders := textproto.MIMEHeader{} partHeaders.Set("Content-Type", contentType) - if len(bytes) < kMinCompressedJSONSize { - gzipCompress = false - } - if r.OldRevJSON != nil { + if r.OldRevJSON != nil && len(bytes) > MinDeltaSavings { delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) - if err == nil && len(delta) < len(bytes) { + if err == nil && len(delta)+MinDeltaSavings < len(bytes) { bytes = delta gzipCompress = false partHeaders.Set("Content-Encoding", "zdelta") @@ -263,8 +260,13 @@ func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, } if gzipCompress { - partHeaders.Set("Content-Encoding", "gzip") + if len(bytes) < kMinCompressedJSONSize { + gzipCompress = false + } else { + partHeaders.Set("Content-Encoding", "gzip") + } } + part, err := writer.CreatePart(partHeaders) if err != nil { return err diff --git a/src/github.com/snej/zdelta-go b/src/github.com/snej/zdelta-go index a13c588aa7..5bc8b0aedf 160000 --- a/src/github.com/snej/zdelta-go +++ b/src/github.com/snej/zdelta-go @@ -1 +1 @@ -Subproject commit a13c588aa745273a969d89c8399f056f3a1a86f1 +Subproject commit 5bc8b0aedfa256908b1b0fea83e5486df34051bf From f44a13e597d458bcc6f933fb1565dc0fecb9a125 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Thu, 26 Feb 2015 13:03:38 -0800 Subject: [PATCH 05/14] Refactoring of multipart MIME handling code Moved all the multipart-related code out of the db package (attachment.go) into the rest package (multipart.go, a new file.) Optimized writing nested multipart documents: the nested document no longer gets written to a temporary in-memory buffer first. --- .../sync_gateway/db/attachment.go | 306 +---------------- .../couchbaselabs/sync_gateway/db/crud.go | 2 +- .../sync_gateway/rest/api_test.go | 6 +- .../sync_gateway/rest/bulk_api.go | 2 +- .../sync_gateway/rest/doc_api.go | 4 +- .../sync_gateway/rest/handler.go | 6 +- .../sync_gateway/rest/multipart.go | 313 ++++++++++++++++++ 7 files changed, 338 insertions(+), 301 deletions(-) create mode 100644 src/github.com/couchbaselabs/sync_gateway/rest/multipart.go diff --git a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go index d81c0e1271..980e424278 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/attachment.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/attachment.go @@ -10,32 +10,14 @@ package db import ( - "bytes" - "compress/gzip" - "crypto/md5" "crypto/sha1" "encoding/base64" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "mime/multipart" - "net/http" - "net/textproto" - "strings" "github.com/couchbaselabs/sync_gateway/base" - "github.com/snej/zdelta-go" ) -// Attachments shorter than this will be left in the JSON as base64 rather than being a separate -// MIME part. -var MaxInlineAttachmentSize = 200 - -// JSON bodies smaller than this won't be GZip-encoded. -const kMinCompressedJSONSize = 300 - // Key for retrieving an attachment from Couchbase. +// In practice it's "sha1-" followed by a hex SHA-1 digest. type AttachmentKey string // Given a CouchDB document body about to be stored in the database, goes through the _attachments @@ -55,7 +37,7 @@ func (db *Database) storeAttachments(doc *document, body Body, generation int, p data, exists := meta["data"] if exists { // Attachment contains data, so store it in the db: - attachment, err := decodeAttachment(data) + attachment, err := DecodeAttachment(data) if err != nil { return err } @@ -144,7 +126,7 @@ func (db *Database) loadBodyAttachments(body Body, minRevpos int, deltaSrcKeys m // Retrieves an attachment's body given its key. func (db *Database) GetAttachment(key AttachmentKey) ([]byte, error) { - return db.Bucket.GetRaw(attachmentKeyToString(key)) + return db.Bucket.GetRaw(attachmentKeyToDocKey(key)) } // Retrieves an attachment's body, preferably as a delta from one of the versions specified @@ -170,7 +152,7 @@ func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []At } for _, sourceKey = range sourceKeys { - if src, _ := db.Bucket.GetRaw(attachmentKeyToString(sourceKey)); src != nil { + if src, _ := db.Bucket.GetRaw(attachmentKeyToDocKey(sourceKey)); src != nil { // Found a previous revision; generate a delta: result = db.generateAttachmentZDelta(src, target, sourceKey, key) if result != nil { @@ -190,7 +172,7 @@ func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []At } // Returns the digests of all attachments in a Body, as a map of attachment names to keys. -func (db *Database) getAttachmentDigests(body Body) map[string]AttachmentKey { +func getAttachmentDigests(body Body) map[string]AttachmentKey { keys := map[string]AttachmentKey{} for name, value := range BodyAttachments(body) { meta := value.(map[string]interface{}) @@ -203,294 +185,36 @@ func (db *Database) getAttachmentDigests(body Body) map[string]AttachmentKey { // Stores a base64-encoded attachment and returns the key to get it by. func (db *Database) setAttachment(attachment []byte) (AttachmentKey, error) { - key := AttachmentKey(SHA1DigestKey(attachment)) - _, err := db.Bucket.AddRaw(attachmentKeyToString(key), 0, attachment) + key := SHA1DigestKey(attachment) + _, err := db.Bucket.AddRaw(attachmentKeyToDocKey(key), 0, attachment) if err == nil { base.LogTo("Attach", "\tAdded attachment %q", key) } return key, err } -//////// MIME MULTIPART: - -// Parses a JSON MIME body, unmarshaling it into "into". -func ReadJSONFromMIME(headers http.Header, input io.Reader, into interface{}) error { - contentType := headers.Get("Content-Type") - if contentType != "" && !strings.HasPrefix(contentType, "application/json") { - return base.HTTPErrorf(http.StatusUnsupportedMediaType, "Invalid content type %s", contentType) - } - - switch headers.Get("Content-Encoding") { - case "gzip": - var err error - if input, err = gzip.NewReader(input); err != nil { - return err - } - case "": - break - default: - return base.HTTPErrorf(http.StatusUnsupportedMediaType, "Unsupported Content-Encoding; use gzip") - } - - decoder := json.NewDecoder(input) - if err := decoder.Decode(into); err != nil { - base.Warn("Couldn't parse JSON in HTTP request: %v", err) - return base.HTTPErrorf(http.StatusBadRequest, "Bad JSON") - } - return nil -} - -func writeJSONPart(writer *multipart.Writer, contentType string, r RevResponse, gzipCompress bool) (err error) { - bytes, err := json.Marshal(r.Body) - if err != nil { - return err - } - - partHeaders := textproto.MIMEHeader{} - partHeaders.Set("Content-Type", contentType) - - if r.OldRevJSON != nil && len(bytes) > MinDeltaSavings { - delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) - if err == nil && len(delta)+MinDeltaSavings < len(bytes) { - bytes = delta - gzipCompress = false - partHeaders.Set("Content-Encoding", "zdelta") - partHeaders.Set("X-Delta-Source", r.OldRevID) - } - } - - if gzipCompress { - if len(bytes) < kMinCompressedJSONSize { - gzipCompress = false - } else { - partHeaders.Set("Content-Encoding", "gzip") - } - } - - part, err := writer.CreatePart(partHeaders) - if err != nil { - return err - } - - if gzipCompress { - gz := gzip.NewWriter(part) - _, err = gz.Write(bytes) - gz.Close() - } else { - _, err = part.Write(bytes) - } - return -} - -// Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. -func (db *Database) WriteMultipartDocument(r RevResponse, writer *multipart.Writer, compress bool) { - type attInfo struct { - name string - data []byte - meta map[string]interface{} - } - - // First extract the attachments that should follow: - following := []attInfo{} - for name, value := range BodyAttachments(r.Body) { - meta := value.(map[string]interface{}) - if meta["stub"] != true { - var err error - var info attInfo - info.data, err = decodeAttachment(meta["data"]) - if info.data == nil { - base.Warn("Couldn't decode attachment %q of doc %q: %v", name, r.Body["_id"], err) - meta["stub"] = true - delete(meta, "data") - } else if len(info.data) > MaxInlineAttachmentSize { - info.name = name - info.meta = meta - following = append(following, info) - meta["follows"] = true - delete(meta, "data") - } - } - } - - // Write the main JSON body: - writeJSONPart(writer, "application/json", r, compress) - - // Write the following attachments - for _, info := range following { - partHeaders := textproto.MIMEHeader{} - if contentType, ok := info.meta["content_type"].(string); ok { - if info.meta["encoding"] == nil { - partHeaders.Set("Content-Type", contentType) - } - } - partHeaders.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", info.name)) - part, _ := writer.CreatePart(partHeaders) - part.Write(info.data) - } -} - -// Adds a new part to the given multipart writer, containing the given revision. -// The revision will be written as a nested multipart body if it has attachments. -func (db *Database) WriteRevisionAsPart(r RevResponse, isError bool, compress bool, writer *multipart.Writer) error { - partHeaders := textproto.MIMEHeader{} - docID, _ := r.Body["_id"].(string) - revID, _ := r.Body["_rev"].(string) - if len(docID) > 0 { - partHeaders.Set("X-Doc-ID", docID) - partHeaders.Set("X-Rev-ID", revID) - } - - if hasInlineAttachments(r.Body) { - // Write as multipart, including attachments: - // OPT: Find a way to do this w/o having to buffer the MIME body in memory! - var buffer bytes.Buffer - docWriter := multipart.NewWriter(&buffer) - contentType := fmt.Sprintf("multipart/related; boundary=%q", - docWriter.Boundary()) - partHeaders.Set("Content-Type", contentType) - db.WriteMultipartDocument(r, docWriter, compress) - docWriter.Close() - content := bytes.TrimRight(buffer.Bytes(), "\r\n") - - part, err := writer.CreatePart(partHeaders) - if err == nil { - _, err = part.Write(content) - } - return err - } else { - // Write as JSON: - contentType := "application/json" - if isError { - contentType += `; error="true"` - } - return writeJSONPart(writer, contentType, r, compress) - } -} - -func ReadMultipartDocument(reader *multipart.Reader) (Body, error) { - // First read the main JSON document body: - mainPart, err := reader.NextPart() - if err != nil { - return nil, err - } - var body Body - err = ReadJSONFromMIME(http.Header(mainPart.Header), mainPart, &body) - mainPart.Close() - if err != nil { - return nil, err - } - - // Collect the attachments with a "follows" property, which will appear as MIME parts: - followingAttachments := map[string]map[string]interface{}{} - for name, value := range BodyAttachments(body) { - if meta := value.(map[string]interface{}); meta["follows"] == true { - followingAttachments[name] = meta - } - } - - // Subroutine to look up a following attachment given its digest. (I used to precompute a - // map from digest->name, which was faster, but that broke down if there were multiple - // attachments with the same contents! See #96) - findFollowingAttachment := func(withDigest string) (string, map[string]interface{}) { - for name, meta := range followingAttachments { - if meta["follows"] == true { - if digest, ok := meta["digest"].(string); ok && digest == withDigest { - return name, meta - } - } - } - return "", nil - } - - // Read the parts one by one: - for i := 0; i < len(followingAttachments); i++ { - part, err := reader.NextPart() - if err != nil { - if err == io.EOF { - err = base.HTTPErrorf(http.StatusBadRequest, - "Too few MIME parts: expected %d attachments, got %d", - len(followingAttachments), i) - } - return nil, err - } - data, err := ioutil.ReadAll(part) - part.Close() - if err != nil { - return nil, err - } - - // Look up the attachment by its digest: - digest := SHA1DigestKey(data) - name, meta := findFollowingAttachment(digest) - if meta == nil { - name, meta = findFollowingAttachment(md5DigestKey(data)) - if meta == nil { - return nil, base.HTTPErrorf(http.StatusBadRequest, - "MIME part #%d doesn't match any attachment", i+2) - } - } - - length, ok := base.ToInt64(meta["encoded_length"]) - if !ok { - length, ok = base.ToInt64(meta["length"]) - } - if ok { - if length != int64(len(data)) { - return nil, base.HTTPErrorf(http.StatusBadRequest, "Attachment length mismatch for %q: read %d bytes, should be %g", name, len(data), length) - } - } - - // Stuff the data into the attachment metadata and remove the "follows" property: - delete(meta, "follows") - meta["data"] = data - meta["digest"] = digest - } - - // Make sure there are no unused MIME parts: - if _, err = reader.NextPart(); err != io.EOF { - if err == nil { - err = base.HTTPErrorf(http.StatusBadRequest, - "Too many MIME parts (expected %d)", len(followingAttachments)+1) - } - return nil, err - } - - return body, nil -} - //////// HELPERS: -func SHA1DigestKey(data []byte) string { +// Returns an AttachmentKey for an attachment body, based on its SHA-1 digest. +func SHA1DigestKey(data []byte) AttachmentKey { digester := sha1.New() digester.Write(data) - return "sha1-" + base64.StdEncoding.EncodeToString(digester.Sum(nil)) -} - -func md5DigestKey(data []byte) string { - digester := md5.New() - digester.Write(data) - return "md5-" + base64.StdEncoding.EncodeToString(digester.Sum(nil)) + return AttachmentKey("sha1-" + base64.StdEncoding.EncodeToString(digester.Sum(nil))) } +// Returns the "_attachments" property as a map. func BodyAttachments(body Body) map[string]interface{} { atts, _ := body["_attachments"].(map[string]interface{}) return atts } -func hasInlineAttachments(body Body) bool { - for _, value := range BodyAttachments(body) { - if meta, ok := value.(map[string]interface{}); ok && meta["data"] != nil { - return true - } - } - return false -} - -func attachmentKeyToString(key AttachmentKey) string { +// The Couchbase bucket key under which to store an attachment +func attachmentKeyToDocKey(key AttachmentKey) string { return "_sync:att:" + string(key) } -func decodeAttachment(att interface{}) ([]byte, error) { +// Base64-encodes an attachment if it's present as a raw byte array +func DecodeAttachment(att interface{}) ([]byte, error) { switch att := att.(type) { case []byte: return att, nil diff --git a/src/github.com/couchbaselabs/sync_gateway/db/crud.go b/src/github.com/couchbaselabs/sync_gateway/db/crud.go index 21d64ddfdb..79cec6de45 100644 --- a/src/github.com/couchbaselabs/sync_gateway/db/crud.go +++ b/src/github.com/couchbaselabs/sync_gateway/db/crud.go @@ -205,7 +205,7 @@ func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions boo if r.OldRevJSON, _ = db.getRevisionJSON(doc, r.OldRevID); r.OldRevJSON != nil { var deltaSrcBody Body json.Unmarshal(r.OldRevJSON, &deltaSrcBody) - deltaSrcKeys = db.getAttachmentDigests(deltaSrcBody) + deltaSrcKeys = getAttachmentDigests(deltaSrcBody) } } } diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go index e72086cca5..ae45787aaf 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/api_test.go @@ -1706,12 +1706,12 @@ func TestGetAttachmentAsDelta(t *testing.T) { return attachments["attach1"].(map[string]interface{}) } - oldMax := db.MaxInlineAttachmentSize + oldMax := MaxInlineAttachmentSize oldMinDeltaSavings := db.MinDeltaSavings - db.MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts + MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts db.MinDeltaSavings = 0 defer func() { - db.MaxInlineAttachmentSize = oldMax + MaxInlineAttachmentSize = oldMax db.MinDeltaSavings = oldMinDeltaSavings }() diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go index 78ed3a656c..48686fffc6 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/bulk_api.go @@ -357,7 +357,7 @@ func (h *handler) handleBulkGet() error { } } - h.db.WriteRevisionAsPart(responseInfo, err != nil, canCompress, writer) + WriteRevisionAsPart(responseInfo, err != nil, canCompress, writer) } return nil }) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go index 74dda6d6c2..e553326ef0 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/doc_api.go @@ -58,7 +58,7 @@ func (h *handler) handleGetDoc() error { if h.requestAccepts("multipart/") && (hasBodies || !h.requestAccepts("application/json")) { canCompress := strings.Contains(h.rq.Header.Get("X-Accept-Part-Encoding"), "gzip") return h.writeMultipart("related", func(writer *multipart.Writer) error { - h.db.WriteMultipartDocument(responseInfo, writer, canCompress) + WriteMultipartDocument(responseInfo, writer, canCompress) return nil }) } else if responseInfo.OldRevJSON != nil && !hasBodies { @@ -100,7 +100,7 @@ func (h *handler) handleGetDoc() error { if err != nil { responseInfo.Body = db.Body{"missing": revid} //TODO: More specific error } - h.db.WriteRevisionAsPart(responseInfo, err != nil, false, writer) + WriteRevisionAsPart(responseInfo, err != nil, false, writer) } return nil }) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/handler.go b/src/github.com/couchbaselabs/sync_gateway/rest/handler.go index 81c7b65f6b..20d3f6384e 100644 --- a/src/github.com/couchbaselabs/sync_gateway/rest/handler.go +++ b/src/github.com/couchbaselabs/sync_gateway/rest/handler.go @@ -310,7 +310,7 @@ func (h *handler) readJSON() (db.Body, error) { // Parses a JSON request body into a custom structure. func (h *handler) readJSONInto(into interface{}) error { - return db.ReadJSONFromMIME(h.rq.Header, h.requestBody, into) + return ReadJSONFromMIME(h.rq.Header, h.requestBody, into) } // Reads & parses the request body, handling either JSON or multipart. @@ -326,7 +326,7 @@ func (h *handler) readDocument() (db.Body, error) { return nil, err } reader := multipart.NewReader(bytes.NewReader(raw), attrs["boundary"]) - body, err := db.ReadMultipartDocument(reader) + body, err := ReadMultipartDocument(reader) if err != nil { ioutil.WriteFile("GatewayPUT.mime", raw, 0600) base.Warn("Error reading MIME data: copied to file GatewayPUT.mime") @@ -334,7 +334,7 @@ func (h *handler) readDocument() (db.Body, error) { return body, err } else { reader := multipart.NewReader(h.requestBody, attrs["boundary"]) - return db.ReadMultipartDocument(reader) + return ReadMultipartDocument(reader) } default: return nil, base.HTTPErrorf(http.StatusUnsupportedMediaType, "Invalid content type %s", contentType) diff --git a/src/github.com/couchbaselabs/sync_gateway/rest/multipart.go b/src/github.com/couchbaselabs/sync_gateway/rest/multipart.go new file mode 100644 index 0000000000..8f4ec9a9e4 --- /dev/null +++ b/src/github.com/couchbaselabs/sync_gateway/rest/multipart.go @@ -0,0 +1,313 @@ +package rest + +import ( + "compress/gzip" + "crypto/md5" + "crypto/rand" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "mime/multipart" + "net/http" + "net/textproto" + "strings" + + "github.com/couchbaselabs/sync_gateway/base" + "github.com/couchbaselabs/sync_gateway/db" + "github.com/snej/zdelta-go" +) + +// Attachments shorter than this will be left in the JSON as base64 rather than being a separate +// MIME part. +var MaxInlineAttachmentSize = 200 + +// JSON bodies smaller than this won't be GZip-encoded. +var MinCompressedJSONSize = 300 + +//////// WRITING: + +// Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. +func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) { + type attInfo struct { + name string + data []byte + meta map[string]interface{} + } + + // First extract the attachments that should follow: + following := []attInfo{} + for name, value := range db.BodyAttachments(r.Body) { + meta := value.(map[string]interface{}) + if meta["stub"] != true { + var err error + var info attInfo + info.data, err = db.DecodeAttachment(meta["data"]) + if info.data == nil { + base.Warn("Couldn't decode attachment %q of doc %q: %v", name, r.Body["_id"], err) + meta["stub"] = true + delete(meta, "data") + } else if len(info.data) > MaxInlineAttachmentSize { + info.name = name + info.meta = meta + following = append(following, info) + meta["follows"] = true + delete(meta, "data") + } + } + } + + // Write the main JSON body: + writeJSONPart(r, "application/json", compress, writer) + + // Write the following attachments + for _, info := range following { + partHeaders := textproto.MIMEHeader{} + if contentType, ok := info.meta["content_type"].(string); ok { + if info.meta["encoding"] == nil { + partHeaders.Set("Content-Type", contentType) + } + } + partHeaders.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", info.name)) + part, _ := writer.CreatePart(partHeaders) + part.Write(info.data) + } +} + +// Adds a new part to the given multipart writer, containing the given revision. +// The revision will be written as a nested multipart body if it has attachments. +func WriteRevisionAsPart(r db.RevResponse, isError bool, compress bool, writer *multipart.Writer) error { + partHeaders := textproto.MIMEHeader{} + docID, _ := r.Body["_id"].(string) + revID, _ := r.Body["_rev"].(string) + if len(docID) > 0 { + partHeaders.Set("X-Doc-ID", docID) + partHeaders.Set("X-Rev-ID", revID) + } + + if hasInlineAttachments(r.Body) { + // Write doc as multipart, including attachments: + docWriter, err := createNestedMultipart(writer, "related", partHeaders) + if err == nil { + WriteMultipartDocument(r, docWriter, compress) + err = docWriter.Close() + } + return err + } else { + // Write doc as a single JSON part: + contentType := "application/json" + if isError { + contentType += `; error="true"` + r.OldRevJSON = nil // disable delta compression + compress = false // and gzip compression + } + return writeJSONPart(r, contentType, compress, writer) + } +} + +// Writes the JSON body of a revision as a part to a multipart writer. +func writeJSONPart(r db.RevResponse, contentType string, gzipCompress bool, writer *multipart.Writer) (err error) { + bytes, err := json.Marshal(r.Body) + if err != nil { + return err + } + + partHeaders := textproto.MIMEHeader{} + partHeaders.Set("Content-Type", contentType) + + if r.OldRevJSON != nil && len(bytes) > db.MinDeltaSavings { + delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) + if err == nil && len(delta)+db.MinDeltaSavings < len(bytes) { + bytes = delta + gzipCompress = false + partHeaders.Set("Content-Encoding", "zdelta") + partHeaders.Set("X-Delta-Source", r.OldRevID) + } + } + + if gzipCompress { + if len(bytes) < MinCompressedJSONSize { + gzipCompress = false + } else { + partHeaders.Set("Content-Encoding", "gzip") + } + } + + part, err := writer.CreatePart(partHeaders) + if err != nil { + return err + } + + if gzipCompress { + gz := gzip.NewWriter(part) + _, err = gz.Write(bytes) + gz.Close() + } else { + _, err = part.Write(bytes) + } + return +} + +//////// READING: + +// Parses a JSON MIME body, unmarshaling it into "into". +func ReadJSONFromMIME(headers http.Header, input io.Reader, into interface{}) error { + contentType := headers.Get("Content-Type") + if contentType != "" && !strings.HasPrefix(contentType, "application/json") { + return base.HTTPErrorf(http.StatusUnsupportedMediaType, "Invalid content type %s", contentType) + } + + switch headers.Get("Content-Encoding") { + case "gzip": + var err error + if input, err = gzip.NewReader(input); err != nil { + return err + } + case "": + break + default: + return base.HTTPErrorf(http.StatusUnsupportedMediaType, "Unsupported Content-Encoding; use gzip") + } + + decoder := json.NewDecoder(input) + if err := decoder.Decode(into); err != nil { + base.Warn("Couldn't parse JSON in HTTP request: %v", err) + return base.HTTPErrorf(http.StatusBadRequest, "Bad JSON") + } + return nil +} + +// Reads a document from a multipart MIME body: first the JSON part, then any attachments +func ReadMultipartDocument(reader *multipart.Reader) (db.Body, error) { + // First read the main JSON document body: + mainPart, err := reader.NextPart() + if err != nil { + return nil, err + } + var body db.Body + err = ReadJSONFromMIME(http.Header(mainPart.Header), mainPart, &body) + mainPart.Close() + if err != nil { + return nil, err + } + + // Collect the attachments with a "follows" property, which will appear as MIME parts: + followingAttachments := map[string]map[string]interface{}{} + for name, value := range db.BodyAttachments(body) { + if meta := value.(map[string]interface{}); meta["follows"] == true { + followingAttachments[name] = meta + } + } + + // Subroutine to look up a following attachment given its digest. (I used to precompute a + // map from digest->name, which was faster, but that broke down if there were multiple + // attachments with the same contents! See #96) + findFollowingAttachment := func(withDigest string) (string, map[string]interface{}) { + for name, meta := range followingAttachments { + if meta["follows"] == true { + if digest, ok := meta["digest"].(string); ok && digest == withDigest { + return name, meta + } + } + } + return "", nil + } + + // Read the parts one by one: + for i := 0; i < len(followingAttachments); i++ { + part, err := reader.NextPart() + if err != nil { + if err == io.EOF { + err = base.HTTPErrorf(http.StatusBadRequest, + "Too few MIME parts: expected %d attachments, got %d", + len(followingAttachments), i) + } + return nil, err + } + data, err := ioutil.ReadAll(part) + part.Close() + if err != nil { + return nil, err + } + + // Look up the attachment by its digest: + digest := string(db.SHA1DigestKey(data)) + name, meta := findFollowingAttachment(digest) + if meta == nil { + name, meta = findFollowingAttachment(md5DigestKey(data)) // CouchDB uses MD5 :-p + if meta == nil { + return nil, base.HTTPErrorf(http.StatusBadRequest, + "MIME part #%d doesn't match any attachment", i+2) + } + } + + length, ok := base.ToInt64(meta["encoded_length"]) + if !ok { + length, ok = base.ToInt64(meta["length"]) + } + if ok { + if length != int64(len(data)) { + return nil, base.HTTPErrorf(http.StatusBadRequest, "Attachment length mismatch for %q: read %d bytes, should be %g", name, len(data), length) + } + } + + // Stuff the data into the attachment metadata and remove the "follows" property: + delete(meta, "follows") + meta["data"] = data + meta["digest"] = digest + } + + // Make sure there are no unused MIME parts: + if _, err = reader.NextPart(); err != io.EOF { + if err == nil { + err = base.HTTPErrorf(http.StatusBadRequest, + "Too many MIME parts (expected %d)", len(followingAttachments)+1) + } + return nil, err + } + + return body, nil +} + +///////// HELPERS: + +// CouchDB-format attachment digest string +func md5DigestKey(data []byte) string { + digester := md5.New() + digester.Write(data) + return "md5-" + base64.StdEncoding.EncodeToString(digester.Sum(nil)) +} + +// Does this Body contain any attachments with a "data" property? +func hasInlineAttachments(body db.Body) bool { + for _, value := range db.BodyAttachments(body) { + if meta, ok := value.(map[string]interface{}); ok && meta["data"] != nil { + return true + } + } + return false +} + +// Creates a multipart writer as a nested part in another writer. +func createNestedMultipart(mpWriter *multipart.Writer, multipartSubType string, headers textproto.MIMEHeader) (*multipart.Writer, error) { + boundary := randomBoundary() + headers.Set("Content-Type", fmt.Sprintf("multipart/%s; boundary=%q", multipartSubType, boundary)) + part, err := mpWriter.CreatePart(headers) + if err != nil { + return nil, err + } + partWriter := multipart.NewWriter(part) + partWriter.SetBoundary(boundary) + return partWriter, nil +} + +// copied from Go source: src/mime/multipart/writer.go +func randomBoundary() string { + var buf [30]byte + _, err := io.ReadFull(rand.Reader, buf[:]) + if err != nil { + panic(err) + } + return fmt.Sprintf("%x", buf[:]) +} From 60b2b9f59102536dc30bfcd472089c788bbf62c7 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Thu, 26 Feb 2015 13:33:01 -0800 Subject: [PATCH 06/14] Oops, forgot to rename the imports in the 2 new files couchbaselabs/sync_gateway --> couchbase/sync_gateway --- src/github.com/couchbase/sync_gateway/db/deltaizer.go | 2 +- src/github.com/couchbase/sync_gateway/rest/multipart.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/deltaizer.go b/src/github.com/couchbase/sync_gateway/db/deltaizer.go index faa8105d50..aa17353307 100644 --- a/src/github.com/couchbase/sync_gateway/db/deltaizer.go +++ b/src/github.com/couchbase/sync_gateway/db/deltaizer.go @@ -4,7 +4,7 @@ import ( "fmt" "time" - "github.com/couchbaselabs/sync_gateway/base" + "github.com/couchbase/sync_gateway/base" "github.com/snej/zdelta-go" ) diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index 8f4ec9a9e4..a05df66f37 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -14,8 +14,8 @@ import ( "net/textproto" "strings" - "github.com/couchbaselabs/sync_gateway/base" - "github.com/couchbaselabs/sync_gateway/db" + "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/db" "github.com/snej/zdelta-go" ) From e68f4e1f5b2b3e98740493be1765ba4d0730ac72 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Fri, 27 Feb 2015 09:37:26 -0800 Subject: [PATCH 07/14] Added heuristics to detect attachments it's not worth trying to compress Checking for MIME types and filename extensions that indicate already- compressed data ("audio/mp3", ".zip", etc.) --- .../couchbase/sync_gateway/db/attachment.go | 33 ++++++++++++++++++- .../sync_gateway/db/attachment_test.go | 23 +++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index 79e7015163..862f2a4673 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -12,6 +12,7 @@ package db import ( "crypto/sha1" "encoding/base64" + "regexp" "github.com/couchbase/sync_gateway/base" ) @@ -104,7 +105,7 @@ func (db *Database) loadBodyAttachments(body Body, minRevpos int, deltaSrcKeys m if ok && revpos >= int64(minRevpos) { key := AttachmentKey(meta["digest"].(string)) var sourceKeys []AttachmentKey - if _, ok := meta["encoding"].(string); !ok { // leave encoded attachment alone + if mayCompressAttachment(name, meta) { if srcKey, ok := deltaSrcKeys[name]; ok { sourceKeys = []AttachmentKey{srcKey} } @@ -224,3 +225,33 @@ func DecodeAttachment(att interface{}) ([]byte, error) { return nil, base.HTTPErrorf(400, "invalid attachment data (type %T)", att) } } + +var kCompressedTypes, kGoodTypes, kBadTypes, kBadFilenames *regexp.Regexp + +func init() { + // MIME types that explicitly indicate they're compressed: + kCompressedTypes, _ = regexp.Compile(`(?i)\bg?zip\b`) + // MIME types that are compressible: + kGoodTypes, _ = regexp.Compile(`(?i)(^text)|(xml\b)|(\b(html|json|yaml)\b)`) + // ... or generally uncompressible: + kBadTypes, _ = regexp.Compile(`(?i)^(audio|image|video)/`) + // An interesting type is SVG (image/svg+xml) which matches _both_! (It's compressible.) + // See + + // Filename extensions of uncompressible types: + kBadFilenames, _ = regexp.Compile(`(?i)\.(zip|t?gz|rar|7z|jpe?g|png|gif|svgz|mp3|m4a|ogg|wav|aiff|mp4|mov|avi|theora)$`) +} + +// Returns true if this attachment is worth trying to compress. +func mayCompressAttachment(filename string, meta map[string]interface{}) bool { + if _, ok := meta["encoding"].(string); ok { + return false // leave encoded attachment alone + } else if kBadFilenames.MatchString(filename) { + return false + } else if contentType, ok := meta["content_type"].(string); ok { + return !kCompressedTypes.MatchString(contentType) && + (kGoodTypes.MatchString(contentType) || + !kBadTypes.MatchString(contentType)) + } + return true // be optimistic by default +} diff --git a/src/github.com/couchbase/sync_gateway/db/attachment_test.go b/src/github.com/couchbase/sync_gateway/db/attachment_test.go index 9441741a0f..ba85e328ba 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment_test.go @@ -150,3 +150,26 @@ func TestAttachmentDeltas(t *testing.T) { rawDelta, _ := base64.StdEncoding.DecodeString("ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT") assert.DeepEquals(t, cached, rawDelta) } + +func TestMayCompress(t *testing.T) { + meta := map[string]interface{}{} + assert.True(t, mayCompressAttachment("foo", meta)) + assert.True(t, mayCompressAttachment("foo.bar", meta)) + assert.True(t, mayCompressAttachment("foo.html", meta)) + assert.False(t, mayCompressAttachment("foo.jpg", meta)) + assert.False(t, mayCompressAttachment("foo.MP3", meta)) + assert.True(t, mayCompressAttachment("a.zippy.movie.txt", meta)) + + meta["content_type"] = "application/json" + assert.True(t, mayCompressAttachment("foo", meta)) + meta["content_type"] = "audio/tincan" + assert.False(t, mayCompressAttachment("foo", meta)) + meta["content_type"] = "image/svg+xml" + assert.True(t, mayCompressAttachment("foo", meta)) // tricky! + meta["content_type"] = "application/json+zip" + assert.False(t, mayCompressAttachment("foo", meta)) // also tricky! + + meta["content_type"] = "application/json" + meta["encoding"] = "gzip" + assert.False(t, mayCompressAttachment("foo", meta)) +} From 9ab0b35b0989eeefeca8fb94e4eba0cc6b0695da Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Sun, 1 Mar 2015 14:50:52 -0800 Subject: [PATCH 08/14] Don't add delta properties to attachment metadata. Also: refactoring * Decided not to represent zdelta as an attachment "encoding" property or add the "deltaSrc" property. Instead that's indicated by the MIME part headers. * As a consequence, documents sent with inline attachments don't use delta encoding. * Refactored db/attachment.go to help make the above possible. There's now an Attachment struct representing an attachment whose body is going to be sent in a response. It has a 'metadata' map that points to the metadata in the Body so it can modify it. --- .../couchbase/sync_gateway/db/attachment.go | 350 +++++++++++------- .../sync_gateway/db/attachment_test.go | 13 +- .../couchbase/sync_gateway/db/crud.go | 22 +- .../couchbase/sync_gateway/rest/api_test.go | 13 +- .../couchbase/sync_gateway/rest/doc_api.go | 10 +- .../couchbase/sync_gateway/rest/multipart.go | 62 ++-- 6 files changed, 284 insertions(+), 186 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index 862f2a4673..188e693640 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -12,6 +12,8 @@ package db import ( "crypto/sha1" "encoding/base64" + "fmt" + "net/textproto" "regexp" "github.com/couchbase/sync_gateway/base" @@ -21,108 +23,149 @@ import ( // In practice it's "sha1-" followed by a hex SHA-1 digest. type AttachmentKey string -// Given a CouchDB document body about to be stored in the database, goes through the _attachments -// dict, finds attachments with inline bodies, copies the bodies into the Couchbase db, and replaces -// the bodies with the 'digest' attributes which are the keys to retrieving them. -func (db *Database) storeAttachments(doc *document, body Body, generation int, parentRev string) error { - var parentAttachments map[string]interface{} - atts := BodyAttachments(body) - if atts == nil && body["_attachments"] != nil { - return base.HTTPErrorf(400, "Invalid _attachments") +// Represents an attachment. Contains a references to the metadata map in the Body, and can +// change it as needed. +type Attachment struct { + Name string // Filename (key in _attachments map) + followingData []byte // Data to appear in MIME part + deltaSource AttachmentKey // If data is a delta, this is the source attachment + meta map[string]interface{} // Points at the map inside the Body's _attachments map + db *Database // Database to load the data from +} + +// The MIME content type of the attachment, or an empty string if not set +func (a *Attachment) ContentType() string { + value, _ := a.meta["content_type"].(string) + return value +} + +// The attachment digest as stored in the "digest" metadata property. +func (a *Attachment) Key() AttachmentKey { + key, _ := a.meta["digest"].(string) + return AttachmentKey(key) +} + +// The attachment's MIME headers. If `full` is true, adds headers appropriate for a top-level +// MIME body, else adds ones appropriate for a nested part. +func (a *Attachment) Headers(full bool) textproto.MIMEHeader { + h := textproto.MIMEHeader{} + if a.deltaSource != "" { + h.Set("Content-Encoding", "zdelta") + h.Set("X-Delta-Source", string(a.deltaSource)) + } else if encoding, _ := a.meta["encoding"].(string); encoding != "" { + h.Set("Content-Encoding", encoding) } - for name, value := range atts { - meta, ok := value.(map[string]interface{}) - if !ok { - return base.HTTPErrorf(400, "Invalid _attachments") + if full { + if contentType := a.ContentType(); contentType != "" { + h.Set("Content-Type", contentType) } - data, exists := meta["data"] - if exists { - // Attachment contains data, so store it in the db: - attachment, err := DecodeAttachment(data) - if err != nil { - return err - } - key, err := db.setAttachment(attachment) - if err != nil { - return err - } + } else { + h.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", a.Name)) + } + return h +} - newMeta := map[string]interface{}{ - "stub": true, - "digest": string(key), - "revpos": generation, - } - if contentType, ok := meta["content_type"].(string); ok { - newMeta["content_type"] = contentType - } - if encoding := meta["encoding"]; encoding != nil { - newMeta["encoding"] = encoding - newMeta["encoded_length"] = len(attachment) - if length, ok := meta["length"].(float64); ok { - newMeta["length"] = length - } - } else { - newMeta["length"] = len(attachment) - } - atts[name] = newMeta +// The raw data of the attachment, if already loaded. May be gzipped, may be a delta. +func (a *Attachment) Data() []byte { + data := a.followingData + if data == nil { + data, _ = a.meta["data"].([]byte) + } + return data +} - } else { - // Attachment must be a stub that repeats a parent attachment - if meta["stub"] != true { - return base.HTTPErrorf(400, "Missing data of attachment %q", name) - } - if revpos, ok := base.ToInt64(meta["revpos"]); !ok || revpos < 1 { - return base.HTTPErrorf(400, "Missing/invalid revpos in stub attachment %q", name) - } - // Try to look up the attachment in the parent revision: - if parentAttachments == nil { - if parent, _ := db.getAvailableRev(doc, parentRev); parent != nil { - parentAttachments, _ = parent["_attachments"].(map[string]interface{}) - } - } - if parentAttachments != nil { - if parentAttachment := parentAttachments[name]; parentAttachment != nil { - atts[name] = parentAttachment - } - } else if meta["digest"] == nil { - return base.HTTPErrorf(400, "Missing digest in stub attachment %q", name) +// Loads the data of an attachment (inline). +// If `deltaOK` is true, and a.deltaSource is set, may load a delta. +func (a *Attachment) LoadData(deltaOK bool) ([]byte, error) { + data := a.Data() + var err error + if data == nil { + var sourceKeys []AttachmentKey + if deltaOK && a.deltaSource != "" && a.Compressible() { + sourceKeys = []AttachmentKey{a.deltaSource} + } + var deltaSource AttachmentKey + data, deltaSource, err = a.db.GetAttachmentMaybeAsDelta(a.Key(), sourceKeys) + if err == nil { + a.meta["data"] = data + a.deltaSource = deltaSource + if deltaSource != "" { + a.meta["zdeltasrc"] = string(deltaSource) } + delete(a.meta, "stub") } } - return nil + return data, err +} + +// Is an attachment's data to be stored outside the JSON body (i.e. in a MIME part)? +func (a *Attachment) Follows() bool { + return a.meta["follows"] == true +} + +// Converts an attachment from inline to following +func (a *Attachment) SetFollows() { + data := a.meta["data"] + if data != nil { + a.followingData, _ = decodeData(data) + delete(a.meta, "data") + delete(a.meta, "zdeltasrc") + a.meta["follows"] = true + } } -// Goes through a revisions '_attachments' map, loads attachments (by their 'digest' properties) -// and adds 'data' properties containing the data. The data is added as raw []byte; the JSON -// marshaler will convert that to base64. +var kCompressedTypes, kGoodTypes, kBadTypes, kBadFilenames *regexp.Regexp + +func init() { + // MIME types that explicitly indicate they're compressed: + kCompressedTypes, _ = regexp.Compile(`(?i)\bg?zip\b`) + // MIME types that are compressible: + kGoodTypes, _ = regexp.Compile(`(?i)(^text)|(xml\b)|(\b(html|json|yaml)\b)`) + // ... or generally uncompressible: + kBadTypes, _ = regexp.Compile(`(?i)^(audio|image|video)/`) + // An interesting type is SVG (image/svg+xml) which matches _both_! (It's compressible.) + // See + + // Filename extensions of uncompressible types: + kBadFilenames, _ = regexp.Compile(`(?i)\.(zip|t?gz|rar|7z|jpe?g|png|gif|svgz|mp3|m4a|ogg|wav|aiff|mp4|mov|avi|theora)$`) +} + +// Returns true if this attachment is worth trying to compress. +func (a *Attachment) Compressible() bool { + if _, ok := a.meta["encoding"].(string); ok { + return false // leave encoded attachment alone + } else if kBadFilenames.MatchString(a.Name) { + return false + } else if contentType := a.ContentType(); contentType != "" { + return !kCompressedTypes.MatchString(contentType) && + (kGoodTypes.MatchString(contentType) || + !kBadTypes.MatchString(contentType)) + } + return true // be optimistic by default +} + +//////// LOADING ATTACHMENTS: + +// Goes through a revisions '_attachments' map and creates an Attachment object for each +// attachment. Also updates the Body to be safely mutable. // If minRevpos is > 0, then only attachments that have been changed in a revision of that -// generation or later are loaded. -func (db *Database) loadBodyAttachments(body Body, minRevpos int, deltaSrcKeys map[string]AttachmentKey) (Body, error) { +// generation or later are returned. +func (db *Database) findAttachments(body Body, minRevpos int, deltaSrcKeys map[string]AttachmentKey) (Body, []*Attachment) { body = body.ImmutableAttachmentsCopy() - for name, value := range BodyAttachments(body) { + var attachments []*Attachment + for name, value := range body.Attachments() { meta := value.(map[string]interface{}) revpos, ok := base.ToInt64(meta["revpos"]) if ok && revpos >= int64(minRevpos) { - key := AttachmentKey(meta["digest"].(string)) - var sourceKeys []AttachmentKey - if mayCompressAttachment(name, meta) { - if srcKey, ok := deltaSrcKeys[name]; ok { - sourceKeys = []AttachmentKey{srcKey} - } - } - data, srcKey, err := db.GetAttachmentMaybeAsDelta(key, sourceKeys) - if err != nil { - return nil, err - } - meta["data"] = data - delete(meta, "stub") - if srcKey != "" { - meta["encoding"] = "zdelta" - meta["deltasrc"] = srcKey - } + attachments = append(attachments, &Attachment{ + Name: name, + meta: meta, + db: db, + deltaSource: deltaSrcKeys[name], + }) } } - return body, nil + return body, attachments } // Retrieves an attachment's body given its key. @@ -172,20 +215,81 @@ func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []At return } -// Returns the digests of all attachments in a Body, as a map of attachment names to keys. -func getAttachmentDigests(body Body) map[string]AttachmentKey { - keys := map[string]AttachmentKey{} - for name, value := range BodyAttachments(body) { - meta := value.(map[string]interface{}) - if key := AttachmentKey(meta["digest"].(string)); key != "" { - keys[name] = key +//////// STORING ATTACHMENTS: + +// Given a CouchDB document body about to be stored in the database, goes through the _attachments +// dict, finds attachments with inline bodies, copies the bodies into the Couchbase db, and replaces +// the bodies with the 'digest' attributes which are the keys to retrieving them. +func (db *Database) storeAttachments(doc *document, body Body, generation int, parentRev string) error { + var parentAttachments map[string]interface{} + atts := body.Attachments() + if atts == nil && body["_attachments"] != nil { + return base.HTTPErrorf(400, "Invalid _attachments") + } + for name, value := range atts { + meta, ok := value.(map[string]interface{}) + if !ok { + return base.HTTPErrorf(400, "Invalid _attachments") + } + data, exists := meta["data"] + if exists { + // Attachment contains data, so store it in the db: + attachment, err := decodeData(data) + if err != nil { + return err + } + key, err := db.storeAttachment(attachment) + if err != nil { + return err + } + + newMeta := map[string]interface{}{ + "stub": true, + "digest": string(key), + "revpos": generation, + } + if contentType, ok := meta["content_type"].(string); ok { + newMeta["content_type"] = contentType + } + if encoding := meta["encoding"]; encoding != nil { + newMeta["encoding"] = encoding + newMeta["encoded_length"] = len(attachment) + if length, ok := meta["length"].(float64); ok { + newMeta["length"] = length + } + } else { + newMeta["length"] = len(attachment) + } + atts[name] = newMeta + + } else { + // Attachment must be a stub that repeats a parent attachment + if meta["stub"] != true { + return base.HTTPErrorf(400, "Missing data of attachment %q", name) + } + if revpos, ok := base.ToInt64(meta["revpos"]); !ok || revpos < 1 { + return base.HTTPErrorf(400, "Missing/invalid revpos in stub attachment %q", name) + } + // Try to look up the attachment in the parent revision: + if parentAttachments == nil { + if parent, _ := db.getAvailableRev(doc, parentRev); parent != nil { + parentAttachments, _ = parent["_attachments"].(map[string]interface{}) + } + } + if parentAttachments != nil { + if parentAttachment := parentAttachments[name]; parentAttachment != nil { + atts[name] = parentAttachment + } + } else if meta["digest"] == nil { + return base.HTTPErrorf(400, "Missing digest in stub attachment %q", name) + } } } - return keys + return nil } // Stores a base64-encoded attachment and returns the key to get it by. -func (db *Database) setAttachment(attachment []byte) (AttachmentKey, error) { +func (db *Database) storeAttachment(attachment []byte) (AttachmentKey, error) { key := SHA1DigestKey(attachment) _, err := db.Bucket.AddRaw(attachmentKeyToDocKey(key), 0, attachment) if err == nil { @@ -204,54 +308,36 @@ func SHA1DigestKey(data []byte) AttachmentKey { } // Returns the "_attachments" property as a map. -func BodyAttachments(body Body) map[string]interface{} { +func (body Body) Attachments() map[string]interface{} { atts, _ := body["_attachments"].(map[string]interface{}) return atts } +// Returns the digests of all attachments in a Body, as a map of attachment names to keys. +func (body Body) AttachmentDigests() map[string]AttachmentKey { + keys := map[string]AttachmentKey{} + for name, value := range body.Attachments() { + meta := value.(map[string]interface{}) + if key := AttachmentKey(meta["digest"].(string)); key != "" { + keys[name] = key + } + } + return keys +} + // The Couchbase bucket key under which to store an attachment func attachmentKeyToDocKey(key AttachmentKey) string { return "_sync:att:" + string(key) } -// Base64-encodes an attachment if it's present as a raw byte array -func DecodeAttachment(att interface{}) ([]byte, error) { - switch att := att.(type) { +// Base64-decodes attachment data if it's present as a string +func decodeData(data interface{}) ([]byte, error) { + switch data := data.(type) { case []byte: - return att, nil + return data, nil case string: - return base64.StdEncoding.DecodeString(att) + return base64.StdEncoding.DecodeString(data) default: - return nil, base.HTTPErrorf(400, "invalid attachment data (type %T)", att) + return nil, base.HTTPErrorf(400, "invalid attachment data (type %T)", data) } } - -var kCompressedTypes, kGoodTypes, kBadTypes, kBadFilenames *regexp.Regexp - -func init() { - // MIME types that explicitly indicate they're compressed: - kCompressedTypes, _ = regexp.Compile(`(?i)\bg?zip\b`) - // MIME types that are compressible: - kGoodTypes, _ = regexp.Compile(`(?i)(^text)|(xml\b)|(\b(html|json|yaml)\b)`) - // ... or generally uncompressible: - kBadTypes, _ = regexp.Compile(`(?i)^(audio|image|video)/`) - // An interesting type is SVG (image/svg+xml) which matches _both_! (It's compressible.) - // See - - // Filename extensions of uncompressible types: - kBadFilenames, _ = regexp.Compile(`(?i)\.(zip|t?gz|rar|7z|jpe?g|png|gif|svgz|mp3|m4a|ogg|wav|aiff|mp4|mov|avi|theora)$`) -} - -// Returns true if this attachment is worth trying to compress. -func mayCompressAttachment(filename string, meta map[string]interface{}) bool { - if _, ok := meta["encoding"].(string); ok { - return false // leave encoded attachment alone - } else if kBadFilenames.MatchString(filename) { - return false - } else if contentType, ok := meta["content_type"].(string); ok { - return !kCompressedTypes.MatchString(contentType) && - (kGoodTypes.MatchString(contentType) || - !kBadTypes.MatchString(contentType)) - } - return true // be optimistic by default -} diff --git a/src/github.com/couchbase/sync_gateway/db/attachment_test.go b/src/github.com/couchbase/sync_gateway/db/attachment_test.go index ba85e328ba..d4efcbe7f9 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment_test.go @@ -54,6 +54,7 @@ func TestAttachments(t *testing.T) { rev1output := `{"_attachments":{"bye.txt":{"data":"Z29vZGJ5ZSBjcnVlbCB3b3JsZA==","digest":"sha1-l+N7VpXGnoxMm8xfvtWPbz2YvDc=","length":19,"revpos":1},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"1-54f3a105fb903018c160712ffddb74dc"}` response, err := db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev1output) log.Printf("Create rev 2...") @@ -69,12 +70,14 @@ func TestAttachments(t *testing.T) { rev2output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"data":"aGVsbG8gd29ybGQ=","digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` response, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev2output) log.Printf("Retrieve doc with atts_since...") rev2Aoutput := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2},"hello.txt":{"digest":"sha1-Kq5sNclPz7QV2+lfQIuc6R7oRu0=","length":11,"revpos":1,"stub":true}},"_id":"doc1","_rev":"2-08b42c51334c0469bd060e6d9e6d797b"}` response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-54f3a105fb903018c160712ffddb74dc", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev2Aoutput) log.Printf("Create rev 3...") @@ -90,6 +93,7 @@ func TestAttachments(t *testing.T) { rev3output := `{"_attachments":{"bye.txt":{"data":"YnllLXlh","digest":"sha1-gwwPApfQR9bzBKpqoEYwFmKp98A=","length":6,"revpos":2}},"_id":"doc1","_rev":"3-252b9fa1f306930bffc07e7d75b77faf"}` response, err = db.GetRevWithAttachments("doc1", "", false, []string{}, false) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev3output) log.Printf("Expire body of rev 1, then add a child...") // test fix of #498 @@ -133,15 +137,17 @@ func TestAttachmentDeltas(t *testing.T) { assertNoError(t, err, "Couldn't update document") log.Printf("Retrieve doc with delta-encoded attachment...") - rev2output := `{"_attachments":{"bye.txt":{"data":"ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT","deltasrc":"sha1-l5fhr3wrVdXDCNkamTn8KypCswQ=","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","encoding":"zdelta","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` + rev2output := `{"_attachments":{"bye.txt":{"data":"ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","length":37,"revpos":2,"zdeltasrc":"sha1-l5fhr3wrVdXDCNkamTn8KypCswQ="}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` response, err := db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, true) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev2output) log.Printf("Retrieve doc without delta-encoded attachment...") rev2Boutput := `{"_attachments":{"bye.txt":{"data":"VGhpcyBpcyBhIHRlc3QuIFRoaXMgaXMgb25seSBhIHRlc3QuCg==","digest":"sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f134cabf4d9d26b0a5c8a3b566f2c80f"}` response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) assert.Equals(t, tojson(response.Body), rev2Boutput) // Verify contents of delta cache: @@ -173,3 +179,8 @@ func TestMayCompress(t *testing.T) { meta["encoding"] = "gzip" assert.False(t, mayCompressAttachment("foo", meta)) } + +func mayCompressAttachment(name string, meta map[string]interface{}) bool { + att := Attachment{Name: name, meta: meta} + return att.Compressible() +} diff --git a/src/github.com/couchbase/sync_gateway/db/crud.go b/src/github.com/couchbase/sync_gateway/db/crud.go index 6c7a8cc477..3ce735826d 100644 --- a/src/github.com/couchbase/sync_gateway/db/crud.go +++ b/src/github.com/couchbase/sync_gateway/db/crud.go @@ -164,9 +164,10 @@ func (db *Database) GetRevJSON(docid, revid string) ([]byte, error) { // Result of GetRevWithAttachments. type RevResponse struct { - Body Body // The parsed doc body - OldRevJSON []byte // The JSON source of the old revision, to use for a delta - OldRevID string // The revID of the old revision + Body Body // The parsed doc body + Attachments []*Attachment // Attachments whose data will be sent + OldRevJSON []byte // The JSON source of the old revision, to use for a delta + OldRevID string // The revID of the old revision } // Returns the body of a revision of a document, including attachments. Based on GetRev. @@ -177,7 +178,7 @@ type RevResponse struct { func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions bool, knownRevIDs []string, useDeltas bool) (r RevResponse, err error) { var doc *document r.Body, doc, err = db.getRev(docid, revid, listRevisions) - if err != nil || knownRevIDs == nil || (!useDeltas && len(BodyAttachments(r.Body)) == 0) { + if err != nil || knownRevIDs == nil || (!useDeltas && len(r.Body.Attachments()) == 0) { return } @@ -205,16 +206,25 @@ func (db *Database) GetRevWithAttachments(docid, revid string, listRevisions boo if r.OldRevJSON, _ = db.getRevisionJSON(doc, r.OldRevID); r.OldRevJSON != nil { var deltaSrcBody Body json.Unmarshal(r.OldRevJSON, &deltaSrcBody) - deltaSrcKeys = getAttachmentDigests(deltaSrcBody) + deltaSrcKeys = deltaSrcBody.AttachmentDigests() } } } } // Add attachment bodies: - r.Body, err = db.loadBodyAttachments(r.Body, minRevpos, deltaSrcKeys) + r.Body, r.Attachments = db.findAttachments(r.Body, minRevpos, deltaSrcKeys) return } +func (r *RevResponse) LoadAttachmentsInline(deltaOK bool) error { + for _, att := range r.Attachments { + if _, err := att.LoadData(deltaOK); err != nil { + return err + } + } + return nil +} + // Returns the body of a revision of a document, as well as the document's current channels // and the user/roles it grants channel access to. func (db *Database) GetRevAndChannels(docid, revid string, listRevisions bool) (body Body, channels channels.ChannelMap, access UserAccessMap, roleAccess UserAccessMap, err error) { diff --git a/src/github.com/couchbase/sync_gateway/rest/api_test.go b/src/github.com/couchbase/sync_gateway/rest/api_test.go index 4d7e5386e5..778335f866 100644 --- a/src/github.com/couchbase/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbase/sync_gateway/rest/api_test.go @@ -1734,16 +1734,17 @@ func TestGetAttachmentAsDelta(t *testing.T) { // Get the doc with deltas enabled, in JSON format. The JSON itself will not be delta- // compressed since it has to contain the delta-compressed attachments. + /* (not supported currently) attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") assert.Equals(t, attach1["encoding"], "zdelta") assert.Equals(t, attach1["deltasrc"], digest1) delta, err := base64.StdEncoding.DecodeString(attach1["data"].(string)) assert.Equals(t, err, nil) - // Decode the delta: result, err := zdelta.ApplyDelta([]byte(attachmentBody), delta) assert.Equals(t, err, nil) assert.Equals(t, string(result), attachmentBody2) + */ // Get the doc with deltas enabled, in MIME multipart format: headers := map[string]string{"Accept": "multipart/*"} @@ -1760,7 +1761,7 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, part.Header.Get("X-Delta-Source"), revID1) // Decode the delta: log.Printf("Decoding delta with source: %s", bodyData1) - delta, _ = ioutil.ReadAll(part) + delta, _ := ioutil.ReadAll(part) bodyData2, err := zdelta.ApplyDelta(bodyData1, delta) assert.Equals(t, err, nil) log.Printf("Decoded delta: %s", bodyData2) @@ -1769,8 +1770,6 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, json.Unmarshal(bodyData2, &body), nil) attachments := body["_attachments"].(map[string]interface{}) attach1 = attachments["attach1"].(map[string]interface{}) - assert.Equals(t, attach1["encoding"], "zdelta") - assert.Equals(t, attach1["deltasrc"], digest1) assert.Equals(t, attach1["follows"], true) assert.Equals(t, attach1["data"], nil) @@ -1779,10 +1778,13 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, err, nil) assert.Equals(t, part.FileName(), "attach1") assert.DeepEquals(t, part.Header["Content-Type"], []string(nil)) + assert.Equals(t, part.Header.Get("Content-Encoding"), "zdelta") + assert.Equals(t, part.Header.Get("X-Delta-Source"), digest1) + assert.Equals(t, part.Header.Get("Content-Disposition"), "attachment; filename=\"attach1\"") delta, err = ioutil.ReadAll(part) assert.Equals(t, err, nil) // Decode the delta: - result, err = zdelta.ApplyDelta([]byte(attachmentBody), delta) + result, err := zdelta.ApplyDelta([]byte(attachmentBody), delta) assert.Equals(t, err, nil) assert.Equals(t, string(result), attachmentBody2) @@ -1790,6 +1792,7 @@ func TestGetAttachmentAsDelta(t *testing.T) { response = rt.sendRequest("GET", "/db/doc1/attach1?deltas="+digest1, "") assertStatus(t, response, 200) assert.Equals(t, response.HeaderMap.Get("Content-Encoding"), "zdelta") + assert.Equals(t, response.HeaderMap.Get("X-Delta-Source"), digest1) delta, err = ioutil.ReadAll(response.Body) assert.Equals(t, err, nil) result, err = zdelta.ApplyDelta([]byte(attachmentBody), delta) diff --git a/src/github.com/couchbase/sync_gateway/rest/doc_api.go b/src/github.com/couchbase/sync_gateway/rest/doc_api.go index 10ab51f141..27dd69b58b 100644 --- a/src/github.com/couchbase/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbase/sync_gateway/rest/doc_api.go @@ -58,8 +58,7 @@ func (h *handler) handleGetDoc() error { if h.requestAccepts("multipart/") && (hasBodies || !h.requestAccepts("application/json")) { canCompress := strings.Contains(h.rq.Header.Get("X-Accept-Part-Encoding"), "gzip") return h.writeMultipart("related", func(writer *multipart.Writer) error { - WriteMultipartDocument(responseInfo, writer, canCompress) - return nil + return WriteMultipartDocument(responseInfo, writer, canCompress) }) } else if responseInfo.OldRevJSON != nil && !hasBodies { h.setHeader("Content-Type", "application/json") @@ -69,6 +68,9 @@ func (h *handler) handleGetDoc() error { var cmp zdelta.Compressor cmp.WriteDelta(responseInfo.OldRevJSON, target, h.response) } else { + if err := responseInfo.LoadAttachmentsInline(false); err != nil { + return err + } h.writeJSON(responseInfo.Body) } } else { @@ -139,7 +141,7 @@ func (h *handler) handleGetAttachment() error { if body == nil { return kNotFoundError } - meta, ok := db.BodyAttachments(body)[attachmentName].(map[string]interface{}) + meta, ok := body.Attachments()[attachmentName].(map[string]interface{}) if !ok { return base.HTTPErrorf(http.StatusNotFound, "missing attachment %s", attachmentName) } @@ -207,7 +209,7 @@ func (h *handler) handlePutAttachment() error { } // find attachment (if it existed) - attachments := db.BodyAttachments(body) + attachments := body.Attachments() if attachments == nil { attachments = make(map[string]interface{}) } diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index a05df66f37..7425fd6d9c 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -29,50 +29,34 @@ var MinCompressedJSONSize = 300 //////// WRITING: // Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. -func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) { - type attInfo struct { - name string - data []byte - meta map[string]interface{} - } - +func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) error { // First extract the attachments that should follow: - following := []attInfo{} - for name, value := range db.BodyAttachments(r.Body) { - meta := value.(map[string]interface{}) - if meta["stub"] != true { - var err error - var info attInfo - info.data, err = db.DecodeAttachment(meta["data"]) - if info.data == nil { - base.Warn("Couldn't decode attachment %q of doc %q: %v", name, r.Body["_id"], err) - meta["stub"] = true - delete(meta, "data") - } else if len(info.data) > MaxInlineAttachmentSize { - info.name = name - info.meta = meta - following = append(following, info) - meta["follows"] = true - delete(meta, "data") - } + for _, att := range r.Attachments { + if data, err := att.LoadData(true); err != nil { + return err + } else if len(data) > MaxInlineAttachmentSize { + att.SetFollows() } } // Write the main JSON body: - writeJSONPart(r, "application/json", compress, writer) + if err := writeJSONPart(r, "application/json", compress, writer); err != nil { + return err + } // Write the following attachments - for _, info := range following { - partHeaders := textproto.MIMEHeader{} - if contentType, ok := info.meta["content_type"].(string); ok { - if info.meta["encoding"] == nil { - partHeaders.Set("Content-Type", contentType) + for _, att := range r.Attachments { + if att.Follows() { + part, err := writer.CreatePart(att.Headers(false)) + if err == nil { + _, err = part.Write(att.Data()) + } + if err != nil { + return err } } - partHeaders.Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", info.name)) - part, _ := writer.CreatePart(partHeaders) - part.Write(info.data) } + return nil } // Adds a new part to the given multipart writer, containing the given revision. @@ -90,8 +74,10 @@ func WriteRevisionAsPart(r db.RevResponse, isError bool, compress bool, writer * // Write doc as multipart, including attachments: docWriter, err := createNestedMultipart(writer, "related", partHeaders) if err == nil { - WriteMultipartDocument(r, docWriter, compress) - err = docWriter.Close() + err = WriteMultipartDocument(r, docWriter, compress) + if err == nil { + err = docWriter.Close() + } } return err } else { @@ -194,7 +180,7 @@ func ReadMultipartDocument(reader *multipart.Reader) (db.Body, error) { // Collect the attachments with a "follows" property, which will appear as MIME parts: followingAttachments := map[string]map[string]interface{}{} - for name, value := range db.BodyAttachments(body) { + for name, value := range body.Attachments() { if meta := value.(map[string]interface{}); meta["follows"] == true { followingAttachments[name] = meta } @@ -281,7 +267,7 @@ func md5DigestKey(data []byte) string { // Does this Body contain any attachments with a "data" property? func hasInlineAttachments(body db.Body) bool { - for _, value := range db.BodyAttachments(body) { + for _, value := range body.Attachments() { if meta, ok := value.(map[string]interface{}); ok && meta["data"] != nil { return true } From 5bbf4d0144eec91c0493bc562eb7c381f685f9a0 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Mon, 2 Mar 2015 11:56:21 -0800 Subject: [PATCH 09/14] GZip-compress attachments that don't have delta sources If there isn't an earlier version of an attachment available (e.g. it's a new attachment), so it can't be delta-compressed, use regular GZip instead, assuming the metadata indicates it's compressible. Fixed an ambiguity in the Attachment struct: the `deltaSource` field indicated both a _potential_ delta source and the _actual_ delta source, so there wasn't a way to tell whether the attachment data is a delta. --- .../couchbase/sync_gateway/db/attachment.go | 35 +++++++++++-------- .../couchbase/sync_gateway/rest/api_test.go | 34 ++++++++++++------ .../couchbase/sync_gateway/rest/multipart.go | 20 +++++------ 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index 188e693640..19f0731724 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -26,11 +26,12 @@ type AttachmentKey string // Represents an attachment. Contains a references to the metadata map in the Body, and can // change it as needed. type Attachment struct { - Name string // Filename (key in _attachments map) - followingData []byte // Data to appear in MIME part - deltaSource AttachmentKey // If data is a delta, this is the source attachment - meta map[string]interface{} // Points at the map inside the Body's _attachments map - db *Database // Database to load the data from + Name string // Filename (key in _attachments map) + followingData []byte // Data to appear in MIME part + possibleDeltaSources []AttachmentKey // Possible attachments to use as delta source + deltaSource AttachmentKey // Delta source attachment ID + meta map[string]interface{} // Points at the map inside the Body's _attachments map + db *Database // Database to load the data from } // The MIME content type of the attachment, or an empty string if not set @@ -75,19 +76,21 @@ func (a *Attachment) Data() []byte { } // Loads the data of an attachment (inline). -// If `deltaOK` is true, and a.deltaSource is set, may load a delta. +// If `deltaOK` is true, and a.possibleDeltaSources is set, may load a delta. func (a *Attachment) LoadData(deltaOK bool) ([]byte, error) { data := a.Data() var err error if data == nil { var sourceKeys []AttachmentKey - if deltaOK && a.deltaSource != "" && a.Compressible() { - sourceKeys = []AttachmentKey{a.deltaSource} + if deltaOK && a.possibleDeltaSources != nil && a.Compressible() { + sourceKeys = a.possibleDeltaSources } + var deltaSource AttachmentKey data, deltaSource, err = a.db.GetAttachmentMaybeAsDelta(a.Key(), sourceKeys) if err == nil { a.meta["data"] = data + a.possibleDeltaSources = nil a.deltaSource = deltaSource if deltaSource != "" { a.meta["zdeltasrc"] = string(deltaSource) @@ -132,8 +135,8 @@ func init() { // Returns true if this attachment is worth trying to compress. func (a *Attachment) Compressible() bool { - if _, ok := a.meta["encoding"].(string); ok { - return false // leave encoded attachment alone + if _, ok := a.meta["encoding"].(string); ok || a.deltaSource != "" { + return false // leave encoded/delta'd attachment alone } else if kBadFilenames.MatchString(a.Name) { return false } else if contentType := a.ContentType(); contentType != "" { @@ -157,11 +160,15 @@ func (db *Database) findAttachments(body Body, minRevpos int, deltaSrcKeys map[s meta := value.(map[string]interface{}) revpos, ok := base.ToInt64(meta["revpos"]) if ok && revpos >= int64(minRevpos) { + var possibleDeltas []AttachmentKey + if src, ok := deltaSrcKeys[name]; ok { + possibleDeltas = []AttachmentKey{src} + } attachments = append(attachments, &Attachment{ - Name: name, - meta: meta, - db: db, - deltaSource: deltaSrcKeys[name], + Name: name, + meta: meta, + db: db, + possibleDeltaSources: possibleDeltas, }) } } diff --git a/src/github.com/couchbase/sync_gateway/rest/api_test.go b/src/github.com/couchbase/sync_gateway/rest/api_test.go index 778335f866..213d482a88 100644 --- a/src/github.com/couchbase/sync_gateway/rest/api_test.go +++ b/src/github.com/couchbase/sync_gateway/rest/api_test.go @@ -1698,6 +1698,12 @@ func TestGetAttachmentAsDelta(t *testing.T) { assertStatus(t, response, 200) return response.Body.Bytes() } + getDocAsMultipart := func(queries string) *multipart.Reader { + headers := map[string]string{"Accept": "multipart/*", "X-Accept-Part-Encoding": "gzip"} + response := rt.sendRequestWithHeaders("GET", "/db/doc1"+queries, "", headers) + assertStatus(t, response, 200) + return readMultipartResponse(response) + } getDocAttach1 := func(queries string) map[string]interface{} { response := getDoc(queries) var body db.Body @@ -1706,11 +1712,14 @@ func TestGetAttachmentAsDelta(t *testing.T) { return attachments["attach1"].(map[string]interface{}) } + oldMin := MinCompressiblePartSize oldMax := MaxInlineAttachmentSize oldMinDeltaSavings := db.MinDeltaSavings + MinCompressiblePartSize = 0 // Temporarily make all MIME parts gzippable MaxInlineAttachmentSize = 0 // Temporarily force all attachments to be MIME parts - db.MinDeltaSavings = 0 + db.MinDeltaSavings = 0 // Temporarily use deltas regardless of size savings defer func() { + MinCompressiblePartSize = oldMin MaxInlineAttachmentSize = oldMax db.MinDeltaSavings = oldMinDeltaSavings }() @@ -1722,6 +1731,17 @@ func TestGetAttachmentAsDelta(t *testing.T) { attach1 := getDocAttach1("") digest1 := attach1["digest"].(string) + // Get the doc in MIME multipart format and make sure the parts are gzipped: + mp := getDocAsMultipart("?attachments=true") + part, err := mp.NextPart() + assert.Equals(t, err, nil) + assert.Equals(t, part.Header.Get("Content-Type"), "application/json") + assert.Equals(t, part.Header.Get("Content-Encoding"), "gzip") + part, err = mp.NextPart() + assert.Equals(t, err, nil) + assert.Equals(t, part.FileName(), "attach1") + assert.Equals(t, part.Header.Get("Content-Encoding"), "gzip") + // Update doc attachment: attachmentBody2 := "This is test. This is only a test. The test ends." putDocAttach1("?rev="+revID1, attachmentBody2) @@ -1747,14 +1767,9 @@ func TestGetAttachmentAsDelta(t *testing.T) { */ // Get the doc with deltas enabled, in MIME multipart format: - headers := map[string]string{"Accept": "multipart/*"} - attach1 = getDocAttach1("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") - response := rt.sendRequestWithHeaders("GET", "/db/doc1?attachments=true&atts_since=[\""+revID1+"\"]&deltas=true", "", headers) - assertStatus(t, response, 200) - mp := readMultipartResponse(response) - + mp = getDocAsMultipart("?attachments=true&atts_since=[\"" + revID1 + "\"]&deltas=true") // Check the (delta-encoded) JSON part: - part, err := mp.NextPart() + part, err = mp.NextPart() assert.Equals(t, err, nil) assert.Equals(t, part.Header.Get("Content-Type"), "application/json") assert.Equals(t, part.Header.Get("Content-Encoding"), "zdelta") @@ -1780,7 +1795,6 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.DeepEquals(t, part.Header["Content-Type"], []string(nil)) assert.Equals(t, part.Header.Get("Content-Encoding"), "zdelta") assert.Equals(t, part.Header.Get("X-Delta-Source"), digest1) - assert.Equals(t, part.Header.Get("Content-Disposition"), "attachment; filename=\"attach1\"") delta, err = ioutil.ReadAll(part) assert.Equals(t, err, nil) // Decode the delta: @@ -1789,7 +1803,7 @@ func TestGetAttachmentAsDelta(t *testing.T) { assert.Equals(t, string(result), attachmentBody2) // Now get the attachment on its own, as a delta: - response = rt.sendRequest("GET", "/db/doc1/attach1?deltas="+digest1, "") + response := rt.sendRequest("GET", "/db/doc1/attach1?deltas="+digest1, "") assertStatus(t, response, 200) assert.Equals(t, response.HeaderMap.Get("Content-Encoding"), "zdelta") assert.Equals(t, response.HeaderMap.Get("X-Delta-Source"), digest1) diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index 7425fd6d9c..513c57fb15 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -23,8 +23,8 @@ import ( // MIME part. var MaxInlineAttachmentSize = 200 -// JSON bodies smaller than this won't be GZip-encoded. -var MinCompressedJSONSize = 300 +// MIME parts (JSON bodies or attachments) smaller than this won't be GZip-encoded. +var MinCompressiblePartSize = 300 //////// WRITING: @@ -47,10 +47,7 @@ func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress // Write the following attachments for _, att := range r.Attachments { if att.Follows() { - part, err := writer.CreatePart(att.Headers(false)) - if err == nil { - _, err = part.Write(att.Data()) - } + err := writePart(att.Data(), compress && att.Compressible(), att.Headers(false), writer) if err != nil { return err } @@ -93,7 +90,7 @@ func WriteRevisionAsPart(r db.RevResponse, isError bool, compress bool, writer * } // Writes the JSON body of a revision as a part to a multipart writer. -func writeJSONPart(r db.RevResponse, contentType string, gzipCompress bool, writer *multipart.Writer) (err error) { +func writeJSONPart(r db.RevResponse, contentType string, gzipCompress bool, writer *multipart.Writer) error { bytes, err := json.Marshal(r.Body) if err != nil { return err @@ -103,17 +100,20 @@ func writeJSONPart(r db.RevResponse, contentType string, gzipCompress bool, writ partHeaders.Set("Content-Type", contentType) if r.OldRevJSON != nil && len(bytes) > db.MinDeltaSavings { + gzipCompress = false delta, err := zdelta.CreateDelta(r.OldRevJSON, bytes) if err == nil && len(delta)+db.MinDeltaSavings < len(bytes) { bytes = delta - gzipCompress = false partHeaders.Set("Content-Encoding", "zdelta") partHeaders.Set("X-Delta-Source", r.OldRevID) } } + return writePart(bytes, gzipCompress, partHeaders, writer) +} +func writePart(bytes []byte, gzipCompress bool, partHeaders textproto.MIMEHeader, writer *multipart.Writer) error { if gzipCompress { - if len(bytes) < MinCompressedJSONSize { + if len(bytes) < MinCompressiblePartSize { gzipCompress = false } else { partHeaders.Set("Content-Encoding", "gzip") @@ -132,7 +132,7 @@ func writeJSONPart(r db.RevResponse, contentType string, gzipCompress bool, writ } else { _, err = part.Write(bytes) } - return + return err } //////// READING: From 6c0f5308d71bdc15d008ba65eb0989bbb3383858 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Mon, 2 Mar 2015 15:37:39 -0800 Subject: [PATCH 10/14] In multipart response make all delta'd attachments "follows" And in a JSON single-doc response, make sure to always load the data of attachments. --- .../couchbase/sync_gateway/db/attachment.go | 8 +++++-- .../couchbase/sync_gateway/rest/doc_api.go | 22 +++++++++++-------- .../couchbase/sync_gateway/rest/multipart.go | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index 19f0731724..e02d064752 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -50,7 +50,7 @@ func (a *Attachment) Key() AttachmentKey { // MIME body, else adds ones appropriate for a nested part. func (a *Attachment) Headers(full bool) textproto.MIMEHeader { h := textproto.MIMEHeader{} - if a.deltaSource != "" { + if a.IsDelta() { h.Set("Content-Encoding", "zdelta") h.Set("X-Delta-Source", string(a.deltaSource)) } else if encoding, _ := a.meta["encoding"].(string); encoding != "" { @@ -75,6 +75,10 @@ func (a *Attachment) Data() []byte { return data } +func (a *Attachment) IsDelta() bool { + return a.deltaSource != "" +} + // Loads the data of an attachment (inline). // If `deltaOK` is true, and a.possibleDeltaSources is set, may load a delta. func (a *Attachment) LoadData(deltaOK bool) ([]byte, error) { @@ -135,7 +139,7 @@ func init() { // Returns true if this attachment is worth trying to compress. func (a *Attachment) Compressible() bool { - if _, ok := a.meta["encoding"].(string); ok || a.deltaSource != "" { + if _, ok := a.meta["encoding"].(string); ok || a.IsDelta() { return false // leave encoded/delta'd attachment alone } else if kBadFilenames.MatchString(a.Name) { return false diff --git a/src/github.com/couchbase/sync_gateway/rest/doc_api.go b/src/github.com/couchbase/sync_gateway/rest/doc_api.go index 27dd69b58b..7730696bf3 100644 --- a/src/github.com/couchbase/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbase/sync_gateway/rest/doc_api.go @@ -54,24 +54,28 @@ func (h *handler) handleGetDoc() error { } h.setHeader("Etag", responseInfo.Body["_rev"].(string)) - hasBodies := (attachmentsSince != nil && responseInfo.Body["_attachments"] != nil) + hasBodies := (attachmentsSince != nil && len(responseInfo.Attachments) > 0) if h.requestAccepts("multipart/") && (hasBodies || !h.requestAccepts("application/json")) { + // Multipart response: canCompress := strings.Contains(h.rq.Header.Get("X-Accept-Part-Encoding"), "gzip") return h.writeMultipart("related", func(writer *multipart.Writer) error { return WriteMultipartDocument(responseInfo, writer, canCompress) }) - } else if responseInfo.OldRevJSON != nil && !hasBodies { - h.setHeader("Content-Type", "application/json") - h.setHeader("Content-Encoding", "zdelta") - h.setHeader("X-Delta-Source", responseInfo.OldRevID) - target, _ := json.Marshal(responseInfo.Body) - var cmp zdelta.Compressor - cmp.WriteDelta(responseInfo.OldRevJSON, target, h.response) } else { + // JSON response: if err := responseInfo.LoadAttachmentsInline(false); err != nil { return err } - h.writeJSON(responseInfo.Body) + if responseInfo.OldRevJSON != nil && !hasBodies { + h.setHeader("Content-Type", "application/json") + h.setHeader("Content-Encoding", "zdelta") + h.setHeader("X-Delta-Source", responseInfo.OldRevID) + target, _ := json.Marshal(responseInfo.Body) + var cmp zdelta.Compressor + cmp.WriteDelta(responseInfo.OldRevJSON, target, h.response) + } else { + h.writeJSON(responseInfo.Body) + } } } else { var revids []string diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index 513c57fb15..280e3a02ee 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -34,7 +34,7 @@ func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress for _, att := range r.Attachments { if data, err := att.LoadData(true); err != nil { return err - } else if len(data) > MaxInlineAttachmentSize { + } else if len(data) > MaxInlineAttachmentSize || att.IsDelta() { att.SetFollows() } } From 8dc823cf3fbf9fcb076be5cca59f4275b0c1402d Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Wed, 4 Mar 2015 11:24:40 -0800 Subject: [PATCH 11/14] Use new gzip.Writer cache in rest/multipart.go Had to move the cache code down to the base package. --- .../couchbase/sync_gateway/base/util.go | 23 +++++++++++++++- .../rest/encoded_response_writer.go | 26 ++----------------- .../couchbase/sync_gateway/rest/multipart.go | 4 +-- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/base/util.go b/src/github.com/couchbase/sync_gateway/base/util.go index 681370b0ba..7b774fe176 100644 --- a/src/github.com/couchbase/sync_gateway/base/util.go +++ b/src/github.com/couchbase/sync_gateway/base/util.go @@ -10,12 +10,13 @@ package base import ( - "strconv" + "compress/gzip" "crypto/rand" "encoding/json" "fmt" "io" "regexp" + "strconv" "strings" "sync" ) @@ -143,3 +144,23 @@ func (v *IntMax) SetIfMax(value int64) { v.i = value } } + +//////// GZIP WRITER CACHE: + +var zipperCache sync.Pool + +// Gets a gzip writer from the pool, or creates a new one if the pool is empty: +func GetGZipWriter(writer io.Writer) *gzip.Writer { + if gz, ok := zipperCache.Get().(*gzip.Writer); ok { + gz.Reset(writer) + return gz + } else { + return gzip.NewWriter(writer) + } +} + +// Closes a gzip writer and returns it to the pool: +func ReturnGZipWriter(gz *gzip.Writer) { + gz.Close() + zipperCache.Put(gz) +} diff --git a/src/github.com/couchbase/sync_gateway/rest/encoded_response_writer.go b/src/github.com/couchbase/sync_gateway/rest/encoded_response_writer.go index 2ff90e5fd6..f962d0c81b 100644 --- a/src/github.com/couchbase/sync_gateway/rest/encoded_response_writer.go +++ b/src/github.com/couchbase/sync_gateway/rest/encoded_response_writer.go @@ -2,10 +2,8 @@ package rest import ( "compress/gzip" - "io" "net/http" "strings" - "sync" "github.com/couchbase/sync_gateway/base" ) @@ -73,7 +71,7 @@ func (w *EncodedResponseWriter) sniff(bytes []byte) { w.Header().Set("Content-Encoding", "gzip") w.Header().Del("Content-Length") // length is unknown due to compression - w.gz = GetGZipWriter(w.ResponseWriter) + w.gz = base.GetGZipWriter(w.ResponseWriter) } // Flushes the GZip encoder buffer, and if possible flushes output to the network. @@ -90,27 +88,7 @@ func (w *EncodedResponseWriter) Flush() { // The writer should be closed when output is complete, to flush the GZip encoder buffer. func (w *EncodedResponseWriter) Close() { if w.gz != nil { - ReturnGZipWriter(w.gz) + base.ReturnGZipWriter(w.gz) w.gz = nil } } - -//////// GZIP WRITER CACHE: - -var zipperCache sync.Pool - -// Gets a gzip writer from the pool, or creates a new one if the pool is empty: -func GetGZipWriter(writer io.Writer) *gzip.Writer { - if gz, ok := zipperCache.Get().(*gzip.Writer); ok { - gz.Reset(writer) - return gz - } else { - return gzip.NewWriter(writer) - } -} - -// Closes a gzip writer and returns it to the pool: -func ReturnGZipWriter(gz *gzip.Writer) { - gz.Close() - zipperCache.Put(gz) -} diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index 280e3a02ee..d40ceb3b92 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -126,9 +126,9 @@ func writePart(bytes []byte, gzipCompress bool, partHeaders textproto.MIMEHeader } if gzipCompress { - gz := gzip.NewWriter(part) + gz := base.GetGZipWriter(part) _, err = gz.Write(bytes) - gz.Close() + base.ReturnGZipWriter(gz) } else { _, err = part.Write(bytes) } From 66c80fae91f0ffbb13742b691113e0aaa921107a Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Fri, 6 Mar 2015 14:50:58 -0800 Subject: [PATCH 12/14] Delta-compress encoded attachments Previously, attachments with "encoding":"gzip" weren't eligible for delta compression because their data is gzipped which obviously won't delta-compress well. But gzip-encoded attachments tend to be those that are very compressible inside the encoding (text and such), so it's worth delta-encoding them. We just have to decode the gzip first. --- .../couchbase/sync_gateway/db/attachment.go | 165 ++++++++++++------ .../sync_gateway/db/attachment_test.go | 58 +++++- .../couchbase/sync_gateway/db/deltaizer.go | 7 +- .../couchbase/sync_gateway/rest/doc_api.go | 8 +- .../couchbase/sync_gateway/rest/multipart.go | 7 +- src/github.com/snej/zdelta-go | 2 +- 6 files changed, 181 insertions(+), 66 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index e02d064752..43c5489618 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -10,9 +10,12 @@ package db import ( + "bytes" + "compress/gzip" "crypto/sha1" "encoding/base64" "fmt" + "io/ioutil" "net/textproto" "regexp" @@ -20,8 +23,40 @@ import ( ) // Key for retrieving an attachment from Couchbase. -// In practice it's "sha1-" followed by a hex SHA-1 digest. -type AttachmentKey string +type AttachmentKey struct { + Digest string // "sha1-" followed by a hex SHA-1 digest. + Encoding string // empty or "gzip" +} + +func attachmentKeyFromMeta(meta map[string]interface{}) AttachmentKey { + digest, _ := meta["digest"].(string) + encoding, _ := meta["encoding"].(string) + return AttachmentKey{Digest: digest, Encoding: encoding} +} + +// Returns an AttachmentKey for an attachment body, based on its SHA-1 digest. +func SHA1DigestKey(data []byte) AttachmentKey { + digester := sha1.New() + digester.Write(data) + return AttachmentKey{Digest: "sha1-" + base64.StdEncoding.EncodeToString(digester.Sum(nil))} +} + +func (key *AttachmentKey) HasGZipEncoding() bool { + return key.Encoding == "gzip" +} + +func (key *AttachmentKey) EncodedString() string { + if key.HasGZipEncoding() { + return "Z" + key.Digest + } else { + return "-" + key.Digest + } +} + +// The Couchbase bucket key under which to store an attachment +func (key *AttachmentKey) bucketKey() string { + return "_sync:att:" + key.Digest +} // Represents an attachment. Contains a references to the metadata map in the Body, and can // change it as needed. @@ -29,8 +64,8 @@ type Attachment struct { Name string // Filename (key in _attachments map) followingData []byte // Data to appear in MIME part possibleDeltaSources []AttachmentKey // Possible attachments to use as delta source - deltaSource AttachmentKey // Delta source attachment ID - meta map[string]interface{} // Points at the map inside the Body's _attachments map + deltaSource *AttachmentKey // Delta source attachment ID + meta map[string]interface{} // Points inside the Body's _attachments map db *Database // Database to load the data from } @@ -40,10 +75,13 @@ func (a *Attachment) ContentType() string { return value } +func (a *Attachment) IsEncoded() bool { + return a.meta["encoding"] != nil +} + // The attachment digest as stored in the "digest" metadata property. func (a *Attachment) Key() AttachmentKey { - key, _ := a.meta["digest"].(string) - return AttachmentKey(key) + return attachmentKeyFromMeta(a.meta) } // The attachment's MIME headers. If `full` is true, adds headers appropriate for a top-level @@ -52,7 +90,7 @@ func (a *Attachment) Headers(full bool) textproto.MIMEHeader { h := textproto.MIMEHeader{} if a.IsDelta() { h.Set("Content-Encoding", "zdelta") - h.Set("X-Delta-Source", string(a.deltaSource)) + h.Set("X-Delta-Source", string(a.deltaSource.Digest)) } else if encoding, _ := a.meta["encoding"].(string); encoding != "" { h.Set("Content-Encoding", encoding) } @@ -67,7 +105,7 @@ func (a *Attachment) Headers(full bool) textproto.MIMEHeader { } // The raw data of the attachment, if already loaded. May be gzipped, may be a delta. -func (a *Attachment) Data() []byte { +func (a *Attachment) RawData() []byte { data := a.followingData if data == nil { data, _ = a.meta["data"].([]byte) @@ -76,13 +114,13 @@ func (a *Attachment) Data() []byte { } func (a *Attachment) IsDelta() bool { - return a.deltaSource != "" + return a.deltaSource != nil } // Loads the data of an attachment (inline). // If `deltaOK` is true, and a.possibleDeltaSources is set, may load a delta. func (a *Attachment) LoadData(deltaOK bool) ([]byte, error) { - data := a.Data() + data := a.RawData() var err error if data == nil { var sourceKeys []AttachmentKey @@ -90,16 +128,17 @@ func (a *Attachment) LoadData(deltaOK bool) ([]byte, error) { sourceKeys = a.possibleDeltaSources } - var deltaSource AttachmentKey + var deltaSource *AttachmentKey data, deltaSource, err = a.db.GetAttachmentMaybeAsDelta(a.Key(), sourceKeys) if err == nil { a.meta["data"] = data a.possibleDeltaSources = nil a.deltaSource = deltaSource - if deltaSource != "" { - a.meta["zdeltasrc"] = string(deltaSource) + if deltaSource != nil { + a.meta["zdeltasrc"] = deltaSource.Digest } delete(a.meta, "stub") + delete(a.meta, "encoded_length") } } return data, err @@ -114,7 +153,7 @@ func (a *Attachment) Follows() bool { func (a *Attachment) SetFollows() { data := a.meta["data"] if data != nil { - a.followingData, _ = decodeData(data) + a.followingData, _ = decodeIfBase64(data) delete(a.meta, "data") delete(a.meta, "zdeltasrc") a.meta["follows"] = true @@ -139,8 +178,8 @@ func init() { // Returns true if this attachment is worth trying to compress. func (a *Attachment) Compressible() bool { - if _, ok := a.meta["encoding"].(string); ok || a.IsDelta() { - return false // leave encoded/delta'd attachment alone + if a.IsDelta() { + return false // leave delta'd attachment alone } else if kBadFilenames.MatchString(a.Name) { return false } else if contentType := a.ContentType(); contentType != "" { @@ -179,50 +218,82 @@ func (db *Database) findAttachments(body Body, minRevpos int, deltaSrcKeys map[s return body, attachments } -// Retrieves an attachment's body given its key. +// Retrieves an attachment's body given its key. Does not decode GZip-encoded attachments. func (db *Database) GetAttachment(key AttachmentKey) ([]byte, error) { - return db.Bucket.GetRaw(attachmentKeyToDocKey(key)) + return db.Bucket.GetRaw(key.bucketKey()) +} + +func unzip(input []byte) (data []byte, err error) { + reader := bytes.NewReader(input) + var gz *gzip.Reader + if gz, err = gzip.NewReader(reader); err != nil { + return nil, err + } + return ioutil.ReadAll(gz) } // Retrieves an attachment's body, preferably as a delta from one of the versions specified // in `sourceKeys` -func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []AttachmentKey) (result []byte, sourceKey AttachmentKey, err error) { +func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []AttachmentKey) (result []byte, sourceKey *AttachmentKey, err error) { + base.TEMP("GetAttachmentMaybeAsDelta: key=%v, sourceKeys=%v", key, sourceKeys) // First, attempt to reuse a cached delta without even having to load the attachment: - for _, sourceKey = range sourceKeys { - if result = db.getCachedAttachmentZDelta(sourceKey, key); result != nil { + for _, possibleSourceKey := range sourceKeys { + if result = db.getCachedAttachmentZDelta(possibleSourceKey, key); result != nil { // Found a cached delta - if len(result) == 0 { + if len(result) > 0 { + sourceKey = &possibleSourceKey + } else { // ... but it's not worth using - sourceKey = "" result, err = db.GetAttachment(key) } return } } - // No cached deltas, so create one: + // No cached deltas. First get the current attachment: target, err := db.GetAttachment(key) if err != nil { return } - for _, sourceKey = range sourceKeys { - if src, _ := db.Bucket.GetRaw(attachmentKeyToDocKey(sourceKey)); src != nil { - // Found a previous revision; generate a delta: - result = db.generateAttachmentZDelta(src, target, sourceKey, key) - if result != nil { - if len(result) == 0 { - // ... but it's not worth using - break + if len(sourceKeys) > 0 { + // Going to find a source version to delta it with, but first decode it if needed: + decodedTarget := target + if key.HasGZipEncoding() { + if decodedTarget, _ = unzip(target); decodedTarget == nil { + base.Warn("GetAttachmentMaybeAsDelta: Couldn't decode gzip-encoded target attachment %s", + key.Digest) + return target, nil, nil // Won't decode; just give up & return raw + } + } + + for _, possibleSourceKey := range sourceKeys { + if src, _ := db.Bucket.GetRaw(possibleSourceKey.bucketKey()); src != nil { + // Found a previous revision; generate a delta: + if possibleSourceKey.HasGZipEncoding() { + if src, err = unzip(src); err != nil { + base.Warn("GetAttachmentMaybeAsDelta: Couldn't decode gzip-encoded source attachment %s", + possibleSourceKey.Digest) + continue + } + } + result = db.generateAttachmentZDelta(src, decodedTarget, possibleSourceKey, key) + base.TEMP("Generated delta: %x", result) + if result != nil { + if len(result) > 0 { + sourceKey = &possibleSourceKey + return + } else { + // ... but it's not worth using + break + } } - return } } } - // No previous attachments available so return entire body: + // No previous attachments available so return entire (maybe-encoded) body: result = target - sourceKey = "" return } @@ -245,7 +316,7 @@ func (db *Database) storeAttachments(doc *document, body Body, generation int, p data, exists := meta["data"] if exists { // Attachment contains data, so store it in the db: - attachment, err := decodeData(data) + attachment, err := decodeIfBase64(data) if err != nil { return err } @@ -256,7 +327,7 @@ func (db *Database) storeAttachments(doc *document, body Body, generation int, p newMeta := map[string]interface{}{ "stub": true, - "digest": string(key), + "digest": key.Digest, "revpos": generation, } if contentType, ok := meta["content_type"].(string); ok { @@ -302,7 +373,7 @@ func (db *Database) storeAttachments(doc *document, body Body, generation int, p // Stores a base64-encoded attachment and returns the key to get it by. func (db *Database) storeAttachment(attachment []byte) (AttachmentKey, error) { key := SHA1DigestKey(attachment) - _, err := db.Bucket.AddRaw(attachmentKeyToDocKey(key), 0, attachment) + _, err := db.Bucket.AddRaw(key.bucketKey(), 0, attachment) if err == nil { base.LogTo("Attach", "\tAdded attachment %q", key) } @@ -311,13 +382,6 @@ func (db *Database) storeAttachment(attachment []byte) (AttachmentKey, error) { //////// HELPERS: -// Returns an AttachmentKey for an attachment body, based on its SHA-1 digest. -func SHA1DigestKey(data []byte) AttachmentKey { - digester := sha1.New() - digester.Write(data) - return AttachmentKey("sha1-" + base64.StdEncoding.EncodeToString(digester.Sum(nil))) -} - // Returns the "_attachments" property as a map. func (body Body) Attachments() map[string]interface{} { atts, _ := body["_attachments"].(map[string]interface{}) @@ -328,21 +392,16 @@ func (body Body) Attachments() map[string]interface{} { func (body Body) AttachmentDigests() map[string]AttachmentKey { keys := map[string]AttachmentKey{} for name, value := range body.Attachments() { - meta := value.(map[string]interface{}) - if key := AttachmentKey(meta["digest"].(string)); key != "" { + meta, _ := value.(map[string]interface{}) + if key := attachmentKeyFromMeta(meta); key.Digest != "" { keys[name] = key } } return keys } -// The Couchbase bucket key under which to store an attachment -func attachmentKeyToDocKey(key AttachmentKey) string { - return "_sync:att:" + string(key) -} - // Base64-decodes attachment data if it's present as a string -func decodeData(data interface{}) ([]byte, error) { +func decodeIfBase64(data interface{}) ([]byte, error) { switch data := data.(type) { case []byte: return data, nil diff --git a/src/github.com/couchbase/sync_gateway/db/attachment_test.go b/src/github.com/couchbase/sync_gateway/db/attachment_test.go index b534d42b08..ef6548922c 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment_test.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment_test.go @@ -151,8 +151,60 @@ func TestAttachmentDeltas(t *testing.T) { assert.Equals(t, tojson(response.Body), rev2Boutput) // Verify contents of delta cache: - cached := db.getCachedAttachmentZDelta(AttachmentKey("sha1-l5fhr3wrVdXDCNkamTn8KypCswQ="), - AttachmentKey("sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8=")) + cached := db.getCachedAttachmentZDelta(AttachmentKey{Digest: "sha1-l5fhr3wrVdXDCNkamTn8KypCswQ="}, + AttachmentKey{Digest: "sha1-TQ2UKLk7BtEA2lUatosI4xl9xb8="}) + rawDelta, _ := base64.StdEncoding.DecodeString("ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT") + assert.DeepEquals(t, cached, rawDelta) +} + +// Just like the above except the attachment is stored gzip-encoded. +// We want to test that it can be sent as a delta even though it's encoded. +func TestEncodedAttachmentDeltas(t *testing.T) { + context, err := NewDatabaseContext("db", testBucket(), false, CacheOptions{}) + assertNoError(t, err, "Couldn't create context for database 'db'") + defer context.Close() + db, err := CreateDatabase(context) + assertNoError(t, err, "Couldn't create database 'db'") + + // For this test, ensure delta will be used even if it's not much smaller: + oldSavings := MinDeltaSavings + MinDeltaSavings = 0 + defer func() { MinDeltaSavings = oldSavings }() + + // Rev 1: + log.Printf("Create rev 1...") + rev1input := `{"_attachments": {"bye.txt": {"data":"H4sIAIJ491QAAwvJyCxWAKJEheKSosy8dIW0/CKF0uJUhcw8hZLU4hKQUEpqTkmiQnJ+bkFRanFxZn4eFwAY3suFNgAAAA==","encoding":"gzip","length":54}}}` + var body Body + json.Unmarshal([]byte(rev1input), &body) + revid, err := db.Put("doc1", unjson(rev1input)) + assertNoError(t, err, "Couldn't create document") + assert.Equals(t, revid, "1-07e61f284791d24abd54d0e1b500d95d") + + log.Printf("Create rev 2...") + rev2str := `{"_attachments": {"bye.txt": {"data": "H4sIAMN491QAAwvJyCxWAKJEhZLU4hI9hRAoPz8vpxImyAUAxIC6BiUAAAA=","encoding":"gzip","length":37}}}` + var body2 Body + json.Unmarshal([]byte(rev2str), &body2) + body2["_rev"] = revid + revid, err = db.Put("doc1", body2) + assertNoError(t, err, "Couldn't update document") + + log.Printf("Retrieve doc with delta-encoded attachment...") + rev2output := `{"_attachments":{"bye.txt":{"data":"ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT","digest":"sha1-gLw2yxdL4jSP/dAPx865sGY0O8E=","encoding":"gzip","length":37,"revpos":2,"zdeltasrc":"sha1-LFTCiZninzFf4GJoOfTmlmey9OE="}},"_id":"doc1","_rev":"2-f6ffdc7f35597c04eb62da3b2a42710b"}` + response, err := db.GetRevWithAttachments("doc1", "", false, []string{"1-07e61f284791d24abd54d0e1b500d95d", "1-foo", "993-bar"}, true) + assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) + assert.Equals(t, tojson(response.Body), rev2output) + + log.Printf("Retrieve doc without delta-encoded attachment...") + rev2Boutput := `{"_attachments":{"bye.txt":{"data":"H4sIAMN491QAAwvJyCxWAKJEhZLU4hI9hRAoPz8vpxImyAUAxIC6BiUAAAA=","digest":"sha1-gLw2yxdL4jSP/dAPx865sGY0O8E=","encoding":"gzip","length":37,"revpos":2}},"_id":"doc1","_rev":"2-f6ffdc7f35597c04eb62da3b2a42710b"}` + response, err = db.GetRevWithAttachments("doc1", "", false, []string{"1-c0c61706d3f3692aacc0ec0a91425a65", "1-foo", "993-bar"}, false) + assertNoError(t, err, "Couldn't get document") + response.LoadAttachmentsInline(true) + assert.Equals(t, tojson(response.Body), rev2Boutput) + + // Verify contents of delta cache: + cached := db.getCachedAttachmentZDelta(AttachmentKey{Digest: "sha1-LFTCiZninzFf4GJoOfTmlmey9OE=", Encoding: "gzip"}, + AttachmentKey{Digest: "sha1-gLw2yxdL4jSP/dAPx865sGY0O8E=", Encoding: "gzip"}) rawDelta, _ := base64.StdEncoding.DecodeString("ddOrAncoWekSVIHD9u3a9KRKQ4Hu8QxT") assert.DeepEquals(t, cached, rawDelta) } @@ -177,7 +229,7 @@ func TestMayCompress(t *testing.T) { meta["content_type"] = "application/json" meta["encoding"] = "gzip" - assert.False(t, mayCompressAttachment("foo", meta)) + assert.True(t, mayCompressAttachment("foo", meta)) } func mayCompressAttachment(name string, meta map[string]interface{}) bool { diff --git a/src/github.com/couchbase/sync_gateway/db/deltaizer.go b/src/github.com/couchbase/sync_gateway/db/deltaizer.go index aa17353307..9299534f6e 100644 --- a/src/github.com/couchbase/sync_gateway/db/deltaizer.go +++ b/src/github.com/couchbase/sync_gateway/db/deltaizer.go @@ -18,13 +18,13 @@ var MinDeltaSavings = 100 // If the delta is not worth using (not enough space savings), returns an empty array. // If no delta is cached, returns nil. func (db *Database) getCachedAttachmentZDelta(srcKey, dstKey AttachmentKey) []byte { - return db._getCachedZDelta("att", string(srcKey), string(dstKey)) + return db._getCachedZDelta("att", srcKey.EncodedString(), dstKey.EncodedString()) } -// Computes & caches the delta between two attachments. +// Computes & caches the delta between two attachments given their data. // If the delta is not worth using (not enough space savings), returns an empty array. func (db *Database) generateAttachmentZDelta(src, dst []byte, srcKey, dstKey AttachmentKey) []byte { - return db._generateZDelta(src, dst, "att", string(srcKey), string(dstKey)) + return db._generateZDelta(src, dst, "att", srcKey.EncodedString(), dstKey.EncodedString()) } // INTERNAL: @@ -44,6 +44,7 @@ func (db *Database) _generateZDelta(src, dst []byte, idType, srcID, dstID string if delta == nil { return nil } + base.TEMP("ZDELTA: Created %x", delta) base.LogTo("Delta", "Computed zdelta %s %s --> %s: saved %d bytes", idType, srcID, dstID, int64(len(dst))-int64(len(delta))) if len(delta)+MinDeltaSavings > len(dst) { diff --git a/src/github.com/couchbase/sync_gateway/rest/doc_api.go b/src/github.com/couchbase/sync_gateway/rest/doc_api.go index 7730696bf3..0b7d3543d5 100644 --- a/src/github.com/couchbase/sync_gateway/rest/doc_api.go +++ b/src/github.com/couchbase/sync_gateway/rest/doc_api.go @@ -158,11 +158,11 @@ func (h *handler) handleGetAttachment() error { deltaStrs := strings.Split(deltasQ, ",") deltaSourceKeys = make([]db.AttachmentKey, len(deltaStrs)) for i, d := range deltaStrs { - deltaSourceKeys[i] = db.AttachmentKey(d) + deltaSourceKeys[i] = db.AttachmentKey{Digest: d} } } - data, deltaSource, err := h.db.GetAttachmentMaybeAsDelta(db.AttachmentKey(digest), deltaSourceKeys) + data, deltaSource, err := h.db.GetAttachmentMaybeAsDelta(db.AttachmentKey{Digest: digest}, deltaSourceKeys) if err != nil { return err } @@ -171,9 +171,9 @@ func (h *handler) handleGetAttachment() error { h.setHeader("Content-Type", contentType) } - if deltaSource != "" { + if deltaSource != nil { h.setHeader("Content-Encoding", "zdelta") - h.setHeader("X-Delta-Source", string(deltaSource)) + h.setHeader("X-Delta-Source", deltaSource.Digest) } else if encoding, ok := meta["encoding"].(string); ok { h.setHeader("Content-Encoding", encoding) } diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index d40ceb3b92..c54ca856bf 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -47,7 +47,10 @@ func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress // Write the following attachments for _, att := range r.Attachments { if att.Follows() { - err := writePart(att.Data(), compress && att.Compressible(), att.Headers(false), writer) + err := writePart(att.RawData(), + compress && !att.IsEncoded() && att.Compressible(), + att.Headers(false), + writer) if err != nil { return err } @@ -218,7 +221,7 @@ func ReadMultipartDocument(reader *multipart.Reader) (db.Body, error) { } // Look up the attachment by its digest: - digest := string(db.SHA1DigestKey(data)) + digest := db.SHA1DigestKey(data).Digest name, meta := findFollowingAttachment(digest) if meta == nil { name, meta = findFollowingAttachment(md5DigestKey(data)) // CouchDB uses MD5 :-p diff --git a/src/github.com/snej/zdelta-go b/src/github.com/snej/zdelta-go index 5bc8b0aedf..373dae5ddf 160000 --- a/src/github.com/snej/zdelta-go +++ b/src/github.com/snej/zdelta-go @@ -1 +1 @@ -Subproject commit 5bc8b0aedfa256908b1b0fea83e5486df34051bf +Subproject commit 373dae5ddf03a974ac0081049cdb1fc6172c189d From 1da065edd04753d211efe74794ff8d32f29b3955 Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Fri, 6 Mar 2015 14:55:32 -0800 Subject: [PATCH 13/14] Oops, removed some leftover debugging logging calls --- src/github.com/couchbase/sync_gateway/db/attachment.go | 1 - src/github.com/couchbase/sync_gateway/db/deltaizer.go | 1 - src/github.com/couchbase/sync_gateway/rest/view_api.go | 1 - 3 files changed, 3 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/db/attachment.go b/src/github.com/couchbase/sync_gateway/db/attachment.go index 43c5489618..44be5f9999 100644 --- a/src/github.com/couchbase/sync_gateway/db/attachment.go +++ b/src/github.com/couchbase/sync_gateway/db/attachment.go @@ -278,7 +278,6 @@ func (db *Database) GetAttachmentMaybeAsDelta(key AttachmentKey, sourceKeys []At } } result = db.generateAttachmentZDelta(src, decodedTarget, possibleSourceKey, key) - base.TEMP("Generated delta: %x", result) if result != nil { if len(result) > 0 { sourceKey = &possibleSourceKey diff --git a/src/github.com/couchbase/sync_gateway/db/deltaizer.go b/src/github.com/couchbase/sync_gateway/db/deltaizer.go index 9299534f6e..30d1a27ec7 100644 --- a/src/github.com/couchbase/sync_gateway/db/deltaizer.go +++ b/src/github.com/couchbase/sync_gateway/db/deltaizer.go @@ -44,7 +44,6 @@ func (db *Database) _generateZDelta(src, dst []byte, idType, srcID, dstID string if delta == nil { return nil } - base.TEMP("ZDELTA: Created %x", delta) base.LogTo("Delta", "Computed zdelta %s %s --> %s: saved %d bytes", idType, srcID, dstID, int64(len(dst))-int64(len(delta))) if len(delta)+MinDeltaSavings > len(dst) { diff --git a/src/github.com/couchbase/sync_gateway/rest/view_api.go b/src/github.com/couchbase/sync_gateway/rest/view_api.go index dce39eb89b..f8a8642fe4 100644 --- a/src/github.com/couchbase/sync_gateway/rest/view_api.go +++ b/src/github.com/couchbase/sync_gateway/rest/view_api.go @@ -14,7 +14,6 @@ import ( // HTTP handler for GET _design/$ddoc func (h *handler) handleGetDesignDoc() error { ddocID := h.PathVar("ddoc") - base.TEMP("GetDesignDoc %q", ddocID) var result interface{} if ddocID == "sync_gateway" { // we serve this content here so that CouchDB 1.2 has something to From 540b1c86dab10db7877202949d28aac97089ef9e Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Mon, 30 Mar 2015 18:11:58 -0700 Subject: [PATCH 14/14] Fixed some bugs with _bulk_get responses 1. Don't panic if the request doesn't have a "docs" property 2. Fixed the test for whether to send a doc as multipart or just JSON --- .../couchbase/sync_gateway/rest/bulk_api.go | 7 +++- .../couchbase/sync_gateway/rest/multipart.go | 36 ++++++++++--------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/github.com/couchbase/sync_gateway/rest/bulk_api.go b/src/github.com/couchbase/sync_gateway/rest/bulk_api.go index 889dd07366..bf0c59342b 100644 --- a/src/github.com/couchbase/sync_gateway/rest/bulk_api.go +++ b/src/github.com/couchbase/sync_gateway/rest/bulk_api.go @@ -304,8 +304,13 @@ func (h *handler) handleBulkGet() error { return err } + docs, ok := body["docs"].([]interface{}) + if !ok { + return base.HTTPErrorf(http.StatusBadRequest, "Missing 'docs' property in request body") + } + err = h.writeMultipart("mixed", func(writer *multipart.Writer) error { - for _, item := range body["docs"].([]interface{}) { + for _, item := range docs { var body db.Body var attsSince []string var err error diff --git a/src/github.com/couchbase/sync_gateway/rest/multipart.go b/src/github.com/couchbase/sync_gateway/rest/multipart.go index c54ca856bf..d19b5a44f6 100644 --- a/src/github.com/couchbase/sync_gateway/rest/multipart.go +++ b/src/github.com/couchbase/sync_gateway/rest/multipart.go @@ -28,17 +28,29 @@ var MinCompressiblePartSize = 300 //////// WRITING: -// Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. -func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) error { - // First extract the attachments that should follow: +// Loads the bodies of the Attachment objects in the response, and indicates whether the +// response should be sent as multipart. +func loadAttachments(r db.RevResponse) (useMultipart bool, err error) { for _, att := range r.Attachments { if data, err := att.LoadData(true); err != nil { - return err + return false, err } else if len(data) > MaxInlineAttachmentSize || att.IsDelta() { att.SetFollows() + useMultipart = true } } + return +} +// Writes a revision to a MIME multipart writer, encoding large attachments as separate parts. +func WriteMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) error { + if _, err := loadAttachments(r); err != nil { + return err + } + return writeLoadedMultipartDocument(r, writer, compress) +} + +func writeLoadedMultipartDocument(r db.RevResponse, writer *multipart.Writer, compress bool) error { // Write the main JSON body: if err := writeJSONPart(r, "application/json", compress, writer); err != nil { return err @@ -70,11 +82,13 @@ func WriteRevisionAsPart(r db.RevResponse, isError bool, compress bool, writer * partHeaders.Set("X-Rev-ID", revID) } - if hasInlineAttachments(r.Body) { + if useMultipart, err := loadAttachments(r); err != nil { + return err + } else if useMultipart { // Write doc as multipart, including attachments: docWriter, err := createNestedMultipart(writer, "related", partHeaders) if err == nil { - err = WriteMultipartDocument(r, docWriter, compress) + err = writeLoadedMultipartDocument(r, docWriter, compress) if err == nil { err = docWriter.Close() } @@ -268,16 +282,6 @@ func md5DigestKey(data []byte) string { return "md5-" + base64.StdEncoding.EncodeToString(digester.Sum(nil)) } -// Does this Body contain any attachments with a "data" property? -func hasInlineAttachments(body db.Body) bool { - for _, value := range body.Attachments() { - if meta, ok := value.(map[string]interface{}); ok && meta["data"] != nil { - return true - } - } - return false -} - // Creates a multipart writer as a nested part in another writer. func createNestedMultipart(mpWriter *multipart.Writer, multipartSubType string, headers textproto.MIMEHeader) (*multipart.Writer, error) { boundary := randomBoundary()