From 4598d68b127cd17b892bcfc3a487a5228da39e8e Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 7 Jun 2018 12:05:01 +0100 Subject: [PATCH 01/17] #3632 change default sgcollect output dir (#3636) * Revert separation of stdout/stderr pipes (sgcollect_info doesn't use these outputs well enough to distinguish errors) * If output_dir is not specified, find LogFilePath and write there, failing that, fall back to SG working dir, or Program Files dir on Windows * Use LogFilePath as a fallback if no output_dir is specified for sgcollect_info * Reduce PR scope to only #3632 --- rest/sgcollect.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rest/sgcollect.go b/rest/sgcollect.go index 06c3dc555e..cbbfa491d8 100644 --- a/rest/sgcollect.go +++ b/rest/sgcollect.go @@ -53,10 +53,16 @@ func (sg *sgCollect) Start(zipFilename string, params sgCollectOptions) error { } if params.OutputDirectory == "" { - // If no output directory specified, default to the directory sgcollect_info is in. - params.OutputDirectory = filepath.Dir(sgCollectPath) + // If no output directory specified, default to the configured LogFilePath + if config != nil && config.Logging != nil && config.Logging.LogFilePath != "" { + params.OutputDirectory = config.Logging.LogFilePath + base.Debugf(base.KeyAdmin, "sgcollect_info: no output directory specified, using LogFilePath: %v", params.OutputDirectory) + } else { + // If LogFilePath is not set, and DefaultLogFilePath is not set via a service script, error out. + return errors.New("no output directory or LogFilePath specified") + } - // Validate the path, just in case were not getting sgCollectPath correctly. + // Validate the path, just in case were not getting it correctly. if err := validateOutputDirectory(params.OutputDirectory); err != nil { return err } From 4bc25b9b68fb7f4b401741cb4a6854cf8c39d973 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 7 Jun 2018 17:28:27 +0100 Subject: [PATCH 02/17] Try to look for gozip in the tools directory, before falling back to looking in $PATH (#3638) --- tools/tasks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/tasks.py b/tools/tasks.py index fc296b33ce..bd3095c2af 100644 --- a/tools/tasks.py +++ b/tools/tasks.py @@ -342,7 +342,13 @@ def close_all_files(self): def _zip_helper(self, prefix, filename, files): """Write all our logs to a zipfile""" - exe = exec_name("gozip") + + # Get absolute path to gozip relative to this file + exe = os.path.abspath(os.path.join(os.path.dirname(__file__), exec_name("gozip"))) + + # Don't have gozip relative to this file, try and see if there's one elsewhere in $PATH + if not os.path.isfile(exe): + exe = exec_name("gozip") fallback = False From 95b598b84d9dea3b65e17dbd0cb82f604caeabba Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Thu, 7 Jun 2018 15:18:29 -0700 Subject: [PATCH 03/17] Port #3637 to release/2.1.0 --- build/windows/sgw_windows_build.bat | 4 ++++ manifest/default.xml | 3 +++ 2 files changed, 7 insertions(+) diff --git a/build/windows/sgw_windows_build.bat b/build/windows/sgw_windows_build.bat index b8514a6494..232d21d027 100755 --- a/build/windows/sgw_windows_build.bat +++ b/build/windows/sgw_windows_build.bat @@ -159,6 +159,9 @@ if EXIST %SGW_DIR%\pkg rmdir /s/q %SGW_DIR%\pkg echo go install github.com\couchbase\sync_gateway\... go install github.com\couchbase\sync_gateway\... +echo go install github.com\couchbase\ns_server\deps\gocode\src\gozip +go install github.com\couchbase\ns_server\deps\gocode\src\gozip + if NOT EXIST %BIN_DIR%\%SGW_EXEC% ( echo "############################# Sync-Gateway FAIL! no such file: %BIN_DIR%\%SGW_EXEC%" @@ -276,6 +279,7 @@ echo ======== sync-gateway package ========================== echo ".................staging sgw files to %SGW_INSTALL_DIR%" copy %DEST_DIR%\%SGW_EXEC% %SGW_INSTALL_DIR%\sync_gateway.exe copy %COLLECTINFO_DIST% %SGW_INSTALL_DIR%\tools\ +copy %BIN_DIR%\gozip.exe %SGW_INSTALL_DIR%\tools\ copy %BLD_DIR%\README.txt %SGW_INSTALL_DIR%\README.txt echo %VERSION% > %SGW_INSTALL_DIR%\VERSION.txt copy %LIC_DIR%\LICENSE_%EDITION%.txt %SGW_INSTALL_DIR%\LICENSE.txt diff --git a/manifest/default.xml b/manifest/default.xml index 37297b8c22..7b8dfe4cf4 100644 --- a/manifest/default.xml +++ b/manifest/default.xml @@ -128,4 +128,7 @@ + + + From b215c94db0df6b8602904d9b8adeaff8151d1d26 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Mon, 25 Jun 2018 10:01:52 -0700 Subject: [PATCH 04/17] Fix TLS support for DCP when certpath not specified (#3660) --- base/bucket.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/base/bucket.go b/base/bucket.go index 8e27f96e56..6ce27ab7c9 100644 --- a/base/bucket.go +++ b/base/bucket.go @@ -213,14 +213,11 @@ func (b BucketSpec) TLSConnect(prot, dest string) (rv *memcached.Client, err err return nil, pkgerrors.Wrapf(err, "Error setting NoDelay on tcpConn during TLS Connect") } - tlsConfig := &tls.Config{} - if b.Certpath != "" && b.Keypath != "" { - var configErr error - tlsConfig, configErr = TLSConfigForX509(b.Certpath, b.Keypath, b.CACertPath) - if configErr != nil { - return nil, pkgerrors.Wrapf(configErr, "Error adding x509 to TLSConfig for DCP TLS connection") - } + tlsConfig, configErr := TLSConfigForX509(b.Certpath, b.Keypath, b.CACertPath) + if configErr != nil { + return nil, pkgerrors.Wrapf(configErr, "Error creating TLSConfig for DCP TLS connection") } + tlsConfig.ServerName = host tlsConn := tls.Client(tcpConn, tlsConfig) @@ -233,20 +230,19 @@ func (b BucketSpec) TLSConnect(prot, dest string) (rv *memcached.Client, err err } +// Returns a TLSConfig based on the specified certificate paths. If none are provided, returns tlsConfig with +// InsecureSkipVerify:true. func TLSConfigForX509(certpath, keypath, cacertpath string) (*tls.Config, error) { - cacertpaths := []string{cacertpath} - tlsConfig := &tls.Config{} - - if len(cacertpaths) > 0 { + if cacertpath != "" { + cacertpaths := []string{cacertpath} rootCerts := x509.NewCertPool() for _, path := range cacertpaths { cacert, err := ioutil.ReadFile(path) if err != nil { return nil, err } - ok := rootCerts.AppendCertsFromPEM(cacert) if !ok { return nil, fmt.Errorf("can't append certs from PEM") @@ -261,6 +257,7 @@ func TLSConfigForX509(certpath, keypath, cacertpath string) (*tls.Config, error) tlsConfig.InsecureSkipVerify = true } + // If client cert and key are provided, add to config as x509 key pair if certpath != "" && keypath != "" { cert, err := tls.LoadX509KeyPair(certpath, keypath) if err != nil { @@ -269,6 +266,7 @@ func TLSConfigForX509(certpath, keypath, cacertpath string) (*tls.Config, error) tlsConfig.Certificates = []tls.Certificate{cert} } + return tlsConfig, nil } From ddff918fe8757ce41b3da82075f3f34282e6132e Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Thu, 28 Jun 2018 10:53:47 -0700 Subject: [PATCH 05/17] Ensure checkpoint data is loaded regardless of backfill status (#3666) --- base/dcp_feed.go | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/base/dcp_feed.go b/base/dcp_feed.go index 1ec84d9c3e..9b937bea5f 100644 --- a/base/dcp_feed.go +++ b/base/dcp_feed.go @@ -385,7 +385,7 @@ func (r *DCPReceiver) initMetadata(maxVbNo uint16) { backfillSeqs, err := r.backfill.loadBackfillSequences(r.bucket) if err != nil { // Backfill sequences not present or invalid - will use metadata only - return + backfillSeqs = nil } // Load persisted metadata @@ -399,17 +399,19 @@ func (r *DCPReceiver) initMetadata(maxVbNo uint16) { r.meta[i] = metadata r.seqs[i] = snapStart // Check whether we persisted a sequence midway through a previous incomplete backfill - var partialBackfillSequence uint64 - if backfillSeqs != nil && backfillSeqs.Seqs[i] < backfillSeqs.SnapEnd[i] { - partialBackfillSequence = backfillSeqs.Seqs[i] - } - // If we have a backfill sequence later than the DCP checkpoint's snapStart, start from there - if partialBackfillSequence > snapStart { - Infof(KeyDCP, "Restarting vb %d using backfill sequence %d ([%d-%d])", i, partialBackfillSequence, backfillSeqs.SnapStart[i], backfillSeqs.SnapEnd[i]) - r.seqs[i] = partialBackfillSequence - r.meta[i] = makeVbucketMetadata(r.vbuuids[i], partialBackfillSequence, backfillSeqs.SnapStart[i], backfillSeqs.SnapEnd[i]) - } else { - Infof(KeyDCP, "Restarting vb %d using metadata sequence %d (backfill %d not in [%d-%d])", i, snapStart, partialBackfillSequence, snapStart, snapEnd) + if backfillSeqs != nil { + var partialBackfillSequence uint64 + if backfillSeqs.Seqs[i] < backfillSeqs.SnapEnd[i] { + partialBackfillSequence = backfillSeqs.Seqs[i] + } + // If we have a backfill sequence later than the DCP checkpoint's snapStart, start from there + if partialBackfillSequence > snapStart { + Infof(KeyDCP, "Restarting vb %d using backfill sequence %d ([%d-%d])", i, partialBackfillSequence, backfillSeqs.SnapStart[i], backfillSeqs.SnapEnd[i]) + r.seqs[i] = partialBackfillSequence + r.meta[i] = makeVbucketMetadata(r.vbuuids[i], partialBackfillSequence, backfillSeqs.SnapStart[i], backfillSeqs.SnapEnd[i]) + } else { + Infof(KeyDCP, "Restarting vb %d using metadata sequence %d (backfill %d not in [%d-%d])", i, snapStart, partialBackfillSequence, snapStart, snapEnd) + } } } } From a036bd817d35ff1c354c644804dc588fb7c41476 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Thu, 19 Jul 2018 13:20:19 -0700 Subject: [PATCH 06/17] Avoid hardcoding xattr refs in views (Fixes #3678) (#3679) (#3681) --- db/design_doc.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/design_doc.go b/db/design_doc.go index 3f3d75d2c3..7e7dbd56e6 100644 --- a/db/design_doc.go +++ b/db/design_doc.go @@ -377,9 +377,10 @@ func installViews(bucket base.Bucket) error { // Tombstones view - used for view tombstone compaction // Key is purge time; value is docid tombstones_map := `function (doc, meta) { - var sync = meta.xattrs._sync; + %s if (sync !== undefined && sync.tombstoned_at !== undefined) emit(sync.tombstoned_at, meta.id);}` + tombstones_map = fmt.Sprintf(tombstones_map, syncData) // All-principals view // Key is name; value is true for user, false for role From af5379b15ad00a1c31103c6834ccff13321235b0 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 20 Sep 2018 17:42:20 +0100 Subject: [PATCH 07/17] Backport Jenkinsfile to release/2.1.1 (#3754) --- Jenkinsfile | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 Jenkinsfile diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000..027977ee3f --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,116 @@ +pipeline { + // Build on this uberjenkins node, as it has the Go environment set up in a known-state + // We could potentially change this to use a dockerfile agent instead so it can be portable. + agent { label 'sync-gateway-ami-builder' } + + environment { + GO_VERSION = 'go1.8.5' + GVM = "/root/.gvm/bin/gvm" + GO = "/root/.gvm/gos/${GO_VERSION}/bin" + GOPATH = "${WORKSPACE}/godeps" + BRANCH = "${BRANCH_NAME}" + COVERALLS_TOKEN = credentials('SG_COVERALLS_TOKEN') + } + + stages { + stage('Cloning') { + steps { + sh "git rev-parse HEAD > .git/commit-id" + script { + env.SG_COMMIT = readFile '.git/commit-id' + // Set BRANCH variable to target branch if this build is a PR + if (env.CHANGE_TARGET) { + env.BRANCH = env.CHANGE_TARGET + } + } + + // Make a hidden directory to move scm + // checkout into, we'll need a bit for later, + // but mostly rely on bootstrap.sh to get our code. + // + // TODO: We could probably change the implicit checkout + // to clone directly into a subdirectory instead? + sh 'mkdir .scm-checkout' + sh 'mv * .scm-checkout/' + } + } + stage('Setup Tools') { + steps { + echo 'Setting up Go tools..' + // Use gvm to install the required Go version, if not already + sh "${GVM} install $GO_VERSION" + withEnv(["PATH+=${GO}", "GOPATH=${GOPATH}"]) { + sh "go version" + sh 'go get -v -u github.com/AlekSi/gocoverutil' + sh 'go get -v -u golang.org/x/tools/cmd/cover' + sh 'go get -v -u github.com/mattn/goveralls' + // Jenkins coverage reporting tools + // sh 'go get -v -u github.com/axw/gocov/...' + // sh 'go get -v -u github.com/AlekSi/gocov-xml' + } + } + } + stage('Bootstrap') { + steps { + echo "Bootstrapping commit ${SG_COMMIT}" + sh 'cp .scm-checkout/bootstrap.sh .' + sh 'chmod +x bootstrap.sh' + sh "./bootstrap.sh -p sg-accel -c ${SG_COMMIT}" + } + } + stage('Build') { + steps { + echo 'Building..' + withEnv(["PATH+=${GO}"]) { + sh './build.sh -v' + } + } + } + stage('Test with coverage') { + steps { + echo 'Testing with coverage..' + withEnv(["PATH+=${GO}:${GOPATH}/bin"]) { + // gocoverutil is required until we upgrade to Go 1.10, and can use -coverprofile with ./... + sh 'gocoverutil -coverprofile=cover_sg.out test -covermode=atomic github.com/couchbase/sync_gateway/...' + sh 'gocoverutil -coverprofile=cover_sga.out test -covermode=atomic github.com/couchbaselabs/sync-gateway-accel/...' + + sh 'gocoverutil -coverprofile=cover_merged.out merge cover_sg.out cover_sga.out' + + // Publish combined HTML coverage report + sh 'mkdir reports' + sh 'go tool cover -html=cover_merged.out -o reports/coverage.html' + publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, includes: 'coverage.html', keepAll: false, reportDir: 'reports', reportFiles: 'coverage.html', reportName: 'Code Coverage', reportTitles: '']) + } + + // Travis-related variables are required as coveralls only officially supports a certain set of CI tools. + withEnv(["PATH+=${GO}:${GOPATH}/bin", "TRAVIS_BRANCH=${env.BRANCH}", "TRAVIS_PULL_REQUEST=${env.CHANGE_ID}", "TRAVIS_JOB_ID=${env.BUILD_NUMBER}"]) { + // Replace count covermode values with set just for coveralls to reduce the variability in reports. + sh 'awk \'NR==1{print "mode: set";next} $NF>0{$NF=1} {print}\' cover_sg.out > cover_coveralls.out' + + // Send just the SG coverage report to coveralls.io - **NOT** accel! It will expose the codebase!!! + sh "goveralls -coverprofile=cover_coveralls.out -service=uberjenkins -repotoken=${COVERALLS_TOKEN}" + + // Generate Cobertura XML report that can be parsed by the Jenkins Cobertura Plugin + // TODO: Requires Cobertura Plugin to be installed on Jenkins first + // sh 'gocov convert cover_sg.out | gocov-xml > reports/coverage.xml' + // step([$class: 'CoberturaPublisher', coberturaReportFile: 'reports/coverage.xml']) + } + } + } + stage('Test Race') { + steps { + echo 'Testing with -race..' + withEnv(["PATH+=${GO}:${GOPATH}/bin"]) { + sh './test.sh -race' + } + } + } + } + + post { + always { + // TODO: Might be better to clean the workspace to before a job runs instead + step([$class: 'WsCleanup']) + } + } +} From 30a11bfa11d75e545c4bda4ada9e7babe08991af Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 25 Sep 2018 16:57:50 +0100 Subject: [PATCH 08/17] Feature/issue 3570 gsi wait (#3594) (#3758) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WaitForIndexRollback working * Refactor CountQueryResults() + cleanup * wait for index to be empty in another case * Dump stack (overlaps w/ a PR) * Fix TestInitializeIndexes * Don’t wait for indexes to be empty if bucket is not flushed * Extend DefaultWaitForSequenceTesting timeout to try to make it easier to identify cause of test failure * Only call WaitForIndexEmpty if gocb bucket * Fixes TestViewQueryUserAccess sporadic fail Doesn’t seem to be fixed by WaitForIndexEmpty() https://github.com/couchbase/sync_gateway/issues/3539#issuecomment-387810263 * Fix view_undefined test failures https://github.com/couchbase/sync_gateway/issues/3570#issuecomment-390487982 * More retries http://uberjenkins.sc.couchbase.com/view/Build/job/sync-gateway-integration-master/693/console * Fix issue where it was printing the wrong error * Remove the one-off retry in TestViewQueryUserAccess() * Drop indexes if gsi indexes don’t empty * Was missing return * Fix loop * Fix attempt for “bucket already open” error * PR cleanup * Force numIndexReplicas to be 0 * Address PR feedback * PR feedback about when to close results * If the view is undefined, retry * Revert DbConfig{ UseViews: true } changes --- base/constants.go | 4 ++- base/util_testing.go | 8 +++-- db/database_test.go | 47 ++++++++++++++++++++----- db/util_testing.go | 73 +++++++++++++++++++++++++++++++++++++++ rest/changes_api_test.go | 2 ++ rest/utilities_testing.go | 53 +++++++++++++++++++++++----- rest/view_api_test.go | 16 ++++++--- 7 files changed, 179 insertions(+), 24 deletions(-) create mode 100644 db/util_testing.go diff --git a/base/constants.go b/base/constants.go index 3302e0e25a..bbdca81799 100644 --- a/base/constants.go +++ b/base/constants.go @@ -66,7 +66,9 @@ const ( DefaultViewQueryPageSize = 5000 // This must be greater than 1, or the code won't work due to windowing method - DefaultWaitForSequenceTesting = time.Second * 2 + // Until the sporadic integration tests failures in SG #3570 are fixed, should be GTE n1ql query timeout + // to make it easier to identify root cause of test failures. + DefaultWaitForSequenceTesting = time.Second * 30 // Default the max number of idle connections per host to a relatively high number to avoid // excessive socket churn caused by opening short-lived connections and closing them after, which can cause diff --git a/base/util_testing.go b/base/util_testing.go index 0937c0a320..8b554199e1 100644 --- a/base/util_testing.go +++ b/base/util_testing.go @@ -30,6 +30,7 @@ func init() { type TestBucket struct { Bucket + BucketSpec BucketSpec } func (tb TestBucket) Close() { @@ -137,7 +138,10 @@ func GetBucketOrPanicCommon(bucketType CouchbaseBucketType) TestBucket { panic(fmt.Sprintf("Could not open bucket: %v", err)) } - return TestBucket{Bucket: bucket} + return TestBucket{ + Bucket: bucket, + BucketSpec: spec, + } } @@ -347,7 +351,7 @@ func (tbm *TestBucketManager) FlushBucket() error { // Ignore sporadic errors like: // Error trying to empty bucket. err: {"_":"Flush failed with unexpected error. Check server logs for details."} - log.Printf("Flushing bucket %s", tbm.Bucket.Name()) + Infof(KeyAll, "Flushing bucket %s", tbm.Bucket.Name()) workerFlush := func() (shouldRetry bool, err error, value interface{}) { err = tbm.Bucket.Flush() diff --git a/db/database_test.go b/db/database_test.go index 6145114dc2..5bdfdb29a8 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -89,17 +89,45 @@ func setupTestDBWithCacheOptions(t testing.TB, options CacheOptions) (*Database, func testBucket() base.TestBucket { - testBucket := base.GetTestBucketOrPanic() - err := installViews(testBucket.Bucket) - if err != nil { - log.Fatalf("Couldn't connect to bucket: %v", err) - } + // Retry loop in case the GSI indexes don't handle the flush and we need to drop them and retry + for i:= 0; i < 2; i++ { + + testBucket := base.GetTestBucketOrPanic() + err := installViews(testBucket.Bucket) + if err != nil { + log.Fatalf("Couldn't connect to bucket: %v", err) + // ^^ effectively panics + } + + err = InitializeIndexes(testBucket.Bucket, base.TestUseXattrs(), 0) + if err != nil { + log.Fatalf("Unable to initialize GSI indexes for test: %v", err) + // ^^ effectively panics + } + + // Since GetTestBucketOrPanic() always returns an _empty_ bucket, it's safe to wait for the indexes to be empty + gocbBucket, isGoCbBucket := base.AsGoCBBucket(testBucket.Bucket) + if isGoCbBucket { + waitForIndexRollbackErr := WaitForIndexEmpty(gocbBucket, testBucket.BucketSpec.UseXattrs) + if waitForIndexRollbackErr != nil { + base.Infof(base.KeyAll, "Error WaitForIndexEmpty: %v. Drop indexes and retry", waitForIndexRollbackErr) + if err := base.DropAllBucketIndexes(gocbBucket); err != nil { + log.Fatalf("Unable to drop GSI indexes for test: %v", err) + // ^^ effectively panics + } + testBucket.Close() // Close the bucket, it will get re-opened on next loop iteration + continue // Goes to top of outer for loop to retry + } + + } + + return testBucket - err = InitializeIndexes(testBucket.Bucket, base.TestUseXattrs(), 0) - if err != nil { - log.Fatalf("Unable to initialize GSI indexes for test:%v", err) } - return testBucket + + panic(fmt.Sprintf("Failed to create a testbucket after multiple attempts")) + + } func setupTestLeakyDBWithCacheOptions(t *testing.T, options CacheOptions, leakyOptions base.LeakyBucketConfig) *Database { @@ -1415,6 +1443,7 @@ func TestRecentSequenceHistory(t *testing.T) { } func TestChannelView(t *testing.T) { + db, testBucket := setupTestDBWithCacheOptions(t, CacheOptions{}) defer testBucket.Close() defer tearDownTestDB(t, db) diff --git a/db/util_testing.go b/db/util_testing.go new file mode 100644 index 0000000000..ed7520eaa7 --- /dev/null +++ b/db/util_testing.go @@ -0,0 +1,73 @@ +package db + +import ( + "math" + + "github.com/couchbase/gocb" + "github.com/couchbase/sync_gateway/base" + "fmt" +) + +// Workaround SG #3570 by doing a polling loop until the star channel query returns 0 results. +// Uses the star channel index as a proxy to indicate that _all_ indexes are empty (which might not be true) +func WaitForIndexEmpty(bucket *base.CouchbaseBucketGoCB, useXattrs bool) error { + + retryWorker := func() (shouldRetry bool, err error, value interface{}) { + + var results gocb.QueryResults + + // Create the star channel query + statement := fmt.Sprintf("%s LIMIT 1", QueryStarChannel.statement) // append LIMIT 1 since we only care if there are any results or not + starChannelQueryStatement := replaceSyncTokensQuery(statement,useXattrs) + params := map[string]interface{}{} + params[QueryParamStartSeq] = 0 + params[QueryParamEndSeq] = math.MaxInt64 + + // Execute the query + results, err = bucket.Query(starChannelQueryStatement, params, gocb.RequestPlus, true) + + // If there was an error, then retry. Assume it's an "index rollback" error which happens as + // the index processes the bucket flush operation + if err != nil { + base.Infof(base.KeyAll, "Error querying star channel: %v. Assuming it's a temp error, will retry", err) + return true, err, nil + } + + // If it's empty, we're done + var queryRow AllDocsIndexQueryRow + found := results.Next(&queryRow) + resultsCloseErr := results.Close() + if resultsCloseErr != nil { + return false, resultsCloseErr, nil + } + if !found { + base.Infof(base.KeyAll, "WaitForIndexEmpty found 0 results. GSI index appears to be empty.") + return false, nil, nil + } + + // Otherwise, retry + base.Infof(base.KeyAll, "WaitForIndexEmpty found non-zero results. Retrying until the GSI index is empty.") + return true, nil, nil + + } + + // Kick off the retry loop + err, _ := base.RetryLoop( + "Wait for index to be empty", + retryWorker, + base.CreateMaxDoublingSleeperFunc(30, 100, 2000), + ) + return err + +} + +// Count how many rows are in gocb.QueryResults +func ResultsEmpty(results gocb.QueryResults) (resultsEmpty bool) { + + var queryRow AllDocsIndexQueryRow + found := results.Next(&queryRow) + return !found + +} + + diff --git a/rest/changes_api_test.go b/rest/changes_api_test.go index 35499fe487..1cc17ad2eb 100644 --- a/rest/changes_api_test.go +++ b/rest/changes_api_test.go @@ -629,12 +629,14 @@ func TestChangesLoopingWhenLowSequence(t *testing.T) { maxNum := 50 skippedMaxWait := uint32(120000) + numIndexReplicas := uint(0) shortWaitConfig := &DbConfig{ CacheConfig: &CacheConfig{ CachePendingSeqMaxWait: &pendingMaxWait, CachePendingSeqMaxNum: &maxNum, CacheSkippedSeqMaxWait: &skippedMaxWait, }, + NumIndexReplicas:&numIndexReplicas, } rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`, DatabaseConfig: shortWaitConfig} defer rt.Close() diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index 0043acfdf9..279041482b 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -46,7 +46,13 @@ type RestTester struct { func (rt *RestTester) Bucket() base.Bucket { - if rt.RestTesterBucket == nil { + if rt.RestTesterBucket != nil { + return rt.RestTesterBucket + } + + // Put this in a loop in case certain operations fail, like waiting for GSI indexes to be empty. + // Limit number of attempts to 2. + for i := 0; i < 2; i++ { // Initialize the bucket. For couchbase-backed tests, triggers with creation/flushing of the bucket if !rt.NoFlush { @@ -82,7 +88,17 @@ func (rt *RestTester) Bucket() base.Bucket { useXattrs := base.TestUseXattrs() if rt.DatabaseConfig == nil { + + // If no db config was passed in, create one rt.DatabaseConfig = &DbConfig{} + + // By default, does NOT use views when running against couchbase server, since should use GSI + rt.DatabaseConfig.UseViews = base.TestUseViews() + + // numReplicas set to 0 for test buckets, since it should assume that there is only one node. + numReplicas := uint(0) + rt.DatabaseConfig.NumIndexReplicas = &numReplicas + } rt.DatabaseConfig.BucketConfig = BucketConfig{ @@ -99,24 +115,37 @@ func (rt *RestTester) Bucket() base.Bucket { rt.DatabaseConfig.AllowConflicts = &boolVal } - // Set to 0 for test buckets, since it should assume that there is only one node. - numReplicas := uint(0) - - rt.DatabaseConfig.UseViews = base.TestUseViews() - rt.DatabaseConfig.NumIndexReplicas = &numReplicas - _, err := rt.RestTesterServerContext.AddDatabaseFromConfig(rt.DatabaseConfig) if err != nil { panic(fmt.Sprintf("Error from AddDatabaseFromConfig: %v", err)) } rt.RestTesterBucket = rt.RestTesterServerContext.Database("db").Bucket + // As long as bucket flushing wasn't disabled, wait for index to be empty (if this is a gocb bucket) + if !rt.NoFlush { + asGoCbBucket, isGoCbBucket := base.AsGoCBBucket(rt.RestTesterBucket) + if isGoCbBucket { + if err := db.WaitForIndexEmpty(asGoCbBucket, spec.UseXattrs); err != nil { + base.Infof(base.KeyAll, "WaitForIndexEmpty returned an error: %v. Dropping indexes and retrying", err) + // if WaitForIndexEmpty returns error, drop the indexes and retry + if err := base.DropAllBucketIndexes(asGoCbBucket); err != nil { + panic(fmt.Sprintf("Failed to drop bucket indexes: %v", err)) + } + + continue // Go to the top of the for loop to retry + } + } + } + if !rt.noAdminParty { rt.SetAdminParty(true) } + return rt.RestTesterBucket } - return rt.RestTesterBucket + + panic(fmt.Sprintf("Failed to create a RestTesterBucket after multiple attempts")) + } func (rt *RestTester) BucketAllowEmptyPassword() base.Bucket { @@ -365,6 +394,14 @@ func (rt *RestTester) WaitForNViewResults(numResultsExpected int, viewUrlPath st } else { response = rt.SendAdminRequest("GET", viewUrlPath, ``) } + + // If the view is undefined, it might be a race condition where the view is still being created + // See https://github.com/couchbase/sync_gateway/issues/3570#issuecomment-390487982 + if strings.Contains(response.Body.String(), "view_undefined") { + base.Infof(base.KeyAll, "view_undefined error: %v. Retrying", response.Body.String()) + return true, nil, nil + } + if response.Code != 200 { return false, fmt.Errorf("Got response code: %d from view call. Expected 200.", response.Code), sgbucket.ViewResult{} } diff --git a/rest/view_api_test.go b/rest/view_api_test.go index 7a2a1eaf70..85f490fd77 100644 --- a/rest/view_api_test.go +++ b/rest/view_api_test.go @@ -95,9 +95,10 @@ func TestViewQuery(t *testing.T) { } -//Tests #1109, where design doc contains multiple views +//Tests #1109, wh ere design doc contains multiple views func TestViewQueryMultipleViews(t *testing.T) { - var rt RestTester + + rt := RestTester{} defer rt.Close() //Define three views @@ -126,6 +127,7 @@ func TestViewQueryMultipleViews(t *testing.T) { } func TestViewQueryUserAccess(t *testing.T) { + var rt RestTester defer rt.Close() @@ -140,12 +142,14 @@ func TestViewQueryUserAccess(t *testing.T) { assertStatus(t, response, 201) result, err := rt.WaitForNAdminViewResults(2, "/db/_design/foo/_view/bar?stale=false") - assertNoError(t, err, "Unexpected error") + assertNoError(t, err, "Unexpected error in WaitForNAdminViewResults") assert.Equals(t, len(result.Rows), 2) assert.DeepEquals(t, result.Rows[0], &sgbucket.ViewRow{ID: "doc1", Key: "state1", Value: "doc1"}) assert.DeepEquals(t, result.Rows[1], &sgbucket.ViewRow{ID: "doc2", Key: "state2", Value: "doc2"}) result, err = rt.WaitForNAdminViewResults(2, "/db/_design/foo/_view/bar?stale=false") + assertNoError(t, err, "Unexpected error in WaitForNAdminViewResults") + assert.Equals(t, len(result.Rows), 2) assert.DeepEquals(t, result.Rows[0], &sgbucket.ViewRow{ID: "doc1", Key: "state1", Value: "doc1"}) assert.DeepEquals(t, result.Rows[1], &sgbucket.ViewRow{ID: "doc2", Key: "state2", Value: "doc2"}) @@ -157,7 +161,8 @@ func TestViewQueryUserAccess(t *testing.T) { a.Save(testUser) result, err = rt.WaitForNUserViewResults(2, "/db/_design/foo/_view/bar?stale=false", testUser, password) - assertNoError(t, err, "Unexpected error") + assertNoError(t, err, "Unexpected error in WaitForNUserViewResults") + assert.Equals(t, len(result.Rows), 2) assert.DeepEquals(t, result.Rows[0], &sgbucket.ViewRow{ID: "doc1", Key: "state1", Value: "doc1"}) assert.DeepEquals(t, result.Rows[1], &sgbucket.ViewRow{ID: "doc2", Key: "state2", Value: "doc2"}) @@ -168,6 +173,7 @@ func TestViewQueryUserAccess(t *testing.T) { request.SetBasicAuth(testUser.Name(), password) userResponse := rt.Send(request) assertStatus(t, userResponse, 403) + } func TestViewQueryMultipleViewsInterfaceValues(t *testing.T) { @@ -210,6 +216,7 @@ func TestViewQueryMultipleViewsInterfaceValues(t *testing.T) { } func TestUserViewQuery(t *testing.T) { + rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`} defer rt.Close() @@ -525,6 +532,7 @@ func TestAdminGroupLevelReduceSumQuery(t *testing.T) { } func TestPostInstallCleanup(t *testing.T) { + rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`} defer rt.Close() From 0d6290650a90ad7e929d2dbc05351fbebacc1b70 Mon Sep 17 00:00:00 2001 From: Traun Leyden Date: Tue, 25 Sep 2018 18:52:26 -0700 Subject: [PATCH 09/17] Feature/issue 3738 norev (#3763) * Repro attempt for SG #3738 * /_flush_rev_cache endpoint for easy manual testing * TODOs on fix * Clean up test * First pass at implmenting norev * Update test helper to deal with norev messages * Translate error to status code * Avoid panic if there is a marshal err for the sequence * Revert "/_flush_rev_cache endpoint for easy manual testing" This reverts commit 9e54548815f5d40689e3f2dd0bc415f834b53a1a. * Update test comment * PR feedback --- rest/blip_api_test.go | 53 +++++++++++++++++++++++++++++++++++++++ rest/blip_sync.go | 48 +++++++++++++++++++++++++++++------ rest/utilities_testing.go | 8 ++++++ 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/rest/blip_api_test.go b/rest/blip_api_test.go index 7a7cdfda88..3d8cbcb7f5 100644 --- a/rest/blip_api_test.go +++ b/rest/blip_api_test.go @@ -1309,4 +1309,57 @@ func TestMultipleOustandingChangesSubscriptions(t *testing.T) { assert.True(t, errorCode == "") +} + + +// Reproduce issue SG #3738 +// +// - Add 5 docs to channel ABC +// - Purge one doc via _purge REST API +// - Flush rev cache +// - Send subChanges request +// - Reply to all changes saying all docs are wanted +// - Wait to receive rev messages for all 5 docs +// - Expected: receive all 5 docs (4 revs and 1 norev) +// - Actual: only recieve 4 docs (4 revs) +func TestMissingNoRev(t *testing.T) { + + rt := RestTester{} + btSpec := BlipTesterSpec{ + restTester: &rt, + } + bt, err := NewBlipTesterFromSpec(btSpec) + assertNoError(t, err, "Unexpected error creating BlipTester") + defer bt.Close() + + // Create 5 docs + for i := 0; i < 5; i++ { + docId := fmt.Sprintf("doc-%d", i) + docRev := fmt.Sprintf("1-abc%d", i) + sent, _, resp, err := bt.SendRev(docId, docRev, []byte(`{"key": "val", "channels": ["ABC"]}`), blip.Properties{}) + assert.True(t, sent) + log.Printf("resp: %v, err: %v", resp, err) + } + + + // Get a reference to the database + targetDbContext, err := rt.ServerContext().GetDatabase("db") + assertNoError(t, err, "failed") + targetDb, err := db.GetDatabase(targetDbContext, nil) + assertNoError(t, err, "failed") + + // Purge one doc + doc0Id := fmt.Sprintf("doc-%d", 0) + err = targetDb.Purge(doc0Id) + assertNoError(t, err, "failed") + + // Flush rev cache + targetDb.FlushRevisionCache() + + // Pull docs, expect to pull 4 since one was purged. (also expect to NOT get stuck) + docs := bt.WaitForNumDocsViaChanges(4) + assert.True(t, len(docs) == 4) + + + } \ No newline at end of file diff --git a/rest/blip_sync.go b/rest/blip_sync.go index 8cb236f010..494c73a7a2 100644 --- a/rest/blip_sync.go +++ b/rest/blip_sync.go @@ -449,7 +449,7 @@ func (bh *blipHandler) handleChangesResponse(sender *blip.Sender, response *blip return } } - bh.sendRevision(sender, seq, docID, revID, knownRevs, maxHistory) + bh.sendRevOrNorev(sender, seq, docID, revID, knownRevs, maxHistory) } } } @@ -538,14 +538,44 @@ func (bh *blipHandler) handleProposedChanges(rq *blip.Message) error { //////// DOCUMENTS: -// Pushes a revision body to the client -func (bh *blipHandler) sendRevision(sender *blip.Sender, seq db.SequenceID, docID string, revID string, knownRevs map[string]bool, maxHistory int) { - bh.Logf(base.LevelDebug, base.KeySync, "Sending rev %q %s based on %d known. User:%s", base.UD(docID), revID, len(knownRevs), base.UD(bh.effectiveUsername)) +func (bh *blipHandler) sendRevOrNorev(sender *blip.Sender, seq db.SequenceID, docID string, revID string, knownRevs map[string]bool, maxHistory int) { + body, err := bh.db.GetRev(docID, revID, true, nil) if err != nil { - base.Warnf(base.KeyAll, "[%s] blipHandler can't get doc %q/%s: %v", bh.blipContext.ID, base.UD(docID), revID, err) - return + bh.sendNoRev(err, sender, seq, docID, revID) + } else { + bh.sendRevision(body, sender, seq, docID, revID, knownRevs, maxHistory) } +} + +func (bh *blipHandler) sendNoRev(err error, sender *blip.Sender, seq db.SequenceID, docID string, revID string) { + + bh.Logf(base.LevelDebug, base.KeySync, "Sending norev %q %s due to error: %v. User:%s", base.UD(docID), revID, err, base.UD(bh.effectiveUsername)) + + outrq := blip.NewRequest() + outrq.SetProfile("norev") + outrq.Properties["id"] = docID + outrq.Properties["rev"] = revID + seqJSON, marshalErr := json.Marshal(seq) + if marshalErr == nil { + outrq.Properties["sequence"] = string(seqJSON) + } + + status, reason := base.ErrorAsHTTPStatus(err) + outrq.Properties["error"] = fmt.Sprintf("%s", status) + + // Add a "reason" field that gives more detailed explanation on the cause of the error. + outrq.Properties["reason"] = fmt.Sprintf("%s", reason) + + outrq.SetNoReply(true) + sender.Send(outrq) + +} + +// Pushes a revision body to the client +func (bh *blipHandler) sendRevision(body db.Body, sender *blip.Sender, seq db.SequenceID, docID string, revID string, knownRevs map[string]bool, maxHistory int) { + + bh.Logf(base.LevelDebug, base.KeySync, "Sending rev %q %s based on %d known. User:%s", base.UD(docID), revID, len(knownRevs), base.UD(bh.effectiveUsername)) // Get the revision's history as a descending array of ancestor revIDs: history := db.ParseRevisions(body)[1:] @@ -561,7 +591,7 @@ func (bh *blipHandler) sendRevision(sender *blip.Sender, seq db.SequenceID, docI outrq := blip.NewRequest() outrq.SetProfile("rev") - seqJSON, _ := json.Marshal(seq) + seqJSON, marshalErr := json.Marshal(seq) outrq.Properties["id"] = docID delete(body, "_id") outrq.Properties["rev"] = revID @@ -570,7 +600,9 @@ func (bh *blipHandler) sendRevision(sender *blip.Sender, seq db.SequenceID, docI outrq.Properties["deleted"] = "1" delete(body, "deleted") } - outrq.Properties["sequence"] = string(seqJSON) + if marshalErr == nil { + outrq.Properties["sequence"] = string(seqJSON) + } if len(history) > 0 { outrq.Properties["history"] = strings.Join(history, ",") } diff --git a/rest/utilities_testing.go b/rest/utilities_testing.go index 279041482b..ad6d7c4a75 100644 --- a/rest/utilities_testing.go +++ b/rest/utilities_testing.go @@ -1164,6 +1164,14 @@ func (bt *BlipTester) PullDocs() (docs map[string]RestDocument) { } + // -------- Norev handler callback -------- + bt.blipContext.HandlerForProfile["norev"] = func(request *blip.Message) { + // If a norev is received, then don't bother waiting for one of the expected revisions, since it will never come. + // The norev could be added to the returned docs map, but so far there is no need for that. The ability + // to assert on the number of actually received revisions (which norevs won't affect) meets current test requirements. + defer revsFinishedWg.Done() + } + // Send subChanges to subscribe to changes, which will cause the "changes" profile handler above to be called back changesFinishedWg.Add(1) subChangesRequest := blip.NewRequest() From f4d396f8ee114c76dd214baa0cd5897421a3e376 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Fri, 28 Sep 2018 02:45:30 -0700 Subject: [PATCH 10/17] Ensure removal of obsolete rev from revtree (#3694) (#3768) Previously failures persisting temporary revision backups were also blocking revision removal, resulting in malformed obsolete revision bodies remaining in the revtree in some scenarios. Fixes #3692 --- base/leaky_bucket.go | 9 +- db/crud.go | 14 +-- db/crud_test.go | 260 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 275 insertions(+), 8 deletions(-) diff --git a/base/leaky_bucket.go b/base/leaky_bucket.go index 220f09d341..3e52380ee6 100644 --- a/base/leaky_bucket.go +++ b/base/leaky_bucket.go @@ -5,7 +5,7 @@ import ( "fmt" "time" - sgbucket "github.com/couchbase/sg-bucket" + "github.com/couchbase/sg-bucket" ) // A wrapper around a Bucket to support forced errors. For testing use only. @@ -28,6 +28,8 @@ type LeakyBucketConfig struct { TapFeedVbuckets bool // Emulate vbucket numbers on feed TapFeedMissingDocs []string // Emulate entry not appearing on tap feed + ForceErrorSetRawKeys []string // Issuing a SetRaw call with a specified key will return an error + // Returns a partial error the first time ViewCustom is called FirstTimeViewCustomPartialError bool PostQueryCallback func(ddoc, viewName string, params map[string]interface{}) // Issues callback after issuing query when bucket.ViewQuery is called @@ -68,6 +70,11 @@ func (b *LeakyBucket) Set(k string, exp uint32, v interface{}) error { return b.bucket.Set(k, exp, v) } func (b *LeakyBucket) SetRaw(k string, exp uint32, v []byte) error { + for _, errorKey := range b.config.ForceErrorSetRawKeys { + if k == errorKey { + return fmt.Errorf("Leaky bucket forced SetRaw error for key %s", k) + } + } return b.bucket.SetRaw(k, exp, v) } func (b *LeakyBucket) Delete(k string) error { diff --git a/db/crud.go b/db/crud.go index 707e3944dd..2c98bde34b 100644 --- a/db/crud.go +++ b/db/crud.go @@ -494,32 +494,32 @@ func (db *Database) getAvailableRev(doc *document, revid string) (Body, error) { } // Moves a revision's ancestor's body out of the document object and into a separate db doc. -func (db *Database) backupAncestorRevs(doc *document, revid string) error { +func (db *Database) backupAncestorRevs(doc *document, revid string) { // Find an ancestor that still has JSON in the document: var json []byte for { if revid = doc.History.getParent(revid); revid == "" { - return nil // No ancestors with JSON found + return // No ancestors with JSON found } else if json = doc.getRevisionBodyJSON(revid, db.RevisionBodyLoader); json != nil { break } } // Store the JSON as a separate doc in the bucket: - if err := db.setOldRevisionJSON(doc.ID, revid, json); err != nil { + err := db.setOldRevisionJSON(doc.ID, revid, json) + if err != nil { // This isn't fatal since we haven't lost any information; just warn about it. base.Warnf(base.KeyAll, "backupAncestorRevs failed: doc=%q rev=%q err=%v", base.UD(doc.ID), revid, err) - return err + } else { + base.Debugf(base.KeyCRUD, "Backed up obsolete rev %q/%q", base.UD(doc.ID), revid) } - // Nil out the rev's body in the document struct: + // Nil out the ancestor rev's body in the document struct: if revid == doc.CurrentRev { doc.RemoveBody() } else { doc.removeRevisionBody(revid) } - base.Debugf(base.KeyCRUD, "Backed up obsolete rev %q/%q", base.UD(doc.ID), revid) - return nil } //////// UPDATING DOCUMENTS: diff --git a/db/crud_test.go b/db/crud_test.go index 168a64753e..7274d93a98 100644 --- a/db/crud_test.go +++ b/db/crud_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/channels" "github.com/couchbaselabs/go.assert" ) @@ -352,3 +353,262 @@ func TestRevisionStoragePruneTombstone(t *testing.T) { assertTrue(t, base.IsKeyNotFoundError(db.Bucket, err), "Revision should be not found") } + +// Checks for unwanted interaction between old revision body backups and revision cache +func TestOldRevisionStorage(t *testing.T) { + + db, _ := setupTestDBWithCacheOptions(t, CacheOptions{}) + defer tearDownTestDB(t, db) + + prop_1000_bytes := base.CreateProperty(1000) + + // Create rev 1-a + log.Printf("Create rev 1-a") + body := Body{"key1": "value1", "version": "1a", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", body, []string{"1-a"}, false), "add 1-a") + + // Create rev 2-a + // 1-a + // | + // 2-a + log.Printf("Create rev 2-a") + rev2a_body := Body{"key1": "value2", "version": "2a", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev2a_body, []string{"2-a", "1-a"}, false), "add 2-a") + + // Retrieve the document: + log.Printf("Retrieve doc 2-a...") + gotbody, err := db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev2a_body) + + // Create rev 3-a + + // 1-a + // | + // 2-a + // | + // 3-a + log.Printf("Create rev 3-a") + rev3a_body := Body{"key1": "value2", "version": "3a", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev3a_body, []string{"3-a", "2-a", "1-a"}, false), "add 3-a") + + // Retrieve the document: + log.Printf("Retrieve doc 3-a...") + gotbody, err = db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev3a_body) + + // Create rev 2-b + // 1-a + // / \ + // 2-a 2-b + // | + // 3-a + log.Printf("Create rev 2-b") + rev2b_body := Body{"key1": "value2", "version": "2b", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev2b_body, []string{"2-b", "1-a"}, false), "add 2-b") + + // Retrieve the document: + log.Printf("Retrieve doc, verify still rev 3-a") + gotbody, err = db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev3a_body) + + // Create rev that hops a few generations + // 1-a + // / \ + // 2-a 2-b + // | + // 3-a + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Create rev 6-a") + rev6a_body := Body{"key1": "value2", "version": "6a", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev6a_body, []string{"6-a", "5-a", "4-a", "3-a"}, false), "add 6-a") + + // Retrieve the document: + log.Printf("Retrieve doc 6-a...") + gotbody, err = db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev6a_body) + + // Add child to non-winning revision w/ inline body + // 1-a + // / \ + // 2-a 2-b + // | | + // 3-a 3-b + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Create rev 3-b") + rev3b_body := Body{"key1": "value2", "version": "3b", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev3b_body, []string{"3-b", "2-b", "1-a"}, false), "add 3-b") + + // Same again and again + // Add child to non-winning revision w/ inline body + // 1-a + // / \ + // 2-a 2-b + // | / | \ + // 3-a 3-b 3-c 3-d + // | + // 4-a + // | + // 5-a + // | + // 6-a + + log.Printf("Create rev 3-c") + rev3c_body := Body{"key1": "value2", "version": "3c", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev3c_body, []string{"3-c", "2-b", "1-a"}, false), "add 3-c") + + log.Printf("Create rev 3-d") + rev3d_body := Body{"key1": "value2", "version": "3d", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev3d_body, []string{"3-d", "2-b", "1-a"}, false), "add 3-d") + + // Create new winning revision on 'b' branch. Triggers movement of 6-a to inline storage. Force cas retry, check document contents + // 1-a + // / \ + // 2-a 2-b + // | / | \ + // 3-a 3-b 3-c 3-d + // | | + // 4-a 4-b + // | | + // 5-a 5-b + // | | + // 6-a 6-b + // | + // 7-b + log.Printf("Create rev 7-b") + rev7b_body := Body{"key1": "value2", "version": "7b", "large": prop_1000_bytes} + assertNoError(t, db.PutExistingRev("doc1", rev7b_body, []string{"7-b", "6-b", "5-b", "4-b", "3-b"}, false), "add 7-b") + +} + +// Ensure safe handling when hitting a bucket error during backup of old revision bodies. +// https://github.com/couchbase/sync_gateway/issues/3692 +func TestOldRevisionStorageError(t *testing.T) { + + // Use LeakyBucket to force a server error when persisting the old revision body for doc1, rev 2-b + forceErrorKey := oldRevisionKey("doc1", "2-b") + leakyConfig := base.LeakyBucketConfig{ + ForceErrorSetRawKeys: []string{forceErrorKey}, + } + db := setupTestLeakyDBWithCacheOptions(t, CacheOptions{}, leakyConfig) + defer tearDownTestDB(t, db) + + db.ChannelMapper = channels.NewChannelMapper(`function(doc, oldDoc) {channel(doc.channels);}`) + + // Create rev 1-a + log.Printf("Create rev 1-a") + body := Body{"key1": "value1", "v": "1a"} + assertNoError(t, db.PutExistingRev("doc1", body, []string{"1-a"}, false), "add 1-a") + + // Create rev 2-a + // 1-a + // | + // 2-a + log.Printf("Create rev 2-a") + rev2a_body := Body{"key1": "value2", "v": "2a"} + assertNoError(t, db.PutExistingRev("doc1", rev2a_body, []string{"2-a", "1-a"}, false), "add 2-a") + + // Retrieve the document: + log.Printf("Retrieve doc 2-a...") + gotbody, err := db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev2a_body) + + // Create rev 3-a, should re-attempt to write old revision body for 2-a + // 1-a + // | + // 2-a + // | + // 3-a + log.Printf("Create rev 3-a") + rev3a_body := Body{"key1": "value2", "v": "3a"} + assertNoError(t, db.PutExistingRev("doc1", rev3a_body, []string{"3-a", "2-a", "1-a"}, false), "add 3-a") + + // Create rev 2-b + // 1-a + // / \ + // 2-a 2-b + // | + // 3-a + log.Printf("Create rev 2-b") + rev2b_body := Body{"key1": "value2", "v": "2b"} + assertNoError(t, db.PutExistingRev("doc1", rev2b_body, []string{"2-b", "1-a"}, false), "add 2-b") + + // Retrieve the document: + log.Printf("Retrieve doc, verify still rev 3-a") + gotbody, err = db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev3a_body) + + // Create rev that hops a few generations + // 1-a + // / \ + // 2-a 2-b + // | + // 3-a + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Create rev 6-a") + rev6a_body := Body{"key1": "value2", "v": "6a"} + assertNoError(t, db.PutExistingRev("doc1", rev6a_body, []string{"6-a", "5-a", "4-a", "3-a"}, false), "add 6-a") + + // Retrieve the document: + log.Printf("Retrieve doc 6-a...") + gotbody, err = db.Get("doc1") + assertNoError(t, err, "Couldn't get document") + assert.DeepEquals(t, gotbody, rev6a_body) + + // Add child to non-winning revision w/ inline body + // Creation of 3-b will trigger leaky bucket handling when obsolete body of rev 2-b is persisted + // 1-a + // / \ + // 2-a 2-b + // | | + // 3-a 3-b + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Create rev 3-b") + rev3b_body := Body{"key1": "value2", "v": "3b"} + assertNoError(t, db.PutExistingRev("doc1", rev3b_body, []string{"3-b", "2-b", "1-a"}, false), "add 3-b") + + // Same again + // Add child to non-winning revision w/ inline body. + // Prior to fix for https://github.com/couchbase/sync_gateway/issues/3692, this fails due to malformed oldDoc + // 1-a + // / \ + // 2-a 2-b + // | / | + // 3-a 3-b 3-c + // | + // 4-a + // | + // 5-a + // | + // 6-a + + log.Printf("Create rev 3-c") + rev3c_body := Body{"key1": "value2", "v": "3c"} + assertNoError(t, db.PutExistingRev("doc1", rev3c_body, []string{"3-c", "2-b", "1-a"}, false), "add 3-c") + +} From 308786c15ad55dc8ecbbf099c10b4cc875385a5b Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 28 Sep 2018 17:19:44 +0100 Subject: [PATCH 11/17] 2.1.1 Backport #3724 Fix resync with xattrs warnings (#3730) (#3753) * Drop verbose 'Re-syncing document' log level from Info to Debug * Return couchbase.UpdateCancel when there's nothing to do in resync w/ xattrs * Remove couchbase.UpdateCancel and use new base.ErrUpdateCancel instead * Make SGError simpler by removing sgErrorCodes --- auth/auth.go | 13 ++++--- base/error.go | 88 ++++++++++++--------------------------------- db/crud.go | 4 +-- db/database.go | 19 +++++----- db/repair_bucket.go | 11 +++--- db/shadower.go | 6 ++-- 6 files changed, 46 insertions(+), 95 deletions(-) diff --git a/auth/auth.go b/auth/auth.go index e00ee94aa6..afe85f4c13 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -14,7 +14,6 @@ import ( "github.com/coreos/go-oidc/jose" "github.com/coreos/go-oidc/oidc" - "github.com/couchbase/go-couchbase" "github.com/couchbase/sync_gateway/base" ch "github.com/couchbase/sync_gateway/channels" pkgerrors "github.com/pkg/errors" @@ -108,7 +107,7 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) // Be careful: this block can be invoked multiple times if there are races! if currentValue == nil { princ = nil - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } princ = factory() @@ -143,11 +142,11 @@ func (auth *Authenticator) getPrincipal(docID string, factory func() Principal) return updatedBytes, nil, marshalErr } else { // Principal is valid, so stop the update - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } }) - if err != nil && err != couchbase.UpdateCancel { + if err != nil && err != base.ErrUpdateCancel { return nil, err } return princ, nil @@ -443,7 +442,7 @@ func (auth *Authenticator) updateVbucketSequences(docID string, factory func() P err := auth.bucket.Update(docID, 0, func(currentValue []byte) ([]byte, *uint32, error) { // Be careful: this block can be invoked multiple times if there are races! if currentValue == nil { - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } princ := factory() if err := json.Unmarshal(currentValue, princ); err != nil { @@ -491,11 +490,11 @@ func (auth *Authenticator) updateVbucketSequences(docID string, factory func() P return updatedBytes, nil, marshalErr } else { // No entries found requiring update, so cancel update. - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } }) - if err != nil && err != couchbase.UpdateCancel { + if err != nil && err != base.ErrUpdateCancel { return err } return nil diff --git a/base/error.go b/base/error.go index 2a0faff447..c9bdb24117 100644 --- a/base/error.go +++ b/base/error.go @@ -20,80 +20,36 @@ import ( pkgerrors "github.com/pkg/errors" ) -type sgErrorCode uint16 - -const ( - alreadyImported = sgErrorCode(0x00) - importCancelled = sgErrorCode(0x01) - importCasFailure = sgErrorCode(0x02) - viewTimeoutError = sgErrorCode(0x03) - revTreeAddRevFailure = sgErrorCode(0x04) - importCancelledFilter = sgErrorCode(0x05) - documentMigrated = sgErrorCode(0x06) - fatalBucketConnection = sgErrorCode(0x07) - emptyMetadata = sgErrorCode(0x08) - casFailureShouldRetry = sgErrorCode(0x09) - errPartialViewErrors = sgErrorCode(0x10) - indexerError = sgErrorCode(0x11) - indexExists = sgErrorCode(0x12) - notFound = sgErrorCode(0x13) -) - -type SGError struct { - code sgErrorCode +type sgError struct { + message string } var ( - ErrRevTreeAddRevFailure = &SGError{revTreeAddRevFailure} - ErrImportCancelled = &SGError{importCancelled} - ErrAlreadyImported = &SGError{alreadyImported} - ErrImportCasFailure = &SGError{importCasFailure} - ErrViewTimeoutError = &SGError{viewTimeoutError} - ErrImportCancelledFilter = &SGError{importCancelledFilter} - ErrDocumentMigrated = &SGError{documentMigrated} - ErrFatalBucketConnection = &SGError{fatalBucketConnection} - ErrEmptyMetadata = &SGError{emptyMetadata} - ErrCasFailureShouldRetry = &SGError{casFailureShouldRetry} - ErrIndexerError = &SGError{indexerError} - ErrIndexAlreadyExists = &SGError{indexExists} - ErrNotFound = &SGError{notFound} + ErrRevTreeAddRevFailure = &sgError{"Failure adding Rev to RevTree"} + ErrImportCancelled = &sgError{"Import cancelled"} + ErrAlreadyImported = &sgError{"Document already imported"} + ErrImportCasFailure = &sgError{"CAS failure during import"} + ErrViewTimeoutError = &sgError{"Timeout performing Query"} + ErrImportCancelledFilter = &sgError{"Import cancelled based on import filter"} + ErrDocumentMigrated = &sgError{"Document migrated"} + ErrFatalBucketConnection = &sgError{"Fatal error connecting to bucket"} + ErrEmptyMetadata = &sgError{"Empty Sync Gateway metadata"} + ErrCasFailureShouldRetry = &sgError{"CAS failure should retry"} + ErrIndexerError = &sgError{"Indexer error"} + ErrIndexAlreadyExists = &sgError{"Index already exists"} + ErrNotFound = &sgError{"Not Found"} + ErrUpdateCancel = &sgError{"Cancel update"} // ErrPartialViewErrors is returned if the view call contains any partial errors. // This is more of a warning, and inspecting ViewResult.Errors is required for detail. - ErrPartialViewErrors = &SGError{errPartialViewErrors} + ErrPartialViewErrors = &sgError{"Partial errors in view"} ) -func (e SGError) Error() string { - switch e.code { - case alreadyImported: - return "Document already imported" - case importCancelled: - return "Import cancelled" - case documentMigrated: - return "Document migrated" - case importCancelledFilter: - return "Import cancelled based on import filter" - case importCasFailure: - return "CAS failure during import" - case revTreeAddRevFailure: - return "Failure adding Rev to RevTree" - case viewTimeoutError: - return "Timeout performing Query" - case fatalBucketConnection: - return "Fatal error connecting to bucket" - case emptyMetadata: - return "Empty Sync Gateway metadata" - case errPartialViewErrors: - return "Partial errors in view" - case indexerError: - return "Indexer error" - case indexExists: - return "Index already exists" - case notFound: - return "Not Found" - default: - return "Unknown error" +func (e *sgError) Error() string { + if e.message != "" { + return e.message } + return "Unknown error" } // Simple error implementation wrapping an HTTP response status. @@ -156,7 +112,7 @@ func ErrorAsHTTPStatus(err error) (int, string) { } case sgbucket.MissingError: return http.StatusNotFound, "missing" - case *SGError: + case *sgError: switch unwrappedErr { case ErrNotFound: return http.StatusNotFound, "missing" diff --git a/db/crud.go b/db/crud.go index 2c98bde34b..e36e2ccfcc 100644 --- a/db/crud.go +++ b/db/crud.go @@ -684,7 +684,7 @@ func (db *Database) PutExistingRev(docid string, body Body, docHistory []string, if currentRevIndex == 0 { base.Debugf(base.KeyCRUD, "PutExistingRev(%q): No new revisions to add", base.UD(docid)) body["_rev"] = newRev // The _rev field is expected by some callers. If missing, may cause problems for callers. - return nil, nil, nil, couchbase.UpdateCancel // No new revisions to add + return nil, nil, nil, base.ErrUpdateCancel // No new revisions to add } // Conflict-free mode check @@ -1112,7 +1112,7 @@ func (db *Database) updateAndReturnDoc( } } - if err == couchbase.UpdateCancel { + if err == base.ErrUpdateCancel { return nil, "", nil } else if err == couchbase.ErrOverwritten { // ErrOverwritten is ok; if a later revision got persisted, that's fine too diff --git a/db/database.go b/db/database.go index d1b3339450..4c22b5039c 100644 --- a/db/database.go +++ b/db/database.go @@ -21,7 +21,6 @@ import ( "sync/atomic" "time" - "github.com/couchbase/go-couchbase" sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/auth" "github.com/couchbase/sync_gateway/base" @@ -825,11 +824,11 @@ func (context *DatabaseContext) UpdateSyncFun(syncFun string) (changed bool, err bytes, err := json.Marshal(syncData) return bytes, nil, err } else { - return nil, nil, couchbase.UpdateCancel // value unchanged, no need to save + return nil, nil, base.ErrUpdateCancel // value unchanged, no need to save } }) - if err == couchbase.UpdateCancel { + if err == base.ErrUpdateCancel { err = nil } return @@ -867,9 +866,9 @@ func (db *Database) UpdateAllDocChannels() (int, error) { imported := false if !doc.HasValidSyncData(db.writeSequences()) { // This is a document not known to the sync gateway. Ignore it: - return nil, false, nil, couchbase.UpdateCancel + return nil, false, nil, base.ErrUpdateCancel } else { - base.Infof(base.KeyCRUD, "\tRe-syncing document %q", base.UD(docid)) + base.Debugf(base.KeyCRUD, "\tRe-syncing document %q", base.UD(docid)) } // Run the sync fn over each current/leaf revision, in case there are conflicts: @@ -906,7 +905,7 @@ func (db *Database) UpdateAllDocChannels() (int, error) { // There's no scenario where a doc should from non-deleted to deleted during UpdateAllDocChannels processing, // so deleteDoc is always returned as false. if currentValue == nil || len(currentValue) == 0 { - return nil, nil, deleteDoc, nil, errors.New("Cancel update") + return nil, nil, deleteDoc, nil, base.ErrUpdateCancel } doc, err := unmarshalDocumentWithXattr(docid, currentValue, currentXattr, cas, DocUnmarshalAll) if err != nil { @@ -925,7 +924,7 @@ func (db *Database) UpdateAllDocChannels() (int, error) { raw, rawXattr, err = updatedDoc.MarshalWithXattr() return raw, rawXattr, deleteDoc, updatedExpiry, err } else { - return nil, nil, deleteDoc, nil, errors.New("Cancel update") + return nil, nil, deleteDoc, nil, base.ErrUpdateCancel } } _, err = db.Bucket.WriteUpdateWithXattr(key, KSyncXattrName, 0, nil, writeUpdateFunc) @@ -933,7 +932,7 @@ func (db *Database) UpdateAllDocChannels() (int, error) { err = db.Bucket.Update(key, 0, func(currentValue []byte) ([]byte, *uint32, error) { // Be careful: this block can be invoked multiple times if there are races! if currentValue == nil { - return nil, nil, couchbase.UpdateCancel // someone deleted it?! + return nil, nil, base.ErrUpdateCancel // someone deleted it?! } doc, err := unmarshalDocument(docid, currentValue) if err != nil { @@ -951,13 +950,13 @@ func (db *Database) UpdateAllDocChannels() (int, error) { updatedBytes, marshalErr := json.Marshal(updatedDoc) return updatedBytes, updatedExpiry, marshalErr } else { - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } }) } if err == nil { changeCount++ - } else if err != couchbase.UpdateCancel { + } else if err != base.ErrUpdateCancel { base.Warnf(base.KeyAll, "Error updating doc %q: %v", base.UD(docid), err) } } diff --git a/db/repair_bucket.go b/db/repair_bucket.go index 20d0904c23..5946bb1d6f 100644 --- a/db/repair_bucket.go +++ b/db/repair_bucket.go @@ -5,7 +5,6 @@ import ( "fmt" "time" - "github.com/couchbase/go-couchbase" "github.com/couchbase/sync_gateway/base" pkgerrors "github.com/pkg/errors" ) @@ -191,7 +190,7 @@ func (r RepairBucket) RepairBucket() (results []RepairBucketResult, err error) { err = r.Bucket.Update(key, 0, func(currentValue []byte) ([]byte, *uint32, error) { // Be careful: this block can be invoked multiple times if there are races! if currentValue == nil { - return nil, nil, couchbase.UpdateCancel // someone deleted it?! + return nil, nil, base.ErrUpdateCancel // someone deleted it?! } updatedDoc, shouldUpdate, repairJobs, err := r.TransformBucketDoc(key, currentValue) if err != nil { @@ -218,19 +217,19 @@ func (r RepairBucket) RepairBucket() (results []RepairBucketResult, err error) { results = append(results, result) if r.DryRun { - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } else { return updatedDoc, nil, nil } default: - return nil, nil, couchbase.UpdateCancel + return nil, nil, base.ErrUpdateCancel } }) if err != nil { - // Ignore couchbase.UpdateCancel (Cas.QUIT) errors. Any other errors should be returned to caller - if pkgerrors.Cause(err) != couchbase.UpdateCancel { + // Ignore base.ErrUpdateCancel (Cas.QUIT) errors. Any other errors should be returned to caller + if pkgerrors.Cause(err) != base.ErrUpdateCancel { return results, err } } diff --git a/db/shadower.go b/db/shadower.go index 9219eb936f..c33639ecee 100644 --- a/db/shadower.go +++ b/db/shadower.go @@ -7,8 +7,6 @@ import ( "strings" "sync/atomic" - "github.com/couchbase/go-couchbase" - sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/channels" @@ -112,7 +110,7 @@ func (s *Shadower) pullDocument(key string, value []byte, isDeletion bool, cas u _, err = db.updateDoc(key, false, expiry, func(doc *document) (resultBody Body, resultAttachmentData AttachmentData, updatedExpiry *uint32, resultErr error) { // (Be careful: this block can be invoked multiple times if there are races!) if doc.UpstreamCAS != nil && *doc.UpstreamCAS == cas { - return nil, nil, nil, couchbase.UpdateCancel // we already have this doc revision + return nil, nil, nil, base.ErrUpdateCancel // we already have this doc revision } base.Debugf(base.KeyShadow, "Pulling %q, CAS=%x ... have UpstreamRev=%q, UpstreamCAS=%x", base.UD(key), cas, doc.UpstreamRev, doc.UpstreamCAS) @@ -147,7 +145,7 @@ func (s *Shadower) pullDocument(key string, value []byte, isDeletion bool, cas u } return body, nil, nil, nil }) - if err == couchbase.UpdateCancel { + if err == base.ErrUpdateCancel { err = nil } return err From b180df76f81e324638bbf31ab5dad6aca5908a4a Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Fri, 28 Sep 2018 10:29:13 -0700 Subject: [PATCH 12/17] Avoid error when processing documents impacted by #3692 (#3769) * Avoid error when processing documents impacted by #3692 Fixes #3700 * Test tear-down cleanup --- db/crud_test.go | 75 ++++++++++++++++++++++++++++++++++++++++++++++++- db/revtree.go | 11 ++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/db/crud_test.go b/db/crud_test.go index 7274d93a98..899055d054 100644 --- a/db/crud_test.go +++ b/db/crud_test.go @@ -357,7 +357,8 @@ func TestRevisionStoragePruneTombstone(t *testing.T) { // Checks for unwanted interaction between old revision body backups and revision cache func TestOldRevisionStorage(t *testing.T) { - db, _ := setupTestDBWithCacheOptions(t, CacheOptions{}) + db, testBucket := setupTestDBWithCacheOptions(t, CacheOptions{}) + defer testBucket.Close() defer tearDownTestDB(t, db) prop_1000_bytes := base.CreateProperty(1000) @@ -612,3 +613,75 @@ func TestOldRevisionStorageError(t *testing.T) { assertNoError(t, db.PutExistingRev("doc1", rev3c_body, []string{"3-c", "2-b", "1-a"}, false), "add 3-c") } + +const rawDocMalformedRevisionStorage = ` + { + "_sync": + {"rev":"6-a", + "new_rev":"3-b", + "flags":28, + "sequence":6, + "recent_sequences":[1,2,3,4,5,6], + "history":{ + "revs":["5-a","6-a","3-b","2-b","2-a","1-a","3-a","4-a"], + "parents":[7,0,3,5,5,-1,4,6], + "bodymap":{ + "2":"{\"key1\":\"value2\",\"v\":\"3b\"}", + "3":"\u0001{\"key1\":\"value2\",\"v\":\"2b\""}, + "channels":[null,null,null,null,null,null,null,null]}, + "cas":"", + "value_crc32c":"", + "time_saved":"2018-09-27T13:47:44.719971735-07:00" + }, + "key1":"value2", + "v":"6a" + }` + +func TestMalformedRevisionStorageRecovery(t *testing.T) { + db, testBucket := setupTestDBWithCacheOptions(t, CacheOptions{}) + defer testBucket.Close() + defer tearDownTestDB(t, db) + + db.ChannelMapper = channels.NewChannelMapper(`function(doc, oldDoc) {channel(doc.channels);}`) + + // Create a document with a malformed revision body (due to https://github.com/couchbase/sync_gateway/issues/3692) in the bucket + // Document has the following rev tree, with a malformed body of revision 2-b remaining in the revision tree (same set of operations as + // TestOldRevisionStorageError) + // 1-a + // / \ + // 2-a 2-b + // | | + // 3-a 3-b + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Add doc1 w/ malformed body for rev 2-b included in revision tree") + ok, addErr := db.Bucket.AddRaw("doc1", 0, []byte(rawDocMalformedRevisionStorage)) + assert.True(t, ok) + assertNoError(t, addErr, "Error writing raw document") + + // Increment _sync:seq to match sequences allocated by raw doc + _, incrErr := db.Bucket.Incr("_sync:seq", 5, 0, 0) + assertNoError(t, incrErr, "Error incrementing sync:seq") + + // Add child to non-winning revision w/ malformed inline body. + // Prior to fix for https://github.com/couchbase/sync_gateway/issues/3700, this fails + // 1-a + // / \ + // 2-a 2-b + // | / | + // 3-a 3-b 3-c + // | + // 4-a + // | + // 5-a + // | + // 6-a + log.Printf("Attempt to create rev 3-c") + rev3c_body := Body{"key1": "value2", "v": "3c"} + assertNoError(t, db.PutExistingRev("doc1", rev3c_body, []string{"3-c", "2-b", "1-a"}, false), "add 3-c") + +} diff --git a/db/revtree.go b/db/revtree.go index 2b8eebe601..8766002384 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -374,6 +374,17 @@ func (tree RevTree) getRevisionBody(revid string, loader RevLoaderFunc) ([]byte, } } } + + // Handling for data affected by https://github.com/couchbase/sync_gateway/issues/3692 + // In that scenario, the revision history includes an entry that should have been moved to a transient + // backup and removed from the rev tree, and the revtree version was modified to include a leading non-JSON byte. + // We ignore these rev history entries, meaning that callers will see the same view of the data + // as if the transient backup had expired. We are not attempting to repair the rev tree, as reclaiming storage + // is a much lower priority than avoiding write errors, and want to avoid introducing additional conflict scenarios. + // The invalid rev tree bodies will eventually be pruned through normal revision tree pruning. + if len(info.Body) > 0 && info.Body[0] == nonJSONPrefix { + return nil, false + } return info.Body, true } From 45b46dcd4dcfce0901a168b13895afa1dde3471b Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 28 Sep 2018 23:41:20 +0100 Subject: [PATCH 13/17] #3765 Remove documents from channel caches on _purge or _compact (#3770) * Fix incorrect count from Compact when add+delete fails * Remove documents from channel caches on _purge or _compact * Improve _changes requqest in test to avoid races * Fix typo in test name * Check both channels in test * Add TestChannelCacheRemove for testing channel cache startTime --- db/change_cache.go | 18 ++++++++++++++++ db/change_index.go | 3 +++ db/channel_cache.go | 46 ++++++++++++++++++++++++++++++++++++++++ db/channel_cache_test.go | 41 +++++++++++++++++++++++++++++++++++ db/database.go | 22 ++++++++++++++----- db/kv_change_index.go | 5 ++++- rest/admin_api.go | 10 +++++++++ rest/admin_api_test.go | 27 +++++++++++++++++++++++ 8 files changed, 166 insertions(+), 6 deletions(-) diff --git a/db/change_cache.go b/db/change_cache.go index dcf10b43e9..85d04d0f58 100644 --- a/db/change_cache.go +++ b/db/change_cache.go @@ -528,6 +528,24 @@ func (c *changeCache) DocChangedSynchronous(event sgbucket.FeedEvent) { } +// Remove purges the given doc IDs from all channel caches and returns the number of items removed. +// count will be larger than the input slice if the same document is removed from multiple channel caches. +func (c *changeCache) Remove(docIDs []string, startTime time.Time) (count int) { + // Exit early if there's no work to do + if len(docIDs) == 0 { + return 0 + } + + c.lock.Lock() + defer c.lock.Unlock() + + for channelName := range c.channelCaches { + count += c._getChannelCache(channelName).Remove(docIDs, startTime) + } + + return count +} + func (c *changeCache) unmarshalPrincipal(docJSON []byte, isUser bool) (auth.Principal, error) { c.context.BucketLock.RLock() diff --git a/db/change_index.go b/db/change_index.go index c0f07e05fc..ef0091499d 100644 --- a/db/change_index.go +++ b/db/change_index.go @@ -53,6 +53,9 @@ type ChangeIndex interface { // stable=true returns cached value (if available) GetStableClock(stale bool) (clock base.SequenceClock, err error) + // Remove purges the given doc IDs and returns the number of items removed + Remove(docIDs []string, startTime time.Time) (count int) + // Utility functions for unit testing waitForSequenceID(sequence SequenceID, maxWaitTime time.Duration) diff --git a/db/channel_cache.go b/db/channel_cache.go index 55e88f4653..a745417799 100644 --- a/db/channel_cache.go +++ b/db/channel_cache.go @@ -173,6 +173,52 @@ func (c *channelCache) wouldBeImmediatelyPruned(change *LogEntry) bool { } +// Remove purges the given doc IDs from the channel cache and returns the number of items removed. +func (c *channelCache) Remove(docIDs []string, startTime time.Time) (count int) { + // Exit early if there's no work to do + if len(docIDs) == 0 { + return 0 + } + + c.lock.Lock() + defer c.lock.Unlock() + + // Build subset of docIDs that we know are present in the cache + foundDocs := make(map[string]struct{}, 0) + for _, docID := range docIDs { + if _, found := c.cachedDocIDs[docID]; found { + foundDocs[docID] = struct{}{} + } + } + + // Do the removals in one sweep of the channel cache + end := len(c.logs) - 1 + for i := end; i >= 0; i-- { + if _, ok := foundDocs[c.logs[i].DocID]; ok { + docID := c.logs[i].DocID + + // Make sure the document we're about to remove is older than the start time of the purge + // This is to ensure that resurrected documents do not accidentally get removed. + if c.logs[i].TimeReceived.After(startTime) { + base.Debugf(base.KeyCache, "Skipping removal of doc %q from cache %q - received after purge", + base.UD(docID), base.UD(c.channelName)) + continue + } + + // Memory-leak safe delete from SliceTricks: + copy(c.logs[i:], c.logs[i+1:]) + c.logs[len(c.logs)-1] = nil + c.logs = c.logs[:len(c.logs)-1] + delete(c.cachedDocIDs, docID) + count++ + + base.Tracef(base.KeyCache, "Removed doc %q from cache %q", base.UD(docID), base.UD(c.channelName)) + } + } + + return count +} + // Internal helper that prunes a single channel's cache. Caller MUST be holding the lock. func (c *channelCache) _pruneCache() { pruned := 0 diff --git a/db/channel_cache_test.go b/db/channel_cache_test.go index a3042f207b..72fe9bced2 100644 --- a/db/channel_cache_test.go +++ b/db/channel_cache_test.go @@ -14,6 +14,7 @@ import ( "log" "math/rand" "testing" + "time" "github.com/couchbase/sync_gateway/base" "github.com/couchbaselabs/go.assert" @@ -390,6 +391,46 @@ func TestPrependChanges(t *testing.T) { } } +func TestChannelCacheRemove(t *testing.T) { + + base.EnableTestLogKey("Cache+") + + context := testBucketContext() + defer context.Close() + defer base.DecrNumOpenBuckets(context.Bucket.GetName()) + + cache := newChannelCache(context, "Test1", 0) + + // Add some entries to cache + cache.addToCache(e(1, "doc1", "1-a"), false) + cache.addToCache(e(2, "doc3", "3-a"), false) + cache.addToCache(e(3, "doc5", "5-a"), false) + + entries, err := cache.GetChanges(ChangesOptions{Since: SequenceID{Seq: 0}}) + assert.Equals(t, len(entries), 3) + assert.True(t, verifyChannelSequences(entries, []uint64{1, 2, 3})) + assert.True(t, verifyChannelDocIDs(entries, []string{"doc1", "doc3", "doc5"})) + assert.True(t, err == nil) + + // Now remove doc1 + cache.Remove([]string{"doc1"}, time.Now()) + entries, err = cache.GetChanges(ChangesOptions{Since: SequenceID{Seq: 0}}) + assert.Equals(t, len(entries), 2) + assert.True(t, verifyChannelSequences(entries, []uint64{2, 3})) + assert.True(t, verifyChannelDocIDs(entries, []string{"doc3", "doc5"})) + assert.True(t, err == nil) + + // Try to remove doc5 with a startTime before it was added to ensure it's not removed + // This will print a debug level log: + // [DBG] Cache+: Skipping removal of doc "doc5" from cache "Test1" - received after purge + cache.Remove([]string{"doc5"}, time.Now().Add(-time.Second*5)) + entries, err = cache.GetChanges(ChangesOptions{Since: SequenceID{Seq: 0}}) + assert.Equals(t, len(entries), 2) + assert.True(t, verifyChannelSequences(entries, []uint64{2, 3})) + assert.True(t, verifyChannelDocIDs(entries, []string{"doc3", "doc5"})) + assert.True(t, err == nil) +} + func verifyChannelSequences(entries []*LogEntry, sequences []uint64) bool { if len(entries) != len(sequences) { log.Printf("verifyChannelSequences: entries size (%v) not equals to sequences size (%v)", diff --git a/db/database.go b/db/database.go index 4c22b5039c..48d9c3a106 100644 --- a/db/database.go +++ b/db/database.go @@ -748,7 +748,8 @@ func (db *Database) Compact() (int, error) { // Trigger view compaction for all tombstoned documents older than the purge interval purgeIntervalDuration := time.Duration(-db.PurgeInterval) * time.Hour - purgeOlderThan := time.Now().Add(purgeIntervalDuration) + startTime := time.Now() + purgeOlderThan := startTime.Add(purgeIntervalDuration) results, err := db.QueryTombstones(purgeOlderThan) if err != nil { return 0, err @@ -756,7 +757,7 @@ func (db *Database) Compact() (int, error) { base.Infof(base.KeyAll, "Compacting purged tombstones for %s ...", base.UD(db.Name)) purgeBody := Body{"_purged": true} - count := 0 + purgedDocs := make([]string, 0) var tombstonesRow QueryIdRow for results.Next(&tombstonesRow) { @@ -764,7 +765,7 @@ func (db *Database) Compact() (int, error) { // First, attempt to purge. purgeErr := db.Purge(tombstonesRow.Id) if purgeErr == nil { - count++ + purgedDocs = append(purgedDocs, tombstonesRow.Id) } else if base.IsKeyNotFoundError(db.Bucket, purgeErr) { // If key no longer exists, need to add and remove to trigger removal from view _, addErr := db.Bucket.Add(tombstonesRow.Id, 0, purgeBody) @@ -772,14 +773,25 @@ func (db *Database) Compact() (int, error) { base.Warnf(base.KeyAll, "Error compacting key %s (add) - tombstone will not be compacted. %v", base.UD(tombstonesRow.Id), addErr) continue } + + // At this point, the doc is not in a usable state for mobile + // so mark it to be removed from cache, even if the subsequent delete fails + purgedDocs = append(purgedDocs, tombstonesRow.Id) + if delErr := db.Bucket.Delete(tombstonesRow.Id); delErr != nil { - base.Warnf(base.KeyAll, "Error compacting key %s (delete) - tombstone will not be compacted. %v", base.UD(tombstonesRow.Id), delErr) + base.Errorf(base.KeyAll, "Error compacting key %s (delete) - tombstone will not be compacted. %v", base.UD(tombstonesRow.Id), delErr) } - count++ } else { base.Warnf(base.KeyAll, "Error compacting key %s (purge) - tombstone will not be compacted. %v", base.UD(tombstonesRow.Id), purgeErr) } } + + // Now purge them from all channel caches + count := len(purgedDocs) + if count > 0 { + db.changeCache.Remove(purgedDocs, startTime) + } + return count, nil } diff --git a/db/kv_change_index.go b/db/kv_change_index.go index 45807340de..3c7ed4c280 100644 --- a/db/kv_change_index.go +++ b/db/kv_change_index.go @@ -19,10 +19,10 @@ import ( "reflect" "strings" "sync" + "time" sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/base" - "time" ) const ( @@ -189,6 +189,9 @@ func (k *kvChangeIndex) getOldestSkippedSequence() uint64 { func (k *kvChangeIndex) getChannelCache(channelName string) *channelCache { return nil } +func (k *kvChangeIndex) Remove(docIDs []string, startTime time.Time) int { + return 0 +} // TODO: refactor waitForSequence to accept either vbNo or clock func (k *kvChangeIndex) waitForSequenceID(sequence SequenceID, maxWaitTime time.Duration) { diff --git a/rest/admin_api.go b/rest/admin_api.go index 3a887f348c..1379376b78 100644 --- a/rest/admin_api.go +++ b/rest/admin_api.go @@ -672,6 +672,9 @@ func (h *handler) handlePurge() error { return base.HTTPErrorf(http.StatusBadRequest, "_purge document ID's must be passed as a JSON") } + startTime := time.Now() + docIDs := make([]string, 0) + h.setHeader("Content-Type", "application/json") h.setHeader("Cache-Control", "private, max-age=0, no-cache, no-store") h.response.Write([]byte("{\"purged\":{\r\n")) @@ -698,6 +701,8 @@ func (h *handler) handlePurge() error { err = h.db.Purge(key) if err == nil { + docIDs = append(docIDs, key) + if first { first = false } else { @@ -718,6 +723,11 @@ func (h *handler) handlePurge() error { } } + if len(docIDs) > 0 { + count := h.db.GetChangeIndex().Remove(docIDs, startTime) + base.Debugf(base.KeyCache, "Purged %d items from caches", count) + } + h.response.Write([]byte("}\n}\n")) h.logStatusWithDuration(http.StatusOK, message) diff --git a/rest/admin_api_test.go b/rest/admin_api_test.go index 9c598c07c3..ca120e72f0 100644 --- a/rest/admin_api_test.go +++ b/rest/admin_api_test.go @@ -1487,6 +1487,33 @@ func TestPurgeWithMultipleValidDocs(t *testing.T) { assertStatus(t, rt.SendRequest("PUT", "/db/doc2", `{"moo":"car"}`), 201) } +// TestPurgeWithChannelCache will make sure thant upon calling _purge, the channel caches are also cleaned +// This was fixed in #3765, previously channel caches were not cleaned up +func TestPurgeWithChannelCache(t *testing.T) { + var rt RestTester + defer rt.Close() + + assertStatus(t, rt.SendRequest("PUT", "/db/doc1", `{"foo":"bar", "channels": ["abc", "def"]}`), http.StatusCreated) + assertStatus(t, rt.SendRequest("PUT", "/db/doc2", `{"moo":"car", "channels": ["abc"]}`), http.StatusCreated) + + changes, err := rt.WaitForChanges(2, "/db/_changes?filter=sync_gateway/bychannel&channels=abc,def", "", true) + assertNoError(t, err, "Error waiting for changes") + assert.Equals(t, changes.Results[0].ID, "doc1") + assert.Equals(t, changes.Results[1].ID, "doc2") + + // Purge "doc1" + resp := rt.SendAdminRequest("POST", "/db/_purge", `{"doc1":["*"]}`) + assertStatus(t, resp, http.StatusOK) + var body map[string]interface{} + json.Unmarshal(resp.Body.Bytes(), &body) + assert.DeepEquals(t, body, map[string]interface{}{"purged": map[string]interface{}{"doc1": []interface{}{"*"}}}) + + changes, err = rt.WaitForChanges(1, "/db/_changes?filter=sync_gateway/bychannel&channels=abc,def", "", true) + assertNoError(t, err, "Error waiting for changes") + assert.Equals(t, changes.Results[0].ID, "doc2") + +} + func TestPurgeWithSomeInvalidDocs(t *testing.T) { var rt RestTester defer rt.Close() From ce0e8e7ce41969cd8fd06ce07129caa4394b7bee Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Mon, 1 Oct 2018 17:17:31 +0100 Subject: [PATCH 14/17] Seed DCP Feed from all nodes (Fixes #3756) (#3757) (#3772) * Seed DCP Feed from all nodes (Fixes #3756) * Add unit test for GetStatsVbSeqno * Add test for replica vbuckets * Split test for lagging and non-lagging replicas --- base/bucket.go | 7 +- base/bucket_test.go | 194 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 3 deletions(-) diff --git a/base/bucket.go b/base/bucket.go index 6ce27ab7c9..b29c9b6702 100644 --- a/base/bucket.go +++ b/base/bucket.go @@ -493,7 +493,10 @@ func GetStatsVbSeqno(stats map[string]map[string]string, maxVbno uint16, useAbsH } highSeqno, err := strconv.ParseUint(serverMap[highSeqnoKey], 10, 64) - if err == nil && highSeqno > 0 { + // Each node will return seqnos for its active and replica vBuckets. Iterating over all nodes will give us + // numReplicas*maxVbno results. Rather than filter by active/replica (which would require a separate STATS call) + // simply pick the highest. + if err == nil && highSeqno > highSeqnos[i] { highSeqnos[i] = highSeqno uuid, err := strconv.ParseUint(serverMap[uuidKey], 10, 64) if err == nil { @@ -501,8 +504,6 @@ func GetStatsVbSeqno(stats map[string]map[string]string, maxVbno uint16, useAbsH } } } - // We're only using a single server, so can break after the first entry in the map. - break } return diff --git a/base/bucket_test.go b/base/bucket_test.go index e58e074d5c..3bfadd19a3 100644 --- a/base/bucket_test.go +++ b/base/bucket_test.go @@ -40,3 +40,197 @@ func TestBucketSpec(t *testing.T) { assertNoError(t, err, "Error creating connection string for bucket spec") } + +func TestGetStatsVbSeqno(t *testing.T) { + + // We'll artificially lower this here to make for easier test data + const maxVbno = 4 + + tests := []struct { + name string + stats map[string]map[string]string + expectedUUIDs map[uint16]uint64 + expectedHighSeqnos map[uint16]uint64 + }{ + { + name: "1 node", + stats: map[string]map[string]string{ + "host1:11210": map[string]string{ + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + }, + }, + expectedUUIDs: map[uint16]uint64{ + 0: 90, + 1: 91, + 2: 92, + 3: 93, + }, + expectedHighSeqnos: map[uint16]uint64{ + 0: 990, + 1: 991, + 2: 992, + 3: 993, + }, + }, + { + name: "2 nodes", + stats: map[string]map[string]string{ + "host1:11210": map[string]string{ + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + }, + "host2:11210": map[string]string{ + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + }, + }, + expectedUUIDs: map[uint16]uint64{ + 0: 90, + 1: 91, + 2: 92, + 3: 93, + }, + expectedHighSeqnos: map[uint16]uint64{ + 0: 990, + 1: 991, + 2: 992, + 3: 993, + }, + }, + { + name: "4 nodes", + stats: map[string]map[string]string{ + "host1:11210": map[string]string{ + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + }, + "host2:11210": map[string]string{ + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + }, + "host3:11210": map[string]string{ + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + }, + "host4:11210": map[string]string{ + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + }, + }, + expectedUUIDs: map[uint16]uint64{ + 0: 90, + 1: 91, + 2: 92, + 3: 93, + }, + expectedHighSeqnos: map[uint16]uint64{ + 0: 990, + 1: 991, + 2: 992, + 3: 993, + }, + }, + { + name: "2 nodes with replica vbuckets", + stats: map[string]map[string]string{ + "host1:11210": map[string]string{ + // active vbuckets + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + // replica vbuckets + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + }, + "host2:11210": map[string]string{ + // active vbuckets + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + // replica vbuckets + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + }, + }, + expectedUUIDs: map[uint16]uint64{ + 0: 90, + 1: 91, + 2: 92, + 3: 93, + }, + expectedHighSeqnos: map[uint16]uint64{ + 0: 990, + 1: 991, + 2: 992, + 3: 993, + }, + }, + { + name: "2 nodes with lagging replica vbuckets", + stats: map[string]map[string]string{ + "host1:11210": map[string]string{ + // active vbuckets + "vb_0:uuid": "90", + "vb_0:high_seqno": "990", + "vb_1:uuid": "91", + "vb_1:high_seqno": "991", + // slightly behind replicas + "vb_2:uuid": "92", + "vb_2:high_seqno": "982", + "vb_3:uuid": "93", + "vb_3:high_seqno": "983", + }, + "host2:11210": map[string]string{ + // active vbuckets + "vb_2:uuid": "92", + "vb_2:high_seqno": "992", + "vb_3:uuid": "93", + "vb_3:high_seqno": "993", + // slightly behind replicas + "vb_0:uuid": "90", + "vb_0:high_seqno": "980", + "vb_1:uuid": "91", + "vb_1:high_seqno": "981", + }, + }, + expectedUUIDs: map[uint16]uint64{ + 0: 90, + 1: 91, + 2: 92, + 3: 93, + }, + expectedHighSeqnos: map[uint16]uint64{ + 0: 990, + 1: 991, + 2: 992, + 3: 993, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(ts *testing.T) { + actualUUIDs, actualHighSeqnos, err := GetStatsVbSeqno(test.stats, maxVbno, false) + assert.Equals(ts, err, nil) + assert.DeepEquals(ts, actualUUIDs, test.expectedUUIDs) + assert.DeepEquals(ts, actualHighSeqnos, test.expectedHighSeqnos) + }) + } +} From c494187e089eea1725caa0aed91a649296ab7fd6 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Tue, 2 Oct 2018 09:33:55 -0700 Subject: [PATCH 15/17] Optional old revision body backups during import (#3775) Allow import to persist a temporary backup of the previous revision body, when it's available via the rev cache. Given the additional import/DCP work involved (an additional rev cache lookup and kv op per imported document), this is an opt-in feature, enabled using import_backup_old_rev config property. --- db/database.go | 1 + db/import.go | 43 ++++++++++- db/revision_cache.go | 14 ++++ rest/config.go | 1 + rest/import_test.go | 163 +++++++++++++++++++++++++++++++++++++++++ rest/server_context.go | 1 + 6 files changed, 221 insertions(+), 2 deletions(-) diff --git a/db/database.go b/db/database.go index 48d9c3a106..5b70e1b9b5 100644 --- a/db/database.go +++ b/db/database.go @@ -125,6 +125,7 @@ type UnsupportedOptions struct { // Options associated with the import of documents not written by Sync Gateway type ImportOptions struct { ImportFilter *ImportFilterFunction // Opt-in filter for document import + BackupOldRev bool // Create temporary backup of old revision body when available } // Represents a simulated CouchDB database. A new instance is created for each HTTP request, diff --git a/db/import.go b/db/import.go index 215e55e752..65d9b7ece1 100644 --- a/db/import.go +++ b/db/import.go @@ -1,15 +1,16 @@ package db import ( + "encoding/json" "errors" "expvar" + "fmt" "strconv" sgbucket "github.com/couchbase/sg-bucket" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/channels" "github.com/robertkrimen/otto" - "fmt" ) var importExpvars *expvar.Map @@ -194,7 +195,16 @@ func (db *Database) importDoc(docid string, body Body, isDelete bool, existingDo body["_rev"] = newRev doc.History.addRevision(docid, RevInfo{ID: newRev, Parent: parentRev, Deleted: isDelete}) - // During import, oldDoc (doc.Body) is nil (since it's no longer available) + // If the previous revision body is available in the rev cache, + // make a temporary copy in the bucket for other nodes/clusters + if db.DatabaseContext.Options.ImportOptions.BackupOldRev && doc.CurrentRev != "" { + backupErr := db.backupPreImportRevision(docid, doc.CurrentRev) + if backupErr != nil { + base.Infof(base.KeyImport, "Optimistic backup of previous revision failed due to %s", backupErr) + } + } + + // During import, oldDoc (doc.Body) is nil (since it's not guaranteed to be available) doc.RemoveBody() // Note - no attachments processing is done during ImportDoc. We don't (currently) support writing attachments through anything but SG. @@ -279,6 +289,35 @@ func (db *Database) migrateMetadata(docid string, body Body, existingDoc *sgbuck } +// backupPreImportRev attempts to make a temporary backup of a revision body if the +// revision is currently resident in the revision cache. This is the import parallel for +// the temporary revision bodies made during SG writes. Allows in-flight replications on +// other Sync Gateway nodes to serve the previous revision +// (https://github.com/couchbase/sync_gateway/issues/3740) +func (db *Database) backupPreImportRevision(docid, revid string) error { + + previousBody, _, _, err := db.revisionCache.GetCached(docid, revid) + if err != nil { + return fmt.Errorf("Cache error: %v", err) + } + + if previousBody == nil { + return nil + } + + bodyJson, marshalErr := json.Marshal(stripSpecialProperties(previousBody)) + if marshalErr != nil { + return fmt.Errorf("Marshal error: %v", marshalErr) + } + + setOldRevErr := db.setOldRevisionJSON(docid, revid, bodyJson) + if setOldRevErr != nil { + return fmt.Errorf("Persistence error: %v", setOldRevErr) + } + + return nil +} + //////// Import Filter Function // A compiled JavaScript event function. diff --git a/db/revision_cache.go b/db/revision_cache.go index bd76dec971..1cdd08cf89 100644 --- a/db/revision_cache.go +++ b/db/revision_cache.go @@ -62,6 +62,20 @@ func (rc *RevisionCache) Get(docid, revid string) (Body, Body, base.Set, error) return body, history, channels, err } +// Looks up a revision from the cache-only. Will not fall back to loader function if not +// present in the cache. +func (rc *RevisionCache) GetCached(docid, revid string) (Body, Body, base.Set, error) { + value := rc.getValue(docid, revid, false) + if value == nil { + return nil, nil, nil, nil + } + body, history, channels, err := value.load(rc.loaderFunc) + if err != nil { + rc.removeValue(value) // don't keep failed loads in the cache + } + return body, history, channels, err +} + // Attempts to retrieve the active revision for a document from the cache. Requires retrieval // of the document from the bucket to guarantee the current active revision, but does minimal unmarshalling // of the retrieved document to get the current rev from _sync metadata. If active rev is already in the diff --git a/rest/config.go b/rest/config.go index 5806436d89..655266ba6c 100644 --- a/rest/config.go +++ b/rest/config.go @@ -166,6 +166,7 @@ type DbConfig struct { RevsLimit *uint32 `json:"revs_limit,omitempty"` // Max depth a document's revision tree can grow to AutoImport interface{} `json:"import_docs,omitempty"` // Whether to automatically import Couchbase Server docs into SG. Xattrs must be enabled. true or "continuous" both enable this. ImportFilter *string `json:"import_filter,omitempty"` // Filter function (import) + ImportBackupOldRev bool `json:"import_backup_old_rev"` // Whether import should attempt to create a temporary backup of the previous revision body, when available. Shadow *ShadowConfig `json:"shadow,omitempty"` // This is where the ShadowConfig used to be. If found, it should throw an error EventHandlers interface{} `json:"event_handlers,omitempty"` // Event handlers (webhook) FeedType string `json:"feed_type,omitempty"` // Feed type - "DCP" or "TAP"; defaults based on Couchbase server version diff --git a/rest/import_test.go b/rest/import_test.go index 496f91cdb9..b1ea68df62 100644 --- a/rest/import_test.go +++ b/rest/import_test.go @@ -1227,6 +1227,169 @@ func TestImportBinaryDoc(t *testing.T) { assert.True(t, response.Code != 200) } +// Test creation of backup revision on import +func TestImportRevisionCopy(t *testing.T) { + + SkipImportTestsIfNotEnabled(t) + + rt := RestTester{ + SyncFn: `function(doc, oldDoc) { channel(doc.channels) }`, + DatabaseConfig: &DbConfig{ + ImportBackupOldRev: true, + }, + } + defer rt.Close() + + bucket := rt.Bucket() + rt.SendAdminRequest("PUT", "/_logging", `{"Import+":true}`) + + key := "TestImportRevisionCopy" + docBody := make(map[string]interface{}) + docBody["test"] = "TestImportRevisionCopy" + docBody["channels"] = "ABC" + + // 1. Create via SDK + _, err := bucket.Add(key, 0, docBody) + assertNoError(t, err, "Unable to insert doc TestImportDelete") + + // 2. Trigger import via SG retrieval + response := rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + var rawInsertResponse rawResponse + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + rev1id := rawInsertResponse.Sync.Rev + + // 3. Update via SDK + updatedBody := make(map[string]interface{}) + updatedBody["test"] = "TestImportRevisionCopyModified" + updatedBody["channels"] = "DEF" + err = bucket.Set(key, 0, updatedBody) + assertNoError(t, err, fmt.Sprintf("Unable to update doc %s", key)) + + // 4. Trigger import of update via SG retrieval + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + + // 5. Flush the rev cache (simulates attempted retrieval by a different SG node, since testing framework isn't great + // at simulating multiple SG instances) + rt.GetDatabase().FlushRevisionCache() + + // 6. Attempt to retrieve previous revision body + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/%s?rev=%s", key, rev1id), "") + assert.Equals(t, response.Code, 200) +} + +// Test creation of backup revision on import, when rev is no longer available in rev cache. +func TestImportRevisionCopyUnavailable(t *testing.T) { + + SkipImportTestsIfNotEnabled(t) + + rt := RestTester{ + SyncFn: `function(doc, oldDoc) { channel(doc.channels) }`, + DatabaseConfig: &DbConfig{ + ImportBackupOldRev: true, + }, + } + defer rt.Close() + + bucket := rt.Bucket() + rt.SendAdminRequest("PUT", "/_logging", `{"Import+":true}`) + + key := "TestImportRevisionCopy" + docBody := make(map[string]interface{}) + docBody["test"] = "TestImportRevisionCopy" + docBody["channels"] = "ABC" + + // 1. Create via SDK + _, err := bucket.Add(key, 0, docBody) + assertNoError(t, err, "Unable to insert doc TestImportDelete") + + // 2. Trigger import via SG retrieval + response := rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + var rawInsertResponse rawResponse + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + rev1id := rawInsertResponse.Sync.Rev + + // 3. Flush the rev cache (simulates attempted retrieval by a different SG node, since testing framework isn't great + // at simulating multiple SG instances) + rt.GetDatabase().FlushRevisionCache() + + // 4. Update via SDK + updatedBody := make(map[string]interface{}) + updatedBody["test"] = "TestImportRevisionCopyModified" + updatedBody["channels"] = "DEF" + err = bucket.Set(key, 0, updatedBody) + assertNoError(t, err, fmt.Sprintf("Unable to update doc %s", key)) + + // 5. Trigger import of update via SG retrieval + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + + // 6. Attempt to retrieve previous revision body. Should return missing, as rev wasn't in rev cache when import occurred. + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/%s?rev=%s", key, rev1id), "") + assert.Equals(t, response.Code, 404) +} + +// Verify config flag for import creation of backup revision on import +func TestImportRevisionCopyDisabled(t *testing.T) { + + SkipImportTestsIfNotEnabled(t) + + // ImportBackupOldRev not set in config, defaults to false + rt := RestTester{ + SyncFn: `function(doc, oldDoc) { channel(doc.channels) }`, + } + defer rt.Close() + + bucket := rt.Bucket() + rt.SendAdminRequest("PUT", "/_logging", `{"Import+":true}`) + + key := "TestImportRevisionCopy" + docBody := make(map[string]interface{}) + docBody["test"] = "TestImportRevisionCopy" + docBody["channels"] = "ABC" + + // 1. Create via SDK + _, err := bucket.Add(key, 0, docBody) + assertNoError(t, err, "Unable to insert doc TestImportDelete") + + // 2. Trigger import via SG retrieval + response := rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + var rawInsertResponse rawResponse + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + rev1id := rawInsertResponse.Sync.Rev + + // 3. Update via SDK + updatedBody := make(map[string]interface{}) + updatedBody["test"] = "TestImportRevisionCopyModified" + updatedBody["channels"] = "DEF" + err = bucket.Set(key, 0, updatedBody) + assertNoError(t, err, fmt.Sprintf("Unable to update doc %s", key)) + + // 4. Trigger import of update via SG retrieval + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/_raw/%s", key), "") + assert.Equals(t, response.Code, 200) + err = json.Unmarshal(response.Body.Bytes(), &rawInsertResponse) + assertNoError(t, err, "Unable to unmarshal raw response") + + // 5. Flush the rev cache (simulates attempted retrieval by a different SG node, since testing framework isn't great + // at simulating multiple SG instances) + rt.GetDatabase().FlushRevisionCache() + + // 6. Attempt to retrieve previous revision body. Should fail, as backup wasn't persisted + response = rt.SendAdminRequest("GET", fmt.Sprintf("/db/%s?rev=%s", key, rev1id), "") + assert.Equals(t, response.Code, 404) +} + // Test DCP backfill stats func TestDcpBackfill(t *testing.T) { diff --git a/rest/server_context.go b/rest/server_context.go index c9b645c8f4..4213bcfa90 100644 --- a/rest/server_context.go +++ b/rest/server_context.go @@ -378,6 +378,7 @@ func (sc *ServerContext) _getOrAddDatabaseFromConfig(config *DbConfig, useExisti if config.ImportFilter != nil { importOptions.ImportFilter = db.NewImportFilterFunction(*config.ImportFilter) } + importOptions.BackupOldRev = config.ImportBackupOldRev // Set cache properties, if present cacheOptions := db.CacheOptions{} From f998748431d497f4017a69d52b3a3be46f456476 Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Thu, 4 Oct 2018 08:39:11 -0700 Subject: [PATCH 16/17] Preserve low sequence across longpoll changes iterations (#3778) Ensure that low sequence is preserved across longpoll requests, to avoid missed changes when system low sequence changes between longpoll iterations. --- db/changes.go | 10 ++ rest/changes_api_test.go | 378 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 387 insertions(+), 1 deletion(-) diff --git a/db/changes.go b/db/changes.go index e88ee0577e..c63f54c182 100644 --- a/db/changes.go +++ b/db/changes.go @@ -406,6 +406,9 @@ func (db *Database) SimpleMultiChangesFeed(chans base.Set, options ChangesOption lateSequenceFeeds = make(map[string]*lateSequenceFeed) } + // Store incoming low sequence, for potential use by longpoll iterations + requestLowSeq := options.Since.LowSeq + // This loop is used to re-run the fetch after every database change, in Wait mode outer: for { @@ -632,6 +635,13 @@ func (db *Database) SimpleMultiChangesFeed(chans base.Set, options ChangesOption break } + // For longpoll requests that didn't send any results, reset low sequence to the original since value, + // as the system low sequence may change before the longpoll request wakes up, and longpoll feeds don't + // use lateSequenceFeeds. + if !options.Continuous { + options.Since.LowSeq = requestLowSeq + } + // If nothing found, and in wait mode: wait for the db to change, then run again. // First notify the reader that we're waiting by sending a nil. base.Debugf(base.KeyChanges, "MultiChangesFeed waiting... %s", base.UD(to)) diff --git a/rest/changes_api_test.go b/rest/changes_api_test.go index 1cc17ad2eb..ce50a25461 100644 --- a/rest/changes_api_test.go +++ b/rest/changes_api_test.go @@ -636,7 +636,7 @@ func TestChangesLoopingWhenLowSequence(t *testing.T) { CachePendingSeqMaxNum: &maxNum, CacheSkippedSeqMaxWait: &skippedMaxWait, }, - NumIndexReplicas:&numIndexReplicas, + NumIndexReplicas: &numIndexReplicas, } rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`, DatabaseConfig: shortWaitConfig} defer rt.Close() @@ -704,6 +704,382 @@ func TestChangesLoopingWhenLowSequence(t *testing.T) { } +// Test low sequence handling of late arriving sequences to a one-shot changes feed, ensuring that +// subsequent requests for the current low sequence value don't return results (avoids loops for +// longpoll as well as clients doing repeated one-off changes requests - see #1309) +func TestChangesLoopingWhenLowSequenceOneShotUser(t *testing.T) { + + if base.TestUseXattrs() { + t.Skip("This test cannot run in xattr mode until WriteDirect() is updated. See https://github.com/couchbase/sync_gateway/issues/2666#issuecomment-311183219") + } + + pendingMaxWait := uint32(5) + maxNum := 50 + skippedMaxWait := uint32(120000) + + numIndexReplicas := uint(0) + shortWaitConfig := &DbConfig{ + CacheConfig: &CacheConfig{ + CachePendingSeqMaxWait: &pendingMaxWait, + CachePendingSeqMaxNum: &maxNum, + CacheSkippedSeqMaxWait: &skippedMaxWait, + }, + NumIndexReplicas: &numIndexReplicas, + } + rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`, DatabaseConfig: shortWaitConfig} + defer rt.Close() + + testDb := rt.ServerContext().Database("db") + + response := rt.SendAdminRequest("PUT", "/_logging", `{"Changes":true, "Changes+":true, "Debug":true}`) + // Create user: + assertStatus(t, rt.SendAdminRequest("GET", "/db/_user/bernard", ""), 404) + response = rt.SendAdminRequest("PUT", "/db/_user/bernard", `{"email":"bernard@couchbase.com", "password":"letmein", "admin_channels":["PBS"]}`) + assertStatus(t, response, 201) + + // Simulate 4 non-skipped writes (seq 2,3,4,5) + WriteDirect(testDb, []string{"PBS"}, 2) + WriteDirect(testDb, []string{"PBS"}, 3) + WriteDirect(testDb, []string{"PBS"}, 4) + WriteDirect(testDb, []string{"PBS"}, 5) + testDb.WaitForSequenceWithMissing(5) + + // Check the _changes feed: + var changes struct { + Results []db.ChangeEntry + Last_Seq string + } + response = rt.Send(requestByUser("GET", "/db/_changes", "", "bernard")) + log.Printf("_changes 1 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 5) // Includes user doc + since := changes.Results[0].Seq + assert.Equals(t, since.Seq, uint64(1)) + assert.Equals(t, changes.Last_Seq, "5") + + // Skip sequence 6, write docs 7-10 + WriteDirect(testDb, []string{"PBS"}, 7) + WriteDirect(testDb, []string{"PBS"}, 8) + WriteDirect(testDb, []string{"PBS"}, 9) + WriteDirect(testDb, []string{"PBS"}, 10) + testDb.WaitForSequenceWithMissing(10) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON := fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.Send(requestByUser("POST", "/db/_changes", changesJSON, "bernard")) + log.Printf("_changes 2 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 4) + assert.Equals(t, changes.Last_Seq, "5::10") + + // Write a few more docs + WriteDirect(testDb, []string{"PBS"}, 11) + WriteDirect(testDb, []string{"PBS"}, 12) + testDb.WaitForSequenceWithMissing(12) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON = fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.Send(requestByUser("POST", "/db/_changes", changesJSON, "bernard")) + log.Printf("_changes 3 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 2) + assert.Equals(t, changes.Last_Seq, "5::12") + + // Write another doc, then the skipped doc - both should be sent, last_seq should move to 13 + WriteDirect(testDb, []string{"PBS"}, 13) + WriteDirect(testDb, []string{"PBS"}, 6) + testDb.WaitForSequence(13) + + changesJSON = fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.Send(requestByUser("POST", "/db/_changes", changesJSON, "bernard")) + log.Printf("_changes 4 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + + // Valid results here under race conditions should be: + // receive sequences 6-13, last seq 13 + // receive sequence 13, last seq 5::13 + // receive sequence 6-12, last seq 12 + // receive no sequences, last seq 5::12 + // Valid but unexpected results (no data loss): + // receive sequence 6, last sequence 6 + // receive sequence 6, last sequence 12 + // Invalid results (data loss): + // receive sequence 13, last seq 13 + // receives sequence 6, last seq 13 + switch len(changes.Results) { + case 0: + assert.Equals(t, changes.Last_Seq, "5::12") + case 1: + switch changes.Last_Seq { + case "5::13": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(13)) + case "6": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(6)) + log.Printf("Didn't expect last:6 w/ seq 6") + case "12": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(6)) + log.Printf("Didn't expect last:12 w/ seq 6") + default: + t.Errorf("invalid response. Last_Seq: %v changes: %v", changes.Last_Seq, changes.Results[0]) + } + case 7: + assert.Equals(t, changes.Last_Seq, "12") + case 8: + assert.Equals(t, changes.Last_Seq, "13") + default: + t.Errorf("Unexpected number of changes results. Last_Seq: %v len(changes): %v", changes.Last_Seq, len(changes.Results)) + } + + assert.Equals(t, len(changes.Results), 8) + assert.Equals(t, changes.Last_Seq, "13") +} + +// Test low sequence handling of late arriving sequences to a one-shot changes feed, ensuring that +// subsequent requests for the current low sequence value don't return results (avoids loops for +// longpoll as well as clients doing repeated one-off changes requests - see #1309) +func TestChangesLoopingWhenLowSequenceOneShotAdmin(t *testing.T) { + + if base.TestUseXattrs() { + t.Skip("This test cannot run in xattr mode until WriteDirect() is updated. See https://github.com/couchbase/sync_gateway/issues/2666#issuecomment-311183219") + } + + pendingMaxWait := uint32(5) + maxNum := 50 + skippedMaxWait := uint32(120000) + + numIndexReplicas := uint(0) + shortWaitConfig := &DbConfig{ + CacheConfig: &CacheConfig{ + CachePendingSeqMaxWait: &pendingMaxWait, + CachePendingSeqMaxNum: &maxNum, + CacheSkippedSeqMaxWait: &skippedMaxWait, + }, + NumIndexReplicas: &numIndexReplicas, + } + rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`, DatabaseConfig: shortWaitConfig} + defer rt.Close() + + testDb := rt.ServerContext().Database("db") + + response := rt.SendAdminRequest("PUT", "/_logging", `{"Changes":true, "Changes+":true, "Debug":true}`) + + // Simulate 5 non-skipped writes (seq 1,2,3,4,5) + WriteDirect(testDb, []string{"PBS"}, 1) + WriteDirect(testDb, []string{"PBS"}, 2) + WriteDirect(testDb, []string{"PBS"}, 3) + WriteDirect(testDb, []string{"PBS"}, 4) + WriteDirect(testDb, []string{"PBS"}, 5) + testDb.WaitForSequenceWithMissing(5) + + // Check the _changes feed: + var changes struct { + Results []db.ChangeEntry + Last_Seq string + } + response = rt.SendAdminRequest("GET", "/db/_changes", "") + log.Printf("_changes 1 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 5) + since := changes.Results[0].Seq + assert.Equals(t, since.Seq, uint64(1)) + assert.Equals(t, changes.Last_Seq, "5") + + // Skip sequence 6, write docs 7-10 + WriteDirect(testDb, []string{"PBS"}, 7) + WriteDirect(testDb, []string{"PBS"}, 8) + WriteDirect(testDb, []string{"PBS"}, 9) + WriteDirect(testDb, []string{"PBS"}, 10) + testDb.WaitForSequenceWithMissing(10) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON := fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.SendAdminRequest("POST", "/db/_changes", changesJSON) + log.Printf("_changes 2 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 4) + assert.Equals(t, changes.Last_Seq, "5::10") + + // Write a few more docs + WriteDirect(testDb, []string{"PBS"}, 11) + WriteDirect(testDb, []string{"PBS"}, 12) + testDb.WaitForSequenceWithMissing(12) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON = fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.SendAdminRequest("POST", "/db/_changes", changesJSON) + log.Printf("_changes 3 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 2) + assert.Equals(t, changes.Last_Seq, "5::12") + + // Write another doc, then the skipped doc - both should be sent, last_seq should move to 13 + WriteDirect(testDb, []string{"PBS"}, 13) + WriteDirect(testDb, []string{"PBS"}, 6) + testDb.WaitForSequence(13) + + changesJSON = fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.SendAdminRequest("POST", "/db/_changes", changesJSON) + log.Printf("_changes 4 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + + // Valid results here under race conditions should be: + // receive sequences 6-13, last seq 13 + // receive sequence 13, last seq 5::13 + // receive sequence 6-12, last seq 12 + // receive no sequences, last seq 5::12 + // Valid but unexpected results (no data loss): + // receive sequence 6, last sequence 6 + // receive sequence 6, last sequence 12 + // Invalid results (data loss): + // receive sequence 13, last seq 13 + // receives sequence 6, last seq 13 + switch len(changes.Results) { + case 0: + assert.Equals(t, changes.Last_Seq, "5::12") + case 1: + switch changes.Last_Seq { + case "5::13": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(13)) + case "6": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(6)) + log.Printf("Didn't expect last:6 w/ seq 6") + case "12": + assert.Equals(t, changes.Results[0].Seq.Seq, uint64(6)) + log.Printf("Didn't expect last:12 w/ seq 6") + default: + t.Errorf("invalid response. Last_Seq: %v changes: %v", changes.Last_Seq, changes.Results[0]) + } + case 7: + assert.Equals(t, changes.Last_Seq, "12") + case 8: + assert.Equals(t, changes.Last_Seq, "13") + default: + t.Errorf("Unexpected number of changes results. Last_Seq: %v len(changes): %v", changes.Last_Seq, len(changes.Results)) + } + + assert.Equals(t, len(changes.Results), 8) + assert.Equals(t, changes.Last_Seq, "13") +} + +// Test low sequence handling of late arriving sequences to a longpoll changes feed, ensuring that +// subsequent requests for the current low sequence value don't return results (avoids loops for +// longpoll as well as clients doing repeated one-off changes requests - see #1309) +func TestChangesLoopingWhenLowSequenceLongpollUser(t *testing.T) { + + if base.TestUseXattrs() { + t.Skip("This test cannot run in xattr mode until WriteDirect() is updated. See https://github.com/couchbase/sync_gateway/issues/2666#issuecomment-311183219") + } + + pendingMaxWait := uint32(5) + maxNum := 50 + skippedMaxWait := uint32(120000) + + numIndexReplicas := uint(0) + shortWaitConfig := &DbConfig{ + CacheConfig: &CacheConfig{ + CachePendingSeqMaxWait: &pendingMaxWait, + CachePendingSeqMaxNum: &maxNum, + CacheSkippedSeqMaxWait: &skippedMaxWait, + }, + NumIndexReplicas: &numIndexReplicas, + } + rt := RestTester{SyncFn: `function(doc) {channel(doc.channel)}`, DatabaseConfig: shortWaitConfig} + defer rt.Close() + + testDb := rt.ServerContext().Database("db") + + response := rt.SendAdminRequest("PUT", "/_logging", `{"Changes":true, "Changes+":true, "Debug":true}`) + // Create user: + assertStatus(t, rt.SendAdminRequest("GET", "/db/_user/bernard", ""), 404) + response = rt.SendAdminRequest("PUT", "/db/_user/bernard", `{"email":"bernard@couchbase.com", "password":"letmein", "admin_channels":["PBS"]}`) + assertStatus(t, response, 201) + + // Simulate 4 non-skipped writes (seq 2,3,4,5) + WriteDirect(testDb, []string{"PBS"}, 2) + WriteDirect(testDb, []string{"PBS"}, 3) + WriteDirect(testDb, []string{"PBS"}, 4) + WriteDirect(testDb, []string{"PBS"}, 5) + testDb.WaitForSequenceWithMissing(5) + + // Check the _changes feed: + var changes struct { + Results []db.ChangeEntry + Last_Seq string + } + response = rt.Send(requestByUser("GET", "/db/_changes", "", "bernard")) + log.Printf("_changes 1 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 5) // Includes user doc + since := changes.Results[0].Seq + assert.Equals(t, since.Seq, uint64(1)) + assert.Equals(t, changes.Last_Seq, "5") + + // Skip sequence 6, write docs 7-10 + WriteDirect(testDb, []string{"PBS"}, 7) + WriteDirect(testDb, []string{"PBS"}, 8) + WriteDirect(testDb, []string{"PBS"}, 9) + WriteDirect(testDb, []string{"PBS"}, 10) + testDb.WaitForSequenceWithMissing(10) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON := fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.Send(requestByUser("POST", "/db/_changes", changesJSON, "bernard")) + log.Printf("_changes 2 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 4) + assert.Equals(t, changes.Last_Seq, "5::10") + + // Write a few more docs + WriteDirect(testDb, []string{"PBS"}, 11) + WriteDirect(testDb, []string{"PBS"}, 12) + testDb.WaitForSequenceWithMissing(12) + + // Send another changes request with the last_seq received from the last changes ("5") + changesJSON = fmt.Sprintf(`{"since":"%s"}`, changes.Last_Seq) + log.Printf("sending changes JSON: %s", changesJSON) + response = rt.Send(requestByUser("POST", "/db/_changes", changesJSON, "bernard")) + log.Printf("_changes 3 looks like: %s", response.Body.Bytes()) + json.Unmarshal(response.Body.Bytes(), &changes) + assert.Equals(t, len(changes.Results), 2) + assert.Equals(t, changes.Last_Seq, "5::12") + + // Issue a longpoll changes request. Will block. + var longpollWg sync.WaitGroup + longpollWg.Add(1) + go func() { + defer longpollWg.Done() + longpollChangesJSON := fmt.Sprintf(`{"since":"%s", "feed":"longpoll"}`, changes.Last_Seq) + log.Printf("starting longpoll changes w/ JSON: %s", longpollChangesJSON) + longPollResponse := rt.Send(requestByUser("POST", "/db/_changes", longpollChangesJSON, "bernard")) + log.Printf("longpoll changes looks like: %s", longPollResponse.Body.Bytes()) + json.Unmarshal(longPollResponse.Body.Bytes(), &changes) + // Expect to get 6 through 12 + assert.Equals(t, len(changes.Results), 7) + assert.Equals(t, changes.Last_Seq, "12") + }() + + // Wait for longpoll to get into wait mode + // TODO: Would be preferable to add an expvar tracking the number of changes feeds in wait mode, and use that to + // detect when the longpoll changes goes into wait mode instead of a sleep. + // Such an expvar would be useful for diagnostic purposes as well. + // Deferring this change until post-2.1.1, due to potential performance implications. Using sleep until that enhancement is + // made. + time.Sleep(2 * time.Second) + + // Write the skipped doc, wait for longpoll to return + WriteDirect(testDb, []string{"PBS"}, 6) + //WriteDirect(testDb, []string{"PBS"}, 13) + longpollWg.Wait() + +} + func TestUnusedSequences(t *testing.T) { // Only do 10 iterations if running against walrus. If against a live couchbase server, From fea9947066eadc7a6cf0c9e12eed5701aaf11bfe Mon Sep 17 00:00:00 2001 From: Adam Fraser Date: Thu, 4 Oct 2018 11:26:43 -0700 Subject: [PATCH 17/17] Fixes issue #3558 Initialize cache after DCP start (#3607) (#3780) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add experimental dockerfile * Fix attempt for build err * Trying to figure out how to get source commit/branch * Revert "Trying to figure out how to get source commit/branch" This reverts commit bd5e502876256e301f61b282b29f500a64c3fffb. * Add artificial delay after getting last seq, but before starting dcp feed * Reload c.initialSequence in DocChanged() as long as c.initialSequenceLazyLoaded == false * Notes from call w/ Adam * Add c.getInitialSequence() + _getNextSequence() * Fixes * Add comments * Notes from call w/ Adam * Sketch out changes and test * Change the changeindex iface * Fix data race WARNING: DATA RACE Write at 0x00c4201fa2e0 by goroutine 9: github.com/couchbase/sync_gateway/db.(*changeCache)._addToCache() /drone/godeps/src/github.com/couchbase/sync_gateway/db/change_cache.go:689 +0x4ff github.com/couchbase/sync_gateway/db.(*changeCache).processEntry() /drone/godeps/src/github.com/couchbase/sync_gateway/db/change_cache.go:650 +0x1f6 github.com/couchbase/sync_gateway/db.(*changeCache).DocChangedSynchronous() /drone/godeps/src/github.com/couchbase/sync_gateway/db/change_cache.go:549 +0x20a4 Previous read at 0x00c4201fa2e0 by goroutine 42: github.com/couchbase/sync_gateway/db.(*changeCache).waitForSequence() /drone/godeps/src/github.com/couchbase/sync_gateway/db/util_testing.go:25 +0xbd github.com/couchbase/sync_gateway/db.TestDocDeletionFromChannelCoalesced() /drone/godeps/src/github.com/couchbase/sync_gateway/db/changes_test.go:233 +0x5fe testing.tRunner() /usr/local/go/src/testing/testing.go:746 +0x16c * Use getNextSequence() in a few more places * Fix another race https://gist.github.com/tleyden/dccaa3cf2be85160051a793333c24467 * Remove dockerfile, moved to a separate PR * Gofmt * PR cleanup * PR cleanup * Remove check that’s leftover from first attempt * Add comments * First round of PR feedback * Address PR feedback about avoiding external locking --- db/change_cache.go | 212 ++++++++++++++++++++-------------------- db/change_cache_test.go | 18 ++-- db/change_index.go | 7 +- db/database.go | 38 ++++--- db/kv_change_index.go | 8 +- db/util_testing.go | 58 ++++++++++- 6 files changed, 203 insertions(+), 138 deletions(-) diff --git a/db/change_cache.go b/db/change_cache.go index 85d04d0f58..017d0ac59f 100644 --- a/db/change_cache.go +++ b/db/change_cache.go @@ -35,15 +35,22 @@ func init() { } // Manages a cache of the recent change history of all channels. +// +// Core responsibilities: +// +// - Manage collection of channel caches +// - Receive DCP changes via callbacks +// - Perform sequence buffering to ensure documents are received in sequence order +// - Propagating DCP changes down to appropriate channel caches type changeCache struct { context *DatabaseContext logsDisabled bool // If true, ignore incoming tap changes - nextSequence uint64 // Next consecutive sequence number to add - initialSequence uint64 // DB's current sequence at startup time + nextSequence uint64 // Next consecutive sequence number to add. State variable for sequence buffering tracking. Should use getNextSequence() rather than accessing directly. + initialSequence uint64 // DB's current sequence at startup time. Should use getInitialSequence() rather than accessing directly. receivedSeqs map[uint64]struct{} // Set of all sequences received pendingLogs LogPriorityQueue // Out-of-sequence entries waiting to be cached channelCaches map[string]*channelCache // A cache of changes for each channel - onChange func(base.Set) // Client callback that notifies of channel changes + notifyChange func(base.Set) // Client callback that notifies of channel changes stopped bool // Set by the Stop method skippedSeqs SkippedSequenceQueue // Skipped sequences still pending on the TAP feed skippedSeqLock sync.RWMutex // Coordinates access to skippedSeqs queue @@ -83,12 +90,13 @@ type CacheOptions struct { // Initializes a new changeCache. // lastSequence is the last known database sequence assigned. -// onChange is an optional function that will be called to notify of channel changes. -func (c *changeCache) Init(context *DatabaseContext, lastSequence SequenceID, onChange func(base.Set), options *CacheOptions, indexOptions *ChannelIndexOptions) error { +// notifyChange is an optional function that will be called to notify of channel changes. +// After calling Init(), you must call .Start() to start useing the cache, otherwise it will be in a locked state +// and callers will block on trying to obtain the lock. +func (c *changeCache) Init(context *DatabaseContext, notifyChange func(base.Set), options *CacheOptions, indexOptions *ChannelIndexOptions) error { c.context = context - c.initialSequence = lastSequence.Seq - c.nextSequence = lastSequence.Seq + 1 - c.onChange = onChange + + c.notifyChange = notifyChange c.channelCaches = make(map[string]*channelCache, 10) c.receivedSeqs = make(map[uint64]struct{}) c.terminator = make(chan bool) @@ -118,7 +126,7 @@ func (c *changeCache) Init(context *DatabaseContext, lastSequence SequenceID, on base.Infof(base.KeyCache, "Initializing changes cache with options %+v", c.options) if context.UseGlobalSequence() { - base.Infof(base.KeyAll, "Initializing changes cache for database %s with sequence: %d", base.UD(context.Name), c.initialSequence) + base.Infof(base.KeyAll, "Initializing changes cache for database %s", base.UD(context.Name)) } heap.Init(&c.pendingLogs) @@ -147,6 +155,24 @@ func (c *changeCache) Init(context *DatabaseContext, lastSequence SequenceID, on } }() + // Lock the cache -- not usable until .Start() called. This fixes the DCP startup race condition documented in SG #3558. + c.lock.Lock() + + return nil +} + +func (c *changeCache) Start() error { + + // Unlock the cache after this function returns. + defer c.lock.Unlock() + + // Find the current global doc sequence and use that for the initial sequence for the change cache + lastSequence, err := c.context.LastSequence() + if err != nil { + return err + } + + c._setInitialSequence(lastSequence) return nil } @@ -169,14 +195,25 @@ func (c *changeCache) IsStopped() bool { return c.stopped } -// Forgets all cached changes for all channels. -func (c *changeCache) Clear() { +// Empty out all channel caches. +func (c *changeCache) Clear() error { c.lock.Lock() - c.initialSequence, _ = c.context.LastSequence() + defer c.lock.Unlock() + + // Reset initialSequence so that any new channel caches have their validFrom set to the current last sequence + // the point at which the change cache was initialized / re-initialized. + // No need to touch c.nextSequence here, because we don't want to touch the sequence buffering state. + var err error + c.initialSequence, err = c.context.LastSequence() + if err != nil { + return err + } + c.channelCaches = make(map[string]*channelCache, 10) c.pendingLogs = nil heap.Init(&c.pendingLogs) - c.lock.Unlock() + + return nil } // If set to false, DocChanged() becomes a no-op. @@ -193,14 +230,15 @@ func (c *changeCache) EnableChannelIndexing(enable bool) { func (c *changeCache) CleanUp() bool { c.lock.Lock() defer c.lock.Unlock() + if c.channelCaches == nil { return false } // If entries have been pending too long, add them to the cache: changedChannels := c._addPendingLogs() - if c.onChange != nil && len(changedChannels) > 0 { - c.onChange(changedChannels) + if c.notifyChange != nil && len(changedChannels) > 0 { + c.notifyChange(changedChannels) } // Remove old cache entries: @@ -275,8 +313,8 @@ func (c *changeCache) CleanSkippedSequenceQueue() bool { // Since the calls to processEntry() above may unblock pending sequences, if there were any changed channels we need // to notify any change listeners that are working changes feeds for these channels - if c.onChange != nil && len(changedChannelsCombined) > 0 { - c.onChange(changedChannelsCombined) + if c.notifyChange != nil && len(changedChannelsCombined) > 0 { + c.notifyChange(changedChannelsCombined) } // Purge pending deletes @@ -292,70 +330,12 @@ func (c *changeCache) CleanSkippedSequenceQueue() bool { return true } -// FOR TESTS ONLY: Blocks until the given sequence has been received. -func (c *changeCache) waitForSequenceID(sequence SequenceID, maxWaitTime time.Duration) { - c.waitForSequence(sequence.Seq, maxWaitTime) -} - -func (c *changeCache) waitForSequence(sequence uint64, maxWaitTime time.Duration) { - - startTime := time.Now() - - for { - - if time.Since(startTime) >= maxWaitTime { - panic(fmt.Sprintf("changeCache: Sequence %d did not show up after waiting %v", sequence, time.Since(startTime))) - } - - c.lock.RLock() - nextSequence := c.nextSequence - c.lock.RUnlock() - if nextSequence >= sequence+1 { - base.Infof(base.KeyAll, "waitForSequence(%d) took %v", sequence, time.Since(startTime)) - return - } - time.Sleep(100 * time.Millisecond) - } -} - -// FOR TESTS ONLY: Blocks until the given sequence has been received. -func (c *changeCache) waitForSequenceWithMissing(sequence uint64, maxWaitTime time.Duration) { - - startTime := time.Now() - - for { - - if time.Since(startTime) >= maxWaitTime { - panic(fmt.Sprintf("changeCache: Sequence %d did not show up after waiting %v", sequence, time.Since(startTime))) - } - - c.lock.RLock() - nextSequence := c.nextSequence - c.lock.RUnlock() - if nextSequence >= sequence+1 { - foundInMissing := false - c.skippedSeqLock.RLock() - for _, skippedSeq := range c.skippedSeqs { - if skippedSeq.seq == sequence { - foundInMissing = true - break - } - } - c.skippedSeqLock.RUnlock() - if !foundInMissing { - base.Infof(base.KeyAll, "waitForSequence(%d) took %v", sequence, time.Since(startTime)) - return - } - } - time.Sleep(100 * time.Millisecond) - } -} - //////// ADDING CHANGES: // Given a newly changed document (received from the tap feed), adds change entries to channels. // The JSON must be the raw document from the bucket, with the metadata and all. func (c *changeCache) DocChanged(event sgbucket.FeedEvent) { + if event.Synchronous { c.DocChangedSynchronous(event) } else { @@ -363,6 +343,7 @@ func (c *changeCache) DocChanged(event sgbucket.FeedEvent) { } } + // Note that DocChangedSynchronous may be executed concurrently for multiple events (in the DCP case, DCP events // originating from multiple vbuckets). Only processEntry is locking - all other functionality needs to support // concurrent processing. @@ -452,7 +433,7 @@ func (c *changeCache) DocChangedSynchronous(event sgbucket.FeedEvent) { } } - if syncData.Sequence <= c.initialSequence { + if syncData.Sequence <= c.getInitialSequence() { return // Tap is sending us an old value from before I started up; ignore it } @@ -483,9 +464,9 @@ func (c *changeCache) DocChangedSynchronous(event sgbucket.FeedEvent) { } if len(syncData.RecentSequences) > 0 { - nextSeq := c.getNextSequence() + for _, seq := range syncData.RecentSequences { - if seq >= nextSeq && seq < currentSequence { + if seq >= c.getNextSequence() && seq < currentSequence { base.Infof(base.KeyCache, "Received deduplicated #%d for (%q / %q)", seq, base.UD(docID), syncData.CurrentRev) change := &LogEntry{ Sequence: seq, @@ -522,8 +503,8 @@ func (c *changeCache) DocChangedSynchronous(event sgbucket.FeedEvent) { changedChannelsCombined = changedChannelsCombined.Union(changedChannels) // Notify change listeners for all of the changed channels - if c.onChange != nil && len(changedChannelsCombined) > 0 { - c.onChange(changedChannelsCombined) + if c.notifyChange != nil && len(changedChannelsCombined) > 0 { + c.notifyChange(changedChannelsCombined) } } @@ -574,8 +555,8 @@ func (c *changeCache) processUnusedSequence(docID string) { // Since processEntry may unblock pending sequences, if there were any changed channels we need // to notify any change listeners that are working changes feeds for these channels changedChannels := c.processEntry(change) - if c.onChange != nil && len(changedChannels) > 0 { - c.onChange(changedChannels) + if c.notifyChange != nil && len(changedChannels) > 0 { + c.notifyChange(changedChannels) } } @@ -590,10 +571,8 @@ func (c *changeCache) processPrincipalDoc(docID string, docJSON []byte, isUser b return } sequence := princ.Sequence() - c.lock.RLock() - initialSequence := c.initialSequence - c.lock.RUnlock() - if sequence <= initialSequence { + + if sequence <= c.getInitialSequence() { return // Tap is sending us an old value from before I started up; ignore it } @@ -611,8 +590,8 @@ func (c *changeCache) processPrincipalDoc(docID string, docJSON []byte, isUser b base.Infof(base.KeyCache, "Received #%d (%q)", change.Sequence, base.UD(change.DocID)) changedChannels := c.processEntry(change) - if c.onChange != nil && len(changedChannels) > 0 { - c.onChange(changedChannels) + if c.notifyChange != nil && len(changedChannels) > 0 { + c.notifyChange(changedChannels) } } @@ -625,7 +604,7 @@ func (c *changeCache) processEntry(change *LogEntry) base.Set { } sequence := change.Sequence - nextSequence := c.nextSequence + if _, found := c.receivedSeqs[sequence]; found { base.Debugf(base.KeyCache, " Ignoring duplicate of #%d", sequence) return nil @@ -634,17 +613,17 @@ func (c *changeCache) processEntry(change *LogEntry) base.Set { // FIX: c.receivedSeqs grows monotonically. Need a way to remove old sequences. var changedChannels base.Set - if sequence == nextSequence || nextSequence == 0 { + if sequence == c.nextSequence || c.nextSequence == 0 { // This is the expected next sequence so we can add it now: changedChannels = c._addToCache(change) // Also add any pending sequences that are now contiguous: changedChannels = changedChannels.Union(c._addPendingLogs()) - } else if sequence > nextSequence { + } else if sequence > c.nextSequence { // There's a missing sequence (or several), so put this one on ice until it arrives: heap.Push(&c.pendingLogs, change) numPending := len(c.pendingLogs) base.Infof(base.KeyCache, " Deferring #%d (%d now waiting for #%d...#%d)", - sequence, numPending, nextSequence, c.pendingLogs[0].Sequence-1) + sequence, numPending, c.nextSequence, c.pendingLogs[0].Sequence-1) changeCacheExpvars.Get("maxPending").(*base.IntMax).SetIfMax(int64(numPending)) if numPending > c.options.CachePendingSeqMaxNum { // Too many pending; add the oldest one: @@ -655,9 +634,9 @@ func (c *changeCache) processEntry(change *LogEntry) base.Set { // Remove from skipped sequence queue if !c.WasSkipped(sequence) { // Error removing from skipped sequences - base.Infof(base.KeyCache, " Received unexpected out-of-order change - not in skippedSeqs (seq %d, expecting %d) doc %q / %q", sequence, nextSequence, base.UD(change.DocID), change.RevID) + base.Infof(base.KeyCache, " Received unexpected out-of-order change - not in skippedSeqs (seq %d, expecting %d) doc %q / %q", sequence, c.nextSequence, base.UD(change.DocID), change.RevID) } else { - base.Infof(base.KeyCache, " Received previously skipped out-of-order change (seq %d, expecting %d) doc %q / %q ", sequence, nextSequence, base.UD(change.DocID), change.RevID) + base.Infof(base.KeyCache, " Received previously skipped out-of-order change (seq %d, expecting %d) doc %q / %q ", sequence, c.nextSequence, base.UD(change.DocID), change.RevID) change.Skipped = true } @@ -672,6 +651,7 @@ func (c *changeCache) processEntry(change *LogEntry) base.Set { // Adds an entry to the appropriate channels' caches, returning the affected channels. lateSequence // flag indicates whether it was a change arriving out of sequence func (c *changeCache) _addToCache(change *LogEntry) base.Set { + if change.Sequence >= c.nextSequence { c.nextSequence = change.Sequence + 1 } @@ -754,17 +734,9 @@ func (c *changeCache) getChannelCache(channelName string) *channelCache { return c._getChannelCache(channelName) } -func (c *changeCache) getNextSequence() uint64 { - c.lock.RLock() - defer c.lock.RUnlock() - return c.nextSequence -} - func (c *changeCache) GetStableSequence(docID string) SequenceID { - c.lock.RLock() - defer c.lock.RUnlock() // Stable sequence is independent of docID in changeCache - return SequenceID{Seq: c.nextSequence - 1} + return SequenceID{Seq: c.LastSequence()} } func (c *changeCache) GetStableClock(stale bool) (clock base.SequenceClock, err error) { @@ -774,7 +746,11 @@ func (c *changeCache) GetStableClock(stale bool) (clock base.SequenceClock, err func (c *changeCache) _getChannelCache(channelName string) *channelCache { cache := c.channelCaches[channelName] if cache == nil { - cache = newChannelCacheWithOptions(c.context, channelName, c.initialSequence+1, c.options) + + // expect to see everything _after_ the sequence at the time of cache init, but not the sequence itself since it not expected to appear on DCP + validFrom := c.initialSequence + 1 + + cache = newChannelCacheWithOptions(c.context, channelName, validFrom, c.options) c.channelCaches[channelName] = cache } return cache @@ -796,9 +772,9 @@ func (c *changeCache) GetCachedChanges(channelName string, options ChangesOption // Returns the sequence number the cache is up-to-date with. func (c *changeCache) LastSequence() uint64 { - c.lock.RLock() - defer c.lock.RUnlock() - return c.nextSequence - 1 + + lastSequence := c.getNextSequence() - 1 + return lastSequence } func (c *changeCache) _allChannels() base.Set { @@ -822,6 +798,26 @@ func (c *changeCache) getOldestSkippedSequence() uint64 { } } +// Set the initial sequence. Presumes that change chache is already locked. +func (c *changeCache) _setInitialSequence(initialSequence uint64) { + c.initialSequence = initialSequence + c.nextSequence = initialSequence + 1 +} + +// Concurrent-safe get value of nextSequence +func (c *changeCache) getNextSequence() uint64 { + c.lock.RLock() + defer c.lock.RUnlock() + return c.nextSequence +} + +// Concurrent-safe get value of initialSequence +func (c *changeCache) getInitialSequence() uint64 { + c.lock.RLock() + defer c.lock.RUnlock() + return c.initialSequence +} + //////// LOG PRIORITY QUEUE -- container/heap callbacks that should not be called directly. Use heap.Init/Push/etc() func (h LogPriorityQueue) Len() int { return len(h) } diff --git a/db/change_cache_test.go b/db/change_cache_test.go index 87019dbf79..3ad928a416 100644 --- a/db/change_cache_test.go +++ b/db/change_cache_test.go @@ -1199,10 +1199,10 @@ func readNextFromFeed(feed <-chan (*ChangeEntry), timeout time.Duration) (*Chang // // Create doc1 w/ unused sequences 1, actual sequence 3. // Create doc2 w/ sequence 2, channel ABC -// Send feed event for doc2. This won't trigger onChange, as buffering is waiting for seq 1 -// Send feed event for doc1. This should trigger caching for doc2, and trigger onChange for channel ABC. +// Send feed event for doc2. This won't trigger notifyChange, as buffering is waiting for seq 1 +// Send feed event for doc1. This should trigger caching for doc2, and trigger notifyChange for channel ABC. // -// Verify that onChange for channel ABC was sent. +// Verify that notifyChange for channel ABC was sent. func TestLateArrivingSequenceTriggersOnChange(t *testing.T) { // -------- Test setup ---------------- @@ -1227,15 +1227,15 @@ func TestLateArrivingSequenceTriggersOnChange(t *testing.T) { defer tearDownTestDB(t, db) defer testBucket.Close() - // -------- Setup onChange callback ---------------- + // -------- Setup notifyChange callback ---------------- // type assert this from ChangeIndex interface -> concrete changeCache implementation changeCacheImpl := db.changeCache.(*changeCache) - // Detect whether the 2nd was ignored using an onChange listener callback and make sure it was not added to the ABC channel + // Detect whether the 2nd was ignored using an notifyChange listener callback and make sure it was not added to the ABC channel waitForOnChangeCallback := sync.WaitGroup{} waitForOnChangeCallback.Add(1) - changeCacheImpl.onChange = func(channels base.Set) { + changeCacheImpl.notifyChange = func(channels base.Set) { // defer waitForOnChangeCallback.Done() log.Printf("channelsChanged: %v", channels) // assert.True(t, channels.Contains("ABC")) @@ -1278,7 +1278,7 @@ func TestLateArrivingSequenceTriggersOnChange(t *testing.T) { doc2Bytes, err := doc2.MarshalJSON() assertNoError(t, err, "Unexpected error") - // Send feed event for doc2. This won't trigger onChange, as buffering is waiting for seq 1 + // Send feed event for doc2. This won't trigger notifyChange, as buffering is waiting for seq 1 feedEventDoc2 := sgbucket.FeedEvent{ Synchronous: true, Key: []byte(doc2Id), @@ -1286,7 +1286,7 @@ func TestLateArrivingSequenceTriggersOnChange(t *testing.T) { } db.changeCache.DocChanged(feedEventDoc2) - // Send feed event for doc1. This should trigger caching for doc2, and trigger onChange for channel ABC. + // Send feed event for doc1. This should trigger caching for doc2, and trigger notifyChange for channel ABC. feedEventDoc1 := sgbucket.FeedEvent{ Synchronous: true, Key: []byte(doc1Id), @@ -1296,7 +1296,7 @@ func TestLateArrivingSequenceTriggersOnChange(t *testing.T) { // -------- Wait for waitgroup ---------------- - // Block until the onChange callback was invoked with the expected channels. + // Block until the notifyChange callback was invoked with the expected channels. // If the callback is never called back with expected, will block forever. waitForOnChangeCallback.Wait() diff --git a/db/change_index.go b/db/change_index.go index ef0091499d..b53e6a3552 100644 --- a/db/change_index.go +++ b/db/change_index.go @@ -27,13 +27,16 @@ import ( type ChangeIndex interface { // Initialize the index - Init(context *DatabaseContext, lastSequence SequenceID, onChange func(base.Set), cacheOptions *CacheOptions, indexOptions *ChannelIndexOptions) error + Init(context *DatabaseContext, notifyChange func(base.Set), cacheOptions *CacheOptions, indexOptions *ChannelIndexOptions) error // Stop the index Stop() + // Start the index + Start() error + // Clear the index - Clear() + Clear() error // Enable/Disable indexing EnableChannelIndexing(enable bool) diff --git a/db/database.go b/db/database.go index 5b70e1b9b5..05f6ef124b 100644 --- a/db/database.go +++ b/db/database.go @@ -206,10 +206,6 @@ func NewDatabaseContext(dbName string, bucket base.Bucket, autoImport bool, opti if err != nil { return nil, err } - lastSeq, err := context.sequences.lastSequence() - if err != nil { - return nil, err - } if options.IndexOptions == nil { // In-memory channel cache @@ -226,20 +222,19 @@ func NewDatabaseContext(dbName string, bucket base.Bucket, autoImport bool, opti } // Callback that is invoked whenever a set of channels is changed in the ChangeCache - onChange := func(changedChannels base.Set) { + notifyChange := func(changedChannels base.Set) { context.mutationListener.Notify(changedChannels) } - // Initialize the ChangeCache + // Initialize the ChangeCache. Will be locked and unusable until .Start() is called (SG #3558) context.changeCache.Init( context, - SequenceID{Seq: lastSeq}, - onChange, + notifyChange, options.CacheOptions, options.IndexOptions, ) - // Set the DB Context onChange callback to call back the changecache DocChanged callback + // Set the DB Context notifyChange callback to call back the changecache DocChanged callback context.SetOnChangeCallback(context.changeCache.DocChanged) // Initialize the tap Listener for notify handling @@ -255,7 +250,7 @@ func NewDatabaseContext(dbName string, bucket base.Bucket, autoImport bool, opti if options.IndexOptions == nil || options.TrackDocs { base.Infof(base.KeyDCP, "Starting mutation feed on bucket %v due to either channel cache mode or doc tracking (auto-import/bucketshadow)", base.MD(bucket.GetName())) - if err = context.mutationListener.Start(bucket, options.TrackDocs, feedMode, func(bucket string, err error) { + err = context.mutationListener.Start(bucket, options.TrackDocs, feedMode, func(bucket string, err error) { msgFormat := "%v dropped Mutation Feed (TAP/DCP) due to error: %v, taking offline" base.Warnf(base.KeyAll, msgFormat, base.UD(bucket), err) @@ -303,9 +298,20 @@ func NewDatabaseContext(dbName string, bucket base.Bucket, autoImport bool, opti // TODO: invoke the same callback function from there as well, to pick up the auto-online handling - }); err != nil { + }) + + // Check if there is an error starting the DCP feed + if err != nil { + context.changeCache = nil + return nil, err + } + + // Unlock change cache + err := context.changeCache.Start() + if err != nil { return nil, err } + } // Load providers into provider map. Does basic validation on the provider definition, and identifies the default provider. @@ -480,9 +486,9 @@ func (context *DatabaseContext) RestartListener() error { } // Cache flush support. Currently test-only - added for unit test access from rest package -func (context *DatabaseContext) FlushChannelCache() { +func (context *DatabaseContext) FlushChannelCache() error { base.Infof(base.KeyCache, "Flushing channel cache") - context.changeCache.Clear() + return context.changeCache.Clear() } // Removes previous versions of Sync Gateway's design docs found on the server @@ -863,7 +869,11 @@ func (db *Database) UpdateAllDocChannels() (int, error) { // really confuse the changeCache, so turn it off until we're done: db.changeCache.EnableChannelIndexing(false) defer db.changeCache.EnableChannelIndexing(true) - db.changeCache.Clear() + + err = db.changeCache.Clear() + if err != nil { + return 0, err + } base.Infof(base.KeyAll, "Re-running sync function on all documents...") changeCount := 0 diff --git a/db/kv_change_index.go b/db/kv_change_index.go index 3c7ed4c280..c03a9665f7 100644 --- a/db/kv_change_index.go +++ b/db/kv_change_index.go @@ -47,7 +47,7 @@ func init() { IndexExpvars = expvar.NewMap("syncGateway_index") } -func (k *kvChangeIndex) Init(context *DatabaseContext, lastSequence SequenceID, onChange func(base.Set), options *CacheOptions, indexOptions *ChannelIndexOptions) (err error) { +func (k *kvChangeIndex) Init(context *DatabaseContext, onChange func(base.Set), options *CacheOptions, indexOptions *ChannelIndexOptions) (err error) { k.context = context k.reader = &kvChangeIndexReader{} @@ -63,8 +63,9 @@ func (k *kvChangeIndex) Prune() { // TODO: currently no pruning of channel indexes } -func (k *kvChangeIndex) Clear() { +func (k *kvChangeIndex) Clear() error { k.reader.Clear() + return nil } func (k *kvChangeIndex) Stop() { @@ -411,6 +412,9 @@ func (k *kvChangeIndex) generatePartitionStats() (PartitionStats, error) { return partitionStats, nil } +// The following are no-ops for kvChangeIndex. +func (k *kvChangeIndex) Start() (error) { return nil } + func IsNotFoundError(err error) bool { return strings.Contains(strings.ToLower(err.Error()), "not found") } diff --git a/db/util_testing.go b/db/util_testing.go index ed7520eaa7..d7c1933c8b 100644 --- a/db/util_testing.go +++ b/db/util_testing.go @@ -1,11 +1,12 @@ package db import ( + "fmt" "math" + "time" "github.com/couchbase/gocb" "github.com/couchbase/sync_gateway/base" - "fmt" ) // Workaround SG #3570 by doing a polling loop until the star channel query returns 0 results. @@ -17,8 +18,8 @@ func WaitForIndexEmpty(bucket *base.CouchbaseBucketGoCB, useXattrs bool) error { var results gocb.QueryResults // Create the star channel query - statement := fmt.Sprintf("%s LIMIT 1", QueryStarChannel.statement) // append LIMIT 1 since we only care if there are any results or not - starChannelQueryStatement := replaceSyncTokensQuery(statement,useXattrs) + statement := fmt.Sprintf("%s LIMIT 1", QueryStarChannel.statement) // append LIMIT 1 since we only care if there are any results or not + starChannelQueryStatement := replaceSyncTokensQuery(statement, useXattrs) params := map[string]interface{}{} params[QueryParamStartSeq] = 0 params[QueryParamEndSeq] = math.MaxInt64 @@ -70,4 +71,55 @@ func ResultsEmpty(results gocb.QueryResults) (resultsEmpty bool) { } +// FOR TESTS ONLY: Blocks until the given sequence has been received. +func (c *changeCache) waitForSequenceID(sequence SequenceID, maxWaitTime time.Duration) { + c.waitForSequence(sequence.Seq, maxWaitTime) +} + +func (c *changeCache) waitForSequence(sequence uint64, maxWaitTime time.Duration) { + + startTime := time.Now() + + for { + + if time.Since(startTime) >= maxWaitTime { + panic(fmt.Sprintf("changeCache: Sequence %d did not show up after waiting %v", sequence, time.Since(startTime))) + } + + if c.getNextSequence() >= sequence+1 { + base.Infof(base.KeyAll, "waitForSequence(%d) took %v", sequence, time.Since(startTime)) + return + } + time.Sleep(100 * time.Millisecond) + } +} + +// FOR TESTS ONLY: Blocks until the given sequence has been received. +func (c *changeCache) waitForSequenceWithMissing(sequence uint64, maxWaitTime time.Duration) { + + startTime := time.Now() + + for { + if time.Since(startTime) >= maxWaitTime { + panic(fmt.Sprintf("changeCache: Sequence %d did not show up after waiting %v", sequence, time.Since(startTime))) + } + + if c.getNextSequence() >= sequence+1 { + foundInMissing := false + c.skippedSeqLock.RLock() + for _, skippedSeq := range c.skippedSeqs { + if skippedSeq.seq == sequence { + foundInMissing = true + break + } + } + c.skippedSeqLock.RUnlock() + if !foundInMissing { + base.Infof(base.KeyAll, "waitForSequence(%d) took %v", sequence, time.Since(startTime)) + return + } + } + time.Sleep(100 * time.Millisecond) + } +}