From 0694c9bd0e40cb56b4f0c37b5b5c4e9412cdfe68 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Thu, 16 Oct 2014 19:13:31 -0400 Subject: [PATCH 1/2] add references to change index to data keys By adding a reference to the change index id to data table keys, we can quickly revert the current table of the repository to a particular checkpoint. These changes only enable the storage of the change index keys. This is not a stable commit. A majority of tests now pass, but there are still significant issues. --- bin/blobs-put.js | 6 ++++-- bin/rows-put.js | 2 +- cli.js | 2 +- lib/commands.js | 16 ++++++++++------ lib/schema.js | 3 ++- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/bin/blobs-put.js b/bin/blobs-put.js index 358b470a..7008abd4 100644 --- a/bin/blobs-put.js +++ b/bin/blobs-put.js @@ -20,9 +20,11 @@ module.exports = function(dat, opts, cb) { row = existing } var blobKey = opts.name || path.basename(blob) - var ws = dat.createBlobWriteStream(blobKey, row, function(err, updated) { + var options = { filename:blobKey, version:version } + + var ws = dat.createBlobWriteStream(options, row, function(err, updated) { if (err) return cb(err) - console.log('Attached ' + blobKey + ' successfully to', updated.key) + console.log('Attached ' + options.filename + ' successfully to', updated.key) cb() }) diff --git a/bin/rows-put.js b/bin/rows-put.js index 0c44c587..5eaccc0b 100644 --- a/bin/rows-put.js +++ b/bin/rows-put.js @@ -30,4 +30,4 @@ module.exports = function (dat, opts, cb) { cb() }) } -} \ No newline at end of file +} diff --git a/cli.js b/cli.js index eb8b93d8..08e98aba 100755 --- a/cli.js +++ b/cli.js @@ -102,4 +102,4 @@ function close() { } else { dat.close() } -} \ No newline at end of file +} diff --git a/lib/commands.js b/lib/commands.js index 3ae34d2b..15f40789 100644 --- a/lib/commands.js +++ b/lib/commands.js @@ -428,12 +428,16 @@ dat.createBlobWriteStream = function(options, doc, cb) { if (!cb) cb = noop debug('createBlobWriteStream', options.filename) + var version = doc.version + if (!doc.version && options.version) { + version = options.version + } var blobWrite = this.blobs.createWriteStream(options, function(err, blob) { if (err) return cb(err) if (!doc.blobs) doc.blobs = {} doc.blobs[options.filename] = blob - self.put(doc, {version:doc.version}, cb) + self.put(doc, {version:options.version}, cb) }) return blobWrite @@ -783,10 +787,10 @@ dat.put = function(key, doc, opts, cb) { var ready = function(err, doc) { if (err) return cb(err) - - self.storage.put(key, isBuffer ? doc : self.schema.encode(doc), {version:version, force:opts.force}, function(err, value, version) { + + self.storage.put(key, isBuffer ? doc : self.schema.encode(doc), {version:version, force:opts.force}, function(err, value, version, change) { if (err) return cb(err) - cb(null, self.schema.decode(value, {key:key, version:version})) + cb(null, self.schema.decode(value, {key:key, version:version, change:change})) }) } @@ -805,9 +809,9 @@ dat.get = function(key, opts, cb) { var self = this - this.storage.get(key, opts, function(err, value, version) { + this.storage.get(key, opts, function(err, value, version, change) { if (err) return cb(err) - var val = self.schema.decode(value, {key:key, version:version, blobsOnly:opts.blobsOnly}) + var val = self.schema.decode(value, {key:key, version:version, change:change, blobsOnly:opts.blobsOnly}) if (self.afterGet) self.afterGet(val, cb) else cb(null, val) }) diff --git a/lib/schema.js b/lib/schema.js index 17ad76ba..0c1c5894 100644 --- a/lib/schema.js +++ b/lib/schema.js @@ -168,6 +168,7 @@ Schema.prototype.decode = function(buf, opts) { var msg = this.messages.Row.decode(buf) msg.key = opts.key msg.version = opts.version + msg.change = opts.change return msg } @@ -203,4 +204,4 @@ Schema.prototype._tag = function() { return 1 + this.fields.map(toTag).reduce(max, 0) } -module.exports = Schema \ No newline at end of file +module.exports = Schema From 58d5358f081f110d52b981200b1548564adb66e3 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Fri, 17 Oct 2014 14:56:12 -0400 Subject: [PATCH 2/2] store change ids alongside data to enable fast rollback The addition of change log ids to the data keys (after the versions), will allow us to quickly extract the state of the data at particular point in the change log. This can be accomplished via a linear scan of the keys in the data table, requiring that the change id of a particular object is <= the target point in the log. If we did not include this data alongside the data, we would be forced to complete a reconstruction of the dataset via the change log. This would complicate the process of rolling back particular subsets of the data to predetermined points in the history. Additionally, it wouldn't be possible to quickly determine the relative age of two objects, which has a number of possible applications in reproducibility and logging. The level-dat backend will support these change ids as of 4.5.0. No functionality is yet tested which is based on the change ids, but the next step should be to implement a commit/checkout or checkpoint/rollback model on top of it. With this update we now pass 616/616 tests. --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index e3e2dce4..edb9ddc5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "dat", - "version": "6.8.1", + "version": "6.9.0", "description": "real-time replication and versioning for data sets (tabular and blobs)", "preferGlobal": true, "keywords": [ @@ -52,7 +52,7 @@ "isnumber": "^1.0.0", "json-protobuf-encoding": "^1.0.1", "ldjson-stream": "~1.0.0", - "level-dat": "^4.4.1", + "level-dat": "^4.5.0", "level-events": "^1.0.2", "level-js": "^2.1.3", "level-manifest": "~1.2.0",