From 6cce367a8e207dedd4e061e0a7084326b82b6a9a Mon Sep 17 00:00:00 2001 From: "=^._.^=" Date: Tue, 26 May 2015 14:50:39 -0700 Subject: [PATCH 01/31] Update beta-cli-api.md --- beta-cli-api.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/beta-cli-api.md b/beta-cli-api.md index 79c9eca4..d168dafb 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -21,6 +21,20 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat read](#dat-read) - [dat get](#dat-get) +## example repository folder structure + +``` +repo/ + - .dat/ + - dat.json + - dataset-a/ + - dataset.json + - readme.md + - dataset-b/ + - dataset.json + - readme.md +``` + ## repository commands ### dat From 5a464659b9e4bcba1fad089943bf5f8addb61c26 Mon Sep 17 00:00:00 2001 From: "=^._.^=" Date: Tue, 26 May 2015 14:57:33 -0700 Subject: [PATCH 02/31] dat versions -> dat log --- beta-cli-api.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index d168dafb..42a21abf 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -191,18 +191,18 @@ Average speed: 4.3 Mb/s. Replication completed successfully. ``` -### dat versions +### dat log Stream versions out in historical order as json ```bash -dat versions +dat log ``` Example output: ``` -$ dat versions --limit=2 +$ dat log --limit=2 { "change": 1, "version": "6bdd624ae6f9ddb96069e04fc030c6e964e77ac7", links: [...], "puts": 12, "deletes": 3, "date": "2015..."} { "change": 2, "version": "7b13de1bd942a0cbfc2721d9e0b9a4fa5a076517", links: [...], "puts": 0, "deletes": 2, "date": "2015..."} ``` From 894cfb52bdc37f50dd476c2dbf860aaf3d8ec323 Mon Sep 17 00:00:00 2001 From: "=^._.^=" Date: Tue, 26 May 2015 14:57:43 -0700 Subject: [PATCH 03/31] Update beta-cli-api.md --- beta-cli-api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 42a21abf..d4809124 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -10,7 +10,7 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat push](#dat-push) - [dat pull](#dat-pull) - [dat replicate](#dat-replicate) - - [dat versions](#dat-versions) + - [dat log](#dat-log) - [dat checkout](#dat-checkout) - [dat diff](#dat-diff) - [dat merge](#dat-merge) From 3e7d51c928f62e47b630ff2f6b977a9a4d7f7787 Mon Sep 17 00:00:00 2001 From: "=^._.^=" Date: Tue, 26 May 2015 15:39:33 -0700 Subject: [PATCH 04/31] Update beta-cli-api.md --- beta-cli-api.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index d4809124..999ec942 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -196,9 +196,13 @@ Replication completed successfully. Stream versions out in historical order as json ```bash -dat log +dat log ``` +By default (no arguments) it will print out a stream of json representing each version of the repository. + +If `` is specified as the first positional argument then the individual change data for that version will be streamed out. + Example output: ``` @@ -336,6 +340,11 @@ Stream data from stdin: cat file.json | dat import - ``` +### Options + +- `key` - specify which field to use as the primary key +- `no-key` - generate a random unique key + Example output: ``` @@ -357,21 +366,18 @@ Stream data to a file: ```bash dat export > woah-my-data.json - ``` ### Options - `lt`, `lte`, `gt`, `gte` - specify start/end key range values using less than, less than equals, greater than, greater than equals - `limit` - default unlimited. specify how many results to receive -- `versions` - boolean, default `false`. if `true` it will include the `version` hash along with the key/value pair when exporting it - +- `format` - default `json`. you can also specify `csv`. Example output: ``` $ dat export -{"content":"row","key":"1","version":"9e4629196e4db21a244fad8c8a989847fa3827e5747d2ad392363e46223fa888","value":{"key":"1","name":"MAX"}} -{"content":"row","key":"1","version":"163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502","value":{"key":"1","name":"Max"}} +{"key": "maxogden", "firstname": "Max", "lastname": "Ogden"} ``` ### dat write From 491853f1d08b856d6dbe9760ba69c0941d2a4108 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 14:46:21 -0700 Subject: [PATCH 05/31] Add --log=json output to dat write --- bin/write.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/write.js b/bin/write.js index fba727eb..bc6642e5 100644 --- a/bin/write.js +++ b/bin/write.js @@ -57,7 +57,14 @@ function handleWrite (args) { pump(inputStream, db.createFileWriteStream(key, opts), function done (err) { if (err) abort(err, 'dat: err in write') - console.error('Done writing binary data.') + + if (args.log === 'json') { + var output = { + version: db.head + } + console.log(JSON.stringify(output)) + } else console.error('Done writing binary data.') + }) } } From 6b4d65d09ad587ca416d449348f7e9b2068ca8c8 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 14:51:27 -0700 Subject: [PATCH 06/31] Make dataset required for dat export --- bin/export.js | 2 +- tests/export.js | 7 +++++++ usage/export.txt | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/export.js b/bin/export.js index 150bd287..42a0d656 100644 --- a/bin/export.js +++ b/bin/export.js @@ -26,7 +26,7 @@ module.exports = { function handleExport (args) { debug('handleExport', args) - if (args.help) { + if (args.help || !args.dataset) { usage() abort() } diff --git a/tests/export.js b/tests/export.js index 0c5c0143..3d75f99d 100644 --- a/tests/export.js +++ b/tests/export.js @@ -31,6 +31,13 @@ test('dat export to file', function (t) { st.end() }) +test('dat export without dataset errors', function (t) { + var st = spawn(t, dat + ' export', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') + st.end() +}) + test('dat export output matches original file', function (t) { t.plan(53) var sorter = sort(function (a, b) { diff --git a/usage/export.txt b/usage/export.txt index 329a708a..f12cfab1 100644 --- a/usage/export.txt +++ b/usage/export.txt @@ -1,3 +1,3 @@ dat export --dataset= [--limit=] [--format=json,csv] -Stream a range of rows out of a dataset. \ No newline at end of file +Stream a range of rows out of a dataset. Dataset name required. \ No newline at end of file From 9c0c991eebce59e138aa4f919efed5d43c06eaba Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 15:12:32 -0700 Subject: [PATCH 07/31] Update write to make dataset a required property --- bin/write.js | 2 +- tests/write.js | 8 ++------ usage/write.txt | 7 +++++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/bin/write.js b/bin/write.js index bc6642e5..0de10dd5 100644 --- a/bin/write.js +++ b/bin/write.js @@ -25,7 +25,7 @@ module.exports = { function handleWrite (args) { debug('handleWrite', args) - if (args.help || args._.length === 0) { + if (args.help || !args.dataset || args._.length === 0) { usage() abort() } diff --git a/tests/write.js b/tests/write.js index 70abea6f..c0b6caca 100644 --- a/tests/write.js +++ b/tests/write.js @@ -11,17 +11,13 @@ var dat1 = path.join(tmp, 'dat-1') helpers.onedat(dat1) -test('dat write', function (t) { +test('dat write errors without dataset', function (t) { var st = spawn(t, "echo 'hello world' | " + dat + ' write test-file.txt -', {cwd: dat1}) st.stdout.empty() - st.stderr.match(/Done writing binary data/) + st.stderr.match(fs.readFileSync(path.join('usage', 'write.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat cat after write', function (t) { - datCatEquals(t, 'test-file.txt', /hello world/) -}) - test('dat write to dataset', function (t) { var st = spawn(t, "echo 'hello world' | " + dat + ' write -d my-dataset test-file.txt -', {cwd: dat1}) st.stdout.empty() diff --git a/usage/write.txt b/usage/write.txt index 9a0b8f3d..4e5a9233 100644 --- a/usage/write.txt +++ b/usage/write.txt @@ -2,8 +2,11 @@ Write binary data into dat. This differs from `import` in that it doesn't parse Write a file to dat: -dat write -n + dat write + -d # the name of the dataset to create. required + -n # the name of the file. if not supplied, uses the path Stream data from stdin: -cat photo.jpg | dat write photo.jpg - \ No newline at end of file + python generate_model_data.py | dat write model_data - + From 488bf0c8c93cad5e9081cbdc01d692639095ebc4 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 15:43:56 -0700 Subject: [PATCH 08/31] dat get as per cli doc, add tests --- bin/get.js | 62 +++++++-------------------------------------------- tests/get.js | 51 +++++++++++++++++------------------------- usage/get.txt | 4 ++-- 3 files changed, 31 insertions(+), 86 deletions(-) diff --git a/bin/get.js b/bin/get.js index b82266e0..f36471f3 100644 --- a/bin/get.js +++ b/bin/get.js @@ -13,74 +13,28 @@ module.exports = { name: 'get', command: handleRows, options: [ - { - name: 'format', - boolean: false, - abbr: 'f' - }, { name: 'dataset', boolean: false, abbr: 'd' - }, - { - name: 'greater-than-equal', - boolean: false, - abbr: 'gte' - }, - { - name: 'greater-than', - boolean: false, - abbr: 'gt' - }, - { - name: 'less-than-equal', - boolean: false, - abbr: 'lte' - }, - { - name: 'less-than', - boolean: false, - abbr: 'lt' - }, - { - name: 'limit', - boolean: false, - abbr: 'l' } ] } function handleRows (args) { debug('handleRows', args) - if (args.help) return usage() + if (args.help || !args.dataset || args._.length === 0) { + usage() + abort() + } openDat(args, function ready (err, db) { if (err) abort(err) var key = args._[0] - if (!args.f) args.f = 'ndjson' - if (args.f === 'json') args.f = 'ndjson' - var parseReadStream - if (args.f === 'ndjson') { - parseReadStream = ndjson.serialize() - } else { - parseReadStream = through.obj(function (data, enc, next) { - var val = data.value - val.key = data.key - next(null, val) - }) - } - - if (!key) { - pump(db.createReadStream(args), parseReadStream, formatData(args.f), process.stdout, function done (err) { - if (err) abort(err, 'dat get error') - }) - } else { - db.get(key, args, function (err, value) { - if (err) abort(err, 'dat get error') - process.stdout.write(JSON.stringify(value)) - }) - } + db.get(key, args, function (err, value) { + if (err) abort(err, 'dat get error') + process.stdout.write(JSON.stringify(value)) + }) }) } diff --git a/tests/get.js b/tests/get.js index 41a0e5e2..15158e1e 100644 --- a/tests/get.js +++ b/tests/get.js @@ -1,4 +1,5 @@ var os = require('os') +var fs = require('fs') var path = require('path') var test = require('tape') var spawn = require('tape-spawn') @@ -18,44 +19,34 @@ test('dat import dataset', function (t) { st.end() }) -test('dat get from dataset', function (t) { - var st = spawn(t, dat + ' get --dataset=get-test', {cwd: dat1}) +test('dat get a key from dataset', function (t) { + var st = spawn(t, dat + ' get ak11246293 --dataset=get-test', {cwd: dat1}) st.stderr.empty() st.stdout.match(function (output) { - var lines = output.split('\n') - if (lines.length === 10) { - var line = JSON.parse(lines[0]) - if (line.key === 'ak11246285') { - return line.value.latitude === '61.3482' - } - return true - } + output = JSON.parse(output) + if (output.key === 'ak11246293' && output.value.latitude === '60.0366') return true + return false }) st.end() }) -test('dat get from dataset with csv', function (t) { - var st = spawn(t, dat + ' get --dataset=get-test --format=csv', {cwd: dat1}) - st.stderr.empty() - st.stdout.match(function (output) { - var lines = output.split('\n') - if (lines.length === 11) { // 1 header row + 10 lines - var headers = lines[0].split(',') - t.equals(headers.length, 16) - return true - } - return false - }) +test('dat get without key errors', function (t) { + var st = spawn(t, dat + ' get --dataset=get-test', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat get a key from dataset', function (t) { - var st = spawn(t, dat + ' get ak11246293 --dataset=get-test', {cwd: dat1}) - st.stderr.empty() - st.stdout.match(function (output) { - output = JSON.parse(output) - if (output.key === 'ak11246293' && output.value.latitude === '60.0366') return true - return false - }) +test('dat get without dataset errors', function (t) { + var st = spawn(t, dat + ' get ak11246293', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') + st.end() +}) + +test('dat get without key and dataset errors', function (t) { + var st = spawn(t, dat + ' get', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') st.end() }) diff --git a/usage/get.txt b/usage/get.txt index 5b7392f6..19f37d75 100644 --- a/usage/get.txt +++ b/usage/get.txt @@ -1,3 +1,3 @@ -dat get [-gt=key -lt=key -gte=key -lte=key -d dataset] +dat get [-d dataset] -streams tabular data out of dat, defaults to newline delimited json format (ndjson) \ No newline at end of file +Prints a paricular row by key. \ No newline at end of file From a20a3b7c159b8d7f81d4ccd4b11252a3af113092 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 16:59:03 -0700 Subject: [PATCH 09/31] standard style --- bin/export.js | 31 ++++++++++++++++++++++++++----- bin/get.js | 4 ---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/bin/export.js b/bin/export.js index 42a0d656..c24c5b09 100644 --- a/bin/export.js +++ b/bin/export.js @@ -19,6 +19,31 @@ module.exports = { name: 'format', boolean: false, abbr: 'f' + }, + { + name: 'greater-than-equal', + boolean: false, + abbr: 'gte' + }, + { + name: 'greater-than', + boolean: false, + abbr: 'gt' + }, + { + name: 'less-than-equal', + boolean: false, + abbr: 'lte' + }, + { + name: 'less-than', + boolean: false, + abbr: 'lt' + }, + { + name: 'limit', + boolean: false, + abbr: 'l' } ] } @@ -41,10 +66,6 @@ function handleExport (args) { }) function handleOuputStream (db) { - var opts = { - dataset: args.d - } - var parseOutput = through.obj(function (data, enc, next) { debug('exporting through data', data) if (data.content === 'row') { @@ -54,7 +75,7 @@ function handleExport (args) { } }) - pump(db.createReadStream(opts), parseOutput, formatData(args.f), process.stdout, function done (err) { + pump(db.createReadStream(args), parseOutput, formatData(args.f), process.stdout, function done (err) { if (err) abort(err, 'Error exporting data') }) } diff --git a/bin/get.js b/bin/get.js index f36471f3..d387c0bb 100644 --- a/bin/get.js +++ b/bin/get.js @@ -1,8 +1,4 @@ -var pump = require('pump') -var ndjson = require('ndjson') var debug = require('debug')('bin/get') -var formatData = require('format-data') -var through = require('through2') var abort = require('../lib/abort.js') var openDat = require('../lib/open-dat.js') From d0177a8e9586ee720593f3298da2e9b1f0f96037 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Wed, 27 May 2015 16:59:21 -0700 Subject: [PATCH 10/31] Add export tests --- tests/export.js | 95 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 11 deletions(-) diff --git a/tests/export.js b/tests/export.js index 3d75f99d..4920889c 100644 --- a/tests/export.js +++ b/tests/export.js @@ -18,26 +18,19 @@ var csvfile = path.resolve(__dirname + '/fixtures/all_hour.csv') var exportfile = path.join(dat1, 'out.csv') test('dat import csv', function (t) { - var st = spawn(t, dat + ' import ' + csvfile + ' -d test-ds --key=id', {cwd: dat1}) + var st = spawn(t, dat + ' import ' + csvfile + ' -d export-test --key=id', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done importing data/) st.end() }) test('dat export to file', function (t) { - var st = spawn(t, dat + ' export -d test-ds > ' + exportfile, {cwd: dat1}) + var st = spawn(t, dat + ' export -d export-test > ' + exportfile, {cwd: dat1}) st.stdout.empty() st.stderr.empty() st.end() }) -test('dat export without dataset errors', function (t) { - var st = spawn(t, dat + ' export', {cwd: dat1}) - st.stdout.empty() - st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') - st.end() -}) - test('dat export output matches original file', function (t) { t.plan(53) var sorter = sort(function (a, b) { @@ -62,9 +55,89 @@ test('dat export output matches original file', function (t) { }) }) } - loop() +}) + +test('dat export with limit', function (t) { + var st = spawn(t, dat + ' export --limit=5 --dataset=export-test', {cwd: dat1}) + st.stderr.empty() + st.stdout.match(function (output) { + var lines = output.split('\n') + if (lines.length > 6) return false + if (lines.length === 6) { + var line = JSON.parse(lines[4]) // 5th line is empty string due to splittage + if (line.key === 'ak11246293') { + return line.latitude === '60.0366' + } + return false + } + }) + st.end() +}) + +test('dat export with limit and csv', function (t) { + var st = spawn(t, dat + ' export --limit=5 --dataset=export-test --format=csv', {cwd: dat1}) + st.stderr.empty() + var ok = false + st.stdout.match(function (output) { + var lines = output.split('\n') + if (lines.length > 6) return ok + if (lines.length === 6) { + ok = lines[5] === '' // last is empty due to splittage + } + }) + st.end() +}) + +test('dat export with limit and csv without dataset errors', function (t) { + var st = spawn(t, dat + ' export --limit=5 --format=csv', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') + st.end() +}) +test('dat export with range options without dataset errors', function (t) { + var st = spawn(t, dat + ' export --lt=ak11246291', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') + st.end() +}) + +test('dat export with lt', function (t) { + var st = spawn(t, dat + ' export --dataset=export-test --lt=ak11246291', {cwd: dat1}) + st.stderr.empty() + st.stdout.match(function (output) { + var lines = output.split('\n') + if (lines.length === 4) { + return ( + (JSON.parse(lines[0]).id === 'ak11246285') && + (JSON.parse(lines[1]).id === 'ak11246287') && + (JSON.parse(lines[2]).id === 'ak11246289') + ) + } + return false + }) + st.end() +}) + +test('dat export with lt and limit options', function (t) { + var st = spawn(t, dat + ' export --dataset=export-test --lt=ak11246291 --limit=1', {cwd: dat1}) + st.stderr.empty() + st.stdout.match(function (output) { + var lines = output.split('\n') + if (lines.length === 2) { + return (JSON.parse(lines[0]).id === 'ak11246285') + } + return false + }) + st.end() +}) + +test('dat export without dataset errors', function (t) { + var st = spawn(t, dat + ' export', {cwd: dat1}) + st.stdout.empty() + st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') + st.end() }) var hashes, row @@ -143,7 +216,7 @@ test('dat write', function (t) { st.end() }) -test('dat export with checkout', function (t) { +test('dat export with checkout after write', function (t) { var st = spawn(t, dat + ' export -d max', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { From 03d35b7ba88f2d70f699d4e1d3867164e900f0b3 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Thu, 28 May 2015 16:00:51 -0700 Subject: [PATCH 11/31] update docs with new beta apis and output and implement dat status --- beta-cli-api.md | 104 +++++++++++++++++++++++++++---------- bin/{heads.js => forks.js} | 8 +-- bin/status.js | 19 ++++--- cli.js | 2 +- package.json | 1 + tests/checkout.js | 4 +- tests/export.js | 6 +-- usage/forks.txt | 3 ++ usage/heads.txt | 3 -- 9 files changed, 102 insertions(+), 48 deletions(-) rename bin/{heads.js => forks.js} (72%) create mode 100644 usage/forks.txt delete mode 100644 usage/heads.txt diff --git a/beta-cli-api.md b/beta-cli-api.md index 999ec942..ad44d760 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -14,6 +14,7 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat checkout](#dat-checkout) - [dat diff](#dat-diff) - [dat merge](#dat-merge) + - [dat forks](#dat-forks) - [dataset commands](#dataset-commands) - [dat import](#dat-import) - [dat export](#dat-export) @@ -71,7 +72,7 @@ All commands have these options: - `path`/`p` - specify the path to the dat directory that the command should use. Default is current working directory - `help`/`h` (boolean) - pass this option to show the help for a command. -- `log` (default 'text') - set this to 'json' to change the response format logging for status/response messages to JSON for easy parsing. +- `json` - set this to true to change all output to JSON for easy parsing. - `checkout` - the version hash to use when retrieving data for a command. Example output: @@ -82,7 +83,7 @@ usage: dat [-flag] [--key value] commands: init initialize a new dat in a directory - checkout dat will operate at a particular head + checkout dat will operate at a particular fork add import a file into dat push push data to a remote dat ... etc @@ -119,7 +120,7 @@ Example output: ``` $ dat status Current version is now 8eaf3b0739d32849687a544efae8487b5b05df52 -438 keys, 32 files, 3 commits, 143 Mb total +438 keys, 32 files, 3 versions, 143 Mb total Last updated 3 seconds ago ``` @@ -173,7 +174,7 @@ Example output: ``` $ dat pull ssh://192.168.0.5:~/data Pulled 823 changes (93.88 Mb, 3.4 Mb/s). -Pull completed successfully. +Pull completed successfully, you now have 2 forks. Current version is now b04adb64fdf2203 ``` @@ -206,9 +207,16 @@ If `` is specified as the first positional argument then the indiv Example output: ``` -$ dat log --limit=2 -{ "change": 1, "version": "6bdd624ae6f9ddb96069e04fc030c6e964e77ac7", links: [...], "puts": 12, "deletes": 3, "date": "2015..."} -{ "change": 2, "version": "7b13de1bd942a0cbfc2721d9e0b9a4fa5a076517", links: [...], "puts": 0, "deletes": 2, "date": "2015..."} +$ dat log --limit=1 +Version: 6bdd624ae6f9ddb96069e04fc030c6e964e77ac7 [+12, -3] +Date: April 15th 2015, 7:30PM PST + + added cool csv +``` + +``` +$ dat log --limit=1 --json +{ "change": 1, "version": "6bdd624ae6f9ddb96069e04fc030c6e964e77ac7", links: [...], "puts": 12, "deletes": 3, "date": "2015...", "message": "added cool csv"} ``` `Links` is a list of older versions that are referenced from this current version (forms a directed acyclic graph if drawn). @@ -219,10 +227,10 @@ $ dat log --limit=2 Non-destructive rollback state to a hash in the past ```bash -dat checkout +dat checkout ``` -Check out latest commit on default branch +Check out latest version on default branch ```bash dat checkout latest @@ -248,9 +256,22 @@ If the same key is in both versions but the values differ, a diff object will be Example output: ``` -$ dat diff --pretty 163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502 64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 +$ dat diff 64843f272df +Diff between "Imported csv" and "Re-imported edited csv" + ? "first":"Max" -> "MAX" + - "hey": "deleted" + + "foo": "bar" +Diff between "Initial data import" and "Re-imported edited csv" + ? "first":"Bob" -> "BOB" + - "hey": "deleted" + + "foo": "bar" +``` + +``` +$ dat diff --pretty --json 64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 { "key": "1", + "forks": ["163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502", "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3" ] "versions": [ { "type": "put", @@ -260,8 +281,7 @@ $ dat diff --pretty 163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb05245186 "value": { "key": "1", "name": "Max" - }, - "checkout": "163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502" + } }, { "type": "put", @@ -271,51 +291,56 @@ $ dat diff --pretty 163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb05245186 "value": { "key": "1", "name": "MAX" - }, - "checkout": "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3" + } } ] } +<... etc for each key in the diff> ``` ### dat merge -Merge two checkouts of a dataset into a single checkout. Uses [knead](http://github.com/karissa/knead) as default merge tool for now. +Merges two forks. ``` -dat merge +dat merge ``` #### Options -`--merge-tool`: run the given merge tool to assist in resolving conflicts manually. -`-` for : receive resolved changes on stdin +- `-` for : receive resolved changes on stdin +- `left`: pick the left side as the winner +- `right`: pick the right side as the winner +- `yolo`: pick random side for each key - -#### Resolutions from file +Example output: A `dat merge` receives a stream of changes that will be applied to resolve conflicts between two versions. +$ dat merge + +Merging from a file: + ``` -$ cat resolutions.json | dat merge ab3234dfe5 bdc3ae23cef - +$ dat merge resolutions.json Changes resolved successfully. Current version is now b04adb64fdf2203 ``` -#### Merge tools +Merging as a stream using `dat diff`: ``` -$ dat merge ab3234dfe5 bdc3ae23cef --merge-tool="my-merge-tool.sh" +$ dat diff ab3234dfe5 bdc3ae23cef | | dat merge - Changes resolved successfully. -Current version is now b04adb64fdf2203 +Current version is now 98v8catb4bvcddf ``` -In this example, the `` decides which change to keep between the versions suppled in a `dat diff`, outputting the json for each kept change to stdout. +Merging two forks by picking one side: ``` -$ dat diff ab3234dfe5 bdc3ae23cef | | dat merge ab3234dfe5 bdc3ae23cef - +$ dat merge ab3234dfe5 bdc3ae23cef --left Changes resolved successfully. -Current version is now b04adb64fdf2203 +Current version is now b2bg304823h32h2 ``` ## dataset commands @@ -342,8 +367,9 @@ cat file.json | dat import - ### Options -- `key` - specify which field to use as the primary key +- `key`/`k` - specify which field to use as the primary key - `no-key` - generate a random unique key +- `message`/`m` - a short description of this import Example output: @@ -438,3 +464,25 @@ Example output: $ dat get uw60748112 {"key":"uw60748112","version":"5abd6625cd2e64a116628a9a306de2fbd73a05ea5905e26d5d4e58e077be2203","value":{"time":"2014-04-30T00:09:37.000Z","latitude":"46.7557","longitude":"-121.9855","place":"24km ESE of Eatonville, Washington","type":"earthquake"}} ``` + +### dat forks + +List the current forks + +``` +dat forks +``` + +Example output: + +``` +$ dat forks +64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 - Imported csv +163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502 - Updated names +``` + +``` +$ dat forks --json +{version: "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3", message: "Imported csv"} +{version: "163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502", message: "Updated names"} +``` diff --git a/bin/heads.js b/bin/forks.js similarity index 72% rename from bin/heads.js rename to bin/forks.js index 74117667..a70d65e6 100644 --- a/bin/heads.js +++ b/bin/forks.js @@ -1,13 +1,13 @@ -var usage = require('../lib/usage.js')('heads.txt') +var usage = require('../lib/usage.js')('forks.txt') var openDat = require('../lib/open-dat.js') var abort = require('../lib/abort.js') module.exports = { - name: 'heads', - command: handleHeads + name: 'forks', + command: handleForks } -function handleHeads (args) { +function handleForks (args) { if (args.help) return usage() openDat(args, function ready (err, db) { if (err) abort(err) diff --git a/bin/status.js b/bin/status.js index 4efffbd3..4dbb19f5 100644 --- a/bin/status.js +++ b/bin/status.js @@ -1,3 +1,5 @@ +var prettyBytes = require('pretty-bytes') +var relativeDate = require('relative-date') var abort = require('../lib/abort.js') var openDat = require('../lib/open-dat.js') var usage = require('../lib/usage.js')('checkout.txt') @@ -13,14 +15,17 @@ function handleStatus (args) { openDat(args, function ready (err, db) { if (err) abort(err) - db.open(function () { - if (args.l === 'json') { - var output = { - 'version': db.head - } - console.log(JSON.stringify(output)) + db.status(function (err, status) { + if (args.log === 'json') { + console.log(JSON.stringify(status)) } else { - console.error('Current version is', db.head) + var output = '' + output += 'Current version is ' + status.head + if (!status.checkout) output += ' (latest)\n' + else output += '\n' + output += status.rows + ' keys, ' + status.files + ' files, ' + status.versions + ' versions, ' + prettyBytes(status.size) + ' total\n' + output += 'Last updated ' + relativeDate(status.modified) + console.error(output) } }) }) diff --git a/cli.js b/cli.js index ba60f128..74da0880 100755 --- a/cli.js +++ b/cli.js @@ -17,7 +17,7 @@ var config = { require('./bin/diff.js'), require('./bin/write.js'), require('./bin/cat.js'), - require('./bin/heads.js'), + require('./bin/forks.js'), require('./bin/merge.js') ], defaults: require('./bin/defaults.js'), diff --git a/package.json b/package.json index 98d1a49d..0b5dd520 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "pump": "^1.0.0", "pumpify": "^1.3.3", "readable-stream": "^1.0.33", + "relative-date": "^1.1.2", "sorted-diff-stream": "^1.0.0", "subcommand": "^2.0.1", "through2": "^0.6.3", diff --git a/tests/checkout.js b/tests/checkout.js index bc0c985a..25a45746 100644 --- a/tests/checkout.js +++ b/tests/checkout.js @@ -31,8 +31,8 @@ test('dat1 heads', function (t) { }) test('dat1 gets proper export', function (t) { - var checkout = spawn(t, dat + ' checkout -d checkout-test-dataset ' + hashes[0], {cwd: dat1, end: false}) - checkout.stderr.match(new RegExp('Current version is now ' + hashes[0])) + var checkout = spawn(t, dat + ' checkout -d checkout-test-dataset ' + hashes[1], {cwd: dat1, end: false}) + checkout.stderr.match(new RegExp('Current version is now ' + hashes[1])) checkout.stdout.empty() checkout.end(function () { var exp = spawn(t, dat + ' export -d checkout-test-dataset', {cwd: dat1}) diff --git a/tests/export.js b/tests/export.js index 4920889c..9cdf7c9f 100644 --- a/tests/export.js +++ b/tests/export.js @@ -166,7 +166,7 @@ test('dat heads', function (t) { }) test('dat export with checkout', function (t) { - var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[0], {cwd: dat2}) + var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[1], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { @@ -180,7 +180,7 @@ test('dat export with checkout', function (t) { }) test('dat export with checkout hash 1', function (t) { - var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[1], {cwd: dat2}) + var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { @@ -194,7 +194,7 @@ test('dat export with checkout hash 1', function (t) { }) test('dat export with checkout hash 1 abbr', function (t) { - var st = spawn(t, dat + ' export -d max -c ' + hashes[1], {cwd: dat2}) + var st = spawn(t, dat + ' export -d max -c ' + hashes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { diff --git a/usage/forks.txt b/usage/forks.txt new file mode 100644 index 00000000..e6a7a42e --- /dev/null +++ b/usage/forks.txt @@ -0,0 +1,3 @@ +dat forks + +prints out all forks \ No newline at end of file diff --git a/usage/heads.txt b/usage/heads.txt deleted file mode 100644 index 270c873b..00000000 --- a/usage/heads.txt +++ /dev/null @@ -1,3 +0,0 @@ -dat heads - -prints out all heads \ No newline at end of file From 171d18088264a7d009daa2752dfe2ac3af8b5fee Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Thu, 28 May 2015 18:10:52 -0700 Subject: [PATCH 12/31] heads>forks and missing req --- package.json | 1 + tests/checkout.js | 6 +++--- tests/export.js | 4 ++-- tests/merge.js | 4 ++-- tests/status.js | 2 +- usage/root.txt | 6 +++--- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/package.json b/package.json index 0b5dd520..3f2079f7 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "format-data": "^2.1.1", "ndjson": "^1.3.0", "peek-stream": "^1.1.1", + "pretty-bytes": "^2.0.1", "pump": "^1.0.0", "pumpify": "^1.3.3", "readable-stream": "^1.0.33", diff --git a/tests/checkout.js b/tests/checkout.js index 25a45746..70928e3f 100644 --- a/tests/checkout.js +++ b/tests/checkout.js @@ -19,8 +19,8 @@ var dat2 = path.join(tmp, 'dat-2') helpers.twodats(dat1, dat2) helpers.conflict(dat1, dat2, 'checkout-test-dataset', csvs) -test('dat1 heads', function (t) { - var st = spawn(t, dat + ' heads', {cwd: dat1}) +test('dat1 forks', function (t) { + var st = spawn(t, dat + ' forks', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) @@ -36,7 +36,7 @@ test('dat1 gets proper export', function (t) { checkout.stdout.empty() checkout.end(function () { var exp = spawn(t, dat + ' export -d checkout-test-dataset', {cwd: dat1}) - exp.stdout.match(/Max/) + exp.stdout.match(/MAX/) exp.stderr.empty() exp.end() }) diff --git a/tests/export.js b/tests/export.js index 9cdf7c9f..05b30524 100644 --- a/tests/export.js +++ b/tests/export.js @@ -154,8 +154,8 @@ var dat3 = path.join(tmp, 'dat-1') helpers.twodats(dat2, dat3) helpers.conflict(dat2, dat3, 'max', csvs) -test('dat heads', function (t) { - var st = spawn(t, dat + ' heads', {cwd: dat2}) +test('dat forks', function (t) { + var st = spawn(t, dat + ' forks', {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) diff --git a/tests/merge.js b/tests/merge.js index c3ec87de..1ed13938 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -23,8 +23,8 @@ var dat2 = path.join(tmp, 'dat-2') helpers.twodats(dat1, dat2) helpers.conflict(dat1, dat2, 'merge-test', csvs) -test('dat1 heads', function (t) { - var st = spawn(t, dat + ' heads', {cwd: dat1}) +test('dat1 forks', function (t) { + var st = spawn(t, dat + ' forks', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) diff --git a/tests/status.js b/tests/status.js index e45d3e31..8bf3a184 100644 --- a/tests/status.js +++ b/tests/status.js @@ -26,7 +26,7 @@ test('dat1 status', function (t) { helpers.conflict(dat1, dat2, 'status-test', csvs) -test('dat1 status with multiple heads', function (t) { +test('dat1 status with multiple forks', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Current version is/) diff --git a/usage/root.txt b/usage/root.txt index 28714967..cc66a020 100644 --- a/usage/root.txt +++ b/usage/root.txt @@ -10,8 +10,8 @@ commands: import import rows into a dataset write write a file into dat cat reads a file's contents from dat to stdout - heads list heads of the current dat - diff see differences between two heads - merge merge two heads into one + forks list forks of the current dat + diff see differences between two forks + merge merge two forks into one type `dat command --help` to view detailed help about a specific subcommand \ No newline at end of file From 4d03bd1e680382248d452e38a03e8783ac546e09 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Fri, 29 May 2015 02:24:06 -0700 Subject: [PATCH 13/31] Fix test --- tests/merge.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/merge.js b/tests/merge.js index 1ed13938..e0e58d81 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -76,7 +76,7 @@ test('verify merge version', function (t) { st.stdout.match(function match (output) { try { output = JSON.parse(output) - return output.value.name === 'Max' + return output.name === 'MAX' } catch (e) { return false } From 9d74072144cfa6160bfa939e6fe124875f9abdd1 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Fri, 29 May 2015 02:24:25 -0700 Subject: [PATCH 14/31] Status should output version not head --- bin/status.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/status.js b/bin/status.js index 4dbb19f5..2a707ab4 100644 --- a/bin/status.js +++ b/bin/status.js @@ -16,11 +16,14 @@ function handleStatus (args) { if (err) abort(err) db.status(function (err, status) { + if (err) abort(err) + status.version = status.head + delete status.head if (args.log === 'json') { console.log(JSON.stringify(status)) } else { var output = '' - output += 'Current version is ' + status.head + output += 'Current version is ' + status.version if (!status.checkout) output += ' (latest)\n' else output += '\n' output += status.rows + ' keys, ' + status.files + ' files, ' + status.versions + ' versions, ' + prettyBytes(status.size) + ' total\n' From 3bd43f16a9acb607cfa7c991e5a7ee03f8798eb0 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Fri, 29 May 2015 02:35:39 -0700 Subject: [PATCH 15/31] add whitepaper skeleton --- whitepaper.md | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 whitepaper.md diff --git a/whitepaper.md b/whitepaper.md new file mode 100644 index 00000000..0fefdfe4 --- /dev/null +++ b/whitepaper.md @@ -0,0 +1,74 @@ +# Dat +From a variety of contributors. + +http://dat-data.com + +http://github.com/maxogden/dat + + +## Abstract +Dat is a version-controlled, distributed database. Dat is designed to provide data scientists with a common data exchange mechanism to collaborate and share research data, both internally and externally. Here, we outline the core infrastructure for Dat, which has been informed by key use cases in the scientific community. We hope this serves as a living document to help developers and data scientists in all stages of interaction with Dat -- from extending features to understanding use case trade offs. + +## Introduction +We hope Dat will simplify enormously the process of duplicating and verifying novel research and associated discoveries. We've been working with scientists to arrive at key use cases that go unsolved across multiple scientific domains with varied technological expertise. + +From day one, we architected Dat as a variety of open source modules that build upon and integrate with each other, encouraging contribution from an existing community of data engineers. You can find the current list of modules in the `package.json` of the [main repository](http://github.com/maxogden/dat). + +## 2. Data in Dat +### 2.1 Datasets +A dataset in dat is a container for all of the versions of a given table. A dataset is created when data is added to dat, and can be given a name. If no name supplied, dat adds the data to the global, default dataset. Dat accepts data in csv, tsv, or newline-delimited json formats. + +``` +dat add flights.json -d flights +ba5d123eadf6df2 +``` + +Data is then streamed into the Dat database, one row at a time. That means that your computer does not have to hold the entire dataset in memory to add to Dat. When data is finished being added, a new table is created inside of the dataset. This table is given a unique identifier, that is a `hash`. This `hash` can be used to reference or rollback to the table. Read on how this is implemented in Section 4. + +[img]() + +### 2.2 Updating data +For Dat to know when new data creates a new row or when When adding data to a dataset, + +Data in dat is immutable. That means that data is never deleted from dat. + + +### 2.3 Immutability +### 2.4 Streaming + +## 3. Ecosystem +### 3.1 Client libraries +### 3.2 Pipelines + +## 4. Architecture +For each of the following sections, we should describe how dat's core and its underlying modules support the features listed +### Streaming + +## Ecosystem +### Client libraries +### Pipelines + +## Architecture +For each of the following sections, we should describe how dat's core and its underlying modules support the features listed. + +### It's a Graph +Supports merging +TODO: HOW + +Supports branches +TODO: how + +### It's a Log +Supports checkout +TODO: HOW + +Supports pull +TODO: how + +## Performance + +### Benchmarks +Here we should list some basic benchmarks (adding data locally, replication, exporting data). + +### Room for improvement +Here we should talk about where there might be room for improvement. From 9fc0143e6c022a7483b3437bb10f9d637e1ae2dd Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sat, 30 May 2015 14:56:32 -0700 Subject: [PATCH 16/31] convert .log=json to just --json, start making tests pass --- bin/defaults.js | 5 ++--- bin/export.js | 7 +++---- bin/import.js | 10 ++++++++-- bin/status.js | 4 ++-- bin/write.js | 2 +- roadmap.md | 10 ++++++++++ tests/checkout.js | 41 +++++++++++++++++++++++++++++--------- tests/import.js | 50 +++++++++++++++++++++++++---------------------- usage/import.txt | 4 ++-- 9 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 roadmap.md diff --git a/bin/defaults.js b/bin/defaults.js index 9ee42cf4..765f53ae 100644 --- a/bin/defaults.js +++ b/bin/defaults.js @@ -16,8 +16,7 @@ module.exports = [ abbr: 'c' }, { - name: 'log', - boolean: false, - abbr: 'l' + name: 'json', + boolean: true } ] diff --git a/bin/export.js b/bin/export.js index c24c5b09..e32732be 100644 --- a/bin/export.js +++ b/bin/export.js @@ -56,9 +56,8 @@ function handleExport (args) { abort() } - if (!args.f || args.f === 'json') { - args.f = 'ndjson' - } + var format = 'ndjson' + if (args.format) format = args.format openDat(args, function ready (err, db) { if (err) abort(err) @@ -75,7 +74,7 @@ function handleExport (args) { } }) - pump(db.createReadStream(args), parseOutput, formatData(args.f), process.stdout, function done (err) { + pump(db.createReadStream(args), parseOutput, formatData(format), process.stdout, function done (err) { if (err) abort(err, 'Error exporting data') }) } diff --git a/bin/import.js b/bin/import.js index 160b6b9b..5d439486 100644 --- a/bin/import.js +++ b/bin/import.js @@ -38,6 +38,12 @@ function handleImport (args) { abort() } + if (!args.dataset) { + usage() + console.error('\nError: Must specify dataset (-d )') + abort() + } + openDat(args, function ready (err, db) { if (err) abort(err) handleInputStream(db) @@ -54,9 +60,9 @@ function handleImport (args) { next(null, {type: 'put', key: key, value: obj}) }) - pump(inputStream, parseInputStream(args), transform, db.createWriteStream({ dataset: args.d }), function done (err) { + pump(inputStream, parseInputStream(args), transform, db.createWriteStream({ dataset: args.dataset }), function done (err) { if (err) abort(err, 'Error importing data') - if (args.log === 'json') { + if (args.json) { var output = { version: db.head } diff --git a/bin/status.js b/bin/status.js index 2a707ab4..9520e174 100644 --- a/bin/status.js +++ b/bin/status.js @@ -19,7 +19,7 @@ function handleStatus (args) { if (err) abort(err) status.version = status.head delete status.head - if (args.log === 'json') { + if (args.json) { console.log(JSON.stringify(status)) } else { var output = '' @@ -28,7 +28,7 @@ function handleStatus (args) { else output += '\n' output += status.rows + ' keys, ' + status.files + ' files, ' + status.versions + ' versions, ' + prettyBytes(status.size) + ' total\n' output += 'Last updated ' + relativeDate(status.modified) - console.error(output) + console.log(output) } }) }) diff --git a/bin/write.js b/bin/write.js index 0de10dd5..d8e2c612 100644 --- a/bin/write.js +++ b/bin/write.js @@ -58,7 +58,7 @@ function handleWrite (args) { pump(inputStream, db.createFileWriteStream(key, opts), function done (err) { if (err) abort(err, 'dat: err in write') - if (args.log === 'json') { + if (args.json) { var output = { version: db.head } diff --git a/roadmap.md b/roadmap.md new file mode 100644 index 00000000..2e56caae --- /dev/null +++ b/roadmap.md @@ -0,0 +1,10 @@ +# dat roadmap + +- alpha (august 2014) +- beta (june 2015) +- 1.0 (sometime around the end of 2015) + +post-beta features (2015): + +- binary diffs using rabin fingerprinting (space saving optimization) +- p2p replication strategy (faster clone speed, more backups) diff --git a/tests/checkout.js b/tests/checkout.js index 70928e3f..1ded00af 100644 --- a/tests/checkout.js +++ b/tests/checkout.js @@ -5,7 +5,7 @@ var helpers = require('./helpers') var tmp = require('os').tmpdir() var dat = path.resolve(__dirname + '/../cli.js') -var hashes +var hashes, statusJson var csvs = { a: path.resolve(__dirname + '/fixtures/a.csv'), @@ -13,30 +13,53 @@ var csvs = { c: path.resolve(__dirname + '/fixtures/c.csv') } -var dat1 = path.join(tmp, 'dat-1') -var dat2 = path.join(tmp, 'dat-2') +var dat1 = path.join(tmp, 'dat-checkout-1') +var dat2 = path.join(tmp, 'dat-checkout-2') +var dataset = 'checkout-test-dataset' helpers.twodats(dat1, dat2) -helpers.conflict(dat1, dat2, 'checkout-test-dataset', csvs) +helpers.conflict(dat1, dat2, dataset, csvs) test('dat1 forks', function (t) { var st = spawn(t, dat + ' forks', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) - if (ok) hashes = output.split('\n') + if (ok) hashes = output.split('\n').slice(0, 2) return ok }) st.end() }) +test('dat1 status returns local version', function (t) { + var stat = spawn(t, dat + ' status --json', {cwd: dat1, end: false}) + stat.stderr.empty() + stat.stdout.match(function match (output) { + try { + statusJson = JSON.parse(output) + } catch (e) { + statusJson = false + } + if (statusJson && statusJson.version) return true + else return false + }) + stat.end(function () { + t.end() + }) +}) + test('dat1 gets proper export', function (t) { - var checkout = spawn(t, dat + ' checkout -d checkout-test-dataset ' + hashes[1], {cwd: dat1, end: false}) - checkout.stderr.match(new RegExp('Current version is now ' + hashes[1])) + // determine which has is ours and which came from dat2, then checkout to the remote one + var remoteHash + if (hashes[0] === statusJson.version) remoteHash = hashes[1] + else remoteHash = hashes[0] + + var checkout = spawn(t, dat + ' checkout ' + remoteHash, {cwd: dat1, end: false}) + checkout.stderr.match(new RegExp('Current version is now ' + remoteHash)) checkout.stdout.empty() checkout.end(function () { - var exp = spawn(t, dat + ' export -d checkout-test-dataset', {cwd: dat1}) - exp.stdout.match(/MAX/) + var exp = spawn(t, dat + ' export -d ' + dataset, {cwd: dat1}) + exp.stdout.match(/Max/) exp.stderr.empty() exp.end() }) diff --git a/tests/import.js b/tests/import.js index b3c1f30c..756b2888 100644 --- a/tests/import.js +++ b/tests/import.js @@ -12,27 +12,35 @@ var dat3 = path.join(tmp, 'dat-3') helpers.onedat(dat1) -test('dat import csv', function (t) { +test('dat import w/ no dataset arg', function (t) { var csv = path.resolve(__dirname + '/fixtures/all_hour.csv') var st = spawn(t, dat + ' import ' + csv + ' --key=id', {cwd: dat1}) st.stdout.empty() + st.stderr.match(/Must specify dataset/) + st.end() +}) + +test('dat import csv', function (t) { + var csv = path.resolve(__dirname + '/fixtures/all_hour.csv') + var st = spawn(t, dat + ' import ' + csv + ' --key=id --dataset=import-test1', {cwd: dat1}) + st.stdout.empty() st.stderr.match(/Done importing data/) st.end() }) -verify(dat1) +verify('import-test1', dat1) helpers.onedat(dat2) test('dat import json', function (t) { var json = path.resolve(__dirname + '/fixtures/all_hour.json') - var st = spawn(t, dat + ' import ' + json + ' --key=id', {cwd: dat2}) + var st = spawn(t, dat + ' import ' + json + ' --key=id --dataset=import-test2', {cwd: dat2}) st.stdout.empty() st.stderr.match(/Done importing data/) st.end() }) -verify(dat2) +verify('import-test2', dat2) helpers.onedat(dat3) @@ -56,17 +64,24 @@ test('dat import all_hour to separate dataset', function (t) { verify('import-test4', dat3) -function verify (dataset, datN) { - if (!datN) { - datN = dataset - dataset = '' - } - test('dat cat', function (t) { - var st = spawn(t, dat + ' export --dataset=' + dataset, {cwd: datN}) +test('dat import with json output', function (t) { + var json = path.resolve(__dirname + '/fixtures/all_hour.json') + var st = spawn(t, dat + ' import ' + json + ' --json --key=id --dataset=import-test5', {cwd: dat3}) + st.stdout.match(function (output) { + var json = JSON.parse(output) + return json.version.length === 64 // 32bit hash 2 in hex (64) + }) + st.stderr.empty() + st.end() +}) + +function verify (dataset, dir) { + test('dat export', function (t) { + var st = spawn(t, dat + ' export --dataset=' + dataset, {cwd: dir}) st.stderr.empty() st.stdout.match(function (output) { var lines = output.split('\n') - t.ok('less than 10 lines', lines.length <= 10) + t.ok(lines.length <= 10, 'less than 10 lines') if (lines.length === 10) { if (JSON.parse(lines[0]).key === 'ak11246285') return true return false @@ -75,14 +90,3 @@ function verify (dataset, datN) { st.end() }) } - -test('dat import with log to json', function (t) { - var json = path.resolve(__dirname + '/fixtures/all_hour.json') - var st = spawn(t, dat + ' import ' + json + ' --log=json --key=id --dataset=import-test5', {cwd: dat3}) - st.stdout.match(function (output) { - var json = JSON.parse(output) - return json.version.length === 64 // 32bit hash 2 in hex (64) - }) - st.stderr.empty() - st.end() -}) diff --git a/usage/import.txt b/usage/import.txt index 669db124..112e0471 100644 --- a/usage/import.txt +++ b/usage/import.txt @@ -1,4 +1,4 @@ -dat import - -d # the name of the dataset to create +dat import (required) + -d (required) # the name of the dataset to create -f # how to parse the file to add --help # show help \ No newline at end of file From a12b2f8bda0320139d08281bb5b6835e3f6a1217 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 14:24:05 -0700 Subject: [PATCH 17/31] prefix tests for easier debugging --- tests/checkout.js | 6 +++--- tests/cli.js | 6 +++--- tests/export.js | 34 +++++++++++++++++----------------- tests/get.js | 10 +++++----- tests/help.js | 6 +++--- tests/import.js | 14 +++++++------- tests/merge.js | 12 ++++++------ tests/status.js | 10 +++++----- tests/write.js | 26 +++++++++++++------------- 9 files changed, 62 insertions(+), 62 deletions(-) diff --git a/tests/checkout.js b/tests/checkout.js index 1ded00af..cbeb107f 100644 --- a/tests/checkout.js +++ b/tests/checkout.js @@ -20,7 +20,7 @@ var dataset = 'checkout-test-dataset' helpers.twodats(dat1, dat2) helpers.conflict(dat1, dat2, dataset, csvs) -test('dat1 forks', function (t) { +test('checkout: dat1 forks', function (t) { var st = spawn(t, dat + ' forks', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { @@ -31,7 +31,7 @@ test('dat1 forks', function (t) { st.end() }) -test('dat1 status returns local version', function (t) { +test('checkout: dat1 status returns local version', function (t) { var stat = spawn(t, dat + ' status --json', {cwd: dat1, end: false}) stat.stderr.empty() stat.stdout.match(function match (output) { @@ -48,7 +48,7 @@ test('dat1 status returns local version', function (t) { }) }) -test('dat1 gets proper export', function (t) { +test('checkout: dat1 gets proper export', function (t) { // determine which has is ours and which came from dat2, then checkout to the remote one var remoteHash if (hashes[0] === statusJson.version) remoteHash = hashes[1] diff --git a/tests/cli.js b/tests/cli.js index 22dd8c52..f793b025 100644 --- a/tests/cli.js +++ b/tests/cli.js @@ -3,7 +3,7 @@ var path = require('path') var test = require('tape') var spawn = require('tape-spawn') -test('dat -v (version)', function (t) { +test('cli: dat -v (version)', function (t) { var st = spawn(t, 'node cli.js -v') var pkg = require('../package.json') st.stdout.match(pkg.version + '\n') @@ -11,14 +11,14 @@ test('dat -v (version)', function (t) { st.end() }) -test('dat (usage)', function (t) { +test('cli: dat (usage)', function (t) { var st = spawn(t, 'node cli.js') st.stderr.match(fs.readFileSync(path.join('usage', 'root.txt')).toString() + '\n', 'usage matched') st.stdout.empty() st.end() }) -test('invalid command', function (t) { +test('cli: invalid command', function (t) { var st = spawn(t, 'node cli.js pizza') st.stderr.match('dat: pizza is not a valid command\n', 'usage matched') st.stdout.empty() diff --git a/tests/export.js b/tests/export.js index 05b30524..14a7b7a2 100644 --- a/tests/export.js +++ b/tests/export.js @@ -17,21 +17,21 @@ helpers.onedat(dat1) var csvfile = path.resolve(__dirname + '/fixtures/all_hour.csv') var exportfile = path.join(dat1, 'out.csv') -test('dat import csv', function (t) { +test('export: dat import csv', function (t) { var st = spawn(t, dat + ' import ' + csvfile + ' -d export-test --key=id', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done importing data/) st.end() }) -test('dat export to file', function (t) { +test('export: dat export to file', function (t) { var st = spawn(t, dat + ' export -d export-test > ' + exportfile, {cwd: dat1}) st.stdout.empty() st.stderr.empty() st.end() }) -test('dat export output matches original file', function (t) { +test('export: dat export output matches original file', function (t) { t.plan(53) var sorter = sort(function (a, b) { return parseFloat(a.latitude) < parseFloat(b.longitude) @@ -58,7 +58,7 @@ test('dat export output matches original file', function (t) { loop() }) -test('dat export with limit', function (t) { +test('export: dat export with limit', function (t) { var st = spawn(t, dat + ' export --limit=5 --dataset=export-test', {cwd: dat1}) st.stderr.empty() st.stdout.match(function (output) { @@ -75,7 +75,7 @@ test('dat export with limit', function (t) { st.end() }) -test('dat export with limit and csv', function (t) { +test('export: dat export with limit and csv', function (t) { var st = spawn(t, dat + ' export --limit=5 --dataset=export-test --format=csv', {cwd: dat1}) st.stderr.empty() var ok = false @@ -89,21 +89,21 @@ test('dat export with limit and csv', function (t) { st.end() }) -test('dat export with limit and csv without dataset errors', function (t) { +test('export: dat export with limit and csv without dataset errors', function (t) { var st = spawn(t, dat + ' export --limit=5 --format=csv', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat export with range options without dataset errors', function (t) { +test('export: dat export with range options without dataset errors', function (t) { var st = spawn(t, dat + ' export --lt=ak11246291', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat export with lt', function (t) { +test('export: dat export with lt', function (t) { var st = spawn(t, dat + ' export --dataset=export-test --lt=ak11246291', {cwd: dat1}) st.stderr.empty() st.stdout.match(function (output) { @@ -120,7 +120,7 @@ test('dat export with lt', function (t) { st.end() }) -test('dat export with lt and limit options', function (t) { +test('export: dat export with lt and limit options', function (t) { var st = spawn(t, dat + ' export --dataset=export-test --lt=ak11246291 --limit=1', {cwd: dat1}) st.stderr.empty() st.stdout.match(function (output) { @@ -133,14 +133,14 @@ test('dat export with lt and limit options', function (t) { st.end() }) -test('dat export without dataset errors', function (t) { +test('export: dat export without dataset errors', function (t) { var st = spawn(t, dat + ' export', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'export.txt')).toString() + '\n', 'usage matched') st.end() }) -var hashes, row +var hashes, row, statusJson var csvs = { a: path.resolve(__dirname + '/fixtures/a.csv'), @@ -154,7 +154,7 @@ var dat3 = path.join(tmp, 'dat-1') helpers.twodats(dat2, dat3) helpers.conflict(dat2, dat3, 'max', csvs) -test('dat forks', function (t) { +test('export: dat forks', function (t) { var st = spawn(t, dat + ' forks', {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { @@ -165,7 +165,7 @@ test('dat forks', function (t) { st.end() }) -test('dat export with checkout', function (t) { +test('export: dat export with checkout', function (t) { var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[1], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { @@ -179,7 +179,7 @@ test('dat export with checkout', function (t) { st.end() }) -test('dat export with checkout hash 1', function (t) { +test('export: dat export with checkout hash 1', function (t) { var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { @@ -193,7 +193,7 @@ test('dat export with checkout hash 1', function (t) { st.end() }) -test('dat export with checkout hash 1 abbr', function (t) { +test('export: dat export with checkout hash 1 abbr', function (t) { var st = spawn(t, dat + ' export -d max -c ' + hashes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { @@ -209,14 +209,14 @@ test('dat export with checkout hash 1 abbr', function (t) { // export after write file -test('dat write', function (t) { +test('export: dat write', function (t) { var st = spawn(t, "echo 'hello world' | " + dat + ' write test-file.txt -d max -', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done writing binary data/) st.end() }) -test('dat export with checkout after write', function (t) { +test('export: dat export with checkout after write', function (t) { var st = spawn(t, dat + ' export -d max', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { diff --git a/tests/get.js b/tests/get.js index 15158e1e..d843831e 100644 --- a/tests/get.js +++ b/tests/get.js @@ -12,14 +12,14 @@ var dat1 = path.join(tmp, 'dat-1') helpers.onedat(dat1) var json = path.resolve(__dirname + '/fixtures/all_hour.json') -test('dat import dataset', function (t) { +test('get: dat import dataset', function (t) { var st = spawn(t, dat + ' import ' + json + ' --key=id --dataset=get-test', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done importing data/) st.end() }) -test('dat get a key from dataset', function (t) { +test('get: dat get a key from dataset', function (t) { var st = spawn(t, dat + ' get ak11246293 --dataset=get-test', {cwd: dat1}) st.stderr.empty() st.stdout.match(function (output) { @@ -30,21 +30,21 @@ test('dat get a key from dataset', function (t) { st.end() }) -test('dat get without key errors', function (t) { +test('get: dat get without key errors', function (t) { var st = spawn(t, dat + ' get --dataset=get-test', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat get without dataset errors', function (t) { +test('get: dat get without dataset errors', function (t) { var st = spawn(t, dat + ' get ak11246293', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat get without key and dataset errors', function (t) { +test('get: dat get without key and dataset errors', function (t) { var st = spawn(t, dat + ' get', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'get.txt')).toString() + '\n', 'usage matched') diff --git a/tests/help.js b/tests/help.js index 22f671e1..dfa57fee 100644 --- a/tests/help.js +++ b/tests/help.js @@ -3,21 +3,21 @@ var path = require('path') var test = require('tape') var spawn = require('tape-spawn') -test('dat import -h', function (t) { +test('help: dat import -h', function (t) { var st = spawn(t, 'node cli.js import -h') st.stderr.match(fs.readFileSync(path.join('usage', 'import.txt')).toString() + '\n', 'usage matched') st.stdout.empty() st.end() }) -test('dat init -h', function (t) { +test('help: dat init -h', function (t) { var st = spawn(t, 'node cli.js init -h') st.stderr.match(fs.readFileSync(path.join('usage', 'init.txt')).toString() + '\n', 'usage matched') st.stdout.empty() st.end() }) -test('dat cat -h', function (t) { +test('help: dat cat -h', function (t) { var st = spawn(t, 'node cli.js cat -h') st.stderr.match(fs.readFileSync(path.join('usage', 'cat.txt')).toString() + '\n', 'usage matched') st.stdout.empty() diff --git a/tests/import.js b/tests/import.js index 756b2888..60effd88 100644 --- a/tests/import.js +++ b/tests/import.js @@ -12,7 +12,7 @@ var dat3 = path.join(tmp, 'dat-3') helpers.onedat(dat1) -test('dat import w/ no dataset arg', function (t) { +test('import: dat import w/ no dataset arg', function (t) { var csv = path.resolve(__dirname + '/fixtures/all_hour.csv') var st = spawn(t, dat + ' import ' + csv + ' --key=id', {cwd: dat1}) st.stdout.empty() @@ -20,7 +20,7 @@ test('dat import w/ no dataset arg', function (t) { st.end() }) -test('dat import csv', function (t) { +test('import: dat import csv', function (t) { var csv = path.resolve(__dirname + '/fixtures/all_hour.csv') var st = spawn(t, dat + ' import ' + csv + ' --key=id --dataset=import-test1', {cwd: dat1}) st.stdout.empty() @@ -32,7 +32,7 @@ verify('import-test1', dat1) helpers.onedat(dat2) -test('dat import json', function (t) { +test('import: dat import json', function (t) { var json = path.resolve(__dirname + '/fixtures/all_hour.json') var st = spawn(t, dat + ' import ' + json + ' --key=id --dataset=import-test2', {cwd: dat2}) st.stdout.empty() @@ -44,7 +44,7 @@ verify('import-test2', dat2) helpers.onedat(dat3) -test('dat import all_hour to test3', function (t) { +test('import: dat import all_hour to test3', function (t) { var json = path.resolve(__dirname + '/fixtures/all_hour.json') var st = spawn(t, dat + ' import ' + json + ' --key=id --dataset=import-test3', {cwd: dat3}) st.stdout.empty() @@ -54,7 +54,7 @@ test('dat import all_hour to test3', function (t) { verify('import-test3', dat3) -test('dat import all_hour to separate dataset', function (t) { +test('import: dat import all_hour to separate dataset', function (t) { var json = path.resolve(__dirname + '/fixtures/all_hour.json') var st = spawn(t, dat + ' import ' + json + ' --key=id --dataset=import-test4', {cwd: dat3}) st.stdout.empty() @@ -64,7 +64,7 @@ test('dat import all_hour to separate dataset', function (t) { verify('import-test4', dat3) -test('dat import with json output', function (t) { +test('import: dat import with json output', function (t) { var json = path.resolve(__dirname + '/fixtures/all_hour.json') var st = spawn(t, dat + ' import ' + json + ' --json --key=id --dataset=import-test5', {cwd: dat3}) st.stdout.match(function (output) { @@ -76,7 +76,7 @@ test('dat import with json output', function (t) { }) function verify (dataset, dir) { - test('dat export', function (t) { + test('import: dat export', function (t) { var st = spawn(t, dat + ' export --dataset=' + dataset, {cwd: dir}) st.stderr.empty() st.stdout.match(function (output) { diff --git a/tests/merge.js b/tests/merge.js index e0e58d81..808e0b94 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -17,13 +17,13 @@ var csvs = { c: path.resolve(__dirname + '/fixtures/c.csv') } -var dat1 = path.join(tmp, 'dat-1') -var dat2 = path.join(tmp, 'dat-2') +var dat1 = path.join(tmp, 'dat-merge-1') +var dat2 = path.join(tmp, 'dat-merge-2') helpers.twodats(dat1, dat2) helpers.conflict(dat1, dat2, 'merge-test', csvs) -test('dat1 forks', function (t) { +test('merge: dat1 forks', function (t) { var st = spawn(t, dat + ' forks', {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { @@ -34,7 +34,7 @@ test('dat1 forks', function (t) { st.end() }) -test('dat1 diff', function (t) { +test('merge: dat1 diff', function (t) { var st = spawn(t, dat + ' diff ' + hashes.join(' '), {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { @@ -48,7 +48,7 @@ test('dat1 diff', function (t) { st.end() }) -test('dat1 merge', function (t) { +test('merge: dat1 merge', function (t) { var diff = spawn(t, dat + ' diff ' + hashes.join(' '), {cwd: dat1, end: false}) var merge = spawn(t, dat + ' merge ' + hashes.join(' ') + ' --stdin', {cwd: dat1, end: false}) @@ -69,7 +69,7 @@ test('dat1 merge', function (t) { }) }) -test('verify merge version', function (t) { +test('merge: verify merge version', function (t) { var st = spawn(t, dat + ' export -d merge-test', {cwd: dat1}) st.stderr.empty() diff --git a/tests/status.js b/tests/status.js index 8bf3a184..a6dfeadc 100644 --- a/tests/status.js +++ b/tests/status.js @@ -6,8 +6,8 @@ var tmp = require('os').tmpdir() var dat = path.resolve(__dirname + '/../cli.js') -var dat1 = path.join(tmp, 'dat-1') -var dat2 = path.join(tmp, 'dat-2') +var dat1 = path.join(tmp, 'dat-status-1') +var dat2 = path.join(tmp, 'dat-status-2') var csvs = { a: path.resolve(__dirname + '/fixtures/a.csv'), @@ -17,7 +17,7 @@ var csvs = { helpers.twodats(dat1, dat2) -test('dat1 status', function (t) { +test('status: dat1 status', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Current version is/) @@ -26,14 +26,14 @@ test('dat1 status', function (t) { helpers.conflict(dat1, dat2, 'status-test', csvs) -test('dat1 status with multiple forks', function (t) { +test('status: dat1 status with multiple forks', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Current version is/) st.end() }) -test('dat1 status as json', function (t) { +test('status: dat1 status as json', function (t) { var st = spawn(t, dat + ' status --log=json', {cwd: dat1}) st.stdout.match(function (output) { try { diff --git a/tests/write.js b/tests/write.js index c0b6caca..e4c622f2 100644 --- a/tests/write.js +++ b/tests/write.js @@ -11,43 +11,43 @@ var dat1 = path.join(tmp, 'dat-1') helpers.onedat(dat1) -test('dat write errors without dataset', function (t) { +test('write: dat write errors without dataset', function (t) { var st = spawn(t, "echo 'hello world' | " + dat + ' write test-file.txt -', {cwd: dat1}) st.stdout.empty() st.stderr.match(fs.readFileSync(path.join('usage', 'write.txt')).toString() + '\n', 'usage matched') st.end() }) -test('dat write to dataset', function (t) { +test('write: dat write to dataset', function (t) { var st = spawn(t, "echo 'hello world' | " + dat + ' write -d my-dataset test-file.txt -', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done writing binary data/) st.end() }) -test('dat cat after write to dataset', function (t) { +test('write: dat cat after write to dataset', function (t) { datCatEquals(t, 'test-file.txt', /hello world/, '-d my-dataset') }) -test('dat write to new dataset', function (t) { +test('write: dat write to new dataset', function (t) { var st = spawn(t, "echo 'goodbye world' | " + dat + ' write -d my-dataset-2 test-file.txt -', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done writing binary data/) st.end() }) -test('dat cat after write to dataset 2', function (t) { +test('write: dat cat after write to dataset 2', function (t) { datCatEquals(t, 'test-file.txt', /goodbye world/, '-d my-dataset-2') }) -test('dat overwrite to dataset 2', function (t) { +test('write: dat overwrite to dataset 2', function (t) { var st = spawn(t, "echo 'goodbye mars' | " + dat + ' write -d my-dataset-2 test-file.txt -', {cwd: dat1}) st.stdout.empty() st.stderr.match(/Done writing binary data/) st.end() }) -test('dat cat after overwrite to dataset 2', function (t) { +test('write: dat cat after overwrite to dataset 2', function (t) { datCatEquals(t, 'test-file.txt', /goodbye mars/, '-d my-dataset-2') }) @@ -55,29 +55,29 @@ test('dat cat after overwrite to dataset 2', function (t) { var blobPath = path.resolve(__dirname + '/fixtures/blob.txt') -test('dat write from file', function (t) { +test('write: dat write from file', function (t) { datWrite(t, blobPath, '-d my-dataset-2') }) -test('dat cat after write from file', function (t) { +test('write: dat cat after write from file', function (t) { var contents = fs.readFileSync(blobPath).toString() datCatEquals(t, blobPath, contents, '-d my-dataset-2') }) -test('dat write from file with new name', function (t) { +test('write: dat write from file with new name', function (t) { datWrite(t, blobPath, '-d my-dataset-2 --name=new-name.txt') }) -test('dat cat after write from file with new name', function (t) { +test('write: dat cat after write from file with new name', function (t) { var contents = fs.readFileSync(blobPath).toString() datCatEquals(t, 'new-name.txt', contents, '-d my-dataset-2') }) -test('dat write from file with new name with abbr', function (t) { +test('write: dat write from file with new name with abbr', function (t) { datWrite(t, blobPath, '-d my-dataset-2 -n new-name-abbr.txt') }) -test('dat cat after write from file with new name with abbr', function (t) { +test('write: dat cat after write from file with new name with abbr', function (t) { var contents = fs.readFileSync(blobPath).toString() datCatEquals(t, 'new-name-abbr.txt', contents, '-d my-dataset-2') }) From 6802d2476cd0bab9380c85ea8daac1efee62e030 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 14:55:59 -0700 Subject: [PATCH 18/31] refactor tests, still have 5 failing --- tests/checkout.js | 47 ++++-------------------------- tests/export.js | 31 +++++--------------- tests/helpers/index.js | 66 +++++++++++++++++++++++++++++++++++++----- tests/merge.js | 8 +---- tests/status.js | 8 +---- 5 files changed, 73 insertions(+), 87 deletions(-) diff --git a/tests/checkout.js b/tests/checkout.js index cbeb107f..badd1fdd 100644 --- a/tests/checkout.js +++ b/tests/checkout.js @@ -5,57 +5,20 @@ var helpers = require('./helpers') var tmp = require('os').tmpdir() var dat = path.resolve(__dirname + '/../cli.js') -var hashes, statusJson - -var csvs = { - a: path.resolve(__dirname + '/fixtures/a.csv'), - b: path.resolve(__dirname + '/fixtures/b.csv'), - c: path.resolve(__dirname + '/fixtures/c.csv') -} +var forks var dat1 = path.join(tmp, 'dat-checkout-1') var dat2 = path.join(tmp, 'dat-checkout-2') var dataset = 'checkout-test-dataset' helpers.twodats(dat1, dat2) -helpers.conflict(dat1, dat2, dataset, csvs) - -test('checkout: dat1 forks', function (t) { - var st = spawn(t, dat + ' forks', {cwd: dat1}) - st.stderr.empty() - st.stdout.match(function match (output) { - var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) - if (ok) hashes = output.split('\n').slice(0, 2) - return ok - }) - st.end() -}) - -test('checkout: dat1 status returns local version', function (t) { - var stat = spawn(t, dat + ' status --json', {cwd: dat1, end: false}) - stat.stderr.empty() - stat.stdout.match(function match (output) { - try { - statusJson = JSON.parse(output) - } catch (e) { - statusJson = false - } - if (statusJson && statusJson.version) return true - else return false - }) - stat.end(function () { - t.end() - }) +helpers.conflict(dat1, dat2, dataset, function (conflictForks) { + forks = conflictForks }) test('checkout: dat1 gets proper export', function (t) { - // determine which has is ours and which came from dat2, then checkout to the remote one - var remoteHash - if (hashes[0] === statusJson.version) remoteHash = hashes[1] - else remoteHash = hashes[0] - - var checkout = spawn(t, dat + ' checkout ' + remoteHash, {cwd: dat1, end: false}) - checkout.stderr.match(new RegExp('Current version is now ' + remoteHash)) + var checkout = spawn(t, dat + ' checkout ' + forks.remotes[0], {cwd: dat1, end: false}) + checkout.stderr.match(new RegExp('Current version is now ' + forks.remotes[0])) checkout.stdout.empty() checkout.end(function () { var exp = spawn(t, dat + ' export -d ' + dataset, {cwd: dat1}) diff --git a/tests/export.js b/tests/export.js index 14a7b7a2..62e9a9e0 100644 --- a/tests/export.js +++ b/tests/export.js @@ -140,33 +140,18 @@ test('export: dat export without dataset errors', function (t) { st.end() }) -var hashes, row, statusJson - -var csvs = { - a: path.resolve(__dirname + '/fixtures/a.csv'), - b: path.resolve(__dirname + '/fixtures/b.csv'), - c: path.resolve(__dirname + '/fixtures/c.csv') -} +var forks, row var dat2 = path.join(tmp, 'dat-2') var dat3 = path.join(tmp, 'dat-1') helpers.twodats(dat2, dat3) -helpers.conflict(dat2, dat3, 'max', csvs) - -test('export: dat forks', function (t) { - var st = spawn(t, dat + ' forks', {cwd: dat2}) - st.stderr.empty() - st.stdout.match(function match (output) { - var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) - if (ok) hashes = output.split('\n') - return ok - }) - st.end() +helpers.conflict(dat2, dat3, 'max', function (conflictForks) { + forks = conflictForks }) test('export: dat export with checkout', function (t) { - var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[1], {cwd: dat2}) + var st = spawn(t, dat + ' export --dataset=max --checkout=' + forks.mine, {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { @@ -179,8 +164,8 @@ test('export: dat export with checkout', function (t) { st.end() }) -test('export: dat export with checkout hash 1', function (t) { - var st = spawn(t, dat + ' export --dataset=max --checkout=' + hashes[0], {cwd: dat2}) +test('export: dat export with checkout remote fork', function (t) { + var st = spawn(t, dat + ' export --dataset=max --checkout=' + forks.remotes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { @@ -193,8 +178,8 @@ test('export: dat export with checkout hash 1', function (t) { st.end() }) -test('export: dat export with checkout hash 1 abbr', function (t) { - var st = spawn(t, dat + ' export -d max -c ' + hashes[0], {cwd: dat2}) +test('export: dat export with checkout remote fork abbr', function (t) { + var st = spawn(t, dat + ' export -d max -c ' + forks.remotes[0], {cwd: dat2}) st.stderr.empty() st.stdout.match(function match (output) { try { diff --git a/tests/helpers/index.js b/tests/helpers/index.js index 42e54feb..856da205 100644 --- a/tests/helpers/index.js +++ b/tests/helpers/index.js @@ -7,6 +7,12 @@ var mkdirp = require('mkdirp') var dat = path.resolve(__dirname + '/../../cli.js') +var csvs = { + a: path.resolve(__dirname + '/../fixtures/a.csv'), + b: path.resolve(__dirname + '/../fixtures/b.csv'), + c: path.resolve(__dirname + '/../fixtures/c.csv') +} + module.exports = { onedat: onedat, twodats: twodats, @@ -15,11 +21,11 @@ module.exports = { } function onedat (datPath) { - test('init a dat', function (t) { + test('helpers: init a dat', function (t) { rimraf.sync(datPath) mkdirp.sync(datPath) var st = spawn(t, dat + ' init', {cwd: datPath}) - st.stderr.match(/Initialized a new dat/) + st.stderr.match(/Initialized a new dat/, datPath) st.stdout.empty() st.end() }) @@ -30,41 +36,85 @@ function twodats (dat1, dat2) { onedat(dat2) } -function conflict (dat1, dat2, dataset, csvs) { - test('dat1 import', function (t) { +function conflict (dat1, dat2, dataset, cb) { + // creates conflict where: + // dat1 does max -> MAX + // dat2 does max -> Max + // dat1 pulls dat2, has 2 heads + // if cb is supplied will also retrieve heads + + test('helpers: dat1 import', function (t) { var st = spawn(t, dat + ' import -d ' + dataset + ' ' + csvs.a, {cwd: dat2}) st.stderr.match(/Done importing data/) st.stdout.empty() st.end() }) - test('dat2 pull dat1', function (t) { + test('helpers: dat2 pull dat1', function (t) { var st = spawn(t, dat + ' pull ' + dat1, {cwd: dat2}) st.stderr.empty() st.stdout.empty() st.end() }) - test('dat2 import b', function (t) { + test('helpers: dat2 import b', function (t) { var st = spawn(t, dat + ' import -d ' + dataset + ' ' + csvs.b, {cwd: dat2}) st.stderr.match(/Done importing data/) st.stdout.empty() st.end() }) - test('dat1 import c', function (t) { + test('helpers: dat1 import c', function (t) { var st = spawn(t, dat + ' import -d ' + dataset + ' ' + csvs.c, {cwd: dat1}) st.stderr.match(/Done importing data/) st.stdout.empty() st.end() }) - test('dat1 pull dat2', function (t) { + test('helpers: dat1 pull dat2', function (t) { var st = spawn(t, dat + ' pull ' + dat2, {cwd: dat1}) st.stderr.empty() st.stdout.empty() st.end() }) + + if (!cb) return + var hashes + + test('helpers: get forks', function (t) { + var st = spawn(t, dat + ' forks', {cwd: dat1}) + st.stderr.empty() + st.stdout.match(function match (output) { + var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) + if (ok) hashes = output.trim().split('\n') + return ok + }) + st.end() + }) + + test('helpers: get status', function (t) { + var statusJson + var stat = spawn(t, dat + ' status --json', {cwd: dat1, end: false}) + stat.stderr.empty() + stat.stdout.match(function match (output) { + try { + statusJson = JSON.parse(output) + } catch (e) { + statusJson = false + } + if (statusJson && statusJson.version) return true + else return false + }) + stat.end(function () { + var forks = {remotes: []} + hashes.forEach(function (hash) { + if (hash === statusJson.version) forks.mine = hash + else forks.remotes.push(hash) + }) + t.end() + cb(forks) + }) + }) } function randomTmpDir () { diff --git a/tests/merge.js b/tests/merge.js index 808e0b94..b28229a0 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -11,17 +11,11 @@ var tmp = os.tmpdir() var dat = path.resolve(__dirname + '/../cli.js') var hashes, diff -var csvs = { - a: path.resolve(__dirname + '/fixtures/a.csv'), - b: path.resolve(__dirname + '/fixtures/b.csv'), - c: path.resolve(__dirname + '/fixtures/c.csv') -} - var dat1 = path.join(tmp, 'dat-merge-1') var dat2 = path.join(tmp, 'dat-merge-2') helpers.twodats(dat1, dat2) -helpers.conflict(dat1, dat2, 'merge-test', csvs) +helpers.conflict(dat1, dat2, 'merge-test') test('merge: dat1 forks', function (t) { var st = spawn(t, dat + ' forks', {cwd: dat1}) diff --git a/tests/status.js b/tests/status.js index a6dfeadc..94ffd633 100644 --- a/tests/status.js +++ b/tests/status.js @@ -9,12 +9,6 @@ var dat = path.resolve(__dirname + '/../cli.js') var dat1 = path.join(tmp, 'dat-status-1') var dat2 = path.join(tmp, 'dat-status-2') -var csvs = { - a: path.resolve(__dirname + '/fixtures/a.csv'), - b: path.resolve(__dirname + '/fixtures/b.csv'), - c: path.resolve(__dirname + '/fixtures/c.csv') -} - helpers.twodats(dat1, dat2) test('status: dat1 status', function (t) { @@ -24,7 +18,7 @@ test('status: dat1 status', function (t) { st.end() }) -helpers.conflict(dat1, dat2, 'status-test', csvs) +helpers.conflict(dat1, dat2, 'status-test') test('status: dat1 status with multiple forks', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) From 9def99d8901862a91bda8a3e5dafb9e9ee97dee4 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 15:16:48 -0700 Subject: [PATCH 19/31] change abort to have a --verbose flag to show stack trace --- bin/cat.js | 4 ++-- bin/checkout.js | 4 ++-- bin/defaults.js | 5 +++++ bin/diff.js | 2 +- bin/export.js | 4 ++-- bin/forks.js | 6 ++++-- bin/get.js | 4 ++-- bin/import.js | 4 ++-- bin/init.js | 2 +- bin/merge.js | 4 ++-- bin/pull.js | 2 +- bin/push.js | 2 +- bin/replicate.js | 2 +- bin/status.js | 4 ++-- bin/versions.js | 4 ++-- bin/write.js | 6 +++--- lib/abort.js | 7 +++++-- tests/merge.js | 21 ++++++--------------- tests/status.js | 8 ++++---- 19 files changed, 48 insertions(+), 47 deletions(-) diff --git a/bin/cat.js b/bin/cat.js index 2f7643bb..303d4867 100644 --- a/bin/cat.js +++ b/bin/cat.js @@ -25,7 +25,7 @@ function handleCat (args) { } openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) handleReadStream(db) }) @@ -37,7 +37,7 @@ function handleCat (args) { } pump(db.createFileReadStream(key, opts), process.stdout, function done (err) { - if (err) abort(err, 'dat: err in cat') + if (err) abort(err, args, 'dat: err in cat') }) } } diff --git a/bin/checkout.js b/bin/checkout.js index 24c077dc..c653fe5b 100644 --- a/bin/checkout.js +++ b/bin/checkout.js @@ -11,7 +11,7 @@ function handleCheckout (args) { if (args.help || args._.length === 0) return usage() openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) var head = args._[0] var checkout = db.checkout(head === 'latest' ? null : head) @@ -24,7 +24,7 @@ function handleCheckout (args) { }) function done (err) { - if (err) return abort(err, 'Could not find checkout with hash ', head) + if (err) return abort(err, args, 'Could not find checkout with hash ', head) console.error('Current version is now', checkout.head) } }) diff --git a/bin/defaults.js b/bin/defaults.js index 765f53ae..8642ed9d 100644 --- a/bin/defaults.js +++ b/bin/defaults.js @@ -18,5 +18,10 @@ module.exports = [ { name: 'json', boolean: true + }, + { + name: 'verbose', + boolean: true, + default: false } ] diff --git a/bin/diff.js b/bin/diff.js index 44f4baef..59257674 100644 --- a/bin/diff.js +++ b/bin/diff.js @@ -15,7 +15,7 @@ function handleDiff (args) { if (args._.length < 2) return usage() openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) var headA = args._[0] var headB = args._[1] diff --git a/bin/export.js b/bin/export.js index e32732be..5392aff2 100644 --- a/bin/export.js +++ b/bin/export.js @@ -60,7 +60,7 @@ function handleExport (args) { if (args.format) format = args.format openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) handleOuputStream(db) }) @@ -75,7 +75,7 @@ function handleExport (args) { }) pump(db.createReadStream(args), parseOutput, formatData(format), process.stdout, function done (err) { - if (err) abort(err, 'Error exporting data') + if (err) abort(err, args, 'Error exporting data') }) } } diff --git a/bin/forks.js b/bin/forks.js index a70d65e6..3e4a2aa2 100644 --- a/bin/forks.js +++ b/bin/forks.js @@ -10,12 +10,14 @@ module.exports = { function handleForks (args) { if (args.help) return usage() openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) db.heads() .on('data', function head (obj) { console.log(obj) }) - .on('error', abort) + .on('error', function (err) { + abort(err, args) + }) }) } diff --git a/bin/get.js b/bin/get.js index d387c0bb..6acc6091 100644 --- a/bin/get.js +++ b/bin/get.js @@ -25,11 +25,11 @@ function handleRows (args) { } openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) var key = args._[0] db.get(key, args, function (err, value) { - if (err) abort(err, 'dat get error') + if (err) abort(err, args, 'dat get error') process.stdout.write(JSON.stringify(value)) }) }) diff --git a/bin/import.js b/bin/import.js index 5d439486..85dc95a3 100644 --- a/bin/import.js +++ b/bin/import.js @@ -45,7 +45,7 @@ function handleImport (args) { } openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) handleInputStream(db) }) @@ -61,7 +61,7 @@ function handleImport (args) { }) pump(inputStream, parseInputStream(args), transform, db.createWriteStream({ dataset: args.dataset }), function done (err) { - if (err) abort(err, 'Error importing data') + if (err) abort(err, args, 'Error importing data') if (args.json) { var output = { version: db.head diff --git a/bin/init.js b/bin/init.js index cbf2434d..b2da7412 100644 --- a/bin/init.js +++ b/bin/init.js @@ -29,7 +29,7 @@ function handleInit (args) { var db = dat(args.path, {createIfMissing: true}) db.on('error', function error (err) { - abort(err) + abort(err, args) }) db.on('ready', function ready () { diff --git a/bin/merge.js b/bin/merge.js index 14c6533f..3903e860 100644 --- a/bin/merge.js +++ b/bin/merge.js @@ -25,10 +25,10 @@ function handleMerge (args) { if (args._[2] === '-') args.stdin = true openDat(args, function ready (err, db) { - if (err) return abort(err) + if (err) return abort(err, args) var mergeStream = db.merge(headA, headB) pump(process.stdin, ndjson.parse(), mergeStream, function done (err) { - if (err) return abort(err) + if (err) return abort(err, args) console.error('Merged', headA, headB, 'into', db.head) }) }) diff --git a/bin/pull.js b/bin/pull.js index 0ad30fa9..64485b15 100644 --- a/bin/pull.js +++ b/bin/pull.js @@ -22,7 +22,7 @@ function handlePull (args) { }) openDat(args, function ready (err, db) { - if (err) return abort(err) + if (err) return abort(err, args) stream.pipe(db.pull()).pipe(stream) }) } diff --git a/bin/push.js b/bin/push.js index 36c5e187..ac550cd8 100644 --- a/bin/push.js +++ b/bin/push.js @@ -22,7 +22,7 @@ function handlePush (args) { }) openDat(args, function ready (err, db) { - if (err) return abort(err) + if (err) return abort(err, args) stream.pipe(db.push()).pipe(stream) }) } diff --git a/bin/replicate.js b/bin/replicate.js index 605dca54..490360b6 100644 --- a/bin/replicate.js +++ b/bin/replicate.js @@ -22,7 +22,7 @@ function handleReplicate (args) { }) openDat(args, function ready (err, db) { - if (err) return abort(err) + if (err) return abort(err, args) stream.pipe(db.replicate()).pipe(stream) }) } diff --git a/bin/status.js b/bin/status.js index 9520e174..6d497aba 100644 --- a/bin/status.js +++ b/bin/status.js @@ -13,10 +13,10 @@ function handleStatus (args) { if (args.help) return usage() openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) db.status(function (err, status) { - if (err) abort(err) + if (err) abort(err, args) status.version = status.head delete status.head if (args.json) { diff --git a/bin/versions.js b/bin/versions.js index a0758ca0..5993ab5f 100644 --- a/bin/versions.js +++ b/bin/versions.js @@ -26,7 +26,7 @@ function handleVersions (args) { } openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) handleReadStream(db) }) @@ -36,7 +36,7 @@ function handleVersions (args) { } pump(db.createChangesStream(opts), ndjson.serialize(), process.stdout, function done (err) { - if (err) abort(err, 'dat: err in versions') + if (err) abort(err, args, 'dat: err in versions') }) } } diff --git a/bin/write.js b/bin/write.js index d8e2c612..31920671 100644 --- a/bin/write.js +++ b/bin/write.js @@ -31,7 +31,7 @@ function handleWrite (args) { } openDat(args, function ready (err, db) { - if (err) abort(err) + if (err) abort(err, args) handleInputStream(db) }) @@ -46,7 +46,7 @@ function handleWrite (args) { } else { if (!fs.existsSync(path)) { usage() - abort(new Error('File at ' + path + ' does not exist')) + abort(new Error('File at ' + path + ' does not exist'), args) } inputStream = fs.createReadStream(path) } @@ -56,7 +56,7 @@ function handleWrite (args) { } pump(inputStream, db.createFileWriteStream(key, opts), function done (err) { - if (err) abort(err, 'dat: err in write') + if (err) abort(err, args, 'dat: err in write') if (args.json) { var output = { diff --git a/lib/abort.js b/lib/abort.js index 7157935a..13801bd1 100644 --- a/lib/abort.js +++ b/lib/abort.js @@ -1,7 +1,10 @@ module.exports = abort -function abort (err, message) { +function abort (err, args, message) { if (message) console.error(message) - if (err) throw err + else if (err.message) console.error(err.message) + else console.error(err) + + if (args && args.verbose) console.error(err.stack) process.exit(1) } diff --git a/tests/merge.js b/tests/merge.js index b28229a0..7a6a41bc 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -9,27 +9,18 @@ var helpers = require('./helpers') var tmp = os.tmpdir() var dat = path.resolve(__dirname + '/../cli.js') -var hashes, diff +var forks, diff var dat1 = path.join(tmp, 'dat-merge-1') var dat2 = path.join(tmp, 'dat-merge-2') helpers.twodats(dat1, dat2) -helpers.conflict(dat1, dat2, 'merge-test') - -test('merge: dat1 forks', function (t) { - var st = spawn(t, dat + ' forks', {cwd: dat1}) - st.stderr.empty() - st.stdout.match(function match (output) { - var ok = output.length === 130 // 32bit hash 2 in hex (64) x2 (128) + 2 newlines (130) - if (ok) hashes = output.split('\n') - return ok - }) - st.end() +helpers.conflict(dat1, dat2, 'merge-test', function (conflictForks) { + forks = conflictForks }) test('merge: dat1 diff', function (t) { - var st = spawn(t, dat + ' diff ' + hashes.join(' '), {cwd: dat1}) + var st = spawn(t, dat + ' diff ' + forks.remotes[0], {cwd: dat1}) st.stderr.empty() st.stdout.match(function match (output) { try { @@ -43,8 +34,8 @@ test('merge: dat1 diff', function (t) { }) test('merge: dat1 merge', function (t) { - var diff = spawn(t, dat + ' diff ' + hashes.join(' '), {cwd: dat1, end: false}) - var merge = spawn(t, dat + ' merge ' + hashes.join(' ') + ' --stdin', {cwd: dat1, end: false}) + var diff = spawn(t, dat + ' diff ' + forks.remotes[0], {cwd: dat1, end: false}) + var merge = spawn(t, dat + ' merge ' + forks.remotes[0] + ' --stdin', {cwd: dat1, end: false}) diff.stdout.stream .pipe(ndjson.parse()) diff --git a/tests/status.js b/tests/status.js index 94ffd633..501bd365 100644 --- a/tests/status.js +++ b/tests/status.js @@ -14,7 +14,7 @@ helpers.twodats(dat1, dat2) test('status: dat1 status', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) st.stdout.empty() - st.stderr.match(/Current version is/) + st.stderr.match(/This dat is empty/) st.end() }) @@ -22,13 +22,13 @@ helpers.conflict(dat1, dat2, 'status-test') test('status: dat1 status with multiple forks', function (t) { var st = spawn(t, dat + ' status', {cwd: dat1}) - st.stdout.empty() - st.stderr.match(/Current version is/) + st.stderr.empty() + st.stdout.match(/Current version is/) st.end() }) test('status: dat1 status as json', function (t) { - var st = spawn(t, dat + ' status --log=json', {cwd: dat1}) + var st = spawn(t, dat + ' status --json', {cwd: dat1}) st.stdout.match(function (output) { try { var json = JSON.parse(output) From 72d2bcdb789c0eb012cc05729e401005fcd800fc Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 15:34:50 -0700 Subject: [PATCH 20/31] update merge api docs --- beta-cli-api.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index ad44d760..2965c81e 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -300,15 +300,23 @@ $ dat diff --pretty --json 64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6 ### dat merge -Merges two forks. +Merges two forks ``` -dat merge +dat merge [ or ] (options) ``` +You can either merge from a file/STDIN or you can merge based on a fork and a built-in strategy. + +If using a file/STDIN your file should contain a resolution stream (TODO link to example) + +If merging a fork, `` should be the hash of the fork you want to merge into the fork you are currently on and you should specify a strategy option. + +Use `dat status` and `dat forks` to determine these values. + #### Options -- `-` for : receive resolved changes on stdin +- `-` as ``: receive resolved changes on stdin - `left`: pick the left side as the winner - `right`: pick the right side as the winner - `yolo`: pick random side for each key @@ -323,23 +331,23 @@ Merging from a file: ``` $ dat merge resolutions.json -Changes resolved successfully. +Changes merged successfully. Current version is now b04adb64fdf2203 ``` Merging as a stream using `dat diff`: ``` -$ dat diff ab3234dfe5 bdc3ae23cef | | dat merge - -Changes resolved successfully. +$ dat diff ab3234dfe5 | | dat merge - +Changes merged successfully. Current version is now 98v8catb4bvcddf ``` -Merging two forks by picking one side: +Merging by picking one side: ``` -$ dat merge ab3234dfe5 bdc3ae23cef --left -Changes resolved successfully. +$ dat merge bdc3ae23cef --left +Changes merged successfully. Current version is now b2bg304823h32h2 ``` From 7030c5804367aedc50bd0df998c19bc0951361ea Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 15:54:16 -0700 Subject: [PATCH 21/31] yolo -> random for now --- beta-cli-api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 2965c81e..f360306c 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -319,7 +319,7 @@ Use `dat status` and `dat forks` to determine these values. - `-` as ``: receive resolved changes on stdin - `left`: pick the left side as the winner - `right`: pick the right side as the winner -- `yolo`: pick random side for each key +- `random`: pick random side for each key Example output: From b8ccef3a8e08b11da34c99c0488163c788dfe76b Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Sun, 31 May 2015 19:24:41 -0700 Subject: [PATCH 22/31] make merge and diff implementations match docs, work on tests (1 failing still) --- beta-cli-api.md | 8 +++--- bin/diff.js | 65 ++++++++++++++++++++++++++----------------------- bin/merge.js | 30 ++++++++++++++--------- tests/merge.js | 8 +++--- usage/diff.txt | 6 +++-- 5 files changed, 67 insertions(+), 50 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index f360306c..8ddc64e4 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -248,10 +248,12 @@ Checked out state of dat to 7b13de1bd942a0cbfc2721d9e0b9a4fa5a076517 Generate a diff between two versions of the repository ``` -dat diff +dat diff [] ``` -If the same key is in both versions but the values differ, a diff object will be written to the output. You will get a diff object for each diff that is found. +If you specify one version, your current version will be used as the other version. Otherwise you can pass two versions. + +If the same key is in both versions but the values differ, a diff object will be written to the output. You will get a diff object for each diff that is found. Values that match are skipped. Example output: @@ -268,7 +270,7 @@ Diff between "Initial data import" and "Re-imported edited csv" ``` ``` -$ dat diff --pretty --json 64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 +$ dat diff --pretty --json 64843f272df { "key": "1", "forks": ["163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502", "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3" ] diff --git a/bin/diff.js b/bin/diff.js index 59257674..af0dc8e0 100644 --- a/bin/diff.js +++ b/bin/diff.js @@ -12,41 +12,46 @@ module.exports = { function handleDiff (args) { if (args.help) return usage() - if (args._.length < 2) return usage() + if (args._.length < 1) return usage() openDat(args, function ready (err, db) { if (err) abort(err, args) - - var headA = args._[0] - var headB = args._[1] - - var diffs = db.createDiffStream(headA, headB) - pump(diffs, datDiffFormatter(), ndjson.serialize(), process.stdout, function done (err) { - if (err) throw err + + if (args._.length === 2) return diff(args._[0], args._[1]) + + db.status(function (err, status) { + if (err) abort(err, args) + diff(status.head, args._[0]) }) - - function datDiffFormatter () { - return through.obj(function write (obj, enc, next) { - var a = obj[0] - var b = obj[1] - var diff = {} - if (a) diff.key = a.key - if (b) diff.key = b.key - diff.versions = [] - if (a) { - a.checkout = headA - diff.versions.push(a) - } else { - diff.versions.push(null) - } - if (b) { - b.checkout = headB - diff.versions.push(b) - } else { - diff.versions.push(null) - } - next(null, diff) + + function diff (headA, headB) { + var diffs = db.createDiffStream(headA, headB) + pump(diffs, datDiffFormatter(), ndjson.serialize(), process.stdout, function done (err) { + if (err) throw err }) + + function datDiffFormatter () { + return through.obj(function write (obj, enc, next) { + var a = obj[0] + var b = obj[1] + var diff = {} + if (a) diff.key = a.key + if (b) diff.key = b.key + diff.forks = [headA, headB] + diff.versions = [] + if (a) { + diff.versions.push(a) + } else { + diff.versions.push(null) + } + if (b) { + diff.versions.push(b) + } else { + diff.versions.push(null) + } + next(null, diff) + }) + } } }) } diff --git a/bin/merge.js b/bin/merge.js index 3903e860..969d1057 100644 --- a/bin/merge.js +++ b/bin/merge.js @@ -17,19 +17,27 @@ module.exports = { function handleMerge (args) { if (args._.length === 0) return usage() - - var headA = args._[0] - var headB = args._[1] - if (!headA || !headB) return usage() - - if (args._[2] === '-') args.stdin = true + if (args._[args._.length - 1] === '-') { + args.stdin = true + args._.pop() + } openDat(args, function ready (err, db) { - if (err) return abort(err, args) - var mergeStream = db.merge(headA, headB) - pump(process.stdin, ndjson.parse(), mergeStream, function done (err) { - if (err) return abort(err, args) - console.error('Merged', headA, headB, 'into', db.head) + if (err) abort(err, args) + + if (args._.length === 2) return merge(args._[0], args._[1]) + + db.status(function (err, status) { + if (err) abort(err, args) + merge(status.head, args._[0]) }) + + function merge (headA, headB) { + var mergeStream = db.merge(headA, headB) + pump(process.stdin, ndjson.parse(), mergeStream, function done (err) { + if (err) return abort(err, args) + console.error('Merged', headA, headB, 'into', db.head) + }) + } }) } diff --git a/tests/merge.js b/tests/merge.js index 7a6a41bc..748ca287 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -28,19 +28,19 @@ test('merge: dat1 diff', function (t) { } catch (e) { return false } - if (diff.versions[0].value.name === 'Max' && diff.versions[1].value.name === 'MAX') return true + if (diff.versions[0].value.name === 'MAX' && diff.versions[1].value.name === 'Max') return true }) st.end() }) -test('merge: dat1 merge', function (t) { +test('merge: dat1 diff | merge', function (t) { var diff = spawn(t, dat + ' diff ' + forks.remotes[0], {cwd: dat1, end: false}) - var merge = spawn(t, dat + ' merge ' + forks.remotes[0] + ' --stdin', {cwd: dat1, end: false}) + var merge = spawn(t, dat + ' merge -', {cwd: dat1, end: false}) diff.stdout.stream .pipe(ndjson.parse()) .pipe(through.obj(function (obj, enc, next) { - next(null, obj.versions[0]) + next(null, obj.versions[0]) // choose left })) .pipe(ndjson.serialize()) .pipe(merge.stdin) diff --git a/usage/diff.txt b/usage/diff.txt index ac007e98..9f579e4e 100644 --- a/usage/diff.txt +++ b/usage/diff.txt @@ -1,3 +1,5 @@ -dat diff +dat diff [versionB] -compares two heads and streams out any keys that have differing values \ No newline at end of file +Compares two heads and streams out any keys that have differing values + +If you specify one version, your current version will be used as the other version. Otherwise you can pass two versions. From 622be19b91213303efe4507febf8b3c1f470850c Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 10:35:30 -0700 Subject: [PATCH 23/31] update merge behavior --- beta-cli-api.md | 12 +++++++----- bin/diff.js | 6 +++--- bin/merge.js | 8 ++++---- tests/merge.js | 2 +- usage/merge.txt | 14 ++++++++++++-- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 8ddc64e4..002123c3 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -305,14 +305,16 @@ $ dat diff --pretty --json 64843f272df Merges two forks ``` -dat merge [ or ] (options) +dat merge [] ``` -You can either merge from a file/STDIN or you can merge based on a fork and a built-in strategy. +`` should be the hash of the fork you want to merge into the fork you are currently on + +You can either merge data from a file/STDIN or you can merge based on a built-in strategy. If using a file/STDIN your file should contain a resolution stream (TODO link to example) -If merging a fork, `` should be the hash of the fork you want to merge into the fork you are currently on and you should specify a strategy option. +If merging a fork, you should specify a strategy option. Use `dat status` and `dat forks` to determine these values. @@ -332,7 +334,7 @@ $ dat merge Merging from a file: ``` -$ dat merge resolutions.json +$ dat merge ab3234dfe5 resolutions.json Changes merged successfully. Current version is now b04adb64fdf2203 ``` @@ -340,7 +342,7 @@ Current version is now b04adb64fdf2203 Merging as a stream using `dat diff`: ``` -$ dat diff ab3234dfe5 | | dat merge - +$ dat diff ab3234dfe5 | | dat merge ab3234dfe5 - Changes merged successfully. Current version is now 98v8catb4bvcddf ``` diff --git a/bin/diff.js b/bin/diff.js index af0dc8e0..5e8bfb28 100644 --- a/bin/diff.js +++ b/bin/diff.js @@ -16,14 +16,14 @@ function handleDiff (args) { openDat(args, function ready (err, db) { if (err) abort(err, args) - + if (args._.length === 2) return diff(args._[0], args._[1]) - + db.status(function (err, status) { if (err) abort(err, args) diff(status.head, args._[0]) }) - + function diff (headA, headB) { var diffs = db.createDiffStream(headA, headB) pump(diffs, datDiffFormatter(), ndjson.serialize(), process.stdout, function done (err) { diff --git a/bin/merge.js b/bin/merge.js index 969d1057..e7099d0a 100644 --- a/bin/merge.js +++ b/bin/merge.js @@ -16,7 +16,7 @@ module.exports = { } function handleMerge (args) { - if (args._.length === 0) return usage() + if (args._.length < 1) return usage() if (args._[args._.length - 1] === '-') { args.stdin = true args._.pop() @@ -24,14 +24,14 @@ function handleMerge (args) { openDat(args, function ready (err, db) { if (err) abort(err, args) - + if (args._.length === 2) return merge(args._[0], args._[1]) - + db.status(function (err, status) { if (err) abort(err, args) merge(status.head, args._[0]) }) - + function merge (headA, headB) { var mergeStream = db.merge(headA, headB) pump(process.stdin, ndjson.parse(), mergeStream, function done (err) { diff --git a/tests/merge.js b/tests/merge.js index 748ca287..647622ba 100644 --- a/tests/merge.js +++ b/tests/merge.js @@ -35,7 +35,7 @@ test('merge: dat1 diff', function (t) { test('merge: dat1 diff | merge', function (t) { var diff = spawn(t, dat + ' diff ' + forks.remotes[0], {cwd: dat1, end: false}) - var merge = spawn(t, dat + ' merge -', {cwd: dat1, end: false}) + var merge = spawn(t, dat + ' merge ' + forks.remotes[0] + ' -', {cwd: dat1, end: false}) diff.stdout.stream .pipe(ndjson.parse()) diff --git a/usage/merge.txt b/usage/merge.txt index dc2fc648..1394150b 100644 --- a/usage/merge.txt +++ b/usage/merge.txt @@ -1,2 +1,12 @@ -dat merge - --live # wait for data piped to stdin for merge resolutions \ No newline at end of file +dat merge [] + +Merges two forks + + (required) the hash of the fork you want to merge + (optional) merge resolution JSON data from this file + --left pick the left side as the winner + --right pick the right side as the winner + --random pick random side for each key + +If you specify 1 fork, your current fork will be used as the 2nd. +You can also pass 2 specific forks in. \ No newline at end of file From 2430330692387888a080730253346e3bb3896f24 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 11:39:52 -0700 Subject: [PATCH 24/31] add example json format for merge --- beta-cli-api.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 002123c3..116b9b3e 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -312,7 +312,7 @@ dat merge [] You can either merge data from a file/STDIN or you can merge based on a built-in strategy. -If using a file/STDIN your file should contain a resolution stream (TODO link to example) +If using a file/STDIN your file should contain a JSON stream (see below). If merging a fork, you should specify a strategy option. @@ -355,6 +355,19 @@ Changes merged successfully. Current version is now b2bg304823h32h2 ``` +#### JSON format + +When writing data into a merge operation it should be in the same format as is contained in the individual versions supplied in the `versions` array of `dat diff` output. + +`dat merge` expects newline separated JSON objects (ndjson) as input. + +Example: + +``` +{"type":"put","version":"163c6089c3477ee","change":3,"key":"maxogden","value":{"key":"maxogden","name":"Max"}} +{"type":"put","version":"b04adb64fdf2203","change":6,"key":"mafintosh","value":{"key":"mafintosh","name":"Mathias"}} +``` + ## dataset commands These are meant to affect a specific dataset inside a repository. Each dataset is a folder inside the repository. From e4962140166889feeb6cc5ed72d9caf78116d61e Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 11:48:34 -0700 Subject: [PATCH 25/31] remove dataset subfolders for now --- beta-cli-api.md | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 116b9b3e..0c953cdc 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -22,20 +22,6 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat read](#dat-read) - [dat get](#dat-get) -## example repository folder structure - -``` -repo/ - - .dat/ - - dat.json - - dataset-a/ - - dataset.json - - readme.md - - dataset-b/ - - dataset.json - - readme.md -``` - ## repository commands ### dat @@ -370,11 +356,9 @@ Example: ## dataset commands -These are meant to affect a specific dataset inside a repository. Each dataset is a folder inside the repository. - -You can either run these commands from inside the dataset folder, or by explicitly specifying it with the dataset option: +These are meant to affect a specific dataset inside a repository. -- `dataset`/`d` - specify the dataset to use. defauts to the dataset in the folder you are in. +- `dataset`/`d` - specify the dataset to use. ### dat import From f87e4a0e0a0e89bf582ee596570ba29013a04a23 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 11:52:02 -0700 Subject: [PATCH 26/31] docs tweaks --- beta-cli-api.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 0c953cdc..cdd93409 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -60,12 +60,13 @@ All commands have these options: - `help`/`h` (boolean) - pass this option to show the help for a command. - `json` - set this to true to change all output to JSON for easy parsing. - `checkout` - the version hash to use when retrieving data for a command. +- `verbose` - show the full stack trace/debug info on errors Example output: ``` $ dat -usage: dat [-flag] [--key value] +usage: dat [-flag] [--key=value] commands: init initialize a new dat in a directory @@ -207,7 +208,6 @@ $ dat log --limit=1 --json `Links` is a list of older versions that are referenced from this current version (forms a directed acyclic graph if drawn). - ### dat checkout Non-destructive rollback state to a hash in the past From 782d2f283d0aafb12576993aa98e9fe9332e71bd Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Mon, 1 Jun 2015 12:43:26 -0700 Subject: [PATCH 27/31] Fix import error handling --- bin/import.js | 3 +-- lib/abort.js | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bin/import.js b/bin/import.js index 85dc95a3..7d7431b4 100644 --- a/bin/import.js +++ b/bin/import.js @@ -40,8 +40,7 @@ function handleImport (args) { if (!args.dataset) { usage() - console.error('\nError: Must specify dataset (-d )') - abort() + abort(new Error('\nError: Must specify dataset (-d )')) } openDat(args, function ready (err, db) { diff --git a/lib/abort.js b/lib/abort.js index 13801bd1..122ec95c 100644 --- a/lib/abort.js +++ b/lib/abort.js @@ -2,8 +2,10 @@ module.exports = abort function abort (err, args, message) { if (message) console.error(message) - else if (err.message) console.error(err.message) - else console.error(err) + if (err) { + if (err.message) console.error(err.message) + else console.error(err) + } if (args && args.verbose) console.error(err.stack) process.exit(1) From 19b02f0cddc0e7ac53664eae5a11e89ebb298b75 Mon Sep 17 00:00:00 2001 From: Karissa McKelvey Date: Mon, 1 Jun 2015 12:46:53 -0700 Subject: [PATCH 28/31] BUG: fix import bug on using --json flag with csv data --- lib/parse-input-stream.js | 8 ++++---- tests/import.js | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/parse-input-stream.js b/lib/parse-input-stream.js index f9c64a3e..5df59a2e 100644 --- a/lib/parse-input-stream.js +++ b/lib/parse-input-stream.js @@ -11,10 +11,10 @@ module.exports = parseStream function parseStream (opts) { if (!opts) opts = {} - if (opts.csv || opts.f === 'csv') return parseCSV(opts.separator) - if (opts.tsv || opts.f === 'tsv') return parseCSV('\t') - if (opts.json || opts.f === 'json') return parseJSON(opts.jsonpath) - if (opts.objects || opts.f === 'objects') return parseObjects() + if (opts.f === 'csv') return parseCSV(opts.separator) + if (opts.f === 'tsv') return parseCSV('\t') + if (opts.f === 'json') return parseJSON(opts.jsonpath) + if (opts.f === 'objects') return parseObjects() var detectMax = opts.detectMax || 8000 diff --git a/tests/import.js b/tests/import.js index 60effd88..2c758f6e 100644 --- a/tests/import.js +++ b/tests/import.js @@ -40,6 +40,14 @@ test('import: dat import json', function (t) { st.end() }) +test('import: dat import csv with json flag', function (t) { + var json = path.resolve(__dirname + '/fixtures/all_hour.csv') + var st = spawn(t, dat + ' import ' + json + ' --json --key=id --dataset=import-test2', {cwd: dat2}) + st.stdout.match(/version/) + st.stderr.empty() + st.end() +}) + verify('import-test2', dat2) helpers.onedat(dat3) From 4c456c0fbfc0dff75adc6e1345fb6f6b34744eee Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 16:13:14 -0700 Subject: [PATCH 29/31] make cli usage and docs order match --- beta-cli-api.md | 186 ++++++++++++++++++++++-------------------------- usage/root.txt | 28 ++++---- 2 files changed, 100 insertions(+), 114 deletions(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index cdd93409..125c3060 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -6,21 +6,20 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat](#dat) - [dat init](#dat-init) - [dat status](#dat-status) + - [dat log](#dat-log) - [dat clone](#dat-push) - [dat push](#dat-push) - [dat pull](#dat-pull) - - [dat replicate](#dat-replicate) - - [dat log](#dat-log) - [dat checkout](#dat-checkout) - [dat diff](#dat-diff) - [dat merge](#dat-merge) - [dat forks](#dat-forks) + - [dat replicate](#dat-replicate) - [dataset commands](#dataset-commands) - [dat import](#dat-import) - [dat export](#dat-export) - - [dat write](#dat-write) - [dat read](#dat-read) - - [dat get](#dat-get) + - [dat write](#dat-write) ## repository commands @@ -34,7 +33,7 @@ dat ### Options -Options have shorthand `-` and long form `--` variations: +Options usually have shorthand `-` and long form `--` variations: ``` dat -p /test @@ -75,7 +74,7 @@ commands: push push data to a remote dat ... etc -type `dat command --help` to view detailed help about a specific subcommand +type `dat --help` to view detailed help ``` ### dat init @@ -111,6 +110,36 @@ Current version is now 8eaf3b0739d32849687a544efae8487b5b05df52 Last updated 3 seconds ago ``` +### dat log + +Stream versions out in historical order as json + +```bash +dat log +``` + +By default (no arguments) it will print out a stream of json representing each version of the repository. + +If `` is specified as the first positional argument then the individual change data for that version will be streamed out. + +Example output: + +``` +$ dat log --limit=1 +Version: 6bdd624ae6f9ddb96069e04fc030c6e964e77ac7 [+12, -3] +Date: April 15th 2015, 7:30PM PST + + added cool csv +``` + +``` +$ dat log --limit=1 --json +{ "change": 1, "version": "6bdd624ae6f9ddb96069e04fc030c6e964e77ac7", links: [...], "puts": 12, "deletes": 3, "date": "2015...", "message": "added cool csv"} +``` + +`Links` is a list of older versions that are referenced from this current version (forms a directed acyclic graph if drawn). + + ### dat clone Clone a new repository from a remote dat to create a new dat. @@ -165,49 +194,6 @@ Pull completed successfully, you now have 2 forks. Current version is now b04adb64fdf2203 ``` -### dat replicate - -Same as doing a `dat push` and `dat pull` at the same time. Use it when you are on the other end of a `dat pull` or a `dat push` (e.g. if you are hosting dat on a server). - -Example output: - -``` -$ dat pull ssh://192.168.0.5:~/data -Pushed 403 changes (13.88 Mb). -Pulled 823 changes (93.88 Mb). -Average speed: 4.3 Mb/s. -Replication completed successfully. -``` - -### dat log - -Stream versions out in historical order as json - -```bash -dat log -``` - -By default (no arguments) it will print out a stream of json representing each version of the repository. - -If `` is specified as the first positional argument then the individual change data for that version will be streamed out. - -Example output: - -``` -$ dat log --limit=1 -Version: 6bdd624ae6f9ddb96069e04fc030c6e964e77ac7 [+12, -3] -Date: April 15th 2015, 7:30PM PST - - added cool csv -``` - -``` -$ dat log --limit=1 --json -{ "change": 1, "version": "6bdd624ae6f9ddb96069e04fc030c6e964e77ac7", links: [...], "puts": 12, "deletes": 3, "date": "2015...", "message": "added cool csv"} -``` - -`Links` is a list of older versions that are referenced from this current version (forms a directed acyclic graph if drawn). - ### dat checkout Non-destructive rollback state to a hash in the past @@ -354,6 +340,42 @@ Example: {"type":"put","version":"b04adb64fdf2203","change":6,"key":"mafintosh","value":{"key":"mafintosh","name":"Mathias"}} ``` +### dat forks + +List the current forks + +``` +dat forks +``` + +Example output: + +``` +$ dat forks +64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 - Imported csv +163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502 - Updated names +``` + +``` +$ dat forks --json +{version: "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3", message: "Imported csv"} +{version: "163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502", message: "Updated names"} +``` + +### dat replicate + +Same as doing a `dat push` and `dat pull` at the same time. Use it when you are on the other end of a `dat pull` or a `dat push` (e.g. if you are hosting dat on a server). + +Example output: + +``` +$ dat pull ssh://192.168.0.5:~/data +Pushed 403 changes (13.88 Mb). +Pulled 823 changes (93.88 Mb). +Average speed: 4.3 Mb/s. +Replication completed successfully. +``` + ## dataset commands These are meant to affect a specific dataset inside a repository. @@ -415,6 +437,20 @@ $ dat export {"key": "maxogden", "firstname": "Max", "lastname": "Ogden"} ``` +### dat read + +Read binary data from a file stored in dat + +``` +dat read +``` + +Example: + +``` +$ dat read photo.jpg +``` + ### dat write Write binary data into dat. This differs from `import` in that it doesn't parse the file, it just stores it as a binary attachment. `import` is designed for key/value row-like, or tabular data. `write` is meant for large files, blobs, or attachments that you can't parse into rows. @@ -443,55 +479,3 @@ Storing photo.jpg (8.3 Mb, 38 Mb/s). Stored photo.jpg successfully. Current version is now b04adb64fdf2203 ``` - -### dat cat - -Read binary data from a file stored in dat - -``` -dat cat -``` - -Example output: - -``` -$ dat cat photo.jpg - -``` - -### dat get - -Get a single key + value out of a dataset - -``` -dat get -``` - -Example output: - -``` -$ dat get uw60748112 -{"key":"uw60748112","version":"5abd6625cd2e64a116628a9a306de2fbd73a05ea5905e26d5d4e58e077be2203","value":{"time":"2014-04-30T00:09:37.000Z","latitude":"46.7557","longitude":"-121.9855","place":"24km ESE of Eatonville, Washington","type":"earthquake"}} -``` - -### dat forks - -List the current forks - -``` -dat forks -``` - -Example output: - -``` -$ dat forks -64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3 - Imported csv -163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502 - Updated names -``` - -``` -$ dat forks --json -{version: "64843f272df9526fb04adb64fdf220330c9a29a8104c9ae4dead6b0aab5748e3", message: "Imported csv"} -{version: "163c6089c3477eecfa42420b4249f481b61c30b63071079e51cb052451862502", message: "Updated names"} -``` diff --git a/usage/root.txt b/usage/root.txt index cc66a020..a9bc7eb3 100644 --- a/usage/root.txt +++ b/usage/root.txt @@ -1,17 +1,19 @@ usage: dat [--flag] [--key=value] commands: - init initialize a new dat store in a directory - checkout dat will operate at a particular head - status show current status - push push data to a remote dat - pull pull data from a remote dat - export streams data from a dataset to stdout - import import rows into a dataset - write write a file into dat - cat reads a file's contents from dat to stdout - forks list forks of the current dat - diff see differences between two forks - merge merge two forks into one + init initialize a new dat store in a directory + status show current status + log view a list of recent changes + clone download and make a full copy of a remote dat + push push data to a remote dat + pull pull data from a remote dat + checkout dat will operate at a particular head + diff see differences between two forks + merge merge two forks into one + forks list forks of the current dat + import import a tabular file into a dataset + export export tabular data from a dataset + read read a binary file out of a dataset + write write a binary file into a dataset -type `dat command --help` to view detailed help about a specific subcommand \ No newline at end of file +type `dat --help` to view specific command help \ No newline at end of file From f6a02616479a35ba2ea42c6177e6c1b7e27a7d38 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 16:20:40 -0700 Subject: [PATCH 30/31] update dat clone docs --- beta-cli-api.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/beta-cli-api.md b/beta-cli-api.md index 125c3060..a57299bc 100644 --- a/beta-cli-api.md +++ b/beta-cli-api.md @@ -7,7 +7,7 @@ This is the proposed CLI API for our Beta release. Please leave feedback [in thi - [dat init](#dat-init) - [dat status](#dat-status) - [dat log](#dat-log) - - [dat clone](#dat-push) + - [dat clone](#dat-clone) - [dat push](#dat-push) - [dat pull](#dat-pull) - [dat checkout](#dat-checkout) @@ -144,6 +144,12 @@ $ dat log --limit=1 --json Clone a new repository from a remote dat to create a new dat. +``` +dat clone [output-dir] +``` + +Your `repo-url` can use any of the available transports. Default transports are `http`, `https` and `ssh`. + Example output: ``` From b14634a239ccdf4749f6d2a6c8381b9026ada3f3 Mon Sep 17 00:00:00 2001 From: Max Ogden Date: Mon, 1 Jun 2015 16:55:44 -0700 Subject: [PATCH 31/31] rename some files --- beta-js-api.md | 388 -------------------------- beta-cli-api.md => cli-docs.md | 46 +-- beta-conflicts-api.md => conflicts.md | 0 3 files changed, 26 insertions(+), 408 deletions(-) delete mode 100644 beta-js-api.md rename beta-cli-api.md => cli-docs.md (94%) rename beta-conflicts-api.md => conflicts.md (100%) diff --git a/beta-js-api.md b/beta-js-api.md deleted file mode 100644 index ef2fcdad..00000000 --- a/beta-js-api.md +++ /dev/null @@ -1,388 +0,0 @@ -# dat programmatic API - -`dat` is a node module that you can require with e.g. `var dat = require('dat')` - -## Create a dat instance - -```js -var db = dat([path], [options]) -``` - -Returns a new dat instance and either opens the existing underlying database or creates a new empty one. All arguments are optional. - -`path` is the path to the folder inside of which dat should put it's `.dat` folder. If a `.dat` folder already exists there dat will open the existing dat store, otherwise a new one will be created. If not specified it will use `process.cwd()` - -When the dat instance is ready it will emit events `.on('ready')` and `.on('open')` (these are equivalent). - -### Options - -* `path` (default `process.cwd()`) - if not specified as the first argument to the constructor it will check `options.path` instead -* `leveldown` (default `require('leveldown-prebuilt')`) - pass in a custom leveldown backend -* `db` - pass in a custom levelup instance. if specified the `leveldown` option will be ignored, and your tabular data will be stored entirely in the `db` instance you pass in -* `blobs` (default `require('lib/blobs.js')`) - pass in a custom blob store -* `skim` - TODO decide on semantics -* `feed` - pass in a custom [changes-feed](https://www.npmjs.com/package/changes-feed) instance -* `merge` - pass in a custom merge resolution function - -## db.head - -A string property available on the db instance containing the latest stable version hash. - -## db.status - -A string representing the current status of this Dat. - -Possible values are: - -- *"new"* - newly created, not opened or closed -- *"opening"* - waiting for the database to be opened -- *"open"* - successfully opened the database, available for use -- *"conflict"* - the database has entered conflict mode and conflicts must be resolved or aborted -- *"closing"* - waiting for the database to be closed -- *"closed"* - database has been successfully closed, should not be used - -## db.on - -An event emitter instance to hook into Dat status changes with. - -Each `db.status` will be emitted as an event, e.g. `db.on('conflict')`. - -Additionally there is an `error` event for listening for critical errors. - -**Note** `conflict` is a special, mandatory event. If you do not handle it (e.g. you do not have a `on('conflict')` event bound or you do not have a `merge` function registered with the db) then the db will emit an `error` event if it enters conflict mode. - -## db.createChangesStream - -```js -var changes = db.createChangesStream([opts]) -``` - -Returns a read stream that iterates over the dat store change log (a log of all CRUD in the history of the database). - -Changes are emitted as JS objects that look like `{change: 352, key: 'foo', version: 2}` - -- `change` - the *local* change number (auto-incrementing) that should only be used for local operations such as secondary indexing. This number does not get replicated and will vary in distributed use cases. -- `key` - the key of the row related to this change -- `version` - the version hash at the time of this change -- `links` - the previous changes in the change graph that this change points to - - -Example response -``` -{ "change": 13, "key": "foo", "hash": "b342df", "from": 0, "to": 1} -{ "change": 14, "key": "foo", "hash": "a3bc5f", "from": 1, "to": 2} -``` - -*mafintosh: there is no guarantee that the 'change', 'from', and 'to' local numbers are the same across multiple dats. the only guarantee is that all the dependencies for a specific entry in the change feed (previous nodes in the graph) have a lower change number* - -### Options - -* `values` (default `false`) - if true will `get` the row data at the change version and include it `change.value` -* `since` (default `0`) - local `change` number to start from -* `tail` (default `false`) - if true it will set `since` to the very last change so you only get new changes -* `limit` (default unlimited) - how many changes to return before stopping -* `live` (default `false`) - if true will emit new changes as they happen + never end (unless you manually end the stream) - -## db.createConflictStream - -```js -db.createConflictStream(opts) -``` - -Returns a new readable object stream that emits conflicts. If you are not in conflict mode the stream will immediately end. - -The object that gets emitted will be an objects with these properties: - -- `key` - the key for this conflict -- `dataset` - the dataset name for this conflict -- `versions` - an array of objects, each object is a different conflicted version in the same format as what is returned by `dataset.get` - -If specified, `opts` can have these properties: - -- `format` (default `objectMode`) - if set to `csv`, `json` or `protobuf` the stream will not be an object mode stream and will emit serialized data - -## db.merge - -```js -db.merge(versions, value, cb) -``` - -Resolve multiple versions of a conflicted row into a new single merged version. - -- `versions` (required) an array of version hash strings *or* objects to merge (see below) -- `value` (required) the new value to store -- `cb` (optional) called when done with `(err, updated)` where `updated` is the new version of the row that was stored (same format as what you get from `dataset.get`) - -If `versions` is an array of strings, it should be the hashes of the versions you want to merge. If it is an array of objects, the objects should be the same format as what you get back from `createConflictStream`. - -## db.createMergeStream - -```js -db.createMergeStream(opts) -``` - -Returns a writable object stream. Each object you write will be merged using the same semantics as `db.merge`. This makes it possible to implement streaming merge pipelines externally from Dat for automation purposes. - -The objects must have these properties: - -- `versions` (required) an array of version hash strings *or* objects to merge -- `value` (required) the new value to store - -## db.rollback - -```js -db.rollback(version, cb) -``` - -Performs a **destructive** (repeat: **destructive**) rollback to the state at `version` and calls `cb` when done with `(err)`. - - - -## dataset - -```js -var dataset = db.dataset(name, [opts]) -``` - -example - -```js -var dataset = db.dataset('salaries') -``` - -Creates a namespaced 'collection' for a set of data. Creating a dataset is required before you can put any data into dat. All data must be stored in a dataset. - -## dataset.get - -```js -dataset.get(key, [options], cb) -``` - -example: - -```js -dataset.get(whatever, function cb(err, obj) { - // if err exists it will be some type of DatError - // {type: 'file', dataset: 'salaries', 'key': 'photo', version: '324i2h3i4b2iu', value: {foo: 'bar'}} -}) -``` - -Gets a key, calls callback with `(error, value)`. `value` is a JS object that will have these keys: - -* `key` - a string -* `value` - a JS object with the data stored at this `key` -* `version` - the version hash of the row -* `type` - a string of the type, usually `row` or `file` - -### Options - -* `version` (defaults to latest) - gets row at specific checkout, e.g. `{version: '2bi42oujb3'}` -* `checkout` (defaults to latest) - gets row at specific checkout, e.g. `{checkout: '23b4u234u2'}` - -`version` is the version of the row (it must exist) and `checkout` is the version of the dataset (it gets closest older version of the row from the checkout point) - -## dataset.put - -```js -dataset.put(key, value, cb) -``` - -Puts value into the database by key. Key must be a string. - -`cb` will be called with `(error, newVersion)` where `newVersion` will be be a JS object (the same as what you get back from a `.get`). - -All versions of all rows are persisted and replicated. - -## dataset.delete - -```js -dataset.delete(key, cb) -``` - -Marks `key` as deleted. Note: does not destroy old versions. Calls `cb` with `(err, deletedRow)` - -`deletedRow` is a JS object, the same as what you get back from `.get`, except the `value` property will be `null`. The `version` property will be a new hash. - -Note: Deleting the row of a `type: 'file'` will also delete the file it references in the blob store. - -## dataset.createReadStream - -```js -var readStream = dataset.createReadStream([opts]) -``` - -Returns a readable stream over the most recent version of all rows in the dataset. - -Rows are returned in the same format as `.get`. - -### Options - -* `gt`, `gte`, `lt`, `lte` - greater than/less than sort strings for controlling the readstream start/end positions (inclusive and exclusive). -* `limit` (default unlimited) - how many rows to return before stopping -* `reverse` (default false) - if `true` returns in reverse-lexicographic sorting order -* `type` (default all) - specify a single type to return, e.g. `type: 'file'` to get only file metadata back. Note that this simply filters the results, a full table scan will still be performed. - -## createValueStream - -```js -var valueStream = db.createValueStream([opts]) -``` - -Returns a read stream over the most recent version of all rows in the dat store that returns only the values stored. - -By default the returned stream is a readable object stream that will emit 1 JS object per row (equivalent to the `.value` object returned by `createReadStream`). - -You can also pass in options to serialize the values as either CSV or line-delimited JSON (see below). - -### Options - -* `gt`, `gte`, `lt`, `lte` - greater than/less than sort strings for controlling the readstream start/end positions (inclusive and exclusive). -* `limit` (default unlimited) - how many rows to return before stopping -* `reverse` (default false) - if `true` returns in reverse-lexicographic sorting order -* `format` (default `objectMode`) - if set to `csv`, `json` or `protobuf` the stream will not be an object mode stream and will emit serialized data - -## dataset.createKeyStream - -```js -var keyStream = db.createKeyStream([opts]) -``` - -Returns a readable stream over the most recent version of all keys in the dat store that returns only the keys stored. This method does not decode values and exists for mostly performance reasons. - -By default the returned stream is a readable object stream that will emit 1 JS object per row in the form `{key: key, version: number}`. - -### Options -* `gt`, `gte`, `lt`, `lte` - greater than/less than sort strings for controlling the readstream start/end positions (inclusive and exclusive). -* `limit` (default unlimited) - how many rows to return before stopping -* `reverse` (default false) - if `true` returns in reverse-lexicographic sorting order - -## dataset.createWriteStream - -```js -var writeStream = db.createWriteStream([opts]) -``` - -Returns a new writable stream. You can write data to it. - -Supported types (if you set the `format` option correctly) are: - -- `object` - JS objects (default) e.g. `objectMode: true` in node streams -- `csv`- raw CSV (e.g. `fs.createReadStream('data.csv')`) -- `json` - raw [newline separated JSON objects](http://ndjson.org/) -- `protobuf` - protocol buffers encoded binary data - -### Options - -* `format` (defaults to `object`), set this equal to `json`, `csv`, or `protobuf` to tell the write stream how to parse the data you write to it -* `key` (default `key`) - the column or array of columns to use as the primary key -* `keyFormat` - a function that formats the key before it gets inserted. accepts `(val)` and must return a string to set as the key. -* `columns` - specify the column names to use when parsing CSV. CSV headers are automatically parsed but this can be used to override them -* `headerRow` (default `true`) - set to false if your csv doesn't have a header row. you'll also have to manually specify `columns` -* `separator` (default `,`) - passed to the csv parser -* `delimiter` (default `\n`) - passed to the csv parser - -## dataset.createVersionStream - -```js -var versions = dataset.createVersionStream(key) -``` - -Returns a read stream that emits all versions of a given key. - -TODO determine possible options - -## dataset.createFileWriteStream - -```js -var blobWriter = dataset.createFileWriteStream(key, [rowData], [cb]) -``` - -Returns a writable stream that you can stream a binary blob into. Calls optional `cb` with `(err, updated)` where `updated` is the new version of the row that the blob was attached to. - -`key` is the same as the `key` in `.put` - it must be a string. - -If specified `row` should be a JS object with any data you want to store, the same as data in `.put`. - -## dataset.createFileReadStream - -```js -var blobReader = dataset.createFileReadStream(key, [options]) -``` - -Returns a readable stream of file data. - -`key` is the key of the row where the blob is stored. `filename` is the name of the attached blob. both are required. - -### Options - -* `version` (default latest) - the version of the file to get - -# Replication - -## db.createPushStream - -```js -db.createPushStream([opts]) -``` - -Returns a duplex replication stream that you can pipe over a transport stream to a remote replication endpoint. Pushes local data into remote. - -### Options - -TODO decide on options - -## db.createPullStream - -```js -db.createPullStream([opts]) -``` - -Returns a duplex replication stream that you can pipe over a transport stream to a remote replication endpoint. Pulls data from remote and merges into local. - -### Options - -TODO decide on options - -## db.createSyncStream([opts]) - -Returns a duplex replication stream that you can pipe over a transport stream to a remote replication endpoint. Does both a push and a pull. - -### Options - -TODO decide on options - -## db.open - -```js -db.open(cb) -``` - -Makes sure the dat is ready and then calls the `cb` with `(err)` if there was an error. If there was no error the dat is ready to use. Will cause the ready/open events to be emitted. - -## db.close - -```js -db.close(cb) -``` - -Closes the dat and any underlying storage/network interfaces. Calls `cb` with `(err)` when done. - -## db.info - -```js -db.info(cb) -``` - -Gets information about the database and calls `cb` with `(err, info)`. - -`info` will be a JS object with these properties: - -- `datasets` - an object containing information about each dataset in this dat -- `datasets[datasetName]` - an object with these properties - - `rowCount` - the number of rows in this dataset - - `fileCount` - the number of files in this dataset - - `schema` - the `.proto` formatted protocol buffers string schema -- `changeCount` - the overall number of changes in the history -- `meta` - a JS object that has the data from `dat.json` - - diff --git a/beta-cli-api.md b/cli-docs.md similarity index 94% rename from beta-cli-api.md rename to cli-docs.md index a57299bc..fa48ea24 100644 --- a/beta-cli-api.md +++ b/cli-docs.md @@ -1,6 +1,6 @@ # dat command line API -This is the proposed CLI API for our Beta release. Please leave feedback [in this thread](https://github.com/maxogden/dat/issues/195). +This is the `dat` command line API as of the Beta release. - [repository commands](#repository-commands) - [dat](#dat) @@ -139,7 +139,6 @@ $ dat log --limit=1 --json `Links` is a list of older versions that are referenced from this current version (forms a directed acyclic graph if drawn). - ### dat clone Clone a new repository from a remote dat to create a new dat. @@ -386,6 +385,8 @@ Replication completed successfully. These are meant to affect a specific dataset inside a repository. +Currently you **must** specify the dataset when doing any dataset commands. + - `dataset`/`d` - specify the dataset to use. ### dat import @@ -396,19 +397,15 @@ Import key/value data into dat dat import ``` -Stream data from stdin: - -```bash -cat file.json | dat import - -``` - ### Options - `key`/`k` - specify which field to use as the primary key - `no-key` - generate a random unique key - `message`/`m` - a short description of this import -Example output: +Examples: + +Import a json file: ``` $ dat import flights.json @@ -417,6 +414,12 @@ Data added successfully. Current version is now b04adb64fdf2203 ``` +Stream data from stdin: + +```bash +cat file.json | dat import - +``` + ### dat export Stream a range of keys + values out of a dataset. @@ -438,6 +441,7 @@ dat export > woah-my-data.json - `format` - default `json`. you can also specify `csv`. Example output: + ``` $ dat export {"key": "maxogden", "firstname": "Max", "lastname": "Ogden"} @@ -464,24 +468,26 @@ Write binary data into dat. This differs from `import` in that it doesn't parse Write a file to dat: ``` -dat write -``` - -Stream data from stdin: - -```bash -cat photo.jpg | dat write photo.jpg - +dat write ``` #### Options -`name/n`: the name, or lookup key, for the binary file inside dat. If no name is supplied, dat will use the first argument (path-to-file) as the lookup key. +`name`/`n`: the name, or lookup key, for the binary file inside dat. If no name is supplied, dat will use the first argument (filename) as the lookup key. Example output: +Stream data from stdin, save as 'photo.jpg' (must specify name when using STDIN): + +```bash +cat photo.jpg | dat write - --name=photo.jpg +``` + +Write a file by filename (uses `cat.jpg` as the name automatically): + ``` -$ dat write /some/path/to/photo.jpg --name=photo.jpg -Storing photo.jpg (8.3 Mb, 38 Mb/s). -Stored photo.jpg successfully. +$ dat write images/cat.jpg +Storing cat.jpg (8.3 Mb, 38 Mb/s). +Stored cat.jpg successfully. Current version is now b04adb64fdf2203 ``` diff --git a/beta-conflicts-api.md b/conflicts.md similarity index 100% rename from beta-conflicts-api.md rename to conflicts.md