dat-ecosystem · okdistribute · Aug 11, 2015 · Aug 5, 2015 · Aug 7, 2015 · Aug 10, 2015
diff --git a/bin/import.js b/bin/import.js
@@ -47,10 +47,7 @@ module.exports = {
 function handleImport (args) {
   debug('handleImport', args)
 
-  if (args.help || args._.length === 0) {
-    return usage()
-  }
-
+  if (args.help || args._.length === 0) return usage()
   if (!args.dataset) abort(new Error('Error: Must specify dataset (-d)'), args)
 
   openDat(args, function (err, db) {

diff --git a/docs/cli-docs.md b/docs/cli-docs.md
@@ -480,7 +480,7 @@ dat import <filename> --dataset=<name>
 
 ### Options
 
-- `key`/`k` - specify which field to use as the primary key (false for no key)
+- `key`/`k` - specify which column to use as the primary key (defaults to auto-generated keys). You can add multiple keys to craft a compound key. sorted ascending by default
 - `message`/`m` - a short description of this import
 
 Examples:

diff --git a/docs/cookbook.md b/docs/cookbook.md
@@ -2,9 +2,7 @@
 
 ## How do I set up my dat so other people can `dat clone`?
 
-Dat is transport agnostic. Here, we will go over two ways to set up an endpoint for your dat -- ssh, and http.
-
-### Using SSH to set up a dat host
+### dat over SSH
 
 SSH has a lot of benefits -- it doesn't require a running process, and you can easily use highly-secure authentication.
 
@@ -26,7 +24,7 @@ On OSX, it is easy to set up an ssh endpoint: [see this tutorial](https://suppor
 
 However, we recommend that you set up your dat on a dedicated server so that people can clone from you reliably. If using linux, you'll need to [make sure you have an ssh port open](http://www.cyberciti.biz/faq/linux-open-iptables-firewall-port-22-23/).
 
-### Using HTTP to set up a dat host
+### dat over HTTP
 
 A dat can also be hosted through http. We include a command, `dat serve`, which begins an http listener that supports clone, pull, and push.
 
@@ -50,15 +48,26 @@ You then might want to use process monitoring so that if the process fails for s
 
 We recommend SSH keys for access control. Here is a [good tutorial on setting up ssh authentication to allow ssh access only to certain individuals](https://www.digitalocean.com/community/tutorials/how-to-set-up-ssh-keys--2).
 
-
 ## How do I allow read-only access to the dat?
 
-You can do this via https using the `--read-only` flag:
+If you aren't using SSH, you can do this via https using the `--read-only` flag:
 
 ```
 dat serve --read-only
 ```
 
+## How do I use a compound key in dat?
+
+A compound key might be something like 'city', 'state', and 'zip code'. This is where any on its own isn't uniquely identifiable to a row, but all together will create a unique key.
+
+Here, we will create a compound key using these three:
+
+```
+dat import cities.csv -d cities -k city -k state -k zipcode
+```
+
+Dat will sort these keys and use them with a `+` delimiter, so a row with 'oakland', 'ca', '94607' might be 'oakland+ca+94607'.
+
 ## How do I connect a different backend to dat?
 
 In your `package.json` file, under `dat`, add a `backend` entry. Example for `SQL` variants:

diff --git a/lib/import.js b/lib/import.js
@@ -4,14 +4,26 @@ var through = require('through2')
 var debug = require('debug')('lib/import')
 var parseInputStream = require('../lib/util/parse-input-stream.js')
 
+var COMPOUND_KEY_SEPARATOR = '+'
+
 module.exports = function (db, opts) {
   if (!opts) opts = {}
   if (!opts.dataset) throw new Error('Error: Must specify dataset (-d)')
   if (opts.dataset === 'files') throw new Error('Cannot import into the \'files\' dataset')
 
   var transform = through.obj(function (obj, enc, next) {
     debug('heres my obj!', obj)
-    var key = obj[opts.key] || obj.key || uuid()
+
+    var key
+    if (Array.isArray(opts.key) && opts.key.length > 1) {
+      key = opts.key.sort().map(function (key) {
+        return obj[key] || ''
+      }).join(COMPOUND_KEY_SEPARATOR)
+    } else {
+      key = obj[opts.key] || obj.key
+    }
+
+    if (!key || key === COMPOUND_KEY_SEPARATOR) key = uuid()
     var doc = {type: 'put', key: key, value: obj}
     next(null, doc)
   })

diff --git a/tests/import.js b/tests/import.js
@@ -41,6 +41,25 @@ test('import: dat import json', function (t) {
   st.end()
 })
 
+test('import: dat import json with compound key', function (t) {
+  var json = path.resolve(__dirname + '/fixtures/all_hour.json')
+  var st = spawn(t, dat + ' import ' + json + ' -k latitude -k longitude -d compound', {cwd: dat2})
+  st.stdout.empty()
+  st.stderr.match(/Done importing data/)
+  st.end()
+})
+
+test('import: dat keys get with compound key', function (t) {
+  var st = spawn(t, dat + ' keys -d compound', {cwd: dat2})
+  st.stdout.match(function (output) {
+    var keys = output.split('\n')
+    t.same(keys[0], '33.9233322+-117.9376678')
+    return true
+  })
+  st.stderr.empty()
+  st.end()
+})
+
 test('import: dat import json with integer id', function (t) {
   var json = path.resolve(__dirname + '/fixtures/all_hour.json')
   var st = spawn(t, dat + ' import ' + json + ' --key=int --dataset=int-id', {cwd: dat2})

diff --git a/usage/export.txt b/usage/export.txt
@@ -2,4 +2,5 @@ dat export
   -d <dataset-name> (required) # the name of the dataset
   --limit=<number> # the number of rows to output. default: infinity
   --format=[ndjson, csv, json] # how to parse the output. default: ndjson
+  --full # output full metadata with each row, including autogenerated keys
 
diff --git a/usage/import.txt b/usage/import.txt
@@ -1,3 +1,4 @@
 dat import <filename or - for stdin> (required)
   -d <dataset-name> (required) # the name of the dataset to import
   -f [ndjson, csv, json] # how to parse the file. guessed if not supplied.
+  --key <column name> # the primary key(s) to use. defaults to auto-generated keys