Skip to content

Commit

Permalink
Refactor some things out to the 'streaming' package, move main script…
Browse files Browse the repository at this point in the history
… to bin/
  • Loading branch information
chbrown committed Aug 18, 2013
1 parent c0653d6 commit 10f85a1
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 190 deletions.
110 changes: 110 additions & 0 deletions bin/sv.js
@@ -0,0 +1,110 @@
#!/usr/bin/env node
'use strict'; /*jslint node: true, es5: true, indent: 2 */
var fs = require('fs');
var async = require('async');
var sv = require('..');
var merge = require('../merge');

if (require.main === module) {
var optimist = require('optimist')
.usage([
'Consolidate any tabular format.',
'',
'Usage: <sprints.txt sv [options] > sprints.csv',
' or: sv [options] ~/Desktop/**/*.csv > ~/all.csv',
'',
'Parser options:',
' --peek 10 infer columns from first ten lines of input',
' --in-delimiter field separator (inferred if unspecified)',
' --in-quotechar " ',
// ' --escapechar \\ escape quotechars when quoted',
'',
'Stringifier options:',
' --out-delimiter , field separator',
' --out-quotechar " marks beginning and end of fields containing delimiter',
' --filter a,b keep only fields a and b in the results',
' --omit c,d leave out fields x and y from the results (processed before filter)',
' -j, --json write one JSON object per row',
'',
'Other options:',
' --describe only describe the data, using headers and a few examples',
' --width width of the terminal (used by --describe)',
' --merge merge multiple files supplied as command line args',
' --version print version and quit',
' -v --verbose turn up the verbosity (still all on STDERR)',
'',
'STDIN, if supplied, will be coerced to utf8',
].join('\n'))
.string(['delimiter', 'quotechar', 'escapechar'])
.boolean(['json', 'describe', 'merge', 'verbose', 'version'])
.alias({
j: 'json',
v: 'verbose',
})
.default({
width: process.stdout.columns || 80,
});
var argv = optimist.argv;
var parser_opts = {
peek: argv.peek,
delimiter: argv['in-delimiter'],
quotechar: argv['in-quotechar'],
};
var stringifier_opts = {
delimiter: argv['out-delimiter'],
quotechar: argv['out-quotechar'],
filter: argv.filter,
omit: argv.omit,
json: argv.json,
width: argv.width,
};

// func: function (stream, filename, parser_opts, stringifier_opts, callback) { ... }
var func = argv.describe ? sv.describe : sv.transform;
var exit = function(err) {
if (err && err.code != 'EPIPE') {
throw err;
}
// if err.code == 'EPIPE' that just means that someone down
// the line cut us short with a | head or something

if (argv.verbose) {
console.error('Done.');
}
// process.exit(); // wait for stdout to finish, actually.
};

if (argv.help) {
optimist.showHelp();
console.log('ARGV: ' + process.argv.join(' '));
if (argv.verbose) {
console.log(' argv: ' + JSON.stringify(argv, null, ' ').replace(/\n/g, '\n '));
}
console.log(' parser options: ' + JSON.stringify(parser_opts, null, ' ').replace(/\n/g, '\n '));
console.log(' stringifier options: ' + JSON.stringify(stringifier_opts, null, ' ').replace(/\n/g, '\n '));
}
else if (argv.version) {
console.log(require('../package').version);
}
else if (!process.stdin.isTTY) {
// process.stdin.setEncoding('utf8');
func(process.stdin, null, parser_opts, stringifier_opts, exit);
}
else if (argv._.length) {
if (argv.merge) {
console.error('Merging.');
merge(argv._, argv, exit);
}
else {
async.eachSeries(argv._, function(filepath, callback) {
var stream = fs.createReadStream(filepath);
func(stream, filepath, parser_opts, stringifier_opts, callback);
console.error(''); // newline
}, exit);
}
}
else {
optimist.showHelp();
console.error('You must supply data via STDIN or as unflagged command line arguments.');
}
}
173 changes: 10 additions & 163 deletions index.js 100755 → 100644
@@ -1,60 +1,13 @@
#!/usr/bin/env node
'use strict'; /*jslint node: true, es5: true, indent: 2 */
var async = require('async');
var fs = require('fs');
var os = require('os');
var stream = require('stream');
var streaming = require('streaming');
var util = require('util');

var inference = require('./inference');
var merge = require('./merge');
var Parser = exports.Parser = require('./parser');
var Stringifier = exports.Stringifier = require('./stringifier');

var JSONStringifier = function(opts) {
stream.Transform.call(this, {objectMode: true});
this._readableState.objectMode = false;
};
util.inherits(JSONStringifier, stream.Transform);
JSONStringifier.prototype._transform = function(chunk, encoding, callback) {
this.push(JSON.stringify(chunk) + os.EOL, encoding);
callback();
};

var ObjectFilter = function(fields) {
// objects in, objects out
stream.Transform.call(this, {objectMode: true});
this._fields = fields;
this._fields_length = fields.length;
};
util.inherits(ObjectFilter, stream.Transform);
ObjectFilter.prototype._transform = function(chunk, encoding, callback) {
var filtered = {};
for (var i = 0; i < this._fields_length; i++) {
filtered[this._fields[i]] = chunk[this._fields[i]];
}
this.push(filtered, encoding);
callback();
};

var ObjectOmitter = function(fields) {
// objects in, objects out
stream.Transform.call(this, {objectMode: true});
this.fields = {};
for (var i = 0, l = fields.length; i < l; i++) {
this.fields[fields[i]] = 1;
}
};
util.inherits(ObjectOmitter, stream.Transform);
ObjectOmitter.prototype._transform = function(chunk, encoding, callback) {

for (var field in this.fields) {
delete chunk[field];
}
this.push(chunk, encoding);
callback();
};

function pluck(xs, prop) {
return xs.map(function(x) { return x[prop]; });
}
Expand All @@ -68,13 +21,12 @@ function escapeWhitespace(s) {
return whitespace_literals[s];
}

function describe(input, filename, parser_opts, stringifier_opts, callback) {
var describe = exports.describe = function(input, filename, parser_opts, stringifier_opts, callback) {
if (filename) {
console.log(filename);
}

var rows = [];

var parser = input.pipe(new Parser(parser_opts));
parser.on('data', function(row) {
rows.push(row);
Expand All @@ -94,139 +46,34 @@ function describe(input, filename, parser_opts, stringifier_opts, callback) {
callback();
}
});
}
};

function read(input, filename, parser_opts, stringifier_opts, callback) {
var transform = exports.transform = function(input, filename, parser_opts, stringifier_opts, callback) {
if (filename) {
console.error('Reading ' + filename);
console.error('Transforming ' + filename);
}

var parser = input.pipe(new Parser(parser_opts));

if (stringifier_opts.omit) {
parser = parser.pipe(new ObjectOmitter(stringifier_opts.omit.split(/,/g)));
parser = parser.pipe(new streaming.Omitter(stringifier_opts.omit.split(/,/g)));
}

if (stringifier_opts.filter) {
parser = parser.pipe(new ObjectFilter(stringifier_opts.filter.split(/,/g)));
parser = parser.pipe(new streaming.Filter(stringifier_opts.filter.split(/,/g)));
}

var stringifier = stringifier_opts.json ? new JSONStringifier() : new Stringifier(stringifier_opts);
var stringifier = stringifier_opts.json ? new streaming.json.Stringifier() : new Stringifier(stringifier_opts);
parser.pipe(stringifier);

var output = stringifier.pipe(process.stdout);

output.on('finish', callback);
output.on('error', function(err) {
// panic! (let's us quit faster, actually)
// panic! (lets us quit faster, actually)
input.unpipe();
output.unpipe();

callback(err);
});
}

if (require.main === module) {
var optimist = require('optimist')
.usage([
'Consolidate any tabular format.',
'',
'Usage: <sprints.txt sv [options] > sprints.csv',
' or: sv [options] ~/Desktop/**/*.csv > ~/all.csv',
'',
'Parser options:',
' --peek 10 infer columns from first ten lines of input',
' --in-delimiter field separator (inferred if unspecified)',
' --in-quotechar " ',
// ' --escapechar \\ escape quotechars when quoted',
'',
'Stringifier options:',
' --out-delimiter , field separator',
' --out-quotechar " marks beginning and end of fields containing delimiter',
' --filter a,b keep only fields a and b in the results',
' --omit c,d leave out fields x and y from the results',
' omit is processed before filter',
' -j, --json write one JSON object per row',
'',
'Other options:',
' --describe only describe the data, using headers and a few examples',
' --width width of the terminal (used by --describe)',
' --merge merge multiple files supplied as command line args',
' --version print version and quit',
' -v --verbose turn up the verbosity (still all on STDERR)',
'',
'STDIN, if supplied, will be coerced to utf8',
].join('\n'))
.string(['delimiter', 'quotechar', 'escapechar'])
.boolean(['json', 'describe', 'merge', 'verbose', 'version'])
.alias({
j: 'json',
v: 'verbose',
})
.default({
width: process.stdout.columns || 80,
});
var argv = optimist.argv;
var parser_opts = {
peek: argv.peek,
delimiter: argv['in-delimiter'],
quotechar: argv['in-quotechar'],
};
var stringifier_opts = {
delimiter: argv['out-delimiter'],
quotechar: argv['out-quotechar'],
filter: argv.filter,
omit: argv.omit,
json: argv.json,
width: argv.width,
};

// func: function (stream, filename, parser_opts, stringifier_opts, callback) { ... }
var func = argv.describe ? describe : read;
var exit = function(err) {
if (err && err.code != 'EPIPE') {
throw err;
}
// if err.code == 'EPIPE' that just means that someone down
// the line cut us short with a | head or something

if (argv.verbose) {
console.error('Done.');
}
// process.exit(); // wait for stdout to finish, actually.
};

if (argv.help) {
optimist.showHelp();
console.log('ARGV: ' + process.argv.join(' '));
if (argv.verbose) {
console.log(' argv: ' + JSON.stringify(argv, null, ' ').replace(/\n/g, '\n '));
}
console.log(' parser options: ' + JSON.stringify(parser_opts, null, ' ').replace(/\n/g, '\n '));
console.log(' stringifier options: ' + JSON.stringify(stringifier_opts, null, ' ').replace(/\n/g, '\n '));
}
else if (argv.version) {
console.log(require('./package').version);
}
else if (!process.stdin.isTTY) {
// process.stdin.setEncoding('utf8');
func(process.stdin, null, parser_opts, stringifier_opts, exit);
}
else if (argv._.length) {
if (argv.merge) {
console.error('Merging.');
merge(argv._, argv, exit);
}
else {
async.eachSeries(argv._, function(filepath, callback) {
var stream = fs.createReadStream(filepath);
func(stream, filepath, parser_opts, stringifier_opts, callback);
console.error(''); // newline
}, exit);
}
}
else {
optimist.showHelp();
console.error('You must supply data via STDIN or as unflagged command line arguments.');
}
}
};
Empty file modified merge.js 100755 → 100644
Empty file.
7 changes: 4 additions & 3 deletions package.json
Expand Up @@ -12,7 +12,7 @@
"tables"
],
"bin": {
"sv": "index.js"
"sv": "bin/sv.js"
},
"homepage": "https://github.com/chbrown/sv",
"repository": {
Expand All @@ -23,10 +23,11 @@
"license": "MIT",
"dependencies": {
"async": "*",
"optimist": "0.5.2"
"optimist": "*",
"streaming": "*"
},
"devDependencies": {
"tap": "0.4.x"
"tap": "*"
},
"scripts": {
"test": "tap test"
Expand Down
8 changes: 4 additions & 4 deletions test/basic.js
@@ -1,15 +1,15 @@
'use strict'; /*jslint node: true, es5: true, indent: 2 */
var fs = require('fs');
var test = require('tap').test;
var tap = require('tap');

var sv = require('..');

test('import', function (t) {
tap.test('import', function (t) {
t.ok(sv !== undefined, 'sv should load from the current directory');
t.end();
});

test('parser', function (t) {
tap.test('parser', function (t) {
var input = [
'index name time',
'1 chris 1:18',
Expand All @@ -31,7 +31,7 @@ test('parser', function (t) {
});
});

test('stringify', function (t) {
tap.test('stringify', function (t) {
var expected = [
'index,name,time',
'1,chris,NA',
Expand Down

0 comments on commit 10f85a1

Please sign in to comment.