Skip to content

Commit

Permalink
[api] Back of the napkin implementation, 7am implementation of node-s…
Browse files Browse the repository at this point in the history
…chema-org
  • Loading branch information
indexzero committed Jun 8, 2011
1 parent e1ddf92 commit e4d56da
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 0 deletions.
77 changes: 77 additions & 0 deletions bin/read-schema-org
@@ -0,0 +1,77 @@
#!/usr/bin/env node

var fs = require('fs'),
exec = require('child_process').exec,
path = require('path'),
spawn = require('child_process').spawn,
neuron = require('neuron'),
winston = require('winston').cli();

var schemaDir = path.join(__dirname, '..', 'schemas'),
allSchemas = path.join(schemaDir, 'schema-list.json');

function streamChild (params, callback) {
winston.info('Spawning: node ' + params.join(' ').yellow);

var child = spawn('node', params);

child.stdout.on('data', function (data) {
process.stdout.write(data.toString());
});

child.stderr.on('data', function (data) {
process.stdout.write(data.toString());
});

child.on('exit', function () {
winston.info('node ' + params.join(' ').yellow + ' has exited.');
callback();
});
}

winston.warn('Removing all schemas in ' + schemaDir.magenta);
exec('rm -rf ' + schemaDir, function () {
streamChild([path.join(__dirname, '..', 'list-schemas.js')], function () {
fs.readFile(allSchemas, function (err, data) {
if (err) {
return winston.error('Error reading schemas: ' + err.message);
}

var schemas = JSON.parse(data.toString()).schemas,
manager = new neuron.JobManager({ concurrency: 75 }),
completed = 0;

manager.addJob('readSchema', {
work: function (schema) {
var that = this,
readOptions;

readOptions = [
path.join(__dirname, '..', 'read-schema.js'),
'--type',
schema
];

streamChild(readOptions, function () {
that.finished = true;
});
}
});

manager.on('finish', function (job, worker) {
completed += 1;

if (completed === schemas.length) {
winston.info('');
winston.info('Done parsing all schemas from ' + 'schema.org'.magenta);
winston.info('They are located in: ' + schemaDir.magenta);
winston.info('');
}
});

schemas.forEach(function (schema) {
manager.enqueue('readSchema', schema);
});
});
});
});
52 changes: 52 additions & 0 deletions list-schemas.js
@@ -0,0 +1,52 @@
var fs = require('fs'),
path = require('path'),
exec = require('child_process').exec,
colors = require('colors'),
jsdom = require('jsdom')
request = require('request'),
winston = require('winston').cli();

var url = 'http://schema.org/docs/full.html'

winston.info('Contacting: ' + url.green);
request({ uri: url }, function (error, response, body) {
if (error && response.statusCode !== 200) {
console.log('Error when contacting: ' + url);
}

winston.info('Parsing: ' + url.green);
jsdom.env({
html: body,
scripts: [
'http://code.jquery.com/jquery-1.5.min.js'
]
}, function (err, window) {
var $ = window.jQuery,
schemaDir = path.join(__dirname, 'schemas'),
listFile = path.join(schemaDir, 'schema-list.json'),
schemas = [];

winston.info('Reading: ' + url.green);
$('a').each(function (i, el) {
var match = schema = path.basename(el.href);
if (!~['list-schemas.js', 'documents.html', 'schemas.html', '#'].indexOf(schema)) {
if ((match = schema.match(/list-schemas.js\#(\w+)/))) {
return schemas.push(match[1]);
}

schemas.push(schema);
}
});

var result = JSON.stringify({
schemas: schemas
}, null, 2);

winston.info('Saving results to: ' + listFile.green);
exec('mkdir -p ' + schemaDir, function () {
fs.writeFile(listFile, result, function () {
winston.info('Done creating ' + listFile.green + ' from ' + url.magenta);
});
});
});
});
87 changes: 87 additions & 0 deletions read-schema.js
@@ -0,0 +1,87 @@
var fs = require('fs'),
path = require('path'),
exec = require('child_process').exec,
argv = require('optimist').argv,
colors = require('colors'),
request = require('request'),
jsdom = require('jsdom'),
winston = require('winston').cli();

var type = argv.type,
url = 'http://schema.org/' + type

winston.info('Contacting: ' + url.green);
request({ uri: url }, function (error, response, body) {
if (error && response.statusCode !== 200) {
console.log('Error when contacting: ' + url);
}

winston.info('Parsing: ' + url.green);
jsdom.env({
html: body,
scripts: [
'http://code.jquery.com/jquery-1.5.min.js'
]
}, function (err, window) {
var $ = window.jQuery,
schemaDir = path.join(__dirname, 'schemas'),
schemaFile = path.join(schemaDir, type.toLowerCase() + '.json'),
schema = { type: type };

winston.info('Reading: ' + url.green);

$('.definition-table tbody').each(function (i, body) {
var jbody = $(body),
head = jbody.prev(),
propList = [];

var currentType = $(head.find('th a')[0]).html().trim();
winston.info('Parsing Type: ' + currentType.magenta);

var html = jbody.html(),
theadIndex = html.indexOf('<thead');

if (theadIndex > 0) {
jbody = $('<tbody>' + html.substr(0, theadIndex) + '</tbody>');
}

jbody.find('tr').each(function (i, tr) {
var jtr = $(tr),
types = [],
prop;

prop = {
name: $(jtr.find('th code')[0]).html(),
description: $(jtr.find('td.prop-desc')[0]).html()
};

var ptype = $(jtr.find('td.prop-ect')[0]),
atypes = $(ptype.find('a'));

if (atypes.length > 0) {
atypes.each(function (i, el) { types.push($(el).html()) });
prop.type = types.length === 1 ? types[0] : types;
}
else {
prop.type = ptype.html();
}

propList.push(prop);
});

if (currentType === type) {
return schema.properties = propList;
}

schema.bases = schema.bases || {};
schema.bases[currentType] = propList;
});

winston.info('Writing schema: ' + schemaFile.magenta);
exec('mkdir -p ' + schemaDir, function () {
fs.writeFile(schemaFile, JSON.stringify(schema, null, 2), function () {
winston.info('Done parsing schema: ' + schemaFile.magenta);
});
});
});
});

0 comments on commit e4d56da

Please sign in to comment.