Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

first commit

  • Loading branch information...
commit cc649e443a0113a522878e96459bccce8075a0fb 0 parents
Dale Harvey authored
Showing with 188 additions and 0 deletions.
  1. +42 −0 README.md
  2. +94 −0 couchpotato.js
  3. +52 −0 workers/cp-fetchpage.js
42 README.md
@@ -0,0 +1,42 @@
+## Couch Potato
+
+Couch Potato is a data adapter for CouchDB, a generic server that lets you easily read from various data sources and put that data into CouchDB.
+
+## Usage
+
+Start a couchpotato process with the url to a database, couchpotato will create this database if it doesnt exist, then listen to the database for scheduled jobs.
+
+ $ node couchpotato.js http://127.0.0.1:5984/couchpotato
+
+Once the process is started, to schedule a job you write a document to the database, each job takes a custom job description which you can find documented below, this example will read the google.com homepage every out and save the results to your local database `myscreenscapes`
+
+ { "worker": "webpage"
+ , "ttl": 60 * 60 * 100
+ , "opts": {"uri":http://google.com"}
+ , "destination": "myscreenscrapes"
+ }
+
+There are a few common global attributes you can add to jobs, they are all optional
+
+* `ttl` - (defaults to 30 minutes) This is the time in milliseconds between subsequen runs of the job
+* `destination` - This is the database that the results get written to, you can specify a full url (including auth details), if you only specify a string it will get saved to the same host that the `couchpotato` database is.
+* `opaque` - This will passed through and written in the same
+
+## Screenscrape a webpage
+
+ { "worker": "webpage"
+ , "ttl": 60 * 60 * 100
+ , "opts": {"uri":http://google.com"}
+ , "destination": "myscreenscrapes"
+ }
+
+
+This is currently the only worker written.
+
+TODO:
+ * RSS reader
+ * Twitter API reader (problem with oauth?)
+ * Github Issues
+ * POP / Email Reader
+ * Foursquare / Gowalla etc
+
94 couchpotato.js
@@ -0,0 +1,94 @@
+
+
+var follow = require('follow')
+ , request = require('request')
+ , cron = require("cron")
+ , plainUri = process.argv[2]
+ , uri = require("url").parse(plainUri);
+
+
+var jobMap = function(doc) {
+ if (!doc.lastSeen) {
+ emit(0, null);
+ } else {
+ var ttl = (typeof doc.ttl === "undefined") ? 5*60*1000 : doc.ttl;
+ emit(doc.lastSeen + ttl);
+ }
+}
+
+
+var designDoc =
+ { views:
+ { jobs:
+ { map: jobMap.toString() }
+ }
+ };
+
+
+function runJobs() {
+
+ console.log("Fetching new jobs: ");
+
+ request(
+ { method: 'GET'
+ , uri: plainUri + "/_design/couchpotato/_view/jobs?include_docs=true&endkey=" + new Date().getTime()
+ }, function(err, resp, body) {
+ body = JSON.parse(body);
+ for (var i = 0; i < body.rows.length; i++) {
+ var job = body.rows[i].doc;
+ if (job.worker) {
+ console.log("Processing job: " + job.worker + " from " + job._id);
+ require("./workers/cp-" + job.worker).process({uri:uri}, job);
+ }
+
+ markSeen(job)
+ }
+ }
+ );
+}
+
+
+function markSeen(doc) {
+
+ doc.lastSeen = new Date().getTime();
+
+ request(
+ { method: 'PUT'
+ , uri: plainUri + "/" + doc._id
+ , body: JSON.stringify(doc)
+ }, function(err, resp, body) {
+ if (resp.statusCode === 201) {
+ console.log("Marked " + doc._id + " as seen");
+ }
+ }
+ );
+
+}
+
+
+request(
+ { method: 'GET'
+ , uri: plainUri + "/_design/couchpotato"
+ }, function(err, resp, body) {
+
+ if (resp.statusCode === 200) {
+ designDoc._rev = JSON.parse(body)._rev;
+ }
+
+ if (resp.statusCode === 200 || resp.statusCode === 404) {
+ request(
+ { method: 'PUT'
+ , uri: plainUri + "/_design/couchpotato"
+ , body: JSON.stringify(designDoc)
+ }, function(err, resp, body) {
+ if (resp.statusCode === 201) {
+ new cron.CronJob('1 * * * * *', runJobs);
+ runJobs();
+ }
+ }
+ );
+ }
+ }
+);
+
+
52 workers/cp-fetchpage.js
@@ -0,0 +1,52 @@
+
+var request = require('request');
+
+function process(ctx, doc) {
+
+ if (doc.opts && doc.destination) {
+
+ request(doc.opts, function(err, resp, body) {
+
+ var result =
+ { time: (new Date().getTime())
+ , uri: doc.opts.uri
+ , statusCode: resp.statusCode
+ , headers:resp.headers
+ , body: body
+ };
+
+ if (doc.opaque) {
+ result.opaque = doc.opaque;
+ }
+
+ var destination = require("url").parse(doc.destination);
+ var uri = (!destination.protocol) ?
+ ctx.uri.protocol + "//" + ctx.uri.host + "/" + doc.destination :
+ doc.destination;
+
+ request(
+ { method: 'PUT'
+ , uri: uri
+ }, function(err, resp, body) {
+ if (resp.statusCode === 201 || resp.statusCode === 412) {
+ request(
+ { method: 'POST'
+ , headers: {'content-type':"application/json"}
+ , uri: uri
+ , body: JSON.stringify(result)
+ }, function(err, resp, body) {
+ if (resp.statusCode === 201) {
+ console.log("Saved webpage " + doc.opts.uri + " to " + uri);
+ }
+ }
+ );
+ }
+ }
+ );
+ });
+
+ }
+
+}
+
+exports.process = process;
Please sign in to comment.
Something went wrong with that request. Please try again.