Skip to content

Commit

Permalink
adjusting scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
gustavodemari committed Jun 26, 2017
1 parent e630d27 commit 4d8d761
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -1,2 +1,3 @@
# Ignore output of scraper
node_modules/*
data.sqlite
3 changes: 2 additions & 1 deletion package.json
Expand Up @@ -3,11 +3,12 @@
"main": "scraper.js",
"dependencies": {
"cheerio": "latest",
"osmosis": "^1.1.4",
"request": "latest",
"sqlite3": "latest"
},
"keywords": [
"scraper",
"morph"
]
}
}
33 changes: 16 additions & 17 deletions scraper.js
@@ -1,29 +1,31 @@
// This is a template for a Node.js scraper on morph.io (https://morph.io)

var cheerio = require("cheerio");
const osmosis = require('osmosis');
var request = require("request");
var sqlite3 = require("sqlite3").verbose();

const URL = new Buffer('aHR0cDovL2ZvcnVtLmxvbGVzcG9ydGUuY29tLw==', 'base64').toString('ascii')

function initDatabase(callback) {
// Set up sqlite database.
var db = new sqlite3.Database("data.sqlite");
db.serialize(function() {
db.run("CREATE TABLE IF NOT EXISTS data (name TEXT)");
db.run("CREATE TABLE IF NOT EXISTS data (value TEXT, time TIMESTAMP DEFAULT CURRENT_TIMESTAMP)");
callback(db);
});
}

function updateRow(db, value) {
// Insert some data.
var statement = db.prepare("INSERT INTO data VALUES (?)");
var statement = db.prepare("INSERT INTO data(value) VALUES (?)");
statement.run(value);
statement.finalize();
}

function readRows(db) {
// Read some data.
db.each("SELECT rowid AS id, name FROM data", function(err, row) {
console.log(row.id + ": " + row.name);
db.each("SELECT rowid AS id, value FROM data", function(err, row) {
console.log(row.id + ": " + row.value);
});
}

Expand All @@ -40,20 +42,17 @@ function fetchPage(url, callback) {
}

function run(db) {
// Use request to read in pages.
fetchPage("https://morph.io", function (body) {
// Use cheerio to find things in the page with css selectors.
var $ = cheerio.load(body);

var elements = $("div.media-body span.p-name").each(function () {
var value = $(this).text().trim();
updateRow(db, value);
});

readRows(db);

osmosis
.get(URL)
.set({'forum_title': ['.forumtitle']})
.set({'forum_stats': ['.stats-wrapper']})
.data(function(data){
updateRow(db, JSON.stringify(data));
})
.done(function(){
db.close();
});
})
}

initDatabase(run);

0 comments on commit 4d8d761

Please sign in to comment.