Skip to content

Commit

Permalink
adding webkit css stuff via http://css-infos.net/properties/webkit
Browse files Browse the repository at this point in the history
  • Loading branch information
rgarcia committed Nov 12, 2011
1 parent d0d6eba commit 45c3224
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 16 deletions.
25 changes: 12 additions & 13 deletions scraper/app.js
Expand Up @@ -3,11 +3,10 @@ define([
'scraper',
'underscore',
'../config',
'../models/cssprop',
'../models/mozdevcssprop',
'./w3schools',
'./mozdevcss'
], function (mongoose, scraper, _, config, CSSProp, MozDevCSSProp, w3schools, mozdevcss) {
'./mozdevcss',
'./cssinfos'
], function (mongoose, scraper, _, config, MozDevCSSProp, mozdevcss, cssinfos) {

var db = mongoose.connect(
'mongodb://' + config.db_user_prod + ':' + config.db_pass_prod +
Expand All @@ -20,17 +19,17 @@ define([
console.log('connected to db');

// have to set NODE_ENV to 'production' to actually hit the db
if ( config.environment === 'production' ) {
console.log('clearing out collections');
//CSSProp.collection.remove({});
MozDevCSSProp.collection.remove({});
}
// todo: just update db docs, don't reset like this
//if ( config.environment === 'production' ) {
//console.log('clearing out collections');
//MozDevCSSProp.collection.remove({});
//}

//console.log('scraping w3schools');
//w3schools.rootLevelScraper();
// console.log('scraping mozdev');
// mozdevcss.rootLevelScraper();

console.log('scraping mozdev');
mozdevcss.rootLevelScraper();
console.log('scraping cssinfos for webkit data');
cssinfos.rootLevelScraper();
}
});
});
63 changes: 63 additions & 0 deletions scraper/cssinfos.js
@@ -0,0 +1,63 @@
define([
'scraper',
'underscore',
'../models/mozdevcssprop',
'../config'
], function(scraper, _, MozDevCSSProp, config) {

// scrapes http://css-infos.net/properties/webkit for webkit data

var rootURL = 'http://css-infos.net/properties/webkit';
var detailURLs = [];

return {

detailPageScraper: function(err, $) {
if (err) throw(err);

doc = new MozDevCSSProp();

doc['title'] = $('article h1 code').text().trim();
console.log('-----------------')
console.log('scraping ' + doc['title']);

// map from section title to database field
var sectionTitles = {
'description' : 'summary',
'syntax' : 'syntax',
'values' : 'values',
};

$('section').each(function() {
var sectionName = $(this).find('h1').text().trim().toLowerCase();
var sectionContent = $(this).html();
var docField = "";
if ( sectionName in sectionTitles )
docField = sectionTitles[sectionName];
else
docField = sectionName;
doc[docField] = sectionContent;
console.log('stored ' + docField);
});

// save to db!
if ( config.environment === 'production' )
doc.save();
},

rootLevelScraper: function() {
// all the useful links on this page are in <code>... elements
var self = this;
scraper(rootURL, function(err, $) {
$('ul.webkit a').each(function() {
detailURLs.push('http://css-infos.net/' + $(this).attr('href').trim());
});

console.log(detailURLs);
console.log(detailURLs.length);
scraper(detailURLs,self.detailPageScraper);
});
}

}
});
10 changes: 7 additions & 3 deletions scraper/mozdevcss.js
Expand Up @@ -19,12 +19,16 @@ define([
mozdevdoc['title'] = $('#title').text();
console.log('-----------------')
console.log('scraping ' + mozdevdoc['title']);
console.log('-----------------')

if ( !mozdevdoc['title'] ) {
console.log('ERROR: could not find title');
return;
}

// function that looks for a section_* with a certain title,
// sets a document field to the html contents or null if it can't find any
var sectionScraper = function(title, dbfield) {
console.log('looking for ' + title);
//console.log('looking for ' + title);
var $section = $('[id^=section_]').filter(function() {
return $(this).children(0).attr('id').toLowerCase() === title.toLowerCase();
});
Expand All @@ -50,7 +54,7 @@ define([
// save to db!
if ( config.environment === 'production' )
mozdevdoc.save();
console.log(mozdevdoc);
//console.log(mozdevdoc);
},

rootLevelScraper: function() {
Expand Down

0 comments on commit 45c3224

Please sign in to comment.