diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c3629e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..9ea5f3d --- /dev/null +++ b/Gemfile @@ -0,0 +1,7 @@ +source :rubygems + +gem "mechanize", "~>2.0" +gem "scrapi" +gem "mongoid" +gem "ruby-debug19", :require => "ruby-debug" +gem "pry" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..c3235ac --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,73 @@ +GEM + remote: http://rubygems.org/ + specs: + activemodel (3.1.0) + activesupport (= 3.1.0) + bcrypt-ruby (~> 3.0.0) + builder (~> 3.0.0) + i18n (~> 0.6) + activesupport (3.1.0) + multi_json (~> 1.0) + archive-tar-minitar (0.5.2) + bcrypt-ruby (3.0.1) + bson (1.3.1) + builder (3.0.0) + coderay (0.9.8) + columnize (0.3.4) + ffi (1.0.9) + i18n (0.6.0) + linecache19 (0.5.12) + ruby_core_source (>= 0.1.4) + mechanize (2.0.1) + net-http-digest_auth (~> 1.1, >= 1.1.1) + net-http-persistent (~> 1.8) + nokogiri (~> 1.4) + webrobots (~> 0.0, >= 0.0.9) + method_source (0.6.5) + ruby_parser (>= 2.0.5) + mongo (1.3.1) + bson (>= 1.3.1) + mongoid (2.2.0) + activemodel (~> 3.0) + mongo (~> 1.3) + tzinfo (~> 0.3.22) + multi_json (1.0.3) + net-http-digest_auth (1.1.1) + net-http-persistent (1.9) + nokogiri (1.5.0) + pry (0.9.5) + coderay (>= 0.9.8) + method_source (>= 0.6.5) + ruby_parser (>= 2.0.5) + slop (~> 2.1.0) + ruby-debug-base19 (0.11.25) + columnize (>= 0.3.1) + linecache19 (>= 0.5.11) + ruby_core_source (>= 0.1.4) + ruby-debug19 (0.11.6) + columnize (>= 0.3.1) + linecache19 (>= 0.5.11) + ruby-debug-base19 (>= 0.11.19) + ruby_core_source (0.1.5) + archive-tar-minitar (>= 0.5.2) + ruby_parser (2.3.0) + sexp_processor (~> 3.0) + scrapi (2.0.0) + tidy_ffi (>= 0.1.2) + sexp_processor (3.0.6) + slop (2.1.0) + tidy_ffi (0.1.3) + ffi (>= 0.3.5) + tzinfo (0.3.29) + webrobots (0.0.11) + nokogiri (>= 1.4.4) + +PLATFORMS + ruby + +DEPENDENCIES + mechanize (~> 2.0) + mongoid + pry + ruby-debug19 + scrapi diff --git a/jobs/autotrader_listing.coffee b/jobs/autotrader_listing.coffee new file mode 100644 index 0000000..812b9bb --- /dev/null +++ b/jobs/autotrader_listing.coffee @@ -0,0 +1,58 @@ +nodeio = require 'node.io' + +@class = class Listing extends nodeio.JobClass + input: ["http://www.autotrader.ca/a/Mercedes-Benz/C-Class/MILLGROVE/Ontario/19_4430793_/"] + run: (url) -> + @getHtml url, (err, $) => + return @exit err if err? + listingProperties = {} + + $('div[itemtype="http://schema.org/Product"] table td').each (e) => + return unless e.fulltext.length + + # Support rows which use a span + # + # Make:Mercedes-Benz + # + # and Support rows which use just a strong + # + # Year: 2011 + # + try + itemPropSpan = $('span', e) + key = itemPropSpan.attribs.itemprop + value = itemPropSpan.fulltext + catch error + try + strongSpan = $('strong', e) + key = strongSpan.text.replace(/:|\s/g, '') + value = e.text + catch strongError + console.log e.innerHTML + return @exit strongError + + key = key.toLowerCase() + switch key + when 'mileage' + value = value.replace(/[^0-9]/g, '') + when "style/trim", "style", "trim" + key = "trim" + + listingProperties[key] = value + + + # Retrieve the price from + # $61,999 + price = $('#ctl00_PageContentPlaceHolder_financing_lblPriceValue').fulltext.replace(/\$|,/g, '') + listingProperties.price = price + + @assert(listingProperties.price).isNumeric() + @assert(listingProperties.mileage).isNumeric() + @assert(listingProperties.year).isNumeric() + @assert(listingProperties.brand).notEmpty() + @assert(listingProperties.model).notEmpty() + @emit listingProperties + null + +@job = new Listing() + diff --git a/jobs/autotrader_listings.coffee b/jobs/autotrader_listings.coffee new file mode 100644 index 0000000..de56751 --- /dev/null +++ b/jobs/autotrader_listings.coffee @@ -0,0 +1,15 @@ +nodeio = require 'node.io' + +@class = class ListingUrls extends nodeio.JobClass + input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"] + run: (url) -> + @getHtml url, (err, $) => + return @exit err if err? + results = [] + $('.carlink').each (e) => + results.push "http://www.autotrader.ca#{e.attribs.href}" + @emit results + + null + +@job = new ListingUrls() diff --git a/jobs/autotrader_search.coffee b/jobs/autotrader_search.coffee new file mode 100644 index 0000000..659e636 --- /dev/null +++ b/jobs/autotrader_search.coffee @@ -0,0 +1,23 @@ +nodeio = require 'node.io' + +@class = class ListingSearch extends nodeio.JobClass + RCS_RE = /rcs=(\d+)/ + + input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"] + run: (url) -> + urls = [url] + @getHtml url, (err, $) => + lastLink = $('div.Pager a').last() + href = "http://www.autotrader.ca#{lastLink.attribs.href}" + + if match = RCS_RE.exec(href) + lastRCS = parseInt(match[1], 10) + + for rcs in [25..lastRCS] by 25 + urls.push href.replace RCS_RE, "rcs=#{rcs}" + + @emit urls + + null + +@job = new ListingSearch() diff --git a/jobs/autotrader_to_db.coffee b/jobs/autotrader_to_db.coffee new file mode 100644 index 0000000..237f689 --- /dev/null +++ b/jobs/autotrader_to_db.coffee @@ -0,0 +1,18 @@ +nodeio = require 'node.io' +coffee = require 'coffee-script' +busters = require '../lib/busters' + +@class = class SaveToDB extends nodeio.JobClass + input: [{"year":"2011","brand":"Mercedes-Benz","model":"C63 AMG","trim":"Affalterbach Edition","drive":"RWD","status":"Used","mileage":"27","body":"Sedan","exteriorcolour":"Not Specified","price":"117000"}] + run: (attributes) -> + for k in ['year', 'mileage', 'price'] + attributes[k] = parseInt(attributes[k], 10) if attributes[k] + + listing = new busters.Listing(attributes) + listing.save (err) => + if err + @fail err + else + @emit listing + null +@job = new SaveToDB() diff --git a/lib/busters.coffee b/lib/busters.coffee new file mode 100644 index 0000000..2db4902 --- /dev/null +++ b/lib/busters.coffee @@ -0,0 +1,17 @@ +mongoose = require 'mongoose' + +mongoose.connect 'mongodb://localhost/busters' + +ListingSchema = new mongoose.Schema + year: Number + brand: String + model: String + trim: String + drive: String + status: String + mileage: Number + body: String + exteriorcolour: String + price: Number + +exports.Listing = mongoose.model 'Listing', ListingSchema diff --git a/package.json b/package.json new file mode 100644 index 0000000..a5fadd7 --- /dev/null +++ b/package.json @@ -0,0 +1,18 @@ +{ + "author": "", + "name": "busters", + "description": "cheap lease transfers", + "version": "0.0.0", + "repository": { + "url": "" + }, + "engines": { + "node": "~v0.4.11" + }, + "dependencies": { + "coffee-script": ">=0.0.0", + "node.io": ">=0.0.0", + "mongoose": ">=0.0.0" + }, + "devDependencies": {} +} diff --git a/test/test.coffee b/test/test.coffee new file mode 100644 index 0000000..e69de29