Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 8aa7c375ca016f2fedf31324625994f9c6ce80c0 @airhorns committed Sep 18, 2011
@@ -0,0 +1 @@
+node_modules
@@ -0,0 +1,7 @@
+source :rubygems
+
+gem "mechanize", "~>2.0"
+gem "scrapi"
+gem "mongoid"
+gem "ruby-debug19", :require => "ruby-debug"
+gem "pry"
@@ -0,0 +1,73 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ activemodel (3.1.0)
+ activesupport (= 3.1.0)
+ bcrypt-ruby (~> 3.0.0)
+ builder (~> 3.0.0)
+ i18n (~> 0.6)
+ activesupport (3.1.0)
+ multi_json (~> 1.0)
+ archive-tar-minitar (0.5.2)
+ bcrypt-ruby (3.0.1)
+ bson (1.3.1)
+ builder (3.0.0)
+ coderay (0.9.8)
+ columnize (0.3.4)
+ ffi (1.0.9)
+ i18n (0.6.0)
+ linecache19 (0.5.12)
+ ruby_core_source (>= 0.1.4)
+ mechanize (2.0.1)
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
+ net-http-persistent (~> 1.8)
+ nokogiri (~> 1.4)
+ webrobots (~> 0.0, >= 0.0.9)
+ method_source (0.6.5)
+ ruby_parser (>= 2.0.5)
+ mongo (1.3.1)
+ bson (>= 1.3.1)
+ mongoid (2.2.0)
+ activemodel (~> 3.0)
+ mongo (~> 1.3)
+ tzinfo (~> 0.3.22)
+ multi_json (1.0.3)
+ net-http-digest_auth (1.1.1)
+ net-http-persistent (1.9)
+ nokogiri (1.5.0)
+ pry (0.9.5)
+ coderay (>= 0.9.8)
+ method_source (>= 0.6.5)
+ ruby_parser (>= 2.0.5)
+ slop (~> 2.1.0)
+ ruby-debug-base19 (0.11.25)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby_core_source (>= 0.1.4)
+ ruby-debug19 (0.11.6)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby-debug-base19 (>= 0.11.19)
+ ruby_core_source (0.1.5)
+ archive-tar-minitar (>= 0.5.2)
+ ruby_parser (2.3.0)
+ sexp_processor (~> 3.0)
+ scrapi (2.0.0)
+ tidy_ffi (>= 0.1.2)
+ sexp_processor (3.0.6)
+ slop (2.1.0)
+ tidy_ffi (0.1.3)
+ ffi (>= 0.3.5)
+ tzinfo (0.3.29)
+ webrobots (0.0.11)
+ nokogiri (>= 1.4.4)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ mechanize (~> 2.0)
+ mongoid
+ pry
+ ruby-debug19
+ scrapi
@@ -0,0 +1,58 @@
+nodeio = require 'node.io'
+
+@class = class Listing extends nodeio.JobClass
+ input: ["http://www.autotrader.ca/a/Mercedes-Benz/C-Class/MILLGROVE/Ontario/19_4430793_/"]
+ run: (url) ->
+ @getHtml url, (err, $) =>
+ return @exit err if err?
+ listingProperties = {}
+
+ $('div[itemtype="http://schema.org/Product"] table td').each (e) =>
+ return unless e.fulltext.length
+
+ # Support rows which use a span
+ # <td style="width: 50%; ">
+ # <strong>Make:</strong><span itemprop="brand">Mercedes-Benz</span>
+ # </td>
+ # and Support rows which use just a strong
+ # <td style="width: 50%; ">
+ # <strong>Year: </strong>2011
+ # </td>
+ try
+ itemPropSpan = $('span', e)
+ key = itemPropSpan.attribs.itemprop
+ value = itemPropSpan.fulltext
+ catch error
+ try
+ strongSpan = $('strong', e)
+ key = strongSpan.text.replace(/:|\s/g, '')
+ value = e.text
+ catch strongError
+ console.log e.innerHTML
+ return @exit strongError
+
+ key = key.toLowerCase()
+ switch key
+ when 'mileage'
+ value = value.replace(/[^0-9]/g, '')
+ when "style/trim", "style", "trim"
+ key = "trim"
+
+ listingProperties[key] = value
+
+
+ # Retrieve the price from
+ # <span id="ctl00_PageContentPlaceHolder_financing_lblPriceValue" class="loadfinancing_green_text">$61,999</span>
+ price = $('#ctl00_PageContentPlaceHolder_financing_lblPriceValue').fulltext.replace(/\$|,/g, '')
+ listingProperties.price = price
+
+ @assert(listingProperties.price).isNumeric()
+ @assert(listingProperties.mileage).isNumeric()
+ @assert(listingProperties.year).isNumeric()
+ @assert(listingProperties.brand).notEmpty()
+ @assert(listingProperties.model).notEmpty()
+ @emit listingProperties
+ null
+
+@job = new Listing()
+
@@ -0,0 +1,15 @@
+nodeio = require 'node.io'
+
+@class = class ListingUrls extends nodeio.JobClass
+ input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"]
+ run: (url) ->
+ @getHtml url, (err, $) =>
+ return @exit err if err?
+ results = []
+ $('.carlink').each (e) =>
+ results.push "http://www.autotrader.ca#{e.attribs.href}"
+ @emit results
+
+ null
+
+@job = new ListingUrls()
@@ -0,0 +1,23 @@
+nodeio = require 'node.io'
+
+@class = class ListingSearch extends nodeio.JobClass
+ RCS_RE = /rcs=(\d+)/
+
+ input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"]
+ run: (url) ->
+ urls = [url]
+ @getHtml url, (err, $) =>
+ lastLink = $('div.Pager a').last()
+ href = "http://www.autotrader.ca#{lastLink.attribs.href}"
+
+ if match = RCS_RE.exec(href)
+ lastRCS = parseInt(match[1], 10)
+
+ for rcs in [25..lastRCS] by 25
+ urls.push href.replace RCS_RE, "rcs=#{rcs}"
+
+ @emit urls
+
+ null
+
+@job = new ListingSearch()
@@ -0,0 +1,18 @@
+nodeio = require 'node.io'
+coffee = require 'coffee-script'
+busters = require '../lib/busters'
+
+@class = class SaveToDB extends nodeio.JobClass
+ input: [{"year":"2011","brand":"Mercedes-Benz","model":"C63 AMG","trim":"Affalterbach Edition","drive":"RWD","status":"Used","mileage":"27","body":"Sedan","exteriorcolour":"Not Specified","price":"117000"}]
+ run: (attributes) ->
+ for k in ['year', 'mileage', 'price']
+ attributes[k] = parseInt(attributes[k], 10) if attributes[k]
+
+ listing = new busters.Listing(attributes)
+ listing.save (err) =>
+ if err
+ @fail err
+ else
+ @emit listing
+ null
+@job = new SaveToDB()
@@ -0,0 +1,17 @@
+mongoose = require 'mongoose'
+
+mongoose.connect 'mongodb://localhost/busters'
+
+ListingSchema = new mongoose.Schema
+ year: Number
+ brand: String
+ model: String
+ trim: String
+ drive: String
+ status: String
+ mileage: Number
+ body: String
+ exteriorcolour: String
+ price: Number
+
+exports.Listing = mongoose.model 'Listing', ListingSchema
@@ -0,0 +1,18 @@
+{
+ "author": "",
+ "name": "busters",
+ "description": "cheap lease transfers",
+ "version": "0.0.0",
+ "repository": {
+ "url": ""
+ },
+ "engines": {
+ "node": "~v0.4.11"
+ },
+ "dependencies": {
+ "coffee-script": ">=0.0.0",
+ "node.io": ">=0.0.0",
+ "mongoose": ">=0.0.0"
+ },
+ "devDependencies": {}
+}
No changes.

0 comments on commit 8aa7c37

Please sign in to comment.