Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8aa7c37
Showing
10 changed files
with
230 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1 @@ | |||
node_modules |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,7 @@ | |||
source :rubygems | |||
|
|||
gem "mechanize", "~>2.0" | |||
gem "scrapi" | |||
gem "mongoid" | |||
gem "ruby-debug19", :require => "ruby-debug" | |||
gem "pry" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,73 @@ | |||
GEM | |||
remote: http://rubygems.org/ | |||
specs: | |||
activemodel (3.1.0) | |||
activesupport (= 3.1.0) | |||
bcrypt-ruby (~> 3.0.0) | |||
builder (~> 3.0.0) | |||
i18n (~> 0.6) | |||
activesupport (3.1.0) | |||
multi_json (~> 1.0) | |||
archive-tar-minitar (0.5.2) | |||
bcrypt-ruby (3.0.1) | |||
bson (1.3.1) | |||
builder (3.0.0) | |||
coderay (0.9.8) | |||
columnize (0.3.4) | |||
ffi (1.0.9) | |||
i18n (0.6.0) | |||
linecache19 (0.5.12) | |||
ruby_core_source (>= 0.1.4) | |||
mechanize (2.0.1) | |||
net-http-digest_auth (~> 1.1, >= 1.1.1) | |||
net-http-persistent (~> 1.8) | |||
nokogiri (~> 1.4) | |||
webrobots (~> 0.0, >= 0.0.9) | |||
method_source (0.6.5) | |||
ruby_parser (>= 2.0.5) | |||
mongo (1.3.1) | |||
bson (>= 1.3.1) | |||
mongoid (2.2.0) | |||
activemodel (~> 3.0) | |||
mongo (~> 1.3) | |||
tzinfo (~> 0.3.22) | |||
multi_json (1.0.3) | |||
net-http-digest_auth (1.1.1) | |||
net-http-persistent (1.9) | |||
nokogiri (1.5.0) | |||
pry (0.9.5) | |||
coderay (>= 0.9.8) | |||
method_source (>= 0.6.5) | |||
ruby_parser (>= 2.0.5) | |||
slop (~> 2.1.0) | |||
ruby-debug-base19 (0.11.25) | |||
columnize (>= 0.3.1) | |||
linecache19 (>= 0.5.11) | |||
ruby_core_source (>= 0.1.4) | |||
ruby-debug19 (0.11.6) | |||
columnize (>= 0.3.1) | |||
linecache19 (>= 0.5.11) | |||
ruby-debug-base19 (>= 0.11.19) | |||
ruby_core_source (0.1.5) | |||
archive-tar-minitar (>= 0.5.2) | |||
ruby_parser (2.3.0) | |||
sexp_processor (~> 3.0) | |||
scrapi (2.0.0) | |||
tidy_ffi (>= 0.1.2) | |||
sexp_processor (3.0.6) | |||
slop (2.1.0) | |||
tidy_ffi (0.1.3) | |||
ffi (>= 0.3.5) | |||
tzinfo (0.3.29) | |||
webrobots (0.0.11) | |||
nokogiri (>= 1.4.4) | |||
|
|||
PLATFORMS | |||
ruby | |||
|
|||
DEPENDENCIES | |||
mechanize (~> 2.0) | |||
mongoid | |||
pry | |||
ruby-debug19 | |||
scrapi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,58 @@ | |||
nodeio = require 'node.io' | |||
|
|||
@class = class Listing extends nodeio.JobClass | |||
input: ["http://www.autotrader.ca/a/Mercedes-Benz/C-Class/MILLGROVE/Ontario/19_4430793_/"] | |||
run: (url) -> | |||
@getHtml url, (err, $) => | |||
return @exit err if err? | |||
listingProperties = {} | |||
|
|||
$('div[itemtype="http://schema.org/Product"] table td').each (e) => | |||
return unless e.fulltext.length | |||
|
|||
# Support rows which use a span | |||
# <td style="width: 50%; "> | |||
# <strong>Make:</strong><span itemprop="brand">Mercedes-Benz</span> | |||
# </td> | |||
# and Support rows which use just a strong | |||
# <td style="width: 50%; "> | |||
# <strong>Year: </strong>2011 | |||
# </td> | |||
try | |||
itemPropSpan = $('span', e) | |||
key = itemPropSpan.attribs.itemprop | |||
value = itemPropSpan.fulltext | |||
catch error | |||
try | |||
strongSpan = $('strong', e) | |||
key = strongSpan.text.replace(/:|\s/g, '') | |||
value = e.text | |||
catch strongError | |||
console.log e.innerHTML | |||
return @exit strongError | |||
|
|||
key = key.toLowerCase() | |||
switch key | |||
when 'mileage' | |||
value = value.replace(/[^0-9]/g, '') | |||
when "style/trim", "style", "trim" | |||
key = "trim" | |||
|
|||
listingProperties[key] = value | |||
|
|||
|
|||
# Retrieve the price from | |||
# <span id="ctl00_PageContentPlaceHolder_financing_lblPriceValue" class="loadfinancing_green_text">$61,999</span> | |||
price = $('#ctl00_PageContentPlaceHolder_financing_lblPriceValue').fulltext.replace(/\$|,/g, '') | |||
listingProperties.price = price | |||
|
|||
@assert(listingProperties.price).isNumeric() | |||
@assert(listingProperties.mileage).isNumeric() | |||
@assert(listingProperties.year).isNumeric() | |||
@assert(listingProperties.brand).notEmpty() | |||
@assert(listingProperties.model).notEmpty() | |||
@emit listingProperties | |||
null | |||
|
|||
@job = new Listing() | |||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,15 @@ | |||
nodeio = require 'node.io' | |||
|
|||
@class = class ListingUrls extends nodeio.JobClass | |||
input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"] | |||
run: (url) -> | |||
@getHtml url, (err, $) => | |||
return @exit err if err? | |||
results = [] | |||
$('.carlink').each (e) => | |||
results.push "http://www.autotrader.ca#{e.attribs.href}" | |||
@emit results | |||
|
|||
null | |||
|
|||
@job = new ListingUrls() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,23 @@ | |||
nodeio = require 'node.io' | |||
|
|||
@class = class ListingSearch extends nodeio.JobClass | |||
RCS_RE = /rcs=(\d+)/ | |||
|
|||
input: ["http://www.autotrader.ca/a/pv/Used/Mercedes-Benz/C-Class/MERCEDESBENZ+CCLASS/?cat2=7%2c11%2c9%2c10&prv=Ontario"] | |||
run: (url) -> | |||
urls = [url] | |||
@getHtml url, (err, $) => | |||
lastLink = $('div.Pager a').last() | |||
href = "http://www.autotrader.ca#{lastLink.attribs.href}" | |||
|
|||
if match = RCS_RE.exec(href) | |||
lastRCS = parseInt(match[1], 10) | |||
|
|||
for rcs in [25..lastRCS] by 25 | |||
urls.push href.replace RCS_RE, "rcs=#{rcs}" | |||
|
|||
@emit urls | |||
|
|||
null | |||
|
|||
@job = new ListingSearch() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,18 @@ | |||
nodeio = require 'node.io' | |||
coffee = require 'coffee-script' | |||
busters = require '../lib/busters' | |||
|
|||
@class = class SaveToDB extends nodeio.JobClass | |||
input: [{"year":"2011","brand":"Mercedes-Benz","model":"C63 AMG","trim":"Affalterbach Edition","drive":"RWD","status":"Used","mileage":"27","body":"Sedan","exteriorcolour":"Not Specified","price":"117000"}] | |||
run: (attributes) -> | |||
for k in ['year', 'mileage', 'price'] | |||
attributes[k] = parseInt(attributes[k], 10) if attributes[k] | |||
|
|||
listing = new busters.Listing(attributes) | |||
listing.save (err) => | |||
if err | |||
@fail err | |||
else | |||
@emit listing | |||
null | |||
@job = new SaveToDB() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,17 @@ | |||
mongoose = require 'mongoose' | |||
|
|||
mongoose.connect 'mongodb://localhost/busters' | |||
|
|||
ListingSchema = new mongoose.Schema | |||
year: Number | |||
brand: String | |||
model: String | |||
trim: String | |||
drive: String | |||
status: String | |||
mileage: Number | |||
body: String | |||
exteriorcolour: String | |||
price: Number | |||
|
|||
exports.Listing = mongoose.model 'Listing', ListingSchema |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,18 @@ | |||
{ | |||
"author": "", | |||
"name": "busters", | |||
"description": "cheap lease transfers", | |||
"version": "0.0.0", | |||
"repository": { | |||
"url": "" | |||
}, | |||
"engines": { | |||
"node": "~v0.4.11" | |||
}, | |||
"dependencies": { | |||
"coffee-script": ">=0.0.0", | |||
"node.io": ">=0.0.0", | |||
"mongoose": ">=0.0.0" | |||
}, | |||
"devDependencies": {} | |||
} |
Empty file.