Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

WWOZ one page scraper

  • Loading branch information...
commit 94868d701562fa847571f126005767deb24dbcb8 1 parent 40216b3
@daybreaker authored
View
6 Rakefile
@@ -1,6 +1,7 @@
require_relative 'main'
require 'sinatra/activerecord/rake'
require_relative 'places'
+require_relative 'wwoz'
namespace :scrape do
desc "Scrape Google Places"
@@ -9,5 +10,10 @@ namespace :scrape do
g.grid(5)
end
+ desc "Scrape WWOZ"
+ task :wwoz do
+ w = WWOZLivewire.new
+ w.scrape
+ end
end
View
23 config.rb
@@ -0,0 +1,23 @@
+require 'rubygems'
+require 'google_places'
+require 'pry'
+require 'active_record'
+require_relative 'db/models'
+require_relative 'api_keys'
+
+module CloseEnough
+ module Config
+ GooglePlaces = {
+ :api_keys => APIKeys::GoogleKeys,
+ :types => %w(art_gallery bar book_store bowling_alley cafe casino church library movie_theater restaurant place_of_worship night_club museum school shopping_mall stadium university zoo),
+ :main_types => %w(bar night_club stadium art_gallery cafe university),
+ :top => 30.034176,
+ :left => -90.146566,
+ :bottom => 29.908057,
+ :right => -90.017084
+ }
+
+ end
+end
+
+
View
11 db/migrate/20111210214707_add_fields_to_locations.rb
@@ -0,0 +1,11 @@
+class AddFieldsToLocations < ActiveRecord::Migration
+ def self.up
+ add_column :locations, :status, :string
+ add_column :locations, :from_wwoz, :integer
+ end
+
+ def self.down
+ remove_column :locations, :status
+ remove_column :locations, :from_wwoz
+ end
+end
View
1  places.rb
@@ -7,7 +7,6 @@ def initialize
end
def grid(steps)
- l = []
(@bottom..@top).step( (@top - @bottom) / steps ) do |lat|
(@left..@right).step( (@right - @left) / steps ) do |long|
places = @client.spots(lat, long, :types => @main_types )
View
57 wwoz.rb
@@ -1,19 +1,50 @@
require 'open-uri'
require 'nokogiri'
require 'pry'
+require_relative 'config'
-urls = []
-(0..15).each do |x|
- urls << "http://www.wwoz.org/new-orleans-community/music-venues?page=" + x.to_s
-end
-
-url = urls.first #for testing only
-#urls.each do |url|
- venues_html = Nokogiri::HTML(open(url))
- venues = venues_html.css('div.view-content div.item-list ul li.views-row span.field-content a')
+class WWOZLivewire
- venues.each do |venue|
- puts venue.text + "\n"
- vurl = 'http://www.wwoz.org' + venue.attributes['href'].value
- Nokogiri::HTML(open(url))
+ def initialize
+ @api_keys = CloseEnough::Config::GooglePlaces[:api_keys]
+ @client = GooglePlaces::Client.new(@api_keys.sample(1).join)
end
+
+ def scrape
+ urls = []
+ (0..15).each do |x|
+ urls << "http://www.wwoz.org/new-orleans-community/music-venues?page=" + x.to_s
+ end
+
+ url = urls.first #for testing only
+ #urls.each do |url|
+ venues_html = Nokogiri::HTML(open(url))
+ venues = venues_html.css('div.view-content div.item-list ul li.views-row span.field-content a')
+
+ venues.each do |venue|
+ puts venue.text + "\n"
+ vurl = 'http://www.wwoz.org' + venue.attributes['href'].value
+ venue_details_html = Nokogiri::HTML(open(vurl)).css('div.node')
+ status = venue_details_html.css('.venue-status').text.downcase.include?("open") ? 'open' : 'closed'
+ address = venue_details_html.css('.street-address').text + " " + venue_details_html.css('.locality').text + ', ' + venue_details_html.css('.region').text
+ map_matches = /q=(.*?)\+(.*?)\+/.match (venue_details_html.css('.location.map-link a').select{|x| x.text == "Google Maps" }.first.attributes['href'].value)
+ lat, long = map_matches[1..2]
+
+ #places = @client.spots(lat, long, :name => venue.text )
+
+ l = Location.new({
+ :vicinity => address,
+ :lat => lat,
+ :lng => long,
+ :status => status,
+ :from_wwoz => 1,
+ :name => venue.text
+ })
+
+ l.save
+
+ end
+ #end
+ end
+
+end
Please sign in to comment.
Something went wrong with that request. Please try again.