Skip to content
Browse files

some scraper code

  • Loading branch information...
1 parent c7bee05 commit 9f23a7dfc01d93e92b5e8115031a29fc253f0ba9 @cjolly committed Sep 19, 2012
Showing with 40 additions and 4 deletions.
  1. +4 −0 TODO.md
  2. +36 −4 lib/pickem/scraper.rb
View
4 TODO.md
@@ -15,3 +15,7 @@
* http://www.nfl.com/more/gamedaypickem
* http://espn.go.com/nfl/picks
* http://www.nfl.com/news/story/09000d5d8227e56d/article/raiders-in-line-to-upset-weary-jets-in-week-3
+* http://www.pro-football-reference.com/years/2012/
+* http://www.advancednflstats.com/
+* http://www.footballoutsiders.com/stats/teameff
+* http://www.wolframalpha.com/resource/nfl.html
View
40 lib/pickem/scraper.rb
@@ -2,12 +2,44 @@
require 'nokogiri'
class Scraper
- NFL_BASE_URI = 'http://www.nfl.com/schedules/2012/REG'
+ PRO_FOOTBALL_REFERENCE_URL = 'http://www.pro-football-reference.com/years/2012/games.htm'
def self.go
- 1.upto(17).each do |week|
- doc = Nokogiri::HTML(open("#{NFL_BASE_URI}/#{week}"))
- p doc
+ doc = Nokogiri::HTML(open(PRO_FOOTBALL_REFERENCE_URL))
+
+ # Week,Day,Date,,Winner/tie,,Loser/tie,PtsW,PtsL,YdsW,TOW,YdsL,TOL
+ past_games_rows = doc.css('table#games tbody tr:not(.thead)')
+
+ games = []
+ # Week,Day,Date,VisTm,,HomeTm,Time
+ upcoming_grames_rows = doc.css('table#games_left tbody tr:not(.thead)').each do |row|
+ row = row.css('td')
+ game = Game.new
+ game.week = row[0].text
+ game.scheduled_at = DateTime.parse("#{row[2].text}, #{row[6].text} Eastern")
+ game.home_team = row[3].text
+ game.visiting_team = row[5].text
+ games << game
end
+ games
+ end
+
+
+ class Game
+ attr_accessor :week
+ attr_accessor :scheduled_at
+ attr_accessor :visiting_team
+ attr_accessor :visiting_team_score
+ attr_accessor :home_team
+ attr_accessor :home_team_score
+ end
+
+ class Season
+ attr_accessor :year
+ end
+
+ class Team
+ attr_accessor :year
+ attr_accessor :full_name
end
end

0 comments on commit 9f23a7d

Please sign in to comment.
Something went wrong with that request. Please try again.