In [1]:
import requests
import pandas as pd
from robobrowser import RoboBrowser

In [2]:
SNOOZLE_STUB = "http://www.snoozlesports.net/search/nfl/searchHandlerOdds"
START_DATE = '2016-08-31'
END_DATE = '2018-08-31'
SCHEDULE_STUB = "https://www.pro-football-reference.com/years/{}/games.htm"

In [3]:
ODDS_STUB = '''https://fantasydata.com/NFLTeamStats/Odds_Read?\
sort=&page=1&pageSize=50&group=&filter=&filters.position=&filters.team=\
&filters.season={}&filters.seasontype=1&filters.scope=&filters.subscope=\
&filters.redzonescope=&filters.scoringsystem=&filters.leaguetype=\
&filters.playerid=&filters.searchtext=&filters.week={}&filters.startweek=\
&filters.endweek=&filters.minimumsnaps=&filters.teamaspect=&filters.stattype=\
&filters.exportType=&filters.desktop='''

In [8]:
team_map = pd.read_csv('nfl_teams.csv')

In [9]:
team_map.head()

Unnamed: 0,ID,Name,Abbreviation,Conference,Division
0,1,Arizona Cardinals,ARI,NFC,West
1,2,Atlanta Falcons,ATL,NFC,South
2,3,Baltimore Ravens,BAL,AFC,North
3,4,Buffalo Bills,BUF,AFC,East
4,5,Carolina Panthers,CAR,NFC,South


In [67]:
def scrape_odds(year, week):
    url = ODDS_STUB.format(year, week)
    json_file = requests.get(url).json()
    df = pd.DataFrame(json_file['Data'])
    return df

In [70]:
raw_df = pd.DataFrame()
for year in [str(y) for y in range(2016, 2018)]:
    for week in [str(x) for x in range(1, 18)]:
        df = scrape_odds(year, week)
        df['Year'] = year
        df['Week'] = week
        raw_df = raw_df.append(df, ignore_index=True)
raw_df.to_csv('historical_odds_raw.csv', index=False)

In [78]:
def parse_fullbettingline(row):
    words = row.split(' ')
    if words[3] == 'vs.':
        away_team = words[0]
        away_spread = words[1]
        away_ml = words[2].strip('()')
        home_team = words[4]
        home_spread = words[5]
        home_ml = words[6].strip('()')
    elif words[3] == '@':
        away_team = words[4]
        away_spread = words[5]
        away_ml = words[6].strip('()')
        home_team = words[0]
        home_spread = words[1]
        home_ml = words[2].strip('()')
    else:
        raise Exception('wut')
    return away_team, away_spread, away_ml, home_team, home_spread, home_ml

In [79]:
df = raw_df[['Year', 'Week', 'OverUnder']]
df['game_id'] = range(len(df))
df['team_away'], df['spread_away'], df['ml_away'], df['team_home'], df['spread_home'], df['ml_home'] = zip(*raw_df.FullBettingLine.apply(lambda x: parse_fullbettingline(x)))
full_df = pd.wide_to_long(df, ['team', 'spread', 'ml'], i='game_id', j='home/away', sep='_', suffix='\D+')
full_df = full_df.sort_values('game_id').reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [80]:
full_df.head()

Unnamed: 0,game_id,home/away,Week,OverUnder,Year,team,spread,ml
0,0,away,1,41.0,2016,Broncos,3.0,145
1,0,home,1,41.0,2016,Panthers,-3.0,-165
2,1,away,1,42.5,2016,Texans,-6.0,-260
3,1,home,1,42.5,2016,Bears,6.0,220
4,2,home,1,47.0,2016,Buccaneers,3.0,115


In [81]:
team_map['team'] = team_map.Name.str.split(' ').str[-1]

In [90]:
team_map.head()

Unnamed: 0,ID,Name,Abbreviation,Conference,Division,team
0,1,Arizona Cardinals,ARI,NFC,West,Cardinals
1,2,Atlanta Falcons,ATL,NFC,South,Falcons
2,3,Baltimore Ravens,BAL,AFC,North,Ravens
3,4,Buffalo Bills,BUF,AFC,East,Bills
4,5,Carolina Panthers,CAR,NFC,South,Panthers


In [83]:
combined = pd.merge(full_df, team_map, on='team')

In [84]:
combined.to_csv('historical_odds.csv', index=False)

In [12]:
url = SNOOZLE_STUB + "?fileType=inline&statType=latestodds&startDate={}&endDate={}".format(START_DATE, END_DATE)
json_file = requests.get(url).json()
df = pd.DataFrame(json_file['spreadInfo'])
# df.to_csv('historical_odds.csv', index=False)

In [13]:
df.head()

Unnamed: 0,finalOdds,gameCode,gameDate,homeOdds,homeTeamName,ou,overOdds,time,underOdds,visOdds,visTeamName,vistorSpread
0,True,30002820160831,2016-08-31,0,Buccaneers,36.0,0,"Sep 1, 2016 9:00:22 AM",0,0,Redskins,3.0
1,False,30002820160901,2016-09-01,0,Buccaneers,38.5,0,"Aug 30, 2016 9:00:38 AM",0,0,Redskins,3.0
2,True,20002220160901,2016-09-01,0,Eagles,37.0,0,"Sep 2, 2016 9:01:17 AM",0,0,Jets,3.5
3,True,14000220160901,2016-09-01,0,Falcons,38.0,0,"Sep 2, 2016 9:01:17 AM",0,0,Jaguars,3.5
4,True,29001620160901,2016-09-01,0,Dolphins,37.5,0,"Sep 2, 2016 9:01:17 AM",0,0,Titans,2.5


In [9]:
br = RoboBrowser()
year = '2017'
sched = pd.read_csv('nfl_schedule_2017.csv')

In [11]:
sched.head()

Unnamed: 0,Week,Day,Date,Time,Winner/tie,Unnamed: 5,Loser/tie,Unnamed: 7,PtsW,PtsL,YdsW,TOW,YdsL,TOL
0,1,Thu,7-Sep,8:30PM,Kansas City Chiefs,@,New England Patriots,boxscore,42.0,27.0,537.0,1.0,371.0,0.0
1,1,Sun,10-Sep,1:00PM,Atlanta Falcons,@,Chicago Bears,boxscore,23.0,17.0,372.0,0.0,301.0,0.0
2,1,Sun,10-Sep,1:00PM,Buffalo Bills,,New York Jets,boxscore,21.0,12.0,408.0,1.0,214.0,2.0
3,1,Sun,10-Sep,1:00PM,Pittsburgh Steelers,@,Cleveland Browns,boxscore,21.0,18.0,290.0,1.0,237.0,1.0
4,1,Sun,10-Sep,1:00PM,Baltimore Ravens,@,Cincinnati Bengals,boxscore,20.0,0.0,268.0,1.0,221.0,5.0


In [7]:
table

In [14]:
df.head()

Unnamed: 0,finalOdds,gameCode,gameDate,homeOdds,homeTeamName,ou,overOdds,time,underOdds,visOdds,visTeamName,vistorSpread
0,True,30002820160831,2016-08-31,0,Buccaneers,36.0,0,"Sep 1, 2016 9:00:22 AM",0,0,Redskins,3.0
1,False,30002820160901,2016-09-01,0,Buccaneers,38.5,0,"Aug 30, 2016 9:00:38 AM",0,0,Redskins,3.0
2,True,20002220160901,2016-09-01,0,Eagles,37.0,0,"Sep 2, 2016 9:01:17 AM",0,0,Jets,3.5
3,True,14000220160901,2016-09-01,0,Falcons,38.0,0,"Sep 2, 2016 9:01:17 AM",0,0,Jaguars,3.5
4,True,29001620160901,2016-09-01,0,Dolphins,37.5,0,"Sep 2, 2016 9:01:17 AM",0,0,Titans,2.5


In [15]:
df.tail()

Unnamed: 0,finalOdds,gameCode,gameDate,homeOdds,homeTeamName,ou,overOdds,time,underOdds,visOdds,visTeamName,vistorSpread
612,True,19001720180114,2018-01-14,0,Vikings,46.5,0,"Jan 15, 2018 9:00:21 AM",0,0,Saints,5.5
613,True,14001820180121,2018-01-21,0,Patriots,46.0,0,"Jan 22, 2018 9:01:12 AM",0,0,Jaguars,7.5
614,True,17002220180121,2018-01-21,0,Eagles,39.0,0,"Jan 22, 2018 9:01:13 AM",0,0,Vikings,-3.0
615,True,17001820180204,2018-02-04,0,Patriots,0.0,0,"Feb 5, 2018 9:00:42 AM",0,0,Vikings,3.0
616,True,22001820180204,2018-02-04,0,Patriots,49.0,0,"Feb 5, 2018 9:00:42 AM",0,0,Eagles,4.5


In [16]:
df.shape

(617, 12)

In [17]:
df.finalOdds.value_counts()

True     616
False      1
Name: finalOdds, dtype: int64