In [52]:
from selenium import webdriver
from selenium.common.exceptions import ElementClickInterceptedException
import os
from time import sleep
import pandas as pd

PROJECTIONS_STUB = "http://www.fantasypros.com/nfl/projections/"
CWD = os.getcwd()

login = open('login.txt')
USERNAME, PASSWORD = login.read().splitlines()

In [53]:
def set_profile(year, week):
    profile = webdriver.FirefoxProfile()
    profile.set_preference('browser.download.folderList', 2) # custom location
    profile.set_preference('browser.download.manager.showWhenStarting', False)
    profile.set_preference('browser.download.dir', CWD + '/projections/{}/{}'.format(year, week))
    profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/ms-excel')

    options = webdriver.FirefoxOptions()
    options.add_argument('-headless')
    
    return profile, options

In [54]:
def login(username, password, profile, options):
    browser = webdriver.Firefox(firefox_profile=profile, firefox_options=options)
    
    browser.get('https://secure.fantasypros.com/accounts/login/')
    username_field = browser.find_element_by_id("id_username")
    username_field.send_keys(username)
    password_field = browser.find_element_by_id("id_password")
    password_field.send_keys(password)
    password_field.submit()
    sleep(5)
    
    return browser

In [55]:
def scrape_week(browser, year, week):
#     for position in ['qb', 'rb', 'wr', 'te', 'k', 'dst']:
    for position in ['dst']:
        browser.get(PROJECTIONS_STUB + position + ".php?export=xls&year={}&week={}".format(year, week))
        sleep(3)
        try:
            browser.find_element_by_class_name("fa-fp-download").click()
        except ElementClickInterceptedException():
            browser.find_element_by_id("cboxClose").click()
            browser.find_element_by_class_name("fa-fp-download").click()
        sleep(1)
    browser.quit()

In [56]:
for year in [str(y) for y in range(2016, 2018)]:
    for week in [str(x) for x in range(1, 18)]:
        profile, options = set_profile(year, week)
        browser = login(USERNAME, PASSWORD, profile, options)
        scrape_week(browser, year, week)

In [57]:
FILENAME_STUB = "FantasyPros_Fantasy_Football_Projections_{}.csv"
full_df = pd.DataFrame()
for year in [str(y) for y in range(2016, 2018)]:
    for week in [str(x) for x in range(1, 18)]:
        for position in ['QB', 'RB', 'WR', 'TE', 'K', 'DST']:
            df = pd.read_csv(CWD + '/projections/{}/{}/'.format(year, week) + FILENAME_STUB.format(position))
            df['Year'] = year
            df['Week'] = week
            df['Pos'] = position
            if position == 'QB':
                df.rename(columns = {'ATT': 'ATT_PASS', 'YDS': 'YDS_PASS', 'TDS': 'TDS_PASS',
                                     'ATT.1': 'ATT_RUSH', 'YDS.1': 'YDS_RUSH', 'TDS.1': 'TDS_RUSH',
                                    }, inplace = True)
            if position == 'RB':
                df.rename(columns = {'ATT': 'ATT_RUSH', 'YDS': 'YDS_RUSH', 'TDS': 'TDS_PASS',
                                     'YDS.1': 'YDS_REC', 'TDS.1': 'TDS_REC'
                                    }, inplace = True)
            if position == 'WR':
                df.rename(columns = {'ATT': 'ATT_RUSH', 'YDS': 'YDS_REC', 'TDS': 'TDS_REC',
                                     'YDS.1': 'YDS_RUSH', 'TDS.1': 'TDS_RUSH'
                                    }, inplace = True)
            if position == 'TE':
                df.rename(columns = {'YDS': 'YDS_REC', 'TDS': 'TDS_REC',
                                    }, inplace = True)
            full_df = full_df.append(df, ignore_index=True)
full_df = full_df[(full_df.Player.notnull()) & (full_df.Player != '\xa0')]
full_df.fillna(0, inplace = True)
full_df.reset_index(drop=True, inplace=True)
full_df.to_csv('historical_projections.csv', index=False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


In [45]:
full_df.head()

Unnamed: 0,ATT_PASS,ATT_RUSH,CMP,FG,FGA,FL,FPTS,INTS,Player,REC,TDS_PASS,TDS_REC,TDS_RUSH,Team,XPT,YDS_PASS,YDS_REC,YDS_RUSH
0,30.1,6.3,19.9,0.0,0.0,0.2,20.6,0.6,Russell Wilson,0.0,1.9,0.0,0.2,SEA,0.0,245.0,0.0,34.9
1,36.9,3.7,23.3,0.0,0.0,0.2,20.4,0.5,Aaron Rodgers,0.0,2.1,0.0,0.1,GB,0.0,274.7,0.0,17.4
2,38.7,4.0,23.5,0.0,0.0,0.3,20.3,1.0,Andrew Luck,0.0,2.4,0.0,0.1,IND,0.0,275.9,0.0,17.8
3,40.4,1.3,27.0,0.0,0.0,0.2,19.7,0.9,Drew Brees,0.0,2.3,0.0,0.1,NO,0.0,307.2,0.0,2.2
4,37.1,2.1,23.4,0.0,0.0,0.3,18.5,0.7,Derek Carr,0.0,2.2,0.0,0.0,OAK,0.0,274.5,0.0,7.7


In [46]:
full_df.tail()

Unnamed: 0,ATT_PASS,ATT_RUSH,CMP,FG,FGA,FL,FPTS,INTS,Player,REC,TDS_PASS,TDS_REC,TDS_RUSH,Team,XPT,YDS_PASS,YDS_REC,YDS_RUSH
15660,0.0,0.0,0.0,1.1,1.3,0.0,4.8,0.0,Zane Gonzalez,0.0,0.0,0.0,0.0,CLE,1.6,0.0,0.0,0.0
15661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jason Myers,0.0,0.0,0.0,0.0,NYJ,0.0,0.0,0.0,0.0
15662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Jonathan Brown,0.0,0.0,0.0,0.0,CIN,0.0,0.0,0.0,0.0
15663,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Roberto Aguayo,0.0,0.0,0.0,0.0,LAC,0.0,0.0,0.0,0.0
15664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Marshall Koehn,0.0,0.0,0.0,0.0,NYG,0.0,0.0,0.0,0.0


In [47]:
full_df.loc[1, 'Player']

'Aaron Rodgers'

In [30]:
full_df[(full_df.Player.notnull()) & (full_df.Player != '\xa0')].shape

(15665, 18)

In [18]:
full_df.shape

(16141, 18)

In [34]:
full_df.isnull().all(axis=1).sum()

0

In [21]:
full_df.Player.isnull().sum()

340

In [35]:
full_df[full_df.Player == '\xa0'].shape

(0, 18)

In [28]:
full_df.loc[0, 'Player']

'\xa0'

In [13]:
FILENAME_STUB = "FantasyPros_Fantasy_Football_Projections_{}.csv"
year = "2017"
week = "1"
position = "RB"
df = pd.read_csv(CWD + '/projections/{}/{}/'.format(year, week) + FILENAME_STUB.format(position))
            

In [14]:
df.head()

Unnamed: 0,Player,Team,ATT,YDS,TDS,REC,YDS.1,TDS.1,FL,FPTS
0,,,,,,,,,,
1,Le'Veon Bell,PIT,20.7,94.5,0.8,4.7,41.7,0.2,0.2,18.7
2,David Johnson,ARI,18.6,80.3,0.8,5.2,48.6,0.2,0.2,18.5
3,Ezekiel Elliott,DAL,20.4,92.7,0.7,2.6,21.9,0.1,0.1,16.1
4,LeSean McCoy,BUF,18.7,85.3,0.7,3.3,25.8,0.1,0.1,15.6
