# Import requirements

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import matplotlib.pyplot as plt
%matplotlib inline

# Scrape historical data from footballdb.com (2010 - 2017)

In [2]:
def generate_url_string(position,year,week):
    '''
    generate url string, position = QB, RB, WR, TE or DST, year = int, week = int
    '''
    str1 = 'http://www.footballdb.com/fantasy-football/index.html?pos='
    str2 = '&yr='
    str3 = '&wk='
    str4 = '&rules=2'
    if position not in ['QB','RB','WR','TE','DST']:
        return 'ERROR: position must be "off" or "def"'
    yr_str = str(year)
    wk_str = str(week)
    output_string = str1+position+str2+yr_str+str3+wk_str+str4
    return output_string

def get_year_week_df(url_string):
    '''
    return a dataframe of weekly stats for a given url
    '''
    page = requests.get(url_string)
    soup = BeautifulSoup(page.content, 'html.parser')
    table_headers = soup.find('tr', {'class': 'header right'}).get_text().split('\n')
    column_labels = [str(header).replace('*','') for header in table_headers if len(str(header))>0]
    stats_table = soup.find('table', {'class': 'statistics'})
    row_stats = stats_table.findAll('tr', {'class': ['row0','row1']})
    total_data = []
    if len(row_stats) > 1:
        for row in row_stats:
            player_team = str(row.find('span', {'class':'hidden-xs'}).get_text())
            game_data = [td.text.replace(u'\xa0',' ').encode('utf-8').strip() for td in row.find_all('td') if td.text]
            data = [player_team] + game_data[1:]
            total_data.append(data)
        df = pd.DataFrame(total_data)
        df.columns = column_labels
        return df
    else:
        pass

In [3]:
for year in range(2010,2018,1):
    for week in range(1,18,1):
        for position in ['QB','RB','WR','TE','DST']:
            #print (position, year, week)
            url_string = generate_url_string(position, year, week)
            df = get_year_week_df(url_string)
            if df is None:
                break
            else:
                csv_filename = './all_data/footballdb_data/'+position +'_'+str(year)+'_week'+str(week)+'.csv'
                df.to_csv(csv_filename)

# Historical Fantasy Projections (2013 - 2017)

In [None]:
# QB 2013 week1
"https://fantasydata.com/nfl-stats/fantasy-football-weekly-projections.aspx?
fs=3&stype=0&sn=4&scope=0&w=0&ew=0&s=&t=0&p=1&st=FantasyPointsDraftKings&d=1&ls=FantasyPointsDraftKings
&live=false&pid=false&minsnaps=4

# QB 2014 week4
https://fantasydata.com/nfl-stats/fantasy-football-weekly-projections.aspx?
fs=3&stype=0&sn=3&scope=1&w=3&ew=3&s=&t=0&p=1&st=FantasyPointsDraftKings&d=1&ls=FantasyPointsDraftKings
&live=false&pid=false&minsnaps=4

# QB 2017 week4
https://fantasydata.com/nfl-stats/fantasy-football-weekly-projections.aspx?
fs=3&stype=0&sn=0&scope=1&w=3&ew=3&s=&t=0&p=1&st=FantasyPointsDraftKings&d=1&ls=FantasyPointsDraftKings
&live=false&pid=false&minsnaps=4

# QB 2013 week2
"https://fantasydata.com/nfl-stats/fantasy-football-weekly-projections.aspx?
fs=3&stype=0&sn=4&scope=0&w=1&ew=1&s=&t=0&p=1&st=FantasyPointsDraftKings&d=1&ls=FantasyPointsDraftKings
&live=false&pid=false&minsnaps=4

# Historical DraftKings Salaries

In [9]:
pd.read_csv('./all_data/footballdb_data/QB_2013_week5.csv')

Unnamed: 0.1,Unnamed: 0,Player,Opp,Pts,Att,Cmp,Yds,TD,Int,2Pt,Att.1,Yds.1,TD.1,2Pt.1,Rec,Yds.2,TD.2,2Pt.2,FL,TD.3
0,0,"Tony Romo, Dal",Den,50.0,36,25,506,5,1,1,1,7,0,0,0,0,0,0,0,0
1,1,"Peyton Manning, Den",@Dal,44.0,42,33,414,4,1,0,4,-8,1,0,0,0,0,0,0,0
2,2,"Russell Wilson, Sea",@Ind,27.0,31,15,210,2,1,0,13,102,0,0,0,0,0,0,1,0
3,3,"Geno Smith, NYJ",@Atl,27.0,20,16,199,3,0,0,3,21,0,0,0,0,0,0,0,0
4,4,"Jay Cutler, Chi",NO,27.0,33,24,358,2,0,0,4,27,0,0,0,0,0,0,1,0
5,5,"Sam Bradford, Stl",Jax,26.0,34,19,222,3,0,0,5,4,0,0,0,0,0,0,0,0
6,6,"Matt Ryan, Atl",NYJ,23.0,45,36,319,2,0,0,0,0,0,0,0,0,0,0,1,0
7,7,"Terrelle Pryor, Oak",SD,23.0,23,18,221,2,0,0,11,31,0,0,0,0,0,0,0,0
8,8,"Ryan Fitzpatrick, Ten",KC,23.0,41,21,247,1,2,0,6,50,1,0,1,0,0,0,0,0
9,9,"Drew Brees, NO",@Chi,23.0,35,29,288,2,0,0,1,-2,0,0,0,0,0,0,0,0
