In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import time
from sklearn.cluster import KMeans

In [2]:
LINK_PREF = 'http://fftoday.com/stats/playerstats.php?'

In [3]:
def getData(link,year,week,pos,scoring='PPR'):
    '''
    Used to extract data from the website regarding fantasy stats per week for the top players in the scoring format.
    '''
    time.sleep(2) # to make sure we don't time out
    posId ={
        'QB':'10', #ids correspond to ids in link
        'RB':'20',
        'WR':'30',
        'TE':'40',
        'K':'80',
        'DST':'99'
    }
    scoringID = {
        'PPR':'107644'
    }
    full_link = link + 'Season=' + str(year) + '&GameWeek=' + str(week) + '&PosID=' + posId[pos] + '&LeagueID=' + scoringID[scoring]
    response = requests.get(full_link)
    html = response.text.encode()
    soup = BeautifulSoup(html,'html.parser') # use beautifulsoup to parse through html and find specific html tags that correspond to the table
    body = soup.find('td',{'class':'bodycontent'})
    table = body.find_all('table')[5].find_all('tr')[2:] # subset arrays to find actual table (hard-coded)
    table = [[y for y in x.text.split('\n') if y!=''] for x in table] # to later turn into dataframe
    if pos != 'DST':
        table[0] = [table[0][0]] + table[0][4:]
    return table
def parseFantasyData(link):
    '''
    Wrapper function that iterates through all positions, all years and all weeks.
    Returns a dictionary of positions with a list of dataframes.
    '''
    fantasyData = {}
    for pos in ['QB','RB','WR','TE','K','DST']:
        fantasyData[pos]=[]
        for year in [2010,2011,2012,2013,2014,2015,2016,2017]:
            for week in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]:
                #print(pos,year,week)
                data=getData(link,year,week,pos)
                data = pd.DataFrame(columns=data[0],data=data[1:])
                data['POS'] = pos
                data['year']=year
                data['week']=week
                if pos == 'DST':
                    data=data.rename(columns={'Team':'Player'})
                fantasyData[pos].append(data[['Player','POS','year','week','FPts']])
    return fantasyData

def fixNames(dataframe):
    '''
    Removes weird formatting and weekly rank so that dataframe's Player column only has the first and last name of a player.
    Also includes Jr. if applicable.
    '''
    player=[x[1]+ ' ' + ' '.join(x[2:]) for x in dataframe['Player'].str.split(' ').values] # removes weird utf-8 encoding shit
    dataframeCopy = dataframe.copy(deep=True).drop('Player',axis=1)
    dataframeCopy['Player']=player
    return dataframeCopy

def pivotData(dataframe):
    '''
    pivots the data and does some data cleaning (fixes names and transforms FPts to a float.)
    '''
    dataframeNamesFixed = fixNames(dataframe)
    dataframeNamesFixed['FPts'] = dataframeNamesFixed['FPts'].astype(float)
    dataframeNamesFixed = pd.pivot_table(dataframeNamesFixed,values='FPts',index=['Player','POS','year'],columns=['week'],fill_value=0).reset_index()
    return dataframeNamesFixed

In [4]:
data=parseFantasyData(LINK_PREF)

In [10]:
dataMerged = {pos:pivotData(pd.concat(data[pos])) for pos in ['QB','RB','WR','TE','K','DST']}
#dataMerged['DST']['Team']=dataMerged['DST']['Player']
fullData = pd.concat(dataMerged.values())

In [11]:
fullData.head()

week,Player,POS,year,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,A.J. Feeley,QB,2011,1.1,0.0,0.0,0.0,0.0,0.0,9.7,13.3,0.0,0.0,0.0,0.0,7.8,0.0,0.0,0.0,0.0
1,AJ McCarron,QB,2015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.2,0.0,0.0,0.2,1.0,22.8,13.8,16.1,16.2
2,AJ McCarron,QB,2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4,1.0,0.0,0.0
3,Aaron Rodgers,QB,2010,18.3,28.8,27.8,23.1,21.7,27.1,24.2,9.0,30.6,0.0,33.2,32.3,30.8,4.8,0.0,38.8,17.6
4,Aaron Rodgers,QB,2011,27.7,24.7,27.2,52.0,27.7,29.3,29.6,0.0,33.6,30.6,29.8,23.3,37.7,22.1,25.0,36.0,0.0


In [12]:
fullData.to_csv('HistoricalFantasyData/fullWeekbyWeekData_PPR.csv')