* Parse fantasy data from FFToday
* Parse injury data
* Parse Google trends data
* Save to CSVs

From previous game:
* Position (WR, TE, RB)
* Rush yards
* Receive yards
* Touchdowns
* Fumbles
* Fantasy points gained
* Hype (google searches)
* injury status

From previous season:
* ESPN rank avg

Misc
* Age
* Division (AFC North, NFC East, etc)
* Team
* ADP (avg draft position)
* NFL Combine scores (?)
* Years of experience/# of games (?)


Goal:
* Rank,Name        ,Division ,Team,Position,Season,Week,Age,Rush,Receive,Touchdown,Fumble,Points,ADP ,Injury,Hype
* 1   ,Alvin Kamara,NFC South,NO  ,RB      ,2017  ,12  ,22 ,100 ,88     ,2        ,1     ,25.1  ,1.06,0     ,98

In [9]:
import os
import pandas as pd
import re
import requests
import time

from __future__ import print_function
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options

DELAY = 2  # seconds

#RB = 20
#WR = 30
#TE = 40

#fantasy_url = "http://games.espn.com/ffl/leaders"

'''
options = Options()
driver = webdriver.Firefox(firefox_options=options)
driver.get("fantasy_url")
'''


'\noptions = Options()\ndriver = webdriver.Firefox(firefox_options=options)\ndriver.get("fantasy_url")\n'

# FFToday
* Generate URLs to scrape
* Load pages
* Parse DF from pages
* Concat DF by position
* Save CSV for each DF

In [72]:
# Info about FFToday's website
FFTODAY = {
    'base_url' : "http://fftoday.com/stats/playerstats.php?Season={}&GameWeek={}&PosID={}",
    'seasons' : range(2001, 2018),
    'weeks' : range(1, 18),
    'position_index' : [20,30,40],  # RB, WR, TE
    'columns' : {
        20: ['name','team','g','rush_att','rush_yd','rush_td','rec_target','rec_rec','rec_yd','rec_td','fp','fpg'],
        30: ['name','team','g','rec_target','rec_rec','rec_yd','rec_td','rush_att','rush_yd','rush_td','fp','fpg'],
        40: ['name','team','g','rec_target','rec_rec','rec_yd','rec_td','fp','fpg'],
    }
}

In [117]:
def fftoday_urls(seasons=[2017], weeks=[10,11,12], positions=[20,30,40]):
    '''Return a list of FFToday URL tuples: (season, week, position, url).'''
    arr = []
    for p in positions:
        for s in seasons:
            for w in weeks:
                arr.append((s, w, p, 
                            str.format(FFTODAY['base_url'], s, w, p)))
    return arr

def scrape(url=str.format(FFTODAY['base_url'], 2017, 17, 20)):
    '''Return the raw text and soup for the URL.'''
    print("Reading", url)
    response = requests.get(url)
    return BeautifulSoup(response.text,"lxml")

def parse_page_to_dataframe(soup):
    '''Return dataframe.'''
    players = {}
    for n, link in enumerate(soup.find_all(href=re.compile('/stats/players'))):
        name = link.text
        if name != "players":
            row = link.findParent().findParent()
            # RB: Name,Team,G,Rush Att,Rush Yd,Rush TD,Rec Target,Rec Rec,Rec Yd,Rec TD,FPts,FPts/G 
            players[n] = [td.text for td in row.find_all('td')]
            # separate name and rank
            players[n][0] = name
    return pd.DataFrame(players).T

def save_to_csv(dataframe, key=''):
    # save to CSV
    file = str.format("data/fftoday_{}_{}.csv", dataframe.season[1], 'pos'+str(key))
    print("Saving to file", file)
    dataframe.to_csv(file)

fftoday_urls()

[(2017,
  10,
  20,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=10&PosID=20'),
 (2017,
  11,
  20,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=11&PosID=20'),
 (2017,
  12,
  20,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=12&PosID=20'),
 (2017,
  10,
  30,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=10&PosID=30'),
 (2017,
  11,
  30,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=11&PosID=30'),
 (2017,
  12,
  30,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=12&PosID=30'),
 (2017,
  10,
  40,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=10&PosID=40'),
 (2017,
  11,
  40,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=11&PosID=40'),
 (2017,
  12,
  40,
  'http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=12&PosID=40')]

In [19]:
text, soup = scrape('http://fftoday.com/stats/playerstats.php?Season=2017&GameWeek=12&PosID=40')
print(text)


<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'>
<html>

<head>
<title>Tight End Stats: 2017 Week 12 - FF Today</title> 
<META NAME='description' CONTENT='NFL and fantasy football stats from FF Today.'>
<META NAME='keywords' CONTENT='stats, statistics, nfl, fantasy football, quarterbacks, running backs, wide receivers, tight ends, defensive linemen, linebackers, defensive backs'>
<meta property='og:image' content=''>

<LINK rel='STYLESHEET' type='text/css' href='/fftoday2.css'>
<link rel='stylesheet' type='text/css' href='/css/slimbox2.css' media='screen' />
<link rel='stylesheet' href='/themes/fft_default/style.css' type='text/css' media='print, projection, screen' />
<script type='text/javascript' language='javascript' src='/rollOverColors.js'></script>
<script type='text/javascript' src='/js/jquery.js'></script>
<script type='text/javascript' src='/js/slimbox2.js'></script>
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'>

<script type='text/javascr

In [87]:

data = []
for tup in fftoday_urls():
    season, week, position, url = tup
    data.append((season, week, position, scrape(url)))
    time.sleep(DELAY)

In [88]:
print(data)

[(2017, 10, 20, <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>Running Back Stats: 2017 Week 10 - FF Today</title>
<meta content="NFL and fantasy football stats from FF Today." name="description"/>
<meta content="stats, statistics, nfl, fantasy football, quarterbacks, running backs, wide receivers, tight ends, defensive linemen, linebackers, defensive backs" name="keywords"/>
<meta content="" property="og:image"/>
<link href="/fftoday2.css" rel="STYLESHEET" type="text/css"/>
<link href="/css/slimbox2.css" media="screen" rel="stylesheet" type="text/css"/>
<link href="/themes/fft_default/style.css" media="print, projection, screen" rel="stylesheet" type="text/css"/>
<script language="javascript" src="/rollOverColors.js" type="text/javascript"></script>
<script src="/js/jquery.js" type="text/javascript"></script>
<script src="/js/slimbox2.js" type="text/javascript"></script>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<script s

In [102]:
from collections import defaultdict

def make_dataframes(data=data):
    dataframes = defaultdict(list)
    for tup in data:
        season, week, position, soup = tup        
        df = parse_page_to_dataframe(soup)
        df.columns = FFTODAY['columns'][position]
        df['season'] = season
        df['week'] = week
        dataframes[position].append(df)
    return dataframes
    
dataframes = make_dataframes()


In [145]:
def save_to_csv(dataframe, key=''):
    # save to CSV
    season_range = str(dataframe.season.min())
    max_season = str(dataframe.season.max())
    if max_season != season_range:
        season_range += "-" + max_season
    file = str.format("data/fftoday_{}_{}.csv", season_range, 'pos'+str(key))
    print("Saving to file", file)
    dataframe.to_csv(file)
    
for key, value in dataframes.items():
    total_len = sum([len(v) for v in value])
    df = pd.concat(value)
    assert len(df) == total_len
    save_to_csv(df, key)
    


Saving to file data/fftoday_2017_pos20.csv
Saving to file data/fftoday_2017_pos30.csv
Saving to file data/fftoday_2017_pos40.csv


In [101]:
print(dataframes[40])

                       name team  g rec_target rec_rec rec_yd rec_td    fp  \
1              Jimmy Graham  SEA  1          9       6     27      2  14.7   
2             Garrett Celek   SF  1          6       4     67      1  12.7   
3          Jermaine Gresham  ARI  1          7       5     64      1  12.4   
4             Austin Hooper  ATL  1          6       6     49      1  10.9   
5                Eric Ebron  DET  1          3       2     39      1   9.9   
6                Ed Dickson  CAR  1          5       3     33      1   9.3   
7               Evan Engram  NYG  1          9       6     31      1   9.1   
8              Vernon Davis  WAS  1         11       7     76      0   7.6   
9            Vance McDonald  PIT  1          2       2     16      1   7.6   
10           Rob Gronkowski   NE  1          7       4     74      0   7.4   
11             Dwayne Allen   NE  1          2       1     11      1   7.1   
12             Seth Devalve  CLE  1          6       4     70   

## Example
    <tr>
    <td align="LEFT" bgcolor="#ffffff" class="sort1"> 1. <a href="/stats/players/2480/Aaron_Rodgers?LeagueID=">Aaron Rodgers</a></td>
    <td align="center" bgcolor="#ffffff" class="sort1">GB</td>
    <td align="center" bgcolor="#ffffff" class="sort1">16</td>
    <td align="center" bgcolor="#ffffff" class="sort1">401</td>
    <td align="center" bgcolor="#ffffff" class="sort1">610</td>
    <td align="center" bgcolor="#ffffff" class="sort1">4,428</td>
    <td align="center" bgcolor="#ffffff" class="sort1">40</td>
    <td align="center" bgcolor="#ffffff" class="sort1">7</td>
    <td align="center" bgcolor="#ffffff" class="sort1">67</td>
    <td align="center" bgcolor="#ffffff" class="sort1">369</td>
    <td align="center" bgcolor="#ffffff" class="sort1">4</td>
    <td align="center" bgcolor="#e0e0e0" class="sort1">442.3</td>
    <td align="center" bgcolor="#ffffff" class="sort1">27.6</td>
    </tr>
    <tr>

In [20]:
soup = data[0][1]  # Running Back Stats: 2017 Week 10

# http://fftoday.com/stats/players/11882/Mark_Ingram
df = parse_page_to_dataframe(soup)

Unnamed: 0,name,team,g,rush_att,rush_yd,rush_td,rec_target,rec_rec,rec_yd,rec_td,fp,fpg
1,Mark Ingram,NO,1,21,131,3,0,0,0,0,31.1,31.1
2,DeMarco Murray,TEN,1,14,42,2,4,4,30,1,25.2,25.2
3,Austin Ekeler,LAC,1,10,42,0,5,5,77,2,23.9,23.9
4,Alvin Kamara,NO,1,12,106,1,5,5,32,0,19.8,19.8
5,Christian McCaffrey,CAR,1,5,23,1,7,3,27,1,17.0,17.0
6,Isaiah Crowell,CLE,1,16,90,1,2,1,5,0,15.5,15.5
7,Kenyan Drake,MIA,1,7,82,1,2,2,10,0,15.2,15.2
8,Tevin Coleman,ATL,1,20,83,1,1,1,5,0,14.8,14.8
9,Todd Gurley,LAR,1,11,68,0,7,6,68,0,13.6,13.6
10,Cameron Artis-Payne,CAR,1,7,68,1,0,0,0,0,12.8,12.8


# Injury

# Google Trends

In [14]:
players = {}

for link in soup.find_all(href=re.compile('/stats/players')):
    name = link.text
    if name != "players":
        row = link.findParent().findParent()
        players[name] = [td.text for td in row.find_all('td')]
print(players)

{'Aaron Rodgers': ['\xa01. Aaron Rodgers', 'GB', '16', '401', '610', '4,428', '40', '7', '67', '369', '4', '442.3', '27.6'], 'Drew Brees': ['\xa02. Drew Brees', 'NO', '16', '471', '673', '5,207', '37', '15', '23', '20', '2', '422.4', '26.4'], 'Matt Ryan': ['\xa03. Matt Ryan', 'ATL', '16', '373', '534', '4,944', '38', '7', '35', '117', '0', '410.9', '25.7'], 'Andrew Luck': ['\xa04. Andrew Luck', 'IND', '15', '346', '545', '4,240', '31', '13', '64', '341', '2', '382.1', '25.5'], 'Kirk Cousins': ['\xa05. Kirk Cousins', 'WAS', '16', '406', '606', '4,917', '25', '12', '34', '100', '4', '379.9', '23.7'], 'Philip Rivers': ['\xa06. Philip Rivers', 'SD', '16', '349', '578', '4,390', '33', '21', '14', '35', '0', '355.0', '22.2'], 'Matthew Stafford': ['\xa07. Matthew Stafford', 'DET', '16', '388', '594', '4,327', '24', '10', '37', '207', '2', '345.1', '21.6'], 'Blake Bortles': ['\xa08. Blake Bortles', 'JAC', '16', '368', '625', '3,905', '23', '16', '58', '359', '3', '341.2', '21.3'], 'Dak Prescot

In [9]:
players = {}

for link in soup.find_all(href=re.compile('/stats/players')):
    name = link.text
    if name != "players":
        row = link.findParent().findParent()
        players[name] = [td.text for td in row.find_all('td')]
print(players)

NameError: name 'soup' is not defined