In [1]:
#########################################################################
# UPDATE THE WEEK BEFORE DOING ANYTHING ELSE!!!!!!!!!!!!!!
#########################################################################

#Which week of the NFl season are we in?
week = 8




############################################################################
############################################################################
############################################################################

# UPDATE THE WEEK BEFORE DOING ANYTHING ELSE!!!!!!!!!!!!!!

In [2]:
#####################################################################
# Where is the chromedriver on this machine?
####################################################################
## change path as needed
PATH_TO_CHROMEDRIVER = r'C:\Users\JoGa\Documents\PyScripts\chromedriver' 

#####################################################################
# Update the list of URLs as needed
#####################################################################

#swish analytics
SWISH_URL = 'swishanalytics.com'
SWISH_EXTENSION = '/optimus/nfl/daily-fantasy-projections'

#CBS Sports
CBS_URL = 'www.cbssports.com'
CBS_EXTENSION = '/fantasy/football/stats/weeklyprojections/{pos}/{wk}/avg/standard?&print_rows=9999'

#Fantasy Pros - (So far includes STATS,cbssports,espn,fftoday and numberFire)
FP_URL = 'www.fantasypros.com'
FP_EXTENSION = '/nfl/projections/{pos}.php?scoring=PPR'

#Fantasy Sharks 
FS_URL = 'www.fantasysharks.com'
FS_EXTENSION = '/apps/Projections/WeeklyProjections.php?pos=ALL&format=json'



In [3]:
########### Python 3 #############
from bs4 import BeautifulSoup
import http.client 
import json
import pandas as pd
from pandas.io.json import json_normalize
from sqlalchemy import create_engine


#For websites that have a different extension for each position
POSITION_LIST = ['QB','RB','WR','TE','K','DST']

def GetPage(url, extension, return_string=False):
    # Takes a url and an extension and returns the page as a bs4.BeautifulSoup object
    # or if return_string is set to True, it will return a string
    try:
        conn = http.client.HTTPSConnection(url)
        conn.request("GET", extension)
        response = conn.getresponse()
        data = response.read()
        data = data.decode('utf-8')
        conn.close()
        soup = BeautifulSoup(data, 'lxml')
        if return_string:
            return str(soup)
        else:
            return soup
        
    except Exception as e:
        print("[Errno {0}] {1}".format(e.errno, e.strerror))
    
    
#connect to database
engine = create_engine('sqlite:///nfl_projections_db.sqlite')        

In [4]:
# Get projections from Swish Analytics
# point the urls to swish
url = SWISH_URL
extension = SWISH_EXTENSION
table_name = 'swish_week_{}'.format(week)

# Convert the webpage to a string
soup = GetPage(url, extension, return_string=True)

# Check to make sure that the page was retrieved
assert len(soup) > 0, print("The content at {} was not retrieved".format(url))

# Find the start of the projections which begins with this string: "this.players = "
start = soup.find('this.players = ') + len('this.players = ')

#find the end projections in the string
end = soup.find('"}];') + len('"}]')

# slice the string down to the projections
projections = soup[start:end]
# Check to make sure that there are projections on the page
assert len(projections) > 0, print("There appear to be no projections at {}".format(url))
    
try:
    #Convert the string to a dataframe
    swish_projections = json_normalize(json.loads(projections))
except JSONDecodeError:
    print("Error converting the projections from {} into json".format(url))
    
#rename columns
swish_projections.columns = ['avg_yards', 'avg_tds', 'proj_yds', 'proj_tds', 'date', 'dk_avg',
       'dk_fpts', 'dk_fpts_act', 'dk_fpts_ingame', 'dk_salary', 'dk_value',
       'event_status_id', 'fd_avg', 'fd_fpts', 'fd_fpts_act', 'fd_fpts_ingame',
       'fd_salary', 'fd_value', 'home', 'name', 'nfl_avg', 'nfl_fpts',
       'nfl_fpts_act', 'nfl_fpts_ingame', 'nfl_salary', 'nfl_value',
       'nickname', 'opp_abbr', 'player_id', 'primary_pos_abbr',
       'season_fpts_full', 'season_fpts_remaining', 'ya_avg', 'ya_fpts',
       'ya_fpts_act', 'ya_fpts_ingame', 'ya_salary', 'ya_value']   

# Change data types to numeric if possible, ignore if text
swish_projections = swish_projections.apply(lambda x: pd.to_numeric(x, errors='ignore'))
swish_projections[['dk_salary','fd_salary']] = swish_projections[['dk_salary','fd_salary']].apply(lambda x: pd.to_numeric(x, errors='coerce'))

try:    
    #load projections to a sql table
    swish_projections.to_sql(table_name, engine, if_exists='replace')
except Exception as e:
    print(e)
else:
    print("Projections for week {} have been successfully loaded into {}".format(week, table_name))

Projections for week 8 have been successfully loaded into swish_week_8


In [5]:
#################################################
# Get projections from CBS
#################################################

# point the urls to CBS
url = CBS_URL
extension = CBS_EXTENSION

#set up empty df to append the dfs for each position to 
cbs_df = pd.DataFrame()

#create an empty list of tables to append to
table_list = [] 

# for each position scrape the different extension
for position in POSITION_LIST:
    soup = GetPage(url, extension.format(pos=position, wk=week))
    assert len(soup) > 0, print("Problem retrieving {} {}".format(url, extension.format(pos=position, wk=week)))
    
    sql_table_name = 'cbs_{}_week_{}'.format(position.lower(), week)
    table = soup.find('table')
    assert len(table) > 0, print("Problem finding table on {} {}".format(url, extension.format(pos=position, wk=week)))
    
    if position in ['QB','RB','WR','TE']:
        #The header is in row 2 and the last row needs to be dropped
        header_row = 2
        drop_last_row = True
    else:
        header_row = 1
        drop_last_row = False
        
    df = pd.read_html(str(table),header=header_row)[0]
    assert len(df) > 0, print("Problem reading {} html table".format(position))
    
    if drop_last_row:
        df = df.ix[:len(df)-2]
    try:    
        df.to_sql(sql_table_name, engine, if_exists='replace')
        table_list.append(sql_table_name)
    except Exception as e:    
        print(e)
      
    
print("CBS projections have been successfully saved to these tables: {}".format(table_list))

CBS projections have been successfully saved to these tables: ['cbs_qb_week_8', 'cbs_rb_week_8', 'cbs_wr_week_8', 'cbs_te_week_8', 'cbs_k_week_8', 'cbs_dst_week_8']


In [6]:
################################################
# Get projections from Fantasy Pros
################################################

# point the urls to Fantasy Pros
url = FP_URL
extension = FP_EXTENSION

table_list = []

# for each position scrape the different extension
for position in POSITION_LIST:
    soup = GetPage(url, extension.format(pos=position.lower()))
    assert len(soup) > 0, print("Problem retrieving {}{}".format(url, extension.format(pos=position, wk=week)))
    
    sql_table_name = 'fantasy_pros_{}_week_{}'.format(position.lower(), week)
    table = soup.find('table')
    assert len(table) > 0, print("Problem finding table on {}{}".format(url, extension.format(pos=position, wk=week)))
    
    df = pd.read_html(str(table))[0]
    assert len(df) > 0, print("Problem reading {} html table".format(position))
    
        
    if position != 'DST':
        names = []
        teams = []
        for i in df['Player'].iteritems():
            s = i[1]
            space = s[len(s)::-1].find(' ')
            name = s[:len(s) - space - 1 ]
            team = s[len(s) - space :len(s)]
            names.append(name)
            teams.append(team)    
        df['Player'], df['Team'] = names, teams
        
    col_list = []
    for col in df.columns:
        if col[len(col) - 2] == '.':
            new_col = str(col[:len(col) - 2]) + 'z'
            col_list.append(new_col)
        else:
            col_list.append(col)
    df.columns = col_list
    
    try: 
        df.to_sql(sql_table_name, engine, if_exists='replace')
        table_list.append(sql_table_name)
    except Exception as e:
        print(e)
    
    
print("Projections for Fantasy Pros have been added to these tables: {}".format(table_list))

Projections for Fantasy Pros have been added to these tables: ['fantasy_pros_qb_week_8', 'fantasy_pros_rb_week_8', 'fantasy_pros_wr_week_8', 'fantasy_pros_te_week_8', 'fantasy_pros_k_week_8', 'fantasy_pros_dst_week_8']


In [10]:
################################################
# Get projections from Fantasy Sharks
################################################

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import clipboard

# The Fantasy Sharks Website apparently doesn't like to be scraped. Need to use selenium webdriver

# point the urls to Fantasy Sharks
url = 'http://' + FS_URL + FS_EXTENSION
table_name = 'fantasy_sharks_week_{}'.format(week)

# Set modified User-Agent string so that website thinks we're not using a 
# selenium browser
dcap = {}
dcap["phantomjs.page.settings.userAgent"] = (
     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
     "(KHTML, like Gecko) Chrome/15.0.87")

#use the selenium webdriver to connect to Chrome
browser = webdriver.Chrome(executable_path = PATH_TO_CHROMEDRIVER,
                          desired_capabilities = dcap)

#connect to fantasy sharks
browser.get(url)

#the page is just a json file. Selenium (I don't think) has any function that will grab it.
# Need to send keyboard commands to select all text and copy to clipboard.
actions = ActionChains(browser)
actions.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
actions.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
browser.quit()

# Get contents of clipboard
text = clipboard.paste()
assert len(text) > 0, print("There was a problem with downloading from {}.".format(url))

# Convert text to DataFrame
fs_projections = json_normalize(json.loads(text))
assert len(fs_projections) > 0, print("The text could not be converted to a DataFrame.")

fs_projections = fs_projections.apply(lambda x: pd.to_numeric(x, errors='ignore'))

try:
    fs_projections.to_sql('fantasy_sharks_week_{}'.format(week), engine,if_exists='replace')
except Exception as e:
    print(e)
else:    
    print("Fantasy Sharks projections have been successfully added to {}".format(table_name))

Fantasy Sharks projections have been successfully added to fantasy_sharks_week_8


In [11]:
###########################################################
#  Get Defensive Stats from Fantasy Data
###########################################################

headers = {
    # Request headers
    'Ocp-Apim-Subscription-Key': '<your_api_key_here>',
}

try:
    conn = http.client.HTTPSConnection('api.fantasydata.net')
    conn.request("GET", "/v3/nfl/projections/JSON/FantasyDefenseProjectionsByGame/2016REG/{wk}".format(wk=week),\
                 "{body}", headers)
    response = conn.getresponse()
    data = response.read()
    data = data.decode('utf-8')
    conn.close()
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))

####################################

table_name = 'fantasy_data_def_week_{}'.format(week)

try:
    fd_def_stats = json_normalize(json.loads(data))
    fd_def_stats.to_sql(table_name, engine, if_exists='replace')
except Exception as e:
    print(e)
else:    
    print("Fantasy Data projections have been successfully added to {}".format(table_name))


Fantasy Data projections have been successfully added to fantasy_data_def_week_8


In [12]:
###########################################################
#  Get Offensive Stats from Fantasy Data
###########################################################


try:
    conn = http.client.HTTPSConnection('api.fantasydata.net')
    conn.request("GET", "/v3/nfl/projections/JSON/PlayerGameProjectionStatsByWeek/2016REG/{wk}".format(wk=week),\
                 "{body}", headers)
    response = conn.getresponse()
    data = response.read()
    data = data.decode('utf-8')
    conn.close()
except Exception as e:
    print("[Errno {0}] {1}".format(e.errno, e.strerror))

####################################

table_name = 'fantasy_data_off_week_{}'.format(week)

try:
    fd_plyr_stats = json_normalize(json.loads(data))
    fd_plyr_stats.to_sql(table_name, engine, if_exists='replace')
except Exception as e:
    print(e)
else:    
    print("Fantasy Data projections have been successfully added to {}".format(table_name))

Fantasy Data projections have been successfully added to fantasy_data_off_week_8
