In [24]:
# Setup
import urllib3
import certifi
from bs4 import BeautifulSoup
import pandas as pd

In [28]:
# Define score functions
def pitcher_score(df):
    return (df['W'] * 20) + (df['SV'] * 10) + df['K'] + df['IP'] + (df['ER'] * -1) + (df['L'] * -5)


def hitter_score(df):
    return (df['HR'] * 4) + (df['3B'] * 3) + (df['2B'] * 2) + df['H'] + df['R'] + df['RBI'] + df['BB'] + df['SB']

In [26]:
# Pull Sportsnet Player Lists
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())

base_url = "https://fantasy.sportsnet.ca/sportsnet/baseball18/player_stats"
group_param = "pick_stats_group"

scrape_pages = {
    "C/DH": 1,
    "1B/3B": 2,
    "2B/SS": 3,
    "OF": 4,
    "SP": 5,
    "RP": 6
}

frames = {}

for key, value in scrape_pages.items():

    response = http.request("GET", base_url + "?" + group_param + "=" + str(value))
    
    soup = BeautifulSoup(response.data, 'html.parser')
    table = soup.find('table', attrs={'id': 'playerstats'})
    header = [th.text for th in table.find('thead').select('th')]
    players = [[td.text.strip().split('\n')[0] for td in row.select('td')] for row in table.tbody.find_all('tr')]
    cols = zip(*players)
    tbl_d = {name: col for name, col in zip(header, cols)}
    df = pd.DataFrame(tbl_d, columns=header)
    name = df['Player'].str.split(', ')
    df["First"] = name.str[1]
    df['Last'] = name.str[0]
    df['Player'] = name.str[1] + " " + name.str[0]
    frames[key] = df.apply(pd.to_numeric, errors='ignore')


In [32]:
# Load Fantasy Pro projections
hitters = pd.read_csv("data/FantasyPros_2018_wk2_Projections_H.csv")
hitters['Score'] = hitter_score(hitters)

pitchers = pd.read_csv("data/FantasyPros_2018_wk2_Projections_P.csv")
pitchers['Score'] = pitcher_score(pitchers)

In [39]:
# Join Sportsnet data with projects
for key, value in frames.items():
    merge_frame = hitters
    if key == "SP" or key == "RP":
        merge_frame = pitchers
    result = pd.merge(value, merge_frame, on='Player', suffixes=['_actual', '_projected'])
    frames[key] = result

array(['PV', 'Player', 'Team_actual', 'GP', 'S', 'D', 'T', 'HR_actual',
       'R_actual', 'RBI_actual', 'SB_actual', 'BB_actual', 'PTS', 'First',
       'Last', 'VBR_actual', 'Team_projected', 'Positions_actual',
       'AB_actual', 'R_projected', 'HR_projected', 'RBI_projected',
       'SB_projected', 'AVG_actual', 'OBP_actual', 'H_actual',
       '2B_actual', '3B_actual', 'BB_projected', 'SO_actual',
       'SLG_actual', 'OPS_actual', 'Points_actual', 'Own_actual',
       'Score_actual', 'VBR_projected', 'Team_actual',
       'Positions_projected', 'AB_projected', 'R_actual', 'HR_actual',
       'RBI_actual', 'SB_actual', 'AVG_projected', 'OBP_projected',
       'H_projected', '2B_projected', '3B_projected', 'BB_actual',
       'SO_projected', 'SLG_projected', 'OPS_projected',
       'Points_projected', 'Own_projected', 'Score_projected',
       'VBR_actual', 'Team_projected', 'Positions_actual', 'AB_actual',
       'R_projected', 'HR_projected', 'RBI_projected', 'SB_projected',
   

In [40]:
# Find Max expected performers in all player values for all player categories
max_players = {}

for key, frame in frames.items():
    idx = frame.groupby(['PV'])['Score'].transform(max) == frame['Score']
    max_players[key] = frame[idx]

In [41]:
# Build all possible max player teams into a dataframe
columns = ["C/DH", "1B/3B", "2B/SS", "OF1", "OF2", "SP1", "SP2", "RP", "PV Total", "xScore"]
data = []

for cdh_index, cdh in max_players["C/DH"].iterrows():
    for fbtb_index, fbtb in max_players["1B/3B"].iterrows():
        for sbss_index, sbss in max_players["2B/SS"].iterrows():
            for of1_index, of1 in max_players["OF"].iterrows():
                for of2_index, of2 in max_players["OF"].iterrows():
                    if of1["Player"] == of2["Player"]:
                        continue
                    if of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 18:
                        continue
                    for sp1_index, sp1 in max_players["SP"].iterrows():
                        if sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 19:
                            continue
                        for sp2_index, sp2 in max_players["SP"].iterrows():
                            if sp1["Player"] == sp2["Player"]:
                                continue
                            if sp2['PV'] + sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 20:
                                continue
                            for rp_index, rp in max_players["RP"].iterrows():
                                if rp['PV'] + sp2['PV'] + sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] > 20:
                                    continue
                                data.append([cdh['Player'], fbtb['Player'], sbss['Player'], of1['Player'],
                                             of2['Player'], sp1['Player'], sp2['Player'], rp['Player'],
                                             rp['PV'] + sp1['PV'] + sp2['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] 
                                             + fbtb['PV'] + cdh['PV'],
                                             rp['Score'] + sp1['Score'] + sp2['Score'] + of2['Score'] + of1['Score']
                                             + sbss['Score'] + fbtb['Score'] + cdh['Score']])

cols = zip(*data)
tbl_d = {name: col for name, col in zip(columns, cols)}
teams = pd.DataFrame(tbl_d, columns=columns)

In [42]:
# Analyze Teams
teams.sort_values('xScore', ascending=False).head(25)

Unnamed: 0,C/DH,1B/3B,2B/SS,OF1,OF2,SP1,SP2,RP,PV Total,xScore
20841,Miguel Montero,Nolan Arenado,Scooter Gennett,Bryce Harper,Lewis Brinson,Masahiro Tanaka,Jacob deGrom,Brad Hand,20,228.3
20832,Miguel Montero,Nolan Arenado,Scooter Gennett,Bryce Harper,Lewis Brinson,Jacob deGrom,Masahiro Tanaka,Brad Hand,20,228.3
21000,Miguel Montero,Nolan Arenado,Scooter Gennett,Lewis Brinson,Bryce Harper,Jacob deGrom,Masahiro Tanaka,Brad Hand,20,228.3
21009,Miguel Montero,Nolan Arenado,Scooter Gennett,Lewis Brinson,Bryce Harper,Masahiro Tanaka,Jacob deGrom,Brad Hand,20,228.3
1885,Shin-Soo Choo,Jose Ramirez,Scooter Gennett,Lewis Brinson,Bryce Harper,Masahiro Tanaka,Jacob deGrom,Brad Hand,20,228.07
1708,Shin-Soo Choo,Jose Ramirez,Scooter Gennett,Bryce Harper,Lewis Brinson,Jacob deGrom,Masahiro Tanaka,Brad Hand,20,228.07
1717,Shin-Soo Choo,Jose Ramirez,Scooter Gennett,Bryce Harper,Lewis Brinson,Masahiro Tanaka,Jacob deGrom,Brad Hand,20,228.07
1876,Shin-Soo Choo,Jose Ramirez,Scooter Gennett,Lewis Brinson,Bryce Harper,Jacob deGrom,Masahiro Tanaka,Brad Hand,20,228.07
22605,Miguel Montero,Jose Ramirez,Scooter Gennett,Bryce Harper,Jesse Winker,Jacob deGrom,Masahiro Tanaka,Raisel Iglesias,20,227.34
22618,Miguel Montero,Jose Ramirez,Scooter Gennett,Bryce Harper,Jesse Winker,Masahiro Tanaka,Jacob deGrom,Raisel Iglesias,20,227.34


In [23]:
max_players["1B/3B"].columns.values

array(['PV', 'Player', 'Team_actual', 'GP', 'S', 'D', 'T', 'HR_actual',
       'R_actual', 'RBI_actual', 'SB_actual', 'BB_actual', 'PTS', 'First',
       'Last', 'VBR', 'Team_projected', 'Positions', 'AB', 'R_projected',
       'HR_projected', 'RBI_projected', 'SB_projected', 'AVG', 'OBP', 'H',
       '2B', '3B', 'BB_projected', 'SO', 'SLG', 'OPS', 'Points', 'Own'],
      dtype=object)