In [1]:
# Setup
import urllib3
import certifi
from bs4 import BeautifulSoup
import pandas as pd
import json
import time

http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())

In [2]:
# Define score functions
def pitcher_score(df):
    return (df['W'] * 20) + (df['SV'] * 10) + df['K'] + df['IP'] + (df['ER'] * -1) + (df['L'] * -5)


def hitter_score(df):
    return (df['HR'] * 4) + (df['3B'] * 3) + (df['2B'] * 2) + df['H'] + df['R'] + df['RBI'] + df['BB'] + df['SB']

In [3]:
# Pull Sportsnet Player Lists
base_url = "https://fantasy.sportsnet.ca/sportsnet/baseball20/player_stats"
group_param = "pick_stats_group"

scrape_pages = {
    "C/DH": 1,
    "1B/3B": 2,
    "2B/SS": 3,
    "OF": 4,
    "SP": 5,
    "RP": 6
}

player_frames = {}

for key, value in scrape_pages.items():

    response = http.request("GET", base_url + "?" + group_param + "=" + str(value))
    
    soup = BeautifulSoup(response.data, 'html.parser')
    table = soup.find('table', attrs={'id': 'playerstats'})
    header = [th.text for th in table.find('thead').select('th')]
    players = [[td.text.strip().split('\n')[0] for td in row.select('td')] for row in table.tbody.find_all('tr')]
    cols = zip(*players)
    tbl_d = {name: col for name, col in zip(header, cols)}
    df = pd.DataFrame(tbl_d, columns=header)
    name = df['Player'].str.split(', ')
    df["First"] = name.str[1]
    df['Last'] = name.str[0]
    df['Player'] = name.str[1] + " " + name.str[0]
    player_frames[key] = df.apply(pd.to_numeric, errors='ignore')


In [5]:
# Does not seem to work in 2020 - endpoint does not exist
# Load Injury list from MLB
# injury_url = "http://mlb.mlb.com/fantasylookup/json/named.wsfb_news_injury.bam"

# response = http.request("GET", injury_url, headers={'Accept-Encoding': 'UTF-8'})
# data = json.loads(response.data)
# players = data['wsfb_news_injury']['queryResults']['row']
# injuries = pd.DataFrame(players)
# injuries['Player'] = injuries['name_first'] + " " + injuries['name_last']

In [6]:
# Load Fantasy Pro projections
hitters = pd.read_csv("../data/Numberfile_2020_Jul_22_H.csv")
hitters['Score'] = hitter_score(hitters)

pitchers = pd.read_csv("../data/Numberfile_2020_Jul_22_P.csv")
pitchers['Score'] = pitcher_score(pitchers)


In [12]:
# Join Sportsnet data with projections and Injury report
frames = {}

for key, value in player_frames.items():
    print(key + ":")
    merge_frame = hitters
    if key == "SP" or key == "RP":
        merge_frame = pitchers
    print("Original Size: " + str(len(value.index)))
    result = pd.merge(value, merge_frame, on='Player', suffixes=['_actual', '_projected'])
    print("Projection Size: " + str(len(result.index)))
# Skip Inury merge
#     result = pd.merge(result, injuries, how='left', on='Player', suffixes=['', '_injuries'])
#     result = result[result['injury_status'].isnull()]
#     print("Injuries Size: " + str(len(result.index)))
    print("")
    frames[key] = result


C/DH:
Original Size: 95
Projection Size: 56

1B/3B:
Original Size: 113
Projection Size: 61

2B/SS:
Original Size: 103
Projection Size: 64

OF:
Original Size: 131
Projection Size: 94

SP:
Original Size: 183
Projection Size: 86

RP:
Original Size: 106
Projection Size: 0



In [8]:
# Find Max expected performers in all player values for all player categories
max_players = {}

for key, frame in frames.items():
    max_num = 1
    if key == "OF" or key == "SP":
        max_num = 2
    max_p = frame.sort_values('Score', ascending=False).groupby(['PV']).head(max_num).reset_index(drop=True)
    max_players[key] = max_p

In [21]:
#Build all possible max player teams into a dataframe
columns = ["C/DH", "1B/3B", "2B/SS", "OF1", "OF2", "SP1", "SP2", "RP", "PV Total", "xScore"]
data = []

start_time = time.time()

for cdh_index, cdh in max_players["C/DH"].iterrows():
    for fbtb_index, fbtb in max_players["1B/3B"].iterrows():
        for sbss_index, sbss in max_players["2B/SS"].iterrows():
            for of1_index, of1 in max_players["OF"].iterrows():
                for of2_index, of2 in max_players["OF"].iterrows():
                    if of1["Player"] == of2["Player"]:
                        continue
                    if of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 18:
                        continue
                    for sp1_index, sp1 in max_players["SP"].iterrows():
                        if sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 19:
                            continue
                        for sp2_index, sp2 in max_players["SP"].iterrows():
                            if sp1["Player"] == sp2["Player"]:
                                continue
                            if sp2['PV'] + sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] >= 20:
                                continue
                            for rp_index, rp in max_players["RP"].iterrows():
                                if rp['PV'] + sp2['PV'] + sp1['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] + fbtb['PV'] + cdh['PV'] > 20:
                                    continue
                                data.append([cdh['Player'], fbtb['Player'], sbss['Player'], of1['Player'],
                                             of2['Player'], sp1['Player'], sp2['Player'], rp['Player'],
                                             rp['PV'] + sp1['PV'] + sp2['PV'] + of2['PV'] + of1['PV'] + sbss['PV'] 
                                             + fbtb['PV'] + cdh['PV'],
                                             rp['Score'] + sp1['Score'] + sp2['Score'] + of2['Score'] + of1['Score']
                                             + sbss['Score'] + fbtb['Score'] + cdh['Score']])

cols = zip(*data)
tbl_d = {name: col for name, col in zip(columns, cols)}
teams = pd.DataFrame(tbl_d, columns=columns)

elapsed_time = time.time() - start_time
print("Elapsed Time: " + str(round(elapsed_time / 60)) + " minutes")


Elapsed Time: 1 minutes


In [23]:
# Analyze Teams
teams.sort_values('PV Total', ascending=False).head(25)

Unnamed: 0,C/DH,1B/3B,2B/SS,OF1,OF2,SP1,SP2,RP,PV Total,xScore
0,Nelson Cruz,Justin Turner,Marcus Semien,Cody Bellinger,Mike Yastrzemski,Alex Wood,Mike Fiers,19.0,138.8,
3,Nelson Cruz,Justin Turner,Marcus Semien,Cody Bellinger,Mike Yastrzemski,Mike Fiers,Alex Wood,19.0,138.8,
128,Nelson Cruz,Justin Turner,Marcus Semien,Mike Yastrzemski,Cody Bellinger,Alex Wood,Mike Fiers,19.0,138.8,
131,Nelson Cruz,Justin Turner,Marcus Semien,Mike Yastrzemski,Cody Bellinger,Mike Fiers,Alex Wood,19.0,138.8,
1,Nelson Cruz,Justin Turner,Marcus Semien,Cody Bellinger,Mike Yastrzemski,Alex Wood,Martin Perez,19.0,138.75,
5,Nelson Cruz,Justin Turner,Marcus Semien,Cody Bellinger,Mike Yastrzemski,Martin Perez,Alex Wood,19.0,138.75,
129,Nelson Cruz,Justin Turner,Marcus Semien,Mike Yastrzemski,Cody Bellinger,Alex Wood,Martin Perez,19.0,138.75,
133,Nelson Cruz,Justin Turner,Marcus Semien,Mike Yastrzemski,Cody Bellinger,Martin Perez,Alex Wood,19.0,138.75,
67,Nelson Cruz,Justin Turner,Marcus Semien,Mookie Betts,Mike Yastrzemski,Mike Fiers,Alex Wood,19.0,138.65,
138,Nelson Cruz,Justin Turner,Marcus Semien,Mike Yastrzemski,Mookie Betts,Alex Wood,Mike Fiers,19.0,138.65,
