In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

import numpy as np
import pandas as pd
pd.options.display.max_columns = None
from pybaseball import batting_stats, pitching_stats, cache, playerid_lookup, statcast_batter, statcast_pitcher

cache.enable()
cache.config.cache_type='csv'
cache.config.save()


In [36]:
num_teams = 12
num_dollars = 260
player_split = .65
pitcher_split = 1 - player_split
tot_dollars = num_teams * num_dollars

drafted_by_pos = {
    'C':12,
    '1B':12,
    '2B':12,
    '3B':12,
    'SS':12,
    'OF':5*12,
    'MI':12,
    'CI':12,
    'DH':12*2, 
    'P':9
}

def load_data():
    h = pd.read_csv('data/2022-fangraphs-proj-h.csv')
    h['sorter'] = h['HR']+h['R']+h['RBI']+h['H']+h['SB']
    
    p = pd.read_csv('data/2022-fangraphs-proj-p.csv')
    val_h = pd.read_csv('data/2022-fangraphs-auction-calculator-h.csv')
    val_h.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    val_p = pd.read_csv('data/2022-fangraphs-auction-calculator-p.csv')
    val_p.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    
    h = h.merge(val_h[['playerid', 'Pos', 'Dollars']])
    h.drop(columns=['wOBA', 'CS', 'Fld', 'BsR', 'ADP'],inplace=True)
    h['Pos'] = h['Pos'].apply(lambda x: ', '.join(x.split('/')))
    h.sort_values('sorter', ascending=False, inplace=True)
    h.reset_index(drop=True)
    
    p = p.merge(val_p[['playerid', 'Pos', 'Dollars']])
    p.drop(columns=['ADP'],inplace=True)
    p['Sv+Hld'] = p['SV']+p['HLD']
    p['Pos'] = p['Pos'].apply(lambda x: ', '.join(x.split('/')))
    p['sorter'] = p['SO']+(p['Sv+Hld']*4)+p['W']
    p.sort_values('sorter', ascending=False, inplace=True)
    p.reset_index(drop=True)
    return h, p

def calc_z(x, stat):
    z = (x - drafted[stat].mean()) / drafted[stat].std()
    return z

def find_primary_pos(p):
    pos_list = p.split(', ')
    pos_hierarchy = ['C', '2B', '1B', 'OF', '3B', 'SS', 'DH', 'SP', 'RP', 'P']
    for i in pos_hierarchy:
        if i in pos_list:
            return i

In [16]:
def process_top_hitters():
    pos_avg = {}
    pos_std = {}
    h['Used'] = False
    for position in ['C', '2B', '1B', 'OF', '3B', 'SS']:
        mask = (h['Pos'].str.contains(position)) & (h['Used']==False)
        pos_avg[position], pos_std[position] = {}, {}

        for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
            pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
            pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
            for j in h[mask].index[:drafted_by_pos[position]]:
                h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]

        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
        #print(position+':\n',h.loc[h[mask].index[:drafted_by_pos[position]]]['Name'].unique())
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True

    for position in ['MI', 'CI']:
        if position == 'MI':
            pos_avg[position], pos_std[position] = {}, {}
            mask = ((h['Pos'].str.contains('SS')) & (h['Used']==False)) | ((h['Pos'].str.contains('2B')) & (h['Used']==False))
            for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
                pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
                pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
                for j in h[mask].index[:drafted_by_pos[position]]:
                    h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]

            h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
            h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
            h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
            #print(position+':\n',h.loc[h[mask].index[:12]]['Name'].unique())
            h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True

        elif position == 'CI':
            pos_avg[position], pos_std[position] = {}, {}
            mask = ((h['Pos'].str.contains('1B')) & (h['Used']==False)) | ((h['Pos'].str.contains('3B')) & (h['Used']==False))
            for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
                pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
                pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
                for j in h[mask].index[:drafted_by_pos[position]]:
                    h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]

            h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
            h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
            h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
            #print(position+':\n',h.loc[h[mask].index[:12]]['Name'].unique())
            h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True

    pos_avg['DH'], pos_std['DH'] = {}, {}
    mask = (h['Used']==False)
    for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
        pos_avg['DH'][stat] = round(h.loc[h[mask].index[:24], stat].mean(),1)
        pos_std['DH'][stat] = round(h.loc[h[mask].index[:24], stat].std(),1)
        for j in h[mask].index[:24]:
                h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg['DH'][stat]) / pos_std['DH'][stat]

    h.loc[h[mask].index[:24], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
    h.loc[h[mask].index[:24], 'z'] += abs(h.loc[h[mask].index[:24]].sort_values('z')['z'].iloc[0])
    h.loc[h[mask].index[:24], 'Primary_Pos'] = 'DH'
    #print('DH:\n',h.loc[h[mask].index[:24]]['Name'].unique())
    #print('DH:\n',h.loc[h[mask].index[:24]].index)
    sub_mask = h.loc[h[mask].index[:24]].index
    h.loc[h[mask].index[:24], 'Used'] = True
    
    if len(h[h['Used']==True])!=14*num_teams:
        print('drafted list not right')
    return pos_avg, pos_std


In [17]:
def process_rem_hitters(pos_avg, pos_std):
    for position in ['C', '2B', '1B', 'OF', '3B', 'SS']:
        for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
            h.loc[(h['Used']==False) & (h['Primary_Pos']==position), stat+'_z'] = (h[stat] - pos_avg[position][stat]) / pos_std[position][stat]

    h.loc[h['Used']==False, 'z'] = h['H_z'] + h['HR_z'] + h['RBI_z'] + h['R_z'] + h['SB_z']
    return

In [37]:
h, p = load_data()
h['Primary_Pos'] = h['Pos'].apply(lambda x: find_primary_pos(x))
p['Primary_Pos'] = p['Pos'].apply(lambda x: find_primary_pos(x))
pos_avg, pos_std = process_top_hitters()
process_rem_hitters(pos_avg, pos_std)
tot_z = h[h['Used']==True]['z'].sum()
h['Value'] = (h['z'] / tot_z) * tot_dollars * player_split
h.sort_values('Value', ascending=False).head(10)

Unnamed: 0,Name,Team,G,PA,AB,H,2B,3B,HR,R,RBI,BB,SO,HBP,SB,AVG,OBP,SLG,OPS,WAR,playerid,sorter,Pos,Dollars,Primary_Pos,Used,PA_z,AB_z,H_z,HR_z,RBI_z,R_z,SB_z,z,Value
1,Fernando Tatis Jr.,SDP,151,651,564,158,30,2,44,112,103,72,161,7,25,0.281,0.366,0.575,0.941,6.7,19709,442,"SS, OF",40.784208,OF,True,1.085635,0.795666,1.368421,2.1,1.459854,2.669643,2.208333,14.713777,45.187806
3,Vladimir Guerrero Jr.,TOR,154,665,575,178,31,2,44,109,122,77,102,6,4,0.309,0.394,0.598,0.992,6.0,19611,457,"1B, DH",40.252202,1B,True,0.469136,0.066225,2.033333,1.854545,2.071429,2.022727,0.47619,13.677489,42.005241
77,Salvador Perez,KCR,149,601,560,146,26,1,36,79,99,26,147,10,1,0.26,0.302,0.504,0.806,2.7,7304,361,"C, DH",25.0558,C,True,1.846395,2.221122,2.406061,2.566667,2.485507,1.950495,-0.75,12.644014,38.831314
0,Juan Soto,WSN,154,665,519,161,29,2,37,112,106,135,96,5,12,0.31,0.453,0.586,1.039,7.2,20123,428,OF,37.932103,OF,True,1.472376,-0.597523,1.593985,1.225,1.678832,2.669643,0.402778,12.477763,38.320735
17,Bo Bichette,TOR,152,658,601,176,36,2,28,96,94,43,121,6,17,0.292,0.343,0.496,0.839,4.7,19612,411,"SS, DH",28.080286,SS,True,0.762542,1.044218,1.904,0.9,1.367089,1.632911,0.9,11.789313,36.206421
45,Shohei Ohtani,LAA,147,637,537,138,24,5,38,103,96,86,176,5,23,0.257,0.363,0.533,0.895,3.5,19755,398,"P, OF, DH",30.421044,OF,True,0.698895,-0.040248,-0.135338,1.35,0.948905,1.866071,1.930556,10.867719,33.376094
5,Aaron Judge,NYY,152,658,562,154,25,1,40,103,107,83,170,5,5,0.274,0.37,0.537,0.907,5.7,15640,409,"OF, DH",28.931068,OF,True,1.279006,0.733746,1.067669,1.6,1.751825,1.866071,-0.569444,10.623646,32.626516
15,Bryce Harper,PHI,154,665,540,145,32,1,35,102,101,113,150,7,12,0.269,0.398,0.531,0.929,4.8,11579,395,OF,27.485023,OF,True,1.472376,0.052632,0.390977,0.975,1.313869,1.776786,0.402778,9.766935,29.995451
14,Luis Robert,CHW,149,644,586,167,33,2,30,90,92,42,143,8,15,0.285,0.339,0.505,0.843,4.8,20043,394,OF,25.322911,OF,True,0.892265,1.47678,2.045113,0.35,0.656934,0.705357,0.819444,9.484374,29.127672
24,Kyle Tucker,HOU,147,637,567,158,33,4,32,88,97,58,110,4,15,0.278,0.348,0.522,0.87,4.4,18345,390,OF,25.381511,OF,True,0.698895,0.888545,1.368421,0.6,1.021898,0.526786,0.819444,9.244074,28.389682


#### Next Steps  
<li>BA: H - (AB * (lgH/lgAB))
<li>Do it again with pitchers


In [83]:
#(p['ER']*9) - (p['IP'] * ((lgERsum * 9)/(lgIPsum))) * -1
p['xER'] = ((p['ER']*9) - (p['IP'] * (p['ER'].sum()*9)/p['IP'].sum())) * -1

In [84]:
p_avg = p.iloc[:108][['W', 'SO', 'Sv+Hld', 'xER', 'H', 'BB']].mean()
p_std = p.iloc[:108][['W', 'SO', 'Sv+Hld', 'xER', 'H', 'BB']].std()

In [85]:
for i in ['W', 'SO', 'Sv+Hld', 'xER', 'H', 'BB']:
    p[i+'_z'] = p[i].apply(lambda x: (x - p_avg[i]) / p_std[i])

In [86]:
p['z'] = p['W_z'] + p['SO_z'] + p['Sv+Hld_z'] + p['xER']

In [89]:
p[p['z']>0]['z'].sum()

10231.292497176375

In [87]:
p['Value'] = (p['z'] / p[p['z']>0]['z'].sum()) * tot_dollars * (1-player_split)

In [88]:
p

Unnamed: 0,Name,Team,W,L,SV,HLD,ERA,GS,G,IP,H,ER,HR,SO,BB,WHIP,K/9,BB/9,FIP,WAR,playerid,Pos,Dollars,Sv+Hld,sorter,Primary_Pos,W_z,SO_z,Sv+Hld_z,ER_z,H_z,BB_z,z,Value,xER,xER_z
1,Gerrit Cole,NYY,15,8,0,0,3.29,32,32,201.0,159,74,27,266,53,1.05,11.92,2.35,3.16,5.4,13125,SP,33.074812,0,281,SP,1.672486,2.099658,-0.675082,0.607789,0.627789,0.446271,184.245434,19.664770,181.148371,1.910228
14,Robbie Ray,SEA,12,11,0,0,3.68,32,32,194.0,154,79,30,254,72,1.17,11.81,3.34,3.74,3.6,11486,SP,19.541221,0,266,SP,0.876766,1.879642,-0.675082,0.811770,0.532348,1.665643,108.727018,11.604585,106.645692,0.784013
4,Max Scherzer,NYM,13,8,0,0,3.13,31,31,189.0,147,66,24,251,46,1.02,11.97,2.20,3.03,5.1,3137,SP,33.608312,0,264,SP,1.142006,1.824638,-0.675082,0.281419,0.398731,-0.002971,204.863912,21.865409,202.572349,2.234082
6,Shane Bieber,CLE,12,10,0,0,3.36,31,31,193.0,164,72,24,232,54,1.13,10.84,2.52,3.31,4.5,19427,SP,23.898533,0,244,SP,0.876766,1.476280,-0.675082,0.526196,0.723230,0.510449,167.108987,17.835773,165.431023,1.672638
2,Corbin Burnes,MIL,13,8,0,0,2.99,29,29,174.0,136,58,17,230,50,1.07,11.90,2.61,2.81,5.2,19361,SP,30.502972,0,243,SP,1.142006,1.439611,-0.675082,-0.044951,0.188761,0.253739,213.258856,22.761413,211.352321,2.366804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,Alex Claudio,,2,2,0,0,4.18,0,40,40.0,42,19,4,31,15,1.43,6.90,3.41,4.34,0.0,12890,RP,-8.373443,0,33,RP,-1.775635,-2.208987,-0.675082,-1.636006,-1.605528,-1.992473,-7.072963,-0.754907,-2.413259,-0.864571
551,Robert Gsellman,,2,2,0,0,4.65,0,39,39.0,41,20,6,31,14,1.43,7.16,3.33,4.70,0.0,13696,RP,-8.959704,0,33,RP,-1.775635,-2.208987,-0.675082,-1.595210,-1.624616,-2.056650,-20.287632,-2.165327,-15.627928,-1.064329
521,Matt Peacock,ARI,2,2,0,1,4.33,0,36,36.0,40,17,4,26,13,1.45,6.49,3.15,4.27,0.0,20339,RP,-8.670840,1,32,RP,-1.775635,-2.300661,-0.600227,-1.717599,-1.643705,-2.120828,-5.948456,-0.634887,-1.271934,-0.847318
562,Adam Kolarek,OAK,2,2,0,1,4.35,0,38,38.0,41,18,4,24,15,1.47,5.77,3.52,4.66,0.0,10843,RP,-9.035577,1,30,RP,-1.775635,-2.337330,-0.600227,-1.676803,-1.624616,-1.992473,-6.555788,-0.699708,-1.842596,-0.855944
