In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

import numpy as np
import pandas as pd
pd.options.display.max_columns = None
from pybaseball import batting_stats, pitching_stats, cache, playerid_lookup, statcast_batter, statcast_pitcher

cache.enable()
cache.config.cache_type='csv'
cache.config.save()


In [2]:
num_teams = 12
num_dollars = 260
player_split = .65
pitcher_split = 1 - player_split
tot_dollars = num_teams * num_dollars

def load_data():
    h = pd.read_csv('data/2022-fangraphs-proj-h.csv')
    h['sorter'] = h['HR']+h['R']+h['RBI']+h['H']+h['SB']
    
    p = pd.read_csv('data/2022-fangraphs-proj-p.csv')
    val_h = pd.read_csv('data/2022-fangraphs-auction-calculator-h.csv')
    val_h.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    val_p = pd.read_csv('data/2022-fangraphs-auction-calculator-p.csv')
    val_p.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    
    h = h.merge(val_h[['playerid', 'Pos', 'Dollars']])
    h.drop(columns=['wOBA', 'CS', 'Fld', 'BsR', 'ADP'],inplace=True)
    h['Pos'] = h['Pos'].apply(lambda x: ', '.join(x.split('/')))
    h.sort_values('sorter', ascending=False, inplace=True)
    h.reset_index(drop=True)
    
    p = p.merge(val_p[['playerid', 'Pos', 'Dollars']])
    p.drop(columns=['ADP'],inplace=True)
    p['Pos'] = p['Pos'].apply(lambda x: ', '.join(x.split('/')))
    return h, p

def calc_z(x, stat):
    z = (x - drafted[stat].mean()) / drafted[stat].std()
    return z

def find_primary_pos(p):
    pos_list = p.split(', ')
    pos_hierarchy = ['C', '2B', '1B', 'OF', '3B', 'SS', 'DH']
    for i in pos_hierarchy:
        if i in pos_list:
            return i

In [3]:
h, p = load_data()
h['Primary_Pos'] = h['Pos'].apply(lambda x: find_primary_pos(x))
h.head()

Unnamed: 0,Name,Team,G,PA,AB,H,2B,3B,HR,R,RBI,BB,SO,HBP,SB,AVG,OBP,SLG,OPS,WAR,playerid,sorter,Pos,Dollars,Primary_Pos
3,Vladimir Guerrero Jr.,TOR,154,665,575,178,31,2,44,109,122,77,102,6,4,0.309,0.394,0.598,0.992,6.0,19611,457,"1B, DH",40.252202,1B
1,Fernando Tatis Jr.,SDP,151,651,564,158,30,2,44,112,103,72,161,7,25,0.281,0.366,0.575,0.941,6.7,19709,442,"SS, OF",40.784208,OF
0,Juan Soto,WSN,154,665,519,161,29,2,37,112,106,135,96,5,12,0.31,0.453,0.586,1.039,7.2,20123,428,OF,37.932103,OF
13,Rafael Devers,BOS,154,665,593,165,37,2,37,97,108,58,136,7,6,0.278,0.346,0.534,0.88,4.8,17350,413,3B,27.907182,3B
17,Bo Bichette,TOR,152,658,601,176,36,2,28,96,94,43,121,6,17,0.292,0.343,0.496,0.839,4.7,19612,411,"SS, DH",28.080286,SS


In [4]:
p.head()

Unnamed: 0,Name,Team,W,L,SV,HLD,ERA,GS,G,IP,H,ER,HR,SO,BB,WHIP,K/9,BB/9,FIP,WAR,playerid,Pos,Dollars
0,Jacob deGrom,NYM,12,6,0,0,2.33,26,26,152.0,103,39,14,228,33,0.9,13.49,1.96,2.08,6.1,10954,SP,41.513278
1,Gerrit Cole,NYY,15,8,0,0,3.29,32,32,201.0,159,74,27,266,53,1.05,11.92,2.35,3.16,5.4,13125,SP,33.074812
2,Corbin Burnes,MIL,13,8,0,0,2.99,29,29,174.0,136,58,17,230,50,1.07,11.9,2.61,2.81,5.2,19361,SP,30.502972
3,Zack Wheeler,PHI,14,10,0,0,3.33,32,32,206.0,183,76,23,223,50,1.13,9.76,2.19,3.31,5.1,10310,SP,25.95718
4,Max Scherzer,NYM,13,8,0,0,3.13,31,31,189.0,147,66,24,251,46,1.02,11.97,2.2,3.03,5.1,3137,SP,33.608312


In [5]:
drafted_by_pos = {
    'C':12,
    '1B':12,
    '2B':12,
    '3B':12,
    'SS':12,
    'OF':5*12,
    'MI':12,
    'CI':12,
    'DH':12*2
}
drafted_by_pos

{'C': 12,
 '1B': 12,
 '2B': 12,
 '3B': 12,
 'SS': 12,
 'OF': 60,
 'MI': 12,
 'CI': 12,
 'DH': 24}

In [6]:
pos_avg = {}
pos_std = {}
h['Used'] = False
for position in ['C', '2B', '1B', 'OF', '3B', 'SS']:
    mask = (h['Pos'].str.contains(position)) & (h['Used']==False)
    pos_avg[position], pos_std[position] = {}, {}
    
    for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
        pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
        pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
        for j in h[mask].index[:drafted_by_pos[position]]:
            h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]
    
    h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
    h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
    h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
    print(position+':\n',h.loc[h[mask].index[:drafted_by_pos[position]]]['Name'].unique())
    h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True

for position in ['MI', 'CI']:
    if position == 'MI':
        pos_avg[position], pos_std[position] = {}, {}
        mask = ((h['Pos'].str.contains('SS')) & (h['Used']==False)) | ((h['Pos'].str.contains('2B')) & (h['Used']==False))
        for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
            pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
            pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
            for j in h[mask].index[:drafted_by_pos[position]]:
                h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]
        
        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
        print(position+':\n',h.loc[h[mask].index[:12]]['Name'].unique())
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True
        
    elif position == 'CI':
        pos_avg[position], pos_std[position] = {}, {}
        mask = ((h['Pos'].str.contains('1B')) & (h['Used']==False)) | ((h['Pos'].str.contains('3B')) & (h['Used']==False))
        for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
            pos_avg[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].mean(),1)
            pos_std[position][stat] = round(h.loc[h[mask].index[:drafted_by_pos[position]], stat].std(),1)
            for j in h[mask].index[:drafted_by_pos[position]]:
                h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg[position][stat]) / pos_std[position][stat]
        
        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
        h.loc[h[mask].index[:drafted_by_pos[position]], 'z'] += abs(h.loc[h[mask].index[:drafted_by_pos[position]]].sort_values('z')['z'].iloc[0])
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Primary_Pos'] = position
        print(position+':\n',h.loc[h[mask].index[:12]]['Name'].unique())
        h.loc[h[mask].index[:drafted_by_pos[position]], 'Used'] = True

pos_avg['DH'], pos_std['DH'] = {}, {}
mask = (h['Used']==False)
for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
    pos_avg['DH'][stat] = round(h.loc[h[mask].index[:24], stat].mean(),1)
    pos_std['DH'][stat] = round(h.loc[h[mask].index[:24], stat].std(),1)
    for j in h[mask].index[:24]:
            h.loc[j, stat+'_z'] = (h.loc[j][stat] - pos_avg['DH'][stat]) / pos_std['DH'][stat]

h.loc[h[mask].index[:24], 'z'] = h['R_z'] + h['RBI_z'] + h['HR_z'] + h['H_z'] + h['SB_z']
h.loc[h[mask].index[:24], 'z'] += abs(h.loc[h[mask].index[:24]].sort_values('z')['z'].iloc[0])
h.loc[h[mask].index[:24], 'Primary_Pos'] = 'DH'
print('DH:\n',h.loc[h[mask].index[:24]]['Name'].unique())
print('DH:\n',h.loc[h[mask].index[:24]].index)
sub_mask = h.loc[h[mask].index[:24]].index
h.loc[h[mask].index[:24], 'Used'] = True

drafted = h[h['Used']==True]

print(pd.DataFrame(pos_avg).T)
print(pd.DataFrame(pos_std).T)

C:
 ['Salvador Perez' 'J.T. Realmuto' 'Yasmani Grandal' 'Will Smith'
 "Travis d'Arnaud" 'Willson Contreras' 'Daulton Varsho' 'Keibert Ruiz'
 'Sean Murphy' 'Tyler Stephenson' 'Adley Rutschman' 'Danny Jansen']
2B:
 ['Trea Turner' 'Ozzie Albies' 'Mookie Betts' 'Marcus Semien'
 'Gleyber Torres' 'Jose Altuve' 'Jorge Polanco' 'Ketel Marte' 'Ty France'
 'Whit Merrifield' 'Ian Happ' 'Brandon Lowe']
1B:
 ['Vladimir Guerrero Jr.' 'Pete Alonso' 'Freddie Freeman' 'Austin Riley'
 'Matt Olson' 'Paul Goldschmidt' 'Jose Abreu' 'Jared Walsh' 'Josh Bell'
 'Ryan Mountcastle' 'Joey Votto' 'Trey Mancini']
OF:
 ['Fernando Tatis Jr.' 'Juan Soto' 'Aaron Judge' 'Shohei Ohtani'
 'Yordan Alvarez' 'Bryce Harper' 'Luis Robert' 'Kyle Tucker'
 'Teoscar Hernandez' 'Mike Trout' 'J.D. Martinez' "Tyler O'Neill"
 'Franmil Reyes' 'George Springer' 'Eloy Jimenez' 'Giancarlo Stanton'
 'Nick Castellanos' 'Cedric Mullins II' 'Bryan Reynolds' 'Byron Buxton'
 'Randy Arozarena' 'Ronald Acuna Jr.' 'Joey Gallo' 'Christian Yelich'


In [25]:
drafted.shape

(168, 34)

In [23]:
tot_z = h[h['Used']==True]['z'].sum()
#h['Value'] = (h['z'] / tot_z) * tot_dollars * player_split

In [20]:
for position in ['C', '2B', '1B', 'OF', '3B', 'SS']:
    for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
        h.loc[(h['Used']==False) & (h['Primary_Pos']==position), stat+'_z'] = (h[stat] - pos_avg[position][stat]) / pos_std[position][stat]

h.loc[h['Used']==False, 'z'] = h['H_z'] + h['HR_z'] + h['RBI_z'] + h['R_z'] + h['SB_z']

In [24]:
h

Unnamed: 0,Name,Team,G,PA,AB,H,2B,3B,HR,R,RBI,BB,SO,HBP,SB,AVG,OBP,SLG,OPS,WAR,playerid,sorter,Pos,Dollars,Primary_Pos,Used,PA_z,AB_z,H_z,HR_z,RBI_z,R_z,SB_z,z,Value
3,Vladimir Guerrero Jr.,TOR,154,665,575,178,31,2,44,109,122,77,102,6,4,0.309,0.394,0.598,0.992,6.0,19611,457,"1B, DH",40.252202,1B,True,0.469136,0.066225,2.033333,1.854545,2.071429,2.022727,0.476190,13.677489,42.005241
1,Fernando Tatis Jr.,SDP,151,651,564,158,30,2,44,112,103,72,161,7,25,0.281,0.366,0.575,0.941,6.7,19709,442,"SS, OF",40.784208,OF,True,1.085635,0.795666,1.368421,2.100000,1.459854,2.669643,2.208333,14.713777,45.187806
0,Juan Soto,WSN,154,665,519,161,29,2,37,112,106,135,96,5,12,0.310,0.453,0.586,1.039,7.2,20123,428,OF,37.932103,OF,True,1.472376,-0.597523,1.593985,1.225000,1.678832,2.669643,0.402778,12.477763,38.320735
13,Rafael Devers,BOS,154,665,593,165,37,2,37,97,108,58,136,7,6,0.278,0.346,0.534,0.880,4.8,17350,413,3B,27.907182,3B,True,0.900344,1.043333,1.085366,1.260274,1.455882,1.447368,-0.278689,7.879671,24.199434
17,Bo Bichette,TOR,152,658,601,176,36,2,28,96,94,43,121,6,17,0.292,0.343,0.496,0.839,4.7,19612,411,"SS, DH",28.080286,SS,True,0.762542,1.044218,1.904000,0.900000,1.367089,1.632911,0.900000,11.789313,36.206421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585,Jacob Amaya,LAD,2,7,6,1,0,0,0,1,1,1,2,0,0,0.228,0.291,0.368,0.659,0.0,sa3005081,3,SS,-34.996832,SS,False,-21.010033,-19.193878,-12.096000,-6.100000,-10.405063,-10.392405,-1.528571,-40.522040,-124.448137
569,Andy Young,ARI,2,7,6,1,0,0,0,1,1,0,2,0,0,0.221,0.291,0.399,0.690,0.0,19521,3,2B,-35.270584,2B,False,-38.389222,-28.686869,-12.304000,-4.921569,-9.728395,-9.911111,-1.594937,-38.460012,-118.115397
564,Brett Sullivan,MIL,2,6,6,1,0,0,0,1,1,0,1,0,0,0.231,0.285,0.358,0.643,0.0,sa857232,3,C,-25.916788,C,False,-7.479624,-6.920792,-6.381818,-3.433333,-4.615942,-5.772277,-1.107143,-21.310514,-65.447192
563,Chadwick Tromp,ATL,2,6,6,1,0,0,0,1,1,0,2,0,0,0.225,0.275,0.367,0.641,0.0,16953,3,C,-25.916788,C,False,-7.479624,-6.920792,-6.381818,-3.433333,-4.615942,-5.772277,-1.107143,-21.310514,-65.447192


#### Next Steps  
<li>Do it again with pitchers
