In [5]:
import numpy as np
import pandas as pd
pd.options.display.max_columns = None
from pybaseball import batting_stats, pitching_stats, cache, playerid_lookup, statcast_batter, statcast_pitcher

cache.enable()
cache.config.cache_type='csv'
cache.config.save()


In [34]:
num_teams = 12
num_dollars = 260
tot_dollars = num_teams * num_dollars

def load_data():
    h = pd.read_csv('data/2022-fangraphs-proj-h.csv')
    p = pd.read_csv('data/2022-fangraphs-proj-p.csv')
    val_h = pd.read_csv('data/2022-fangraphs-auction-calculator-h.csv')
    val_h.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    val_p = pd.read_csv('data/2022-fangraphs-auction-calculator-p.csv')
    val_p.rename(columns={'PlayerId':'playerid', 'POS':'Pos'},inplace=True)
    
    h = h.merge(val_h[['playerid', 'Pos', 'Dollars']])
    h.drop(columns=['wOBA', 'CS', 'Fld', 'BsR', 'ADP'],inplace=True)
    h['Pos'] = h['Pos'].apply(lambda x: ', '.join(x.split('/')))
    
    p = p.merge(val_p[['playerid', 'Pos', 'Dollars']])
    p.drop(columns=['ADP'],inplace=True)
    p['Pos'] = p['Pos'].apply(lambda x: ', '.join(x.split('/')))
    return h, p

def calc_z(x, sample_mean, sample_std):
    z = (x - sample_mean) / sample_std
    return z

In [23]:
h, p = load_data()
h.head()

Unnamed: 0,Name,Team,G,PA,AB,H,2B,3B,HR,R,RBI,BB,SO,HBP,SB,AVG,OBP,SLG,OPS,WAR,playerid,Pos,Dollars
0,Juan Soto,WSN,154,665,519,161,29,2,37,112,106,135,96,5,12,0.31,0.453,0.586,1.039,7.2,20123,OF,37.932103
1,Fernando Tatis Jr.,SDP,151,651,564,158,30,2,44,112,103,72,161,7,25,0.281,0.366,0.575,0.941,6.7,19709,"SS, OF",40.784208
2,Mike Trout,LAA,147,637,511,138,25,3,37,100,101,109,147,11,7,0.271,0.406,0.546,0.952,6.1,10155,OF,25.942635
3,Vladimir Guerrero Jr.,TOR,154,665,575,178,31,2,44,109,122,77,102,6,4,0.309,0.394,0.598,0.992,6.0,19611,"1B, DH",40.252202
4,Jose Ramirez,CLE,154,665,572,152,35,4,34,97,101,80,98,7,23,0.266,0.36,0.52,0.88,5.7,13510,"3B, DH",29.985799


In [24]:
p.head()

Unnamed: 0,Name,Team,W,L,SV,HLD,ERA,GS,G,IP,H,ER,HR,SO,BB,WHIP,K/9,BB/9,FIP,WAR,playerid,Pos,Dollars
0,Jacob deGrom,NYM,12,6,0,0,2.33,26,26,152.0,103,39,14,228,33,0.9,13.49,1.96,2.08,6.1,10954,SP,41.513278
1,Gerrit Cole,NYY,15,8,0,0,3.29,32,32,201.0,159,74,27,266,53,1.05,11.92,2.35,3.16,5.4,13125,SP,33.074812
2,Corbin Burnes,MIL,13,8,0,0,2.99,29,29,174.0,136,58,17,230,50,1.07,11.9,2.61,2.81,5.2,19361,SP,30.502972
3,Zack Wheeler,PHI,14,10,0,0,3.33,32,32,206.0,183,76,23,223,50,1.13,9.76,2.19,3.31,5.1,10310,SP,25.95718
4,Max Scherzer,NYM,13,8,0,0,3.13,31,31,189.0,147,66,24,251,46,1.02,11.97,2.2,3.03,5.1,3137,SP,33.608312


In [56]:
pd.DataFrame(drafted_by_pos, index=['count']).sum().sum()

168

In [48]:
drafted_by_pos = {
    'C':12,
    '1B':12,
    '2B':12,
    '3B':12,
    'SS':12,
    'OF':5*12,
    'MI':12,
    'CI':12,
    'DH':12*2
}
drafted_by_pos

{'C': 12,
 '1B': 12,
 '2B': 12,
 '3B': 12,
 'SS': 12,
 'OF': 60,
 'MI': 12,
 'CI': 12,
 'DH': 24}

In [135]:
mask = (h['Pos'].str.contains('C')) & (h['Used']==False)
#h.loc[h[mask].index[:12], 'Used'] = True
mask

0      False
1      False
2      False
3      False
4      False
       ...  
623    False
624    False
625    False
626     True
627    False
Length: 628, dtype: bool

In [133]:
h[h['Pos'].str.contains('C')].head(12)['Name'].unique()

array(['Yasmani Grandal', 'J.T. Realmuto', 'Will Smith', 'Sean Murphy',
       'Salvador Perez', 'Adley Rutschman', 'Max Stassi',
       'Jacob Stallings', 'Keibert Ruiz', 'Danny Jansen', 'Mike Zunino',
       'Tyler Stephenson'], dtype=object)

In [136]:
pos_avg = {}
h['Used'] = False
for position in ['C', '2B', '1B', 'OF', '3B', 'SS', 'DH']:
    mask = (h['Pos'].str.contains(position)) & (h['Used']==False)
    print(position,'\n', h[mask]['Name'].head(12))
    pos_avg[position] = {}
    pos_avg[position]['total_players'] = len(h[mask])
    
    for stat in ['PA', 'AB', 'H', 'HR', 'RBI', 'R', 'SB']:
        pos_avg[position][stat] = round(h.loc[h[mask].index[:12], stat].mean(),1)
    
    h.loc[h[mask].index[:12], 'Used'] = True
    
pd.DataFrame(pos_avg).T

C 
 21      Yasmani Grandal
28        J.T. Realmuto
37           Will Smith
60          Sean Murphy
77       Salvador Perez
78      Adley Rutschman
84           Max Stassi
87      Jacob Stallings
95         Keibert Ruiz
99         Danny Jansen
114         Mike Zunino
116    Tyler Stephenson
Name: Name, dtype: object
2B 
 6         Mookie Betts
11         Trea Turner
20       Marcus Semien
29         Jose Altuve
30        Brandon Lowe
31        Ozzie Albies
38         Ketel Marte
39      Gleyber Torres
42           Max Muncy
51         DJ LeMahieu
54      Jonathan India
57    Jake Cronenworth
Name: Name, dtype: object
1B 
 3     Vladimir Guerrero Jr.
16               Matt Olson
19          Freddie Freeman
33              Pete Alonso
36         Paul Goldschmidt
50             Austin Riley
71                Ty France
73               Yandy Diaz
74        Spencer Torkelson
76             Rhys Hoskins
79              Jared Walsh
82           Kyle Schwarber
Name: Name, dtype: object
OF 
 0  

Unnamed: 0,total_players,PA,AB,H,HR,RBI,R,SB
C,105.0,457.8,403.2,99.0,19.4,60.8,55.4,2.1
2B,150.0,642.2,563.2,151.9,24.6,78.2,91.2,10.7
1B,111.0,636.4,550.8,147.7,32.4,93.3,89.8,3.5
OF,291.0,632.9,542.2,150.7,34.3,95.6,96.6,12.0
3B,127.0,632.9,550.9,145.3,26.9,88.3,84.3,5.9
SS,107.0,624.8,558.2,150.7,24.1,80.8,82.8,11.5
DH,81.0,589.9,515.7,133.2,27.8,81.9,81.2,5.9


In [107]:
h[h['Pos'].str.contains('2B')].sort_values('WAR', ascending=False).reset_index().iloc[:12][['HR', 'R', 'RBI', 'SB']].mean()

HR     24.583333
R      91.250000
RBI    78.250000
SB     10.666667
dtype: float64

In [137]:
h[h['Used']==True].shape

(84, 24)

3120