In [1]:
from pybaseball import statcast
from pybaseball.lahman import *
import numpy as np
import pandas as pd

In [107]:
fielding = pd.read_csv('baseballdatabank-2019.2/core/fielding.csv')
fielding = fielding.loc[fielding['yearID'] >= 1970].copy()
print(fielding)

         playerID  yearID  stint teamID lgID POS    G     GS  InnOuts   PO  \
65851   aaronha01    1970      1    ATL   NL  1B   11   10.0    243.0   73   
65852   aaronha01    1970      1    ATL   NL  OF  125  125.0   3180.0  246   
65853   aaronto01    1970      1    ATL   NL  1B   16    3.0    153.0   40   
65854   aaronto01    1970      1    ATL   NL  OF   12    9.0    213.0   13   
65855   abernte02    1970      1    CHN   NL   P   11    0.0     27.0    0   
65856   abernte02    1970      2    SLN   NL   P   11    0.0     54.0    1   
65857   abernte02    1970      3    KCA   AL   P   36    0.0    168.0    3   
65858   acosted01    1970      1    PIT   NL   P    3    0.0      9.0    0   
65859   adairje01    1970      1    KCA   AL  2B    7    7.0    216.0   24   
65860    ageeto01    1970      1    NYN   NL  OF  150  146.0   3957.0  374   
65861   aguirha01    1970      1    CHN   NL   P   17    0.0     42.0    0   
65862    akerja01    1970      1    NYA   AL   P   41    0.0    

In [128]:
averageDSes = {}
minOuts = 100

def calculateDS(InnOuts, PO, A, E, DP):
    
    if (InnOuts.values[0] < minOuts):
        return 0
    
    outRatio = (PO + A + (0.5 * DP)) / InnOuts
    outRatioConstant = 200
    gamesPlayed = InnOuts / 27
    gamesPlayedConstant = 0.1
    errorConstant = 1.0
    
    DS = (outRatioConstant * outRatio) + (gamesPlayedConstant * gamesPlayed) - (errorConstant * E)
    return DS.values[0]

def getAverageDS(year, position):
    if (year, position) in averageDSes:
        return averageDSes[(year, position)]
    
    posData = fielding.loc[(fielding['yearID'] == year) & (fielding['POS'] == position) & (fielding['InnOuts'] > minOuts)].copy()
    posData = posData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    posData['DS'] = calculateDS(posData['InnOuts'], posData['PO'], posData['A'], posData['E'], posData['DP'])
    
    avgDS = sum(posData['DS']) / len(posData['DS'])
    averageDSes[(year, position)] = avgDS
    return avgDS

def getPlayerWDS(playerID, year, position):
    assert year >= 1970,"Year must be >= 1970"
    assert year <= 2018,"Year must be <= 2018"
    
    validPos = ['1B', '2B', '3B', 'SS', 'OF']
    assert position in validPos, "Not a valid position!"
    
    playerData = fielding.loc[(fielding['playerID'] == playerID) & (fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    playerData = playerData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    pDS = calculateDS(playerData['InnOuts'], playerData['PO'], playerData['A'], playerData['E'], playerData['DP'])
    avgDS = getAverageDS(year, position)
    
    return pDS - avgDS
    
    

In [126]:
getPlayerWDS('arenano01', 2018, '3B')
print(getAverageDS(2018, '3B'))

21.824765063571075


In [130]:
validPos = ['3B']
fielding2018 = fielding.loc[(fielding['yearID'] == 2018) & (fielding['POS'].isin(validPos))]
WDS = {}
for index, row in fielding2018.iterrows():
    WDS[(row['playerID'], row['POS'])] = getPlayerWDS(row['playerID'], row['yearID'], row['POS'])
print(WDS)

{('adriaeh01', '3B'): -3.907985046680551e-14, ('aguilje01', '3B'): -21.824765063571075, ('alberha01', '3B'): -21.824765063571075, ('alfarjo01', '3B'): -21.824765063571075, ('alvarpe01', '3B'): 0.47089266312113764, ('anderbr06', '3B'): -8.335192926754843, ('andujmi01', '3B'): -8.210735754900945, ('arenano01', '3B'): 0.9169283236681061, ('arroych01', '3B'): -9.032233187432642, ('astudwi01', '3B'): -6.669209508015523, ('asuajca01', '3B'): -21.824765063571075, ('baezja01', '3B'): 0.35855096273560605, ('bautijo02', '3B'): -4.0517365326478085, ('beckhgo01', '3B'): -21.824765063571075, ('beckhti01', '3B'): -3.888257127063138, ('beltrad01', '3B'): -2.0203168333988266, ('blandal01', '3B'): -5.049824165225921, ('boteda01', '3B'): 0.8307904919844802, ('bregmal01', '3B'): -2.4429280433642724, ('bryankr01', '3B'): -3.59530189253487, ('cabreas01', '3B'): -1.3247650635710748, ('camarjo01', '3B'): -4.195582091912115, ('candeje01', '3B'): 1.9939368066159453, ('canoro01', '3B'): -21.824765063571075, ('c