In [2]:
from pybaseball import statcast
from pybaseball.lahman import *
import numpy as np
import pandas as pd

In [4]:
fielding = pd.read_csv('fielding.csv')
fielding = fielding.loc[fielding['yearID'] >= 1970].copy()
print(fielding)

         playerID  yearID  stint teamID lgID POS    G     GS  InnOuts   PO  \
65851   aaronha01    1970      1    ATL   NL  1B   11   10.0    243.0   73   
65852   aaronha01    1970      1    ATL   NL  OF  125  125.0   3180.0  246   
65853   aaronto01    1970      1    ATL   NL  1B   16    3.0    153.0   40   
65854   aaronto01    1970      1    ATL   NL  OF   12    9.0    213.0   13   
65855   abernte02    1970      1    CHN   NL   P   11    0.0     27.0    0   
65856   abernte02    1970      2    SLN   NL   P   11    0.0     54.0    1   
65857   abernte02    1970      3    KCA   AL   P   36    0.0    168.0    3   
65858   acosted01    1970      1    PIT   NL   P    3    0.0      9.0    0   
65859   adairje01    1970      1    KCA   AL  2B    7    7.0    216.0   24   
65860    ageeto01    1970      1    NYN   NL  OF  150  146.0   3957.0  374   
65861   aguirha01    1970      1    CHN   NL   P   17    0.0     42.0    0   
65862    akerja01    1970      1    NYA   AL   P   41    0.0    

In [111]:
averageDSes = {}
minOuts = 500

def isValidPos(pos):
    validPos = ['1B', '2B', '3B', 'SS', 'OF']
    if (pos in validPos):
        return True
    else:
        return False
    
def isValidEntry(year, pos):
    if (year < 1970):
        return False
    if (year> 2018):
        return False
    
    if (isValidPos(pos) == False):
        return False
    
    return True

def printSummary(playerID, year, position, pDS, avgDS, playerData):
    io = playerData['InnOuts'].values[0]
    gp = round(io / 27, 3)
    po = playerData['PO'].values[0]
    a = playerData['A'].values[0]
    dp = playerData['DP'].values[0]
    e = playerData['E'].values[0]
    
    pDS = round(pDS, 5)
    avgDS = round(avgDS, 5)
    
    
    print("WDS Summary for player", playerID, "in", year, "at", position, ":")
    print("----------------------------------------------------------------------")
    print("Games Played:", gp)
    print("Outs Played:", io)
    print("Put Outs:", po)
    print("Assists:", a)
    print("Double Plays:", dp)
    print("Errors Commited:", e)
    print("----------------------------------------------------------------------")
    outsParticipated = po + a + (0.5 * dp)
    print("Outs participated in:", po, "put outs +", a, "assists + (0.5 *", dp, "double plays) =  ", outsParticipated)
    outRatio = round(outsParticipated / io, 3)
    print("Out ratio:", outsParticipated, "outs participated /", io, "outs played =  ", outRatio)
    print("Defensive Score: ( 200 *", outRatio, "[out ratio] ) + ( 0.1 *", gp, "games played ) - ", e, "errors =  ", pDS)
    print("Average Defensive Score at", position, "in", year, ":", avgDS)
    print("Weighted Defensive Score:", pDS, "[player DS] -", avgDS, "[average DS] =  ", round(pDS-avgDS, 5))

In [104]:
def calculateDS(InnOuts, PO, A, E, DP):
    
    if (InnOuts < minOuts):
        return np.nan
    
    outRatio = (PO + A + (0.5 * DP)) / InnOuts
    outRatioConstant = 200
    gamesPlayed = InnOuts / 27
    gamesPlayedConstant = 0.1
    errorConstant = 0.5
    
    DS = (outRatioConstant * outRatio) + (gamesPlayedConstant * gamesPlayed) - (errorConstant * E)
    return DS

def getAverageDS(year, position):
    
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    if (year, position) in averageDSes:
        return averageDSes[(year, position)]
    
    posData = fielding.loc[(fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    posData = posData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    posData = posData.loc[(posData['InnOuts'] > minOuts)].copy()
    
    dses = []
    for index, row in posData.iterrows():
        ds = calculateDS(row['InnOuts'], row['PO'], row['A'], row['E'], row['DP'])
        dses.append(ds)
    
    avgDS = sum(dses) / len(dses)
    averageDSes[(year, position)] = avgDS
    return avgDS

def getPlayerWDS(playerID, year, position, printData):
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    playerData = fielding.loc[(fielding['playerID'] == playerID) & (fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    if (playerData.empty):
        print("PlayerID not found!")
        return
    
    playerData = playerData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    pDS = calculateDS(playerData['InnOuts'].values[0], playerData['PO'].values[0], playerData['A'].values[0], playerData['E'].values[0], playerData['DP'].values[0])
    avgDS = getAverageDS(year, position)
    
    WDS = pDS - avgDS
    
    if (printData == True):
        printSummary(playerID, year, position, pDS, avgDS, playerData)
    
    
    
    if(np.isnan(pDS)):
        return 0
    else:
        return WDS
    
    

In [103]:
getPlayerWDS('escobed01', 2018, '3B', True)
# print(getAverageDS(2018, '3B'))

             G     GS  InnOuts  PO    A    E  DP
playerID                                        
escobed01  131  124.0   3307.0  70  224  5.0  19
WDS Summary for player escobed01 in 2018 at 3B :
----------------------------------------------------------------------
Games Played: 122.481
Outs Played: 3307.0
Put Outs: 70
Assists: 224
Double Plays: 19
Errors Commited: 5.0
----------------------------------------------------------------------
Outs participated in: 70 put outs + 224 assists + (0.5 * 19 double plays) =   303.5
Out ratio: 303.5 outs participated / 3307.0 outs played =   0.092
Defensive Score: ( 200 * 0.092 [out ratio] ) + ( 0.1 * 122.481 games played ) -  5.0 errors =   28.10315
Average Defensive Score at 3B in 2018 : 22.83973
Weighted Defensive Score: 28.10315 [player DS] - 22.83973 [average DS] =   5.26342


5.263421304827357

In [107]:
def printTopPlayers(year, position, quantity, invert):
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    data = fielding.loc[(fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    
    playerIDs = [] # list used to avoid duplicates
    playerWDSes = [] # list used to store WDS to later create dataframe
    for index, row in data.iterrows():
        pID = row['playerID']
        if (pID not in playerIDs):
            playerIDs.append(pID)
            WDS = getPlayerWDS(pID, year, position, False)
            playerWDSes.append([pID, WDS])
        
    df = pd.DataFrame(playerWDSes, columns = ['PlayerID', 'WDS']) 
    df = df.sort_values(by='WDS', ascending=invert)
    print(df.head(quantity))

In [117]:
printTopPlayers(2018, '2B', 50, True)

      PlayerID        WDS
94    lugoda01 -12.153960
165  utleych01 -10.835504
82   kendrho01  -9.465207
113  nunezed02  -9.009594
61   gonzama01  -6.406795
74   hernaen02  -6.257257
176  walkene01  -5.717052
119  perezhe01  -4.733470
62   goodrni01  -4.205007
77    holtbr01  -4.103468
109  murphda08  -3.986967
124  pireljo01  -3.886371
103  moncayo01  -3.776489
158  torregl01  -3.700592
152  solarya01  -3.553933
57   goinsry01  -2.770419
34   descada01  -2.430638
154  spangco01  -2.343832
4    altuvjo01  -2.274613
150   shawtr01  -2.257099
174  villajo01  -2.045949
92    lowebr01  -1.459509
54   garcigr01  -1.197504
51   fraziad01  -0.740478
108  munozya01  -0.450337
45   fletcda02  -0.360699
100  mcneije01  -0.033707
72   harrijo05  -0.002589
112  ngoepgi01   0.000000
110  negrokr01   0.000000
115    orfna01   0.000000
111  newmake01   0.000000
169  valerbr01   0.000000
0    adamewi01   0.000000
106  motteta01   0.000000
105  morofma01   0.000000
104  mondera02   0.000000
116  owingch