In [2]:
from pybaseball import statcast
from pybaseball.lahman import *
import numpy as np
import pandas as pd

In [18]:
fielding = pd.read_csv('fielding.csv')
fielding = fielding.loc[fielding['yearID'] >= 1970].copy()
# print(fielding)

In [38]:
averageDSes = {} #used to store averages so they do not need to be calculated every time
minOuts = 500 #minimum number of outs needed for a player to be considered

def isValidPos(pos):
    validPos = ['1B', '2B', '3B', 'SS', 'OF']
    if (pos in validPos):
        return True
    else:
        return False
    
def isValidEntry(year, pos):
    #for checking an entry is valid
    if (year < 1970):
        return False
    if (year> 2018):
        return False
    
    if (isValidPos(pos) == False):
        return False
    
    return True

def printSummary(playerID, year, position, pDS, avgDS, playerData):
    io = playerData['InnOuts'].values[0]
    gp = round(io / 27, 3)
    po = playerData['PO'].values[0]
    a = playerData['A'].values[0]
    dp = playerData['DP'].values[0]
    e = playerData['E'].values[0]
    
    pDS = round(pDS, 5)
    avgDS = round(avgDS, 5)
    
    
    print("WDS Summary for player", playerID, "in", year, "at", position, ":")
    print("----------------------------------------------------------------------")
    print("Games Played:", gp)
    print("Outs Played:", io)
    print("Put Outs:", po)
    print("Assists:", a)
    print("Double Plays:", dp)
    print("Errors Commited:", e)
    print("----------------------------------------------------------------------")
    outsParticipated = po + a + (0.5 * dp)
    print("Outs participated in:", po, "put outs +", a, "assists + (0.5 *", dp, "double plays) =  ", outsParticipated)
    outRatio = round(outsParticipated / io, 3)
    print("Out ratio:", outsParticipated, "outs participated /", io, "outs played =  ", outRatio)
    print("Defensive Score: ( 200 *", outRatio, "[out ratio] ) + ( 0.1 *", gp, "games played ) - ", "( 0.5 *", e, "errors ) =  ", pDS)
    print("Average Defensive Score at", position, "in", year, ":", avgDS)
    print("Weighted Defensive Score:", pDS, "[player DS] -", avgDS, "[average DS] =  ", round(pDS-avgDS, 5))
    print("----------------------------------------------------------------------")

In [5]:
def calculateDS(InnOuts, PO, A, E, DP):
    
    if (InnOuts < minOuts):
        return np.nan
    
    outRatio = (PO + A + (0.5 * DP)) / InnOuts
    outRatioConstant = 200
    gamesPlayed = InnOuts / 27
    gamesPlayedConstant = 0.1
    errorConstant = 0.5
    
    DS = (outRatioConstant * outRatio) + (gamesPlayedConstant * gamesPlayed) - (errorConstant * E)
    return DS

def getAverageDS(year, position):
    
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    #If average has already been calculated, grab it instead of calculating again
    if (year, position) in averageDSes:
        return averageDSes[(year, position)]
    
    posData = fielding.loc[(fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    #group by playerID to sum players who were traded mid season
    posData = posData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    #make sure player meets minimum number of outs played
    posData = posData.loc[(posData['InnOuts'] > minOuts)].copy()
    
    #calculate DS for each player
    dses = []
    for index, row in posData.iterrows():
        ds = calculateDS(row['InnOuts'], row['PO'], row['A'], row['E'], row['DP'])
        dses.append(ds)
    
    #average DSes together
    avgDS = sum(dses) / len(dses)
    
    #add average to list so it can be referenced again
    averageDSes[(year, position)] = avgDS
    
    return avgDS

def getPlayerWDS(playerID, year, position, printData):
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    playerData = fielding.loc[(fielding['playerID'] == playerID) & (fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    if (playerData.empty):
        print("PlayerID not found!")
        return
    
    playerData = playerData.groupby('playerID', as_index=True).agg({'G':'sum', 'GS':'sum', 'InnOuts':'sum', 'PO':'sum', 'A':'sum', 'E':'sum', 'DP':'sum'})
    pDS = calculateDS(playerData['InnOuts'].values[0], playerData['PO'].values[0], playerData['A'].values[0], playerData['E'].values[0], playerData['DP'].values[0])
    avgDS = getAverageDS(year, position)
    
    #weight DS with average DS
    WDS = pDS - avgDS
    
    if (printData == True):
        printSummary(playerID, year, position, pDS, avgDS, playerData)
    
    
    #if player didn't meet minimum outs, give WDS of 0
    if(np.isnan(pDS)):
        return 0
    else:
        return WDS
    
    

In [7]:
def printTopPlayers(year, position, quantity, invert):
    if (isValidEntry(year, position) == False):
        print("Not a valid entry!")
        return
    
    data = fielding.loc[(fielding['yearID'] == year) & (fielding['POS'] == position)].copy()
    
    playerIDs = [] # list used to avoid duplicates
    playerWDSes = [] # list used to store WDS to later create dataframe
    #for every player, calculate WDS and append to list
    for index, row in data.iterrows():
        pID = row['playerID']
        if (pID not in playerIDs):
            playerIDs.append(pID)
            WDS = getPlayerWDS(pID, year, position, False)
            playerWDSes.append([pID, WDS])
    
    #add data to a dataframe for easier use
    df = pd.DataFrame(playerWDSes, columns = ['PlayerID', 'WDS']) 
    df = df.sort_values(by='WDS', ascending=invert)
    print(df.head(quantity))

In [36]:
# printTopPlayers(year, position, quantity, invert)
printTopPlayers(2021, 'F', 50, False)

Not a valid entry!


In [39]:
# getPlayerWDS(playerID, year, position, printSummary)
getPlayerWDS('arenano01', 2018, '3B', True)
# print(getAverageDS(2018, '3B'))

WDS Summary for player arenano01 in 2018 at 3B :
----------------------------------------------------------------------
Games Played: 147.593
Outs Played: 3985.0
Put Outs: 104
Assists: 312
Double Plays: 44
Errors Commited: 14.0
----------------------------------------------------------------------
Outs participated in: 104 put outs + 312 assists + (0.5 * 44 double plays) =   438.0
Out ratio: 438.0 outs participated / 3985.0 outs played =   0.11
Defensive Score: ( 200 * 0.11 [out ratio] ) + ( 0.1 * 147.593 games played ) -  ( 0.5 * 14.0 errors ) =   29.74169
Average Defensive Score at 3B in 2018 : 22.83973
Weighted Defensive Score: 29.74169 [player DS] - 22.83973 [average DS] =   6.90196
----------------------------------------------------------------------


6.9019620080853095