**CSCI 4831: Sabermetrics Final Project**  
Name: Adam Ten Hoeve

https://github.com/aaronaaeng/Sabermetrics-Final-Project

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Read in all of the statcast data
# df2016 = pd.read_csv("2016/data2016.csv")

# april2017 = pd.read_csv("2017/dataApril2017.csv")
# may2017 = pd.read_csv("2017/dataMay2017.csv")
# june2017 = pd.read_csv("2017/dataJune2017.csv")
# july2017 = pd.read_csv("2017/dataJuly2017.csv")
# aug2017 = pd.read_csv("2017/dataAug2017.csv")
# sept2017 = pd.read_csv("2017/dataSept2017.csv")
# oct2017 = pd.read_csv("2017/dataOct2017.csv")

april2018 = pd.read_csv("2018/dataApril2018.csv")
# may2018 = pd.read_csv("2018/dataMay2018.csv")
# june2018 = pd.read_csv("2018/dataJune2018.csv")
# july2018 = pd.read_csv("2018/dataJuly2018.csv")
# aug2018 = pd.read_csv("2018/dataAug2018.csv")
# sept2018 = pd.read_csv("2018/dataSept2018.csv")
# oct2018 = pd.read_csv("2018/dataOct2018.csv")

# df2017 = pd.concat([april2017, may2017, june2017, july2017, aug2017, sept2017, oct2017])
# df2018 = pd.concat([april2018, may2018, june2018, july2018, aug2018, sept2018, oct2018])

# dfTotal = pd.concat([df2016, df2017, df2018])
dfTotal = april2018.copy()

In [None]:
# Clean the data. 
# Group by pitcher. Get rid of anyone who pitched less than 100 times in the three years.
dfTempPitch = dfTotal.groupby("player_name", as_index = False).count()
dfTempPitch = dfTempPitch.loc[dfTempPitch["index"] > 100]
# Then group by batter to get rid of any batters who batted less than 100 times.
dfTempBat = dfTotal.groupby("batter", as_index = False).count()
dfTempBat = dfTempBat.loc[dfTempBat["index"] > 100]

# Create a database of pitchers 
dfPitchers = pd.merge(dfTotal, dfTempPitch[["player_name"]], on = "player_name", how = "inner")
# Create a database of batters
dfBatters = pd.merge(dfTotal, dfTempBat[["batter"]], on = "batter", how = "inner")

# Create a final dataframe of the cleaned batter and pitcher data
dfFinal = pd.merge(dfPitchers, dfBatters[["index", "pitcher", "batter", "release_speed", "strikes", "balls"]], on = ["index", "pitcher", "batter", "release_speed", "strikes", "balls"], how = "inner")

In [None]:
# A list of events of the batter either striking out or hitting the ball into play.
events = ["strikeout", "strikeout_double_play", "single", "double", "triple", "home_run", "sac_bunt", "sac_fly", "field_out", "field_error", "double_play", "grounded_into_double_play", "sac_fly_double_play", "triple_play"]
# Get rid of all the rows of data that don't have one of the events listed.
Outcomes = dfFinal.loc[dfFinal["events"].isin(events)]

In [None]:
# Create dictionaries to convert player names and ids
IDtoName = {}
NametoID = {}
# Get all the unique players
temp = Outcomes[["player_name", "pitcher"]].groupby(by = "pitcher", as_index = False).first()

for i in range(len(temp)):
    IDtoName[temp["pitcher"].iloc[i]] = temp["player_name"].iloc[i]
    NametoID[temp["player_name"].iloc[i]] = temp["pitcher"].iloc[i]

In [None]:
# Create a dictionary of all batters and pitchers
batterElos = {}
pitcherElos = {}

ids = list(Outcomes["pitcher"].unique()) + list(Outcomes["batter"].unique())
for id in list(Outcomes["pitcher"].unique()):
    pitcherElos[id] = 1000
for id in list(Outcomes["batter"].unique()):
    batterElos[id] = 1000
    
# Create a list of pitcher-favored outcomes
pitcherFavored = ["strikeout", "strikeout_double_play"]

In [None]:
# Create a function to calculate the probability a pitcher wins the matchup
def ProbPitcherWins(pitcher, batter):
    # Get ranking of pitcher
    pitcherRank = pitcherElos[pitcher]
    # Get ranking of batter
    batterRank = batterElos[batter]
    # From elo rankings. Assumes a difference of 400 is the same as 1 player being 10 times as likely to win
    probPitcherWins = 1 / (1 + 10**((pitcherRank -  batterRank)/400))
    return probPitcherWins

In [None]:
# Create a function to update the scores of players based on the results of pitches
def UpdateElo(row):
    # Get the probability that the pitcher wins from the function above
    prob = ProbPitcherWins(row["pitcher"], row["batter"])
    # Update the ratings based on the outcome of the pitch
    # If pitcher "wins" the matchup
    if row["events"] in pitcherFavored:
        # Largest possible change in score is 32.
        pitcherElos[row["pitcher"]] = int(pitcherElos[row["pitcher"]] + 32 * (1 - prob))
        batterElos[row["batter"]] = int(batterElos[row["batter"]] + 32 * (prob - 1))
    # If the batter wins the matchup
    else:
        pitcherElos[row["pitcher"]] = int(pitcherElos[row["pitcher"]] + 32 * (prob - 1))
        batterElos[row["batter"]] = int(batterElos[row["batter"]] + 32 * (1 - prob))

In [None]:
Outcomes.head()

In [None]:
Outcomes.apply(UpdateElo, axis = 1);

In [None]:
print(list(batterElos.values()))

In [19]:
print(list(pitcherElos.values()))

[772, 798, 806, 762, 770, 714, 577, 644, 793, 743, 702, 891, 718, 666, 587, 596, 815, 852, 789, 811, 631, 758, 642, 1014, 734, 777, 844, 744, 777, 820, 739, 741, 939, 717, 939, 578, 596, 744, 952, 954, 879, 741, 786, 575, 802, 838, 823, 831, 671, 986, 596, 1353, 866, 716, 844, 762, 752, 773, 765, 553, 658, 826, 794, 829, 905, 952, 810, 647, 762, 699, 864, 644, 805, 498, 690, 808, 888, 1058, 819, 594, 615, 616, 842, 882, 711, 591, 853, 777, 624, 877, 793, 759, 822, 817, 852, 713, 627, 1046, 734, 798, 803, 854, 904, 641, 601, 834, 822, 737, 746, 831, 766, 991, 780, 885, 850, 658, 668, 606, 813, 741, 827, 883, 731, 697, 577, 854, 957, 957, 775, 685, 586, 844, 902, 786, 750, 593, 734, 854, 757, 900, 799, 585, 800, 791, 775, 886, 599, 777, 807, 674, 758, 657, 746, 935, 805, 877, 740, 755, 735, 656, 864, 854, 937, 850, 837, 641, 683, 979, 866, 772, 658, 719, 844, 916, 810, 641, 792, 681, 1016, 795, 765, 678, 682, 846, 700, 872, 822, 872, 597, 833, 818, 851, 763, 571, 665, 764, 834, 763, 731,

In [20]:
# Create a dictionary to convert the player ID's to names
def PlayerIDtoName(id):
    return(IDtoName[id])

# Create a function to convert player's name to ID
def PlayerNametoID(name):
    return(NametoID[name])