In [29]:
import pandas as pd
import pickle as pkl
import timeit
import warnings
from datetime import datetime
from baseball_scraper import statcast
import mlbstatsapi
import time
from IPython.display import clear_output
import matplotlib.pyplot as plt

warnings.simplefilter("ignore")


## Random Basic Info For Later

In [2]:
relevant_batting_columns = ["game_date", "player_name", "batter", "pitcher", "events", "stand", "p_throws", "home_team", "away_team",
                            "hit_location", "bb_type", "on_3b", "on_2b", "on_1b", "outs_when_up", "inning", "inning_topbot","game_type",
                            "game_pk", "estimated_ba_using_speedangle", "launch_speed_angle", "bat_score", "fld_score", "post_bat_score",
                            "if_fielding_alignment", "of_fielding_alignment", "delta_home_win_exp"]

relevant_play_types = ["field_out", "strikeout", "strikeout_double_play", "force_out", "grounded_into_double_play", "double_play", "fielders_choice",
                    "fielders_choice_out", "other_out", "sac_fly", "sac_bunt", "single", "double", "triple", "home_run", 
                    "walk", "hit_by_pitch", "intent_walk", "field_error"]

play_type_dict = {"field_out":"fly_out", "strikeout":"strikeout", "strikeout_double_play":"strikeout", "force_out":"out", "grounded_into_double_play":"double_play", "double_play":"double_play", "fielders_choice":"fielders_choice",
                    "fielders_choice_out":"fielders_choice", "other_out":"out", "sac_fly":"sacrifice", "sac_bunt":"sacrifice", "single":"single", "double":"double", "triple":"triple", "home_run":"home_run", 
                    "walk":"walk", "hit_by_pitch":"walk", "intent_walk":"intent_walk", "field_error":"error"}

weather_name_conversions = {"SF":"San Francisco Giants", "NYY":"New York Yankees", "DET":"Detroit Tigers", "TEX":"Texas Rangers",
                            "STL":"St. Louis Cardinals", "WSH":"Washington Nationals", "MIL":"Milwaukee Brewers", "CLE":"Cleveland Guardians",
                            "SD":"San Diego Padres", "COL":"Colorado Rockies", "BAL":"Baltimore Orioles", "HOU":"Houston Astros",
                            "KC":"Kansas City Royals", "OAK":"Oakland Athletics", "BOS":"Boston Red Sox", "CWS":"Chicago White Sox",
                            "ARI":"Arizona Diamondbacks", "ATL":"Atlanta Braves", "CIN":"Cincinnati Reds", "MIN":"Minnesota Twins",
                            "MIA":"Miami Marlins", "LAD":"Los Angeles Dodgers", "TB":"Tampa Bay Rays", "PHI":"Philadelphia Phillies",
                            "NYM":"New York Mets", "CHC":"Chicago Cubs", "TOR":"Toronto Blue Jays", "SEA":"Seattle Mariners",
                            "LAA":"Los Angeles Angels", "PIT":"Pittsburgh Pirates"}                          

### Import Raw Pitches

In [43]:
all_pitches = pd.read_feather("/Users/jaredzirkes/Documents/GitHub/MLB-Simulation/Feathers Raw Pitch and Weather Collection/All Pitches Feather")


##### The Below Code is no longer needed 

In [44]:
# Fix a strange series to begin 2020 where the Blue Jays played as the home team IN Washington

strange_games_I = pitches_2020[(pitches_2020.home_team == "TOR") & (pitches_2020.away_team == "WSH")].index
pitches_2020.loc[strange_games_I, "home_team"] = "WSH"
pitches_2020.loc[strange_games_I, "away_team"] = "TOR"

strange_games_II = pitches_2013[(pitches_2013.home_team == "CIN") & (pitches_2013.away_team == "SF") & (pitches_2013.game_date == "2013-07-23")].index
pitches_2013.loc[strange_games_II, "home_team"] = "SF"
pitches_2013.loc[strange_games_II, "away_team"] = "CIN"

strange_games_III = pitches_2015[(pitches_2015.home_team == "BAL") & (pitches_2015.away_team == "TB") & (pitches_2015.game_date == "2015-05-01")].index
pitches_2015.loc[strange_games_III, "home_team"] = "TB"
pitches_2015.loc[strange_games_III, "away_team"] = "BAL"

strange_games_IV = pitches_2015[(pitches_2015.home_team == "BAL") & (pitches_2015.away_team == "TB") & (pitches_2015.game_date == "2015-05-02")].index
pitches_2015.loc[strange_games_IV, "home_team"] = "TB"
pitches_2015.loc[strange_games_IV, "away_team"] = "BAL"

strange_games_V = pitches_2015[(pitches_2015.home_team == "BAL") & (pitches_2015.away_team == "TB") & (pitches_2015.game_date == "2015-05-03")].index
pitches_2015.loc[strange_games_V, "home_team"] = "TB"
pitches_2015.loc[strange_games_V, "away_team"] = "BAL"

strange_games_VI = pitches_2017[(pitches_2017.home_team == "MIA") & (pitches_2017.away_team == "MIL") & (pitches_2017.game_date == "2017-09-16")].index
pitches_2017.loc[strange_games_VI, "home_team"] = "MIL"
pitches_2017.loc[strange_games_VI, "away_team"] = "MIA"

strange_games_VII = pitches_2017[(pitches_2017.home_team == "MIA") & (pitches_2017.away_team == "MIL") & (pitches_2017.game_date == "2017-09-17")].index
pitches_2017.loc[strange_games_VII, "home_team"] = "MIL"
pitches_2017.loc[strange_games_VII, "away_team"] = "MIA"

strange_games_VIII = pitches_2017[(pitches_2017.home_team == "MIA") & (pitches_2017.away_team == "MIL") & (pitches_2017.game_date == "2017-09-15")].index
pitches_2017.loc[strange_games_VIII, "home_team"] = "MIL"
pitches_2017.loc[strange_games_VIII, "away_team"] = "MIA"

strange_games_IX = pitches_2020[(pitches_2020.home_team == "NYY") & (pitches_2020.away_team == "PHI") & (pitches_2020.game_date == "2020-08-05")].index
pitches_2020.loc[strange_games_IX, "home_team"] = "PHI"
pitches_2020.loc[strange_games_IX, "away_team"] = "NYY"

strange_games_X = pitches_2020[(pitches_2020.home_team == "MIA") & (pitches_2020.away_team == "BAL") & (pitches_2020.game_date == "2020-08-05")].index
pitches_2020.loc[strange_games_X, "home_team"] = "BAL"
pitches_2020.loc[strange_games_X, "away_team"] = "MIA"

strange_games_XI = pitches_2020[(pitches_2020.home_team == "MIA") & (pitches_2020.away_team == "BAL") & (pitches_2020.game_date == "2020-08-06")].index
pitches_2020.loc[strange_games_XI, "home_team"] = "BAL"
pitches_2020.loc[strange_games_XI, "away_team"] = "MIA"

strange_games_XII = pitches_2020[(pitches_2020.home_team == "MIA") & (pitches_2020.away_team == "BAL") & (pitches_2020.game_date == "2020-08-07")].index
pitches_2020.loc[strange_games_XII, "home_team"] = "BAL"
pitches_2020.loc[strange_games_XII, "away_team"] = "MIA"

strange_games_XIII = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "CHC") & (pitches_2020.game_date == "2020-08-17")].index
pitches_2020.loc[strange_games_XIII, "home_team"] = "CHC"
pitches_2020.loc[strange_games_XIII, "away_team"] = "STL"

strange_games_XIX = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "CHC") & (pitches_2020.game_date == "2020-08-18")].index
pitches_2020.loc[strange_games_XIX, "home_team"] = "CHC"
pitches_2020.loc[strange_games_XIX, "away_team"] = "STL"

strange_games_XX = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "CHC") & (pitches_2020.game_date == "2020-08-19")].index
pitches_2020.loc[strange_games_XX, "home_team"] = "CHC"
pitches_2020.loc[strange_games_XX, "away_team"] = "STL"

strange_games_XXI = pitches_2020[(pitches_2020.home_team == "MIA") & (pitches_2020.away_team == "WSH") & (pitches_2020.game_date == "2020-08-22")].index
pitches_2020.loc[strange_games_XXI, "home_team"] = "WSH"
pitches_2020.loc[strange_games_XXI, "away_team"] = "MIA"

strange_games_XXII = pitches_2020[(pitches_2020.home_team == "MIA") & (pitches_2020.away_team == "NYM") & (pitches_2020.game_date == "2020-08-25")].index
pitches_2020.loc[strange_games_XXII, "home_team"] = "NYM"
pitches_2020.loc[strange_games_XXII, "away_team"] = "MIA"

strange_games_XXIII = pitches_2020[(pitches_2020.home_team == "NYY") & (pitches_2020.away_team == "ATL") & (pitches_2020.game_date == "2020-08-26")].index
pitches_2020.loc[strange_games_XXIII, "home_team"] = "ATL"
pitches_2020.loc[strange_games_XXIII, "away_team"] = "NYY"

strange_games_XXIV = pitches_2020[(pitches_2020.home_team == "CIN") & (pitches_2020.away_team == "MIL") & (pitches_2020.game_date == "2020-08-27")].index
pitches_2020.loc[strange_games_XXIV, "home_team"] = "MIL"
pitches_2020.loc[strange_games_XXIV, "away_team"] = "CIN"

strange_games_XXV = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SD") & (pitches_2020.game_date == "2020-08-27")].index
pitches_2020.loc[strange_games_XXV, "home_team"] = "SD"
pitches_2020.loc[strange_games_XXV, "away_team"] = "SEA"

strange_games_XXVI = pitches_2020[(pitches_2020.home_team == "LAD") & (pitches_2020.away_team == "SF") & (pitches_2020.game_date == "2020-08-27")].index
pitches_2020.loc[strange_games_XXVI, "home_team"] = "SF"
pitches_2020.loc[strange_games_XXVI, "away_team"] = "LAD"

strange_games_XXVII = pitches_2020[(pitches_2020.home_team == "PIT") & (pitches_2020.away_team == "STL") & (pitches_2020.game_date == "2020-08-27")].index
pitches_2020.loc[strange_games_XXVII, "home_team"] = "STL"
pitches_2020.loc[strange_games_XXVII, "away_team"] = "PIT"

strange_games_XXVIII = pitches_2020[(pitches_2020.home_team == "NYM") & (pitches_2020.away_team == "NYY") & (pitches_2020.game_date == "2020-08-28")].index
pitches_2020.loc[strange_games_XXVIII, "home_team"] = "NYY"
pitches_2020.loc[strange_games_XXVIII, "away_team"] = "NYM"

strange_games_XXVIV = pitches_2020[(pitches_2020.home_team == "MIN") & (pitches_2020.away_team == "DET") & (pitches_2020.game_date == "2020-08-29")].index
pitches_2020.loc[strange_games_XXVIV, "home_team"] = "DET"
pitches_2020.loc[strange_games_XXVIV, "away_team"] = "MIN"

strange_games_XXVV = pitches_2020[(pitches_2020.home_team == "OAK") & (pitches_2020.away_team == "HOU") & (pitches_2020.game_date == "2020-08-29")].index
pitches_2020.loc[strange_games_XXVV, "home_team"] = "HOU"
pitches_2020.loc[strange_games_XXVV, "away_team"] = "OAK"

strange_games_XXVVI = pitches_2020[(pitches_2020.home_team == "CHC") & (pitches_2020.away_team == "CIN") & (pitches_2020.game_date == "2020-08-29")].index
pitches_2020.loc[strange_games_XXVVI, "home_team"] = "CIN"
pitches_2020.loc[strange_games_XXVVI, "away_team"] = "CHC"

strange_games_XXVVII = pitches_2020[(pitches_2020.home_team == "NYM") & (pitches_2020.away_team == "NYY") & (pitches_2020.game_date == "2020-08-30")].index
pitches_2020.loc[strange_games_XXVVII, "home_team"] = "NYY"
pitches_2020.loc[strange_games_XXVVII, "away_team"] = "NYM"

strange_games_XXVVIII = pitches_2020[(pitches_2020.home_team == "WSH") & (pitches_2020.away_team == "ATL") & (pitches_2020.game_date == "2020-09-4")].index
pitches_2020.loc[strange_games_XXVVIII, "home_team"] = "ATL"
pitches_2020.loc[strange_games_XXVVII, "away_team"] = "WSH"

strange_games_XXVVIV = pitches_2020[(pitches_2020.home_team == "NYY") & (pitches_2020.away_team == "BAL") & (pitches_2020.game_date == "2020-09-04")].index
pitches_2020.loc[strange_games_XXVVIV, "home_team"] = "BAL"
pitches_2020.loc[strange_games_XXVVIV, "away_team"] = "NYY"

strange_games_XXVVV = pitches_2020[(pitches_2020.home_team == "TOR") & (pitches_2020.away_team == "BOS") & (pitches_2020.game_date == "2020-09-04")].index
pitches_2020.loc[strange_games_XXVVV, "home_team"] = "BOS"
pitches_2020.loc[strange_games_XXVVV, "away_team"] = "TOR"

strange_games_XXVVVI = pitches_2020[(pitches_2020.home_team == "DET") & (pitches_2020.away_team == "MIN") & (pitches_2020.game_date == "2020-09-04")].index
pitches_2020.loc[strange_games_XXVVVI, "home_team"] = "MIN"
pitches_2020.loc[strange_games_XXVVVI, "away_team"] = "DET"

strange_games_XXVVVII = pitches_2020[(pitches_2020.home_team == "CIN") & (pitches_2020.away_team == "PIT") & (pitches_2020.game_date == "2020-09-04")].index
pitches_2020.loc[strange_games_XXVVVII, "home_team"] = "PIT"
pitches_2020.loc[strange_games_XXVVVII, "away_team"] = "CIN"

strange_games_XXVVVIII = pitches_2020[(pitches_2020.home_team == "HOU") & (pitches_2020.away_team == "LAA") & (pitches_2020.game_date == "2020-09-05")].index
pitches_2020.loc[strange_games_XXVVVIII, "home_team"] = "LAA"
pitches_2020.loc[strange_games_XXVVVIII, "away_team"] = "HOU"

strange_games_XXVVVIV = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "CHC") & (pitches_2020.game_date == "2020-09-05")].index
pitches_2020.loc[strange_games_XXVVVIV, "home_team"] = "CHC"
pitches_2020.loc[strange_games_XXVVVIV, "away_team"] = "STL"

strange_games_L = pitches_2020[(pitches_2020.home_team == "HOU") & (pitches_2020.away_team == "OAK") & (pitches_2020.game_date == "2020-09-08")].index
pitches_2020.loc[strange_games_L, "home_team"] = "OAK"
pitches_2020.loc[strange_games_L, "away_team"] = "HOU"

strange_games_LI = pitches_2020[(pitches_2020.home_team == "BOS") & (pitches_2020.away_team == "PHI") & (pitches_2020.game_date == "2020-09-08")].index
pitches_2020.loc[strange_games_LI, "home_team"] = "PHI"
pitches_2020.loc[strange_games_LI, "away_team"] = "BOS"

strange_games_LII = pitches_2020[(pitches_2020.home_team == "MIN") & (pitches_2020.away_team == "STL") & (pitches_2020.game_date == "2020-09-08")].index
pitches_2020.loc[strange_games_LII, "home_team"] = "STL"
pitches_2020.loc[strange_games_LII, "away_team"] = "MIN"

strange_games_LIII = pitches_2020[(pitches_2020.home_team == "DET") & (pitches_2020.away_team == "STL") & (pitches_2020.game_date == "2020-09-10")].index
pitches_2020.loc[strange_games_LIII, "home_team"] = "STL"
pitches_2020.loc[strange_games_LIII, "away_team"] = "DET"

strange_games_LIV = pitches_2020[(pitches_2020.home_team == "PHI") & (pitches_2020.away_team == "MIA") & (pitches_2020.game_date == "2020-09-11")].index
pitches_2020.loc[strange_games_LIV, "home_team"] = "MIA"
pitches_2020.loc[strange_games_LIV, "away_team"] = "PHI"

strange_games_LV = pitches_2020[(pitches_2020.home_team == "BAL") & (pitches_2020.away_team == "NYY") & (pitches_2020.game_date == "2020-09-11")].index
pitches_2020.loc[strange_games_LV, "home_team"] = "NYY"
pitches_2020.loc[strange_games_LV, "away_team"] = "BAL"

strange_games_LVI = pitches_2020[(pitches_2020.home_team == "OAK") & (pitches_2020.away_team == "TEX") & (pitches_2020.game_date == "2020-09-12")].index
pitches_2020.loc[strange_games_LVI, "home_team"] = "TEX"
pitches_2020.loc[strange_games_LVI, "away_team"] = "OAK"

strange_games_LVII = pitches_2020[(pitches_2020.home_team == "PHI") & (pitches_2020.away_team == "MIA") & (pitches_2020.game_date == "2020-09-13")].index
pitches_2020.loc[strange_games_LVII, "home_team"] = "MIA"
pitches_2020.loc[strange_games_LVII, "away_team"] = "PHI"

strange_games_LVIII = pitches_2020[(pitches_2020.home_team == "SF") & (pitches_2020.away_team == "SD") & (pitches_2020.game_date == "2020-09-13")].index
pitches_2020.loc[strange_games_LVII, "home_team"] = "SD"
pitches_2020.loc[strange_games_LVII, "away_team"] = "SF"

strange_games_LIX = pitches_2020[(pitches_2020.home_team == "PIT") & (pitches_2020.away_team == "CIN") & (pitches_2020.game_date == "2020-09-14")].index
pitches_2020.loc[strange_games_LIX, "home_team"] = "CIN"
pitches_2020.loc[strange_games_LIX, "away_team"] = "PIT"

strange_games_LX = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "MIL") & (pitches_2020.game_date == "2020-09-14")].index
pitches_2020.loc[strange_games_LX, "home_team"] = "MIL"
pitches_2020.loc[strange_games_LX, "away_team"] = "STL"

strange_games_LXI = pitches_2020[(pitches_2020.home_team == "OAK") & (pitches_2020.away_team == "SEA") & (pitches_2020.game_date == "2020-09-14")].index
pitches_2020.loc[strange_games_LXI, "home_team"] = "SEA"
pitches_2020.loc[strange_games_LXI, "away_team"] = "OAK"

strange_games_LXII = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "MIL") & (pitches_2020.game_date == "2020-09-16")].index
pitches_2020.loc[strange_games_LXII, "home_team"] = "MIL"
pitches_2020.loc[strange_games_LXII, "away_team"] = "STL"

strange_games_LXIII = pitches_2020[(pitches_2020.home_team == "TB") & (pitches_2020.away_team == "BAL") & (pitches_2020.game_date == "2020-09-17")].index
pitches_2020.loc[strange_games_LXIII, "home_team"] = "BAL"
pitches_2020.loc[strange_games_LXIII, "away_team"] = "TB"

strange_games_LXIV = pitches_2020[(pitches_2020.home_team == "WSH") & (pitches_2020.away_team == "MIA") & (pitches_2020.game_date == "2020-09-18")].index
pitches_2020.loc[strange_games_LXIV, "home_team"] = "MIA"
pitches_2020.loc[strange_games_LXIV, "away_team"] = "WSH"

strange_games_LXV = pitches_2020[(pitches_2020.home_team == "TOR") & (pitches_2020.away_team == "PHI") & (pitches_2020.game_date == "2020-09-18")].index
pitches_2020.loc[strange_games_LXV, "home_team"] = "PHI"
pitches_2020.loc[strange_games_LXV, "away_team"] = "TOR"

strange_games_LXVI = pitches_2020[(pitches_2020.home_team == "STL") & (pitches_2020.away_team == "PIT") & (pitches_2020.game_date == "2020-09-18")].index
pitches_2020.loc[strange_games_LXVI, "home_team"] = "PIT"
pitches_2020.loc[strange_games_LXVI, "away_team"] = "STL"

strange_games_LXVII = pitches_2020[(pitches_2020.home_team == "WSH") & (pitches_2020.away_team == "MIA") & (pitches_2020.game_date == "2020-09-20")].index
pitches_2020.loc[strange_games_LXVII, "home_team"] = "MIA"
pitches_2020.loc[strange_games_LXVII, "away_team"] = "WSH"

strange_games_LXVIII = pitches_2020[(pitches_2020.home_team == "PHI") & (pitches_2020.away_team == "WSH") & (pitches_2020.game_date == "2020-09-22")].index
pitches_2020.loc[strange_games_LXVIII, "home_team"] = "WSH"
pitches_2020.loc[strange_games_LXVIII, "away_team"] = "PHI"

strange_games_LXIX = pitches_2020[(pitches_2020.home_team == "COL") & (pitches_2020.away_team == "ARI") & (pitches_2020.game_date == "2020-09-25")].index
pitches_2020.loc[strange_games_LXIX, "home_team"] = "ARI"
pitches_2020.loc[strange_games_LXIX, "away_team"] = "COL"

strange_games_LXX = pitches_2020[(pitches_2020.home_team == "SD") & (pitches_2020.away_team == "SF") & (pitches_2020.game_date == "2020-09-25")].index
pitches_2020.loc[strange_games_LXX, "home_team"] = "SF"
pitches_2020.loc[strange_games_LXX, "away_team"] = "SD"

strange_games_LXXI = pitches_2020[(pitches_2020.home_team == "MIL") & (pitches_2020.away_team == "STL") & (pitches_2020.game_date == "2020-09-25")].index
pitches_2020.loc[strange_games_LXXI, "home_team"] = "STL"
pitches_2020.loc[strange_games_LXXI, "away_team"] = "MIL"

strange_games_LXXII = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "OAK") & (pitches_2020.game_date == "2020-09-26")].index
pitches_2020.loc[strange_games_LXXII, "home_team"] = "OAK"
pitches_2020.loc[strange_games_LXXII, "away_team"] = "SEA"

strange_games_LXXIII = pitches_2020[(pitches_2020.home_team == "NYM") & (pitches_2020.away_team == "WSH") & (pitches_2020.game_date == "2020-09-26")].index
pitches_2020.loc[strange_games_LXXIII, "home_team"] = "WSH"
pitches_2020.loc[strange_games_LXXIII, "away_team"] = "NYM"

strange_games_LXXIV = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SF") & (pitches_2020.game_date == "2020-09-16")].index
pitches_2020.loc[strange_games_LXXIV, "home_team"] = "SF"
pitches_2020.loc[strange_games_LXXIV, "away_team"] = "SEA"

strange_games_LXXV = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SF") & (pitches_2020.game_date == "2020-09-17")].index
pitches_2020.loc[strange_games_LXXV, "home_team"] = "SF"
pitches_2020.loc[strange_games_LXXV, "away_team"] = "SEA"

strange_games_LXXVI = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SD") & (pitches_2020.game_date == "2020-09-18")].index
pitches_2020.loc[strange_games_LXXVI, "home_team"] = "SD"
pitches_2020.loc[strange_games_LXXVI, "away_team"] = "SEA"

strange_games_LXXVII = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SD") & (pitches_2020.game_date == "2020-09-19")].index
pitches_2020.loc[strange_games_LXXVII, "home_team"] = "SD"
pitches_2020.loc[strange_games_LXXVII, "away_team"] = "SEA"

strange_games_LXXVIII = pitches_2020[(pitches_2020.home_team == "SEA") & (pitches_2020.away_team == "SD") & (pitches_2020.game_date == "2020-09-20")].index
pitches_2020.loc[strange_games_LXXVIII, "home_team"] = "SD"
pitches_2020.loc[strange_games_LXXVIII, "away_team"] = "SEA"

strange_games_LXXIX = pitches_2021[(pitches_2021.home_team == "TOR") & (pitches_2021.away_team == "LAA") & (pitches_2021.game_date == "2021-08-10")].index
pitches_2021.loc[strange_games_LXXIX, "home_team"] = "LAA"
pitches_2021.loc[strange_games_LXXIX, "away_team"] = "TOR"

strange_games_LXXX = pitches_2022[(pitches_2022.home_team == "OAK") & (pitches_2022.away_team == "DET") & (pitches_2022.game_date == "2022-05-10")].index
pitches_2022.loc[strange_games_LXXX, "home_team"] = "DET"
pitches_2022.loc[strange_games_LXXX, "away_team"] = "OAK"

### Clean All Pitches DataFrame


In [45]:
# Filter all pitches to only those resulting in a relavant play, and clean to only columns we might use later on

# Filter down to only regular season games
all_pitches = all_pitches[all_pitches.game_type == "R"]

# Convert the datetime game_date to a string formatted as YYYY-MM-DD
all_pitches.game_date = all_pitches.game_date.apply(lambda x: str(x).split(" ")[0])

# Filter all pitches to only those with an event
all_plays = all_pitches[pd.isna(all_pitches.events) == False]

# Filter all pitches with an event to only those types we care about
relevant_plays = all_plays[all_plays.events.isin(relevant_play_types)]

# Clean all the relevant plays and sort by date
final_plays = relevant_plays[relevant_batting_columns].sort_values(by = "game_date").reset_index(drop = True)

# Finally, add a new column that groups all the event types into eventual Y labels
final_plays["play_type"] = final_plays.events.apply(lambda x: play_type_dict[x])

all_plays = final_plays

### Segment stats based on handedness and build raw stats throughout the year

In [46]:
all_plays_by_hand_combo = {"RR":{}, "RL":{}, "LR":{}, "LL":{}}


In [47]:
# Split all plays by year on combo of batter/pitcher handedness and place into a dictionary
for pitbat_combo in all_plays_by_hand_combo.keys(): 
    pitbat_df = all_plays[(all_plays.stand == pitbat_combo[0]) & (all_plays_dict[year].p_throws == pitbat_combo[0])].copy().reset_index(drop=True)
    all_plays_by_hand_combo[pitbat_combo]= pitbat_df

### Attach the weather info to the play info --- THIS WILL GET UPDATED IN THE PRESEASON WITH NEW WEATHER SCRAPER

In [48]:
weather = pkl.load(open("weather_data.pkl", "rb"))


In [50]:
for pitbat_combo in all_plays_by_hand_combo.keys(): # CHECK IF THERE'S A FASTER WAY TO RUN THIS CELL
    all_plays_by_hand_combo[pitbat_combo]["full_weather"] = all_plays_by_hand_combo[pitbat_combo].apply(lambda x: weather[(weather.date == x.game_date) & (weather.home_team == weather_name_conversions[x.home_team])].weather.iloc[0], axis = 1)

In [51]:
def get_wind_direction(full_weather):
    """Given the full description, pull out the wind direction"""
    if full_weather != None:
        if "in" in "".join(full_weather.split("Wind")) or "In" in "".join(full_weather.split("Wind")):
            return "in" #full_weather.full_weather.split("mph ")[-1].split(' from')[0]
        elif "out" in "".join(full_weather.split("Wind")) or "Out" in "".join(full_weather.split("Wind")):
            return "out" #full_weather.full_weather.split("mph ")[-1].split(' to')[0]
        if "Left" in "".join(full_weather.split("Wind")) or "Right" in "".join(full_weather.split("Wind")):
            return full_weather.split("from ")[-1].strip(".")
    else:
        return "xyz"

In [52]:
# Break up the full weather info into temp, wind speed, and wind direction seperately
for pitbat_combo in all_plays_by_hand_combo.keys():
    all_plays_by_hand_combo[pitbat_combo]["temprature"] = all_plays_by_hand_combo[pitbat_combo].full_weather.apply(lambda x: int(x.split(": ")[1].split("°")[0]))
    all_plays_by_hand_combo[pitbat_combo]["wind_speed"] = all_plays_by_hand_combo[pitbat_combo].full_weather.apply(lambda x: int(x.split("Wind ")[1].split("mph")[0]) if "Wind" in x else 0)
    all_plays_by_hand_combo[pitbat_combo]["wind_direction"] = all_plays_by_hand_combo[pitbat_combo].full_weather.apply(get_wind_direction)
    all_plays_by_hand_combo[pitbat_combo]["wind_direction"] = all_plays_by_hand_combo[pitbat_combo].wind_direction.apply(lambda x: x.split(", ")[0] if x != None else x)

In [68]:
#pkl.dump(all_plays_by_hand_combo, open("Feathers Pitches and Batting Stats/all_plays_by_hand_combo.pkl", "wb"))
