Baseball Prediction: 3b - Augment DataFrame with Odds Data

    - In the previous notebook, we got historical odds data from oddsshark.com and saved them as a set of csv files (with a particular naming convention).

    - In this notebook we will load that data and augment our primary (game-level) data frame so that it includes this odds data - specifically, the implied probabilities and the over/under, for each game.

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lxml
import html5lib
%matplotlib inline
from urllib.request import urlopen
import time
import structureboost as stb
import ml_insights as mli
pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)

In [7]:
df=pd.read_csv('df_bp1.csv', low_memory=False)

Plan of attack

- Create a dictionary structure to enable us to easily go to a specific game for a particular team and season.
- Iterate through the rows of our main dataframe, look up the home and visiting team for that game to get their odds. (Note: need two different lookups per game)

In [8]:
# Use the same mapping as before
# (more elegant to save to file...)

oddsshark_num_to_team_dict = {}
oddsshark_num_to_team_dict[26995]='PHI'
oddsshark_num_to_team_dict[26996]='SDN'
oddsshark_num_to_team_dict[26997]='SFN'
oddsshark_num_to_team_dict[26998]='ANA'
oddsshark_num_to_team_dict[26999]='DET'
oddsshark_num_to_team_dict[27000]='CIN'
oddsshark_num_to_team_dict[27001]='NYA'
oddsshark_num_to_team_dict[27002]='TEX'
oddsshark_num_to_team_dict[27003]='TBA'
oddsshark_num_to_team_dict[27004]='COL'
oddsshark_num_to_team_dict[27005]='MIN'
oddsshark_num_to_team_dict[27006]='KCA'
oddsshark_num_to_team_dict[27007]='ARI'
oddsshark_num_to_team_dict[27008]='BAL'
oddsshark_num_to_team_dict[27009]='ATL'
oddsshark_num_to_team_dict[27010]='TOR'
oddsshark_num_to_team_dict[27011]='SEA'
oddsshark_num_to_team_dict[27012]='MIL'
oddsshark_num_to_team_dict[27013]='PIT'
oddsshark_num_to_team_dict[27014]='NYN'
oddsshark_num_to_team_dict[27015]='LAN'
oddsshark_num_to_team_dict[27016]='OAK'
oddsshark_num_to_team_dict[27017]='WAS'
oddsshark_num_to_team_dict[27018]='CHA'
oddsshark_num_to_team_dict[27019]='SLN'
oddsshark_num_to_team_dict[27020]='CHN'
oddsshark_num_to_team_dict[27021]='BOS'
oddsshark_num_to_team_dict[27022]='MIA'
oddsshark_num_to_team_dict[27023]='HOU'
oddsshark_num_to_team_dict[27024]='CLE'
 
# Use the saved files to get the odds information
# We create a dict based on team and season for easy lookup
df_odds_dict={}
for i in range(26995, 27025):
    team_name = oddsshark_num_to_team_dict[i]
    df_odds_dict[team_name] = {}
    print(team_name)
    for season in range(2019,2023):
        fname = 'oddsshark_'+team_name+'_'+str(season)+'.csv'
        df_temp = pd.read_csv('/Volumes/CharmedXi/beatingVegas/oddshark/'+fname)
        df_temp['date_dblhead'] = (df_temp.date_numeric.astype(str) + df_temp.dblheader_num.astype(str)).astype(int)
        df_temp.set_index('date_dblhead', inplace=True)
        df_odds_dict[team_name][season] = df_temp

PHI
SDN
SFN
ANA
DET
CIN
NYA
TEX
TBA
COL
MIN
KCA
ARI
BAL
ATL
TOR
SEA
MIL
PIT
NYN
LAN
OAK
WAS
CHA
SLN
CHN
BOS
MIA
HOU
CLE


Augment our main dataframe

In [9]:
# Again, we iterate through the main dataframe
# get the team, season, game and then get
# the relevant info from the odds dictionary

implied_prob_h = np.zeros(df.shape[0])
implied_prob_v = np.zeros(df.shape[0])
over_under = np.zeros(df.shape[0])
ou_result = np.full(df.shape[0],'', dtype=object)
for ind, row in df.iterrows():
    if (ind%5000)==0:
        print(ind)
    if row.season<2019:
        continue
    else:
        season = row['season']
        home_team = row['team_h']
        visit_team = row['team_v']
        home_game_no = row['game_no_h']
        visit_game_no = row['game_no_v']
        date_dblh = row['date_dblhead']
        try:
            implied_prob_h[ind] = df_odds_dict[home_team][season].loc[date_dblh,'prob_implied']
            over_under[ind] = df_odds_dict[home_team][season].loc[date_dblh,'Total']
            ou_result[ind] = df_odds_dict[home_team][season].loc[date_dblh,'OU']
        except KeyError:
            print(f'Game not found wrt home_team:{home_team} vs {visit_team} date_dbl {date_dblh}')
        try:
            implied_prob_v[ind] = df_odds_dict[visit_team][season].loc[date_dblh,'prob_implied']
        except KeyError:
            print(f'Game not found wrt visit_team:{visit_team} vs {home_team} date_dbl {date_dblh}')

0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
Game not found wrt home_team:CHA vs SEA date_dbl 202106260
Game not found wrt visit_team:SEA vs CHA date_dbl 202106260
Game not found wrt home_team:ATL vs SDN date_dbl 202107211
Game not found wrt visit_team:SDN vs ATL date_dbl 202107211
Game not found wrt home_team:ATL vs SDN date_dbl 202107212
Game not found wrt visit_team:SDN vs ATL date_dbl 202107212
Game not found wrt home_team:CHN vs MIL date_dbl 202303300
Game not found wrt visit_team:MIL vs CHN date_dbl 202303300
Game not found wrt home_team:CIN vs PIT date_dbl 202303300
Game not found wrt visit_team:PIT vs CIN date_dbl 202303300
Game not found wrt home_team:LAN vs ARI date_dbl 202303300
Game not found wrt visit_team:ARI vs LAN date_dbl 202303300
Game not found wrt home_team:MIA vs NYN date_dbl 202303300
Game not found wrt visit_team:NYN vs MIA date_dbl 202303300
Game not found wrt home_team:SDN vs COL date_dbl 202303300
Game not found wrt visit_team:COL vs SDN date_d

In [10]:
df['implied_prob_h'] = implied_prob_h
df['implied_prob_v'] = implied_prob_v
df['implied_prob_h_mid'] = (implied_prob_h + (1-implied_prob_v))/2
df['over_under_line']=over_under
df['over_under_result']=ou_result

Drop games with no odds...

In [11]:
df[(df.season>=2019) & (df.implied_prob_h==0)]

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result
50624,20210626,0,Sat,SEA,AL,78,CHA,AL,76,3,2,54,D,"20210627,CHI12,0,0,15",,,CHI12,30017.0,197,000011001,000011000,34,7,0,0,3,3,0,0,0,3,0,9,0,0,1,0,7,7,2,2,0,0,27,9,0,0,3,0,29,6,0,0,0,2,0,1,1,3,0,11,1,0,3,0,5,3,3,3,0,0,27,7,1,0,1,0,cuzzp901,Phil Cuzzi,hallt901,Tom Hallion,rippm901,Mark Ripperger,blasc901,Cory Blaser,,(none),,(none),servs002,Scott Servais,larut101,Tony LaRussa,sewap001,Paul Sewald,hendl001,Liam Hendriks,gravk001,Kendall Graveman,tramt001,Taylor Trammell,gilbl002,Logan Gilbert,lynnl001,Lance Lynn,crawj002,J.P. Crawford,6,hanim001,Mitch Haniger,9,seagk001,Kyle Seager,5,frant002,Ty France,3,bauej001,Jake Bauers,7,longs001,Shed Long,4,torrl001,Luis Torrens,2,fralj001,Jake Fraley,10,tramt001,Taylor Trammell,8,andet001,Tim Anderson,6,goodb001,Brian Goodwin,8,moncy001,Yoan Moncada,5,abrej003,Jose Abreu,3,grany001,Yasmani Grandal,2,lambj001,Jake Lamb,7,mercy001,Yermin Mercedes,10,garcl004,Leury Garcia,4,gonzl005,Luis Gonzalez,9,"umpchange,3,ump1b,clemp901",Y,2021,-1,0,5,202106260,0.261093,0.220639,0.327324,0.292443,0.433627,0.372102,0.760951,0.664545,56.0,104.0,27.0,34.0,102.0,81.0,0.239378,0.243539,0.314019,0.301194,0.383420,0.410537,0.697438,0.711731,11.0,15.0,7.0,5.0,18.0,15.0,0.0,0.0,0.5,0.0,
50907,20210721,1,Wed,SDN,NL,98,ATL,NL,94,3,2,42,D,,,,ATL03,28621.0,156,0001200,0000020,26,7,2,0,1,3,0,1,0,4,0,6,0,1,0,0,7,3,1,1,0,0,21,10,1,1,0,0,25,5,1,0,0,1,1,1,0,0,0,3,0,0,0,0,4,3,3,3,2,0,21,8,0,0,0,0,timmt901,Tim Timmons,riggj901,Jeremy Riggs,mahrn901,Nick Mahrley,marqa901,Alfonso Marquez,,(none),,(none),tingj801,Jayce Tingler,snitb801,Brian Snitker,paddc001,Chris Paddack,mullk001,Kyle Muller,melam001,Mark Melancon,kim-h002,Ha-Seong Kim,paddc001,Chris Paddack,mullk001,Kyle Muller,phamt001,Tommy Pham,7,tatif002,Fernando Tatis,6,cronj001,Jake Cronenworth,3,machm001,Manny Machado,5,myerw001,Wil Myers,9,profj001,Jurickson Profar,8,kim-h002,Ha-Seong Kim,4,rivaw001,Webster Rivas,2,paddc001,Chris Paddack,1,pedej001,Joc Pederson,9,swand001,Dansby Swanson,6,freef001,Freddie Freeman,3,albio001,Ozzie Albies,4,rilea001,Austin Riley,5,hereg002,Guillermo Heredia,8,arcio002,Orlando Arcia,7,smitk002,Kevan Smith,2,mullk001,Kyle Muller,1,,Y,2021,-1,0,5,202107211,0.250874,0.248832,0.326777,0.323520,0.447673,0.429105,0.774450,0.752625,69.0,146.0,20.0,40.0,85.0,99.0,0.248227,0.277168,0.326067,0.352100,0.427558,0.499501,0.753625,0.851602,19.0,22.0,6.0,9.0,8.0,18.0,0.0,0.0,0.5,0.0,
50908,20210721,2,Wed,SDN,NL,99,ATL,NL,95,6,5,42,N,"20210924,SAN02,5,4,27",,,ATL03,-1.0,126,4100001,0400010,26,6,2,0,1,5,0,1,0,6,2,7,1,0,0,0,6,7,5,5,0,0,21,5,0,0,1,0,24,5,2,0,1,5,0,1,0,6,0,6,0,0,1,0,5,4,6,6,2,0,21,7,0,0,0,0,torrc901,Carlos Torres,mahrn901,Nick Mahrley,marqa901,Alfonso Marquez,riggj901,Jeremy Riggs,,(none),,(none),tingj801,Jayce Tingler,snitb801,Brian Snitker,hudsd001,Daniel Hudson,smitw002,Will Smith,melam001,Mark Melancon,tatif002,Fernando Tatis,knehr001,Reiss Knehr,wilsb003,Bryse Wilson,grist001,Trent Grisham,8,tatif002,Fernando Tatis,6,cronj001,Jake Cronenworth,4,hosme001,Eric Hosmer,3,myerw001,Wil Myers,9,profj001,Jurickson Profar,7,kim-h002,Ha-Seong Kim,5,carav001,Victor Caratini,2,knehr001,Reiss Knehr,1,pedej001,Joc Pederson,9,swand001,Dansby Swanson,6,freef001,Freddie Freeman,3,albio001,Ozzie Albies,4,rilea001,Austin Riley,5,vogts001,Stephen Vogt,2,hereg002,Guillermo Heredia,8,arcio002,Orlando Arcia,7,wilsb003,Bryse Wilson,1,"umpchange,5,umphome,ticht901,5,ump1b,littw901,...",Y,2021,-1,0,11,202107212,0.250414,0.249205,0.326105,0.324069,0.446511,0.430496,0.772616,0.754565,69.0,146.0,20.0,41.0,85.0,100.0,0.246926,0.280561,0.323827,0.357782,0.425205,0.504008,0.749032,0.861790,18.0,22.0,6.0,10.0,8.0,19.0,0.0,0.0,0.5,0.0,
54345,20230330,0,Thu,MIL,NL,1,CHN,NL,1,0,4,51,D,,,,CHI11,36054.0,141,000000000,00400000x,29,4,0,0,0,0,0,0,0,5,0,12,0,0,2,0,7,4,4,4,0,0,24,12,1,0,1,0,30,6,0,0,0,3,0,0,1,4,0,5,0,0,1,0,7,4,0,0,1,0,27,13,1,2,2,0,kulpr901,Ron Kulpa,blasc901,Cory Blaser,torrc901,Carlos Torres,viscj901,Jansen Visconti,,(none),,(none),counc001,Craig Counsell,rossd001,David Ross,strom001,Marcus Stroman,burnc002,Corbin Burnes,,(none),swand001,Dansby Swanson,burnc002,Corbin Burnes,strom001,Marcus Stroman,yelic001,Christian Yelich,7,winkj002,Jesse Winker,10,adamw002,Willy Adames,6,tellr001,Rowdy Tellez,3,contw002,William Contreras,2,urial001,Luis Urias,5,mitcg001,Garrett Mitchell,8,andeb006,Brian Anderson,9,turab002,Brice Turang,4,hoern001,Nico Hoerner,4,swand001,Dansby Swanson,6,happi001,Ian Happ,7,bellc002,Cody Bellinger,8,manct001,Trey Mancini,10,gomey001,Yan Gomes,2,hosme001,Eric Hosmer,3,wisdp001,Patrick Wisdom,5,mastm001,Miles Mastrobuoni,9,,Y,2023,4,1,4,202303300,0.238341,0.234632,0.303439,0.308308,0.386544,0.408529,0.689983,0.716837,111.0,96.0,37.0,30.0,96.0,91.0,0.227083,0.227733,0.296682,0.300642,0.369792,0.395749,0.666474,0.696391,24.0,20.0,5.0,6.0,16.0,17.0,0.0,0.0,0.5,0.0,
54346,20230330,0,Thu,PIT,NL,1,CIN,NL,1,5,4,54,D,,,,CIN09,44063.0,182,001300010,100120000,30,6,1,0,1,4,1,1,0,9,0,11,2,0,1,0,9,5,4,4,0,0,27,9,1,0,2,0,33,7,1,1,1,3,0,0,0,6,0,15,0,1,1,0,8,6,5,5,1,0,27,7,0,0,1,0,wegnm901,Mark Wegner,drecb901,Bruce Dreckman,sches901,Stu Scheurwater,moorm901,Malachi Moore,,(none),,(none),sheld801,Derek Shelton,belld002,David Bell,zastr001,Rob Zastryzny,farmb001,Buck Farmer,bednd001,David Bednar,cruzo001,Oneil Cruz,kellm003,Mitch Keller,greeh001,Hunter Greene,cruzo001,Oneil Cruz,6,reynb001,Bryan Reynolds,7,mccua001,Andrew McCutchen,10,santc002,Carlos Santana,3,smitc008,Canaan Smith-Njigba,9,hayek001,Ke'Bryan Hayes,5,suwij001,Jack Suwinski,8,bae-j001,Ji Hwan Bae,4,hedga001,Austin Hedges,2,indij001,Jonathan India,4,friet001,TJ Friedl,8,fralj001,Jake Fraley,10,stept001,Tyler Stephenson,2,voslj001,Jason Vosler,3,myerw001,Wil Myers,9,stees001,Spencer Steer,5,bensw001,Will Benson,7,garcj007,Jose Garcia,6,,Y,2023,-1,0,9,202303300,0.234944,0.222472,0.294239,0.286206,0.372305,0.363722,0.666544,0.649928,58.0,89.0,33.0,32.0,81.0,121.0,0.204312,0.235176,0.266098,0.306290,0.333676,0.381910,0.599774,0.688199,5.0,25.0,7.0,4.0,18.0,25.0,0.0,0.0,0.5,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56770,20231001,0,Sun,SDN,NL,162,CHA,AL,162,2,1,66,D,,,,CHI12,20588.0,187,00000000011,00000000010,36,7,1,0,0,2,0,1,0,7,0,8,2,1,2,0,11,6,0,0,0,0,33,13,1,0,1,0,40,7,0,0,0,1,0,0,1,3,1,16,1,0,1,0,12,6,0,0,0,0,33,15,0,1,2,0,macka901,Alex MacKay,ballb901,Brock Ballou,hudsm901,Marvin Hudson,tumpj901,John Tumpane,,(none),,(none),melvb001,Bob Melvin,cairm001,Miguel Cairo,hillr001,Rich Hill,crond001,Declan Cronin,,(none),profj001,Jurickson Profar,avilp001,Pedro Avila,urenj001,Jose Urena,bogax001,Xander Bogaerts,6,kim-h002,Ha-Seong Kim,4,sotoj001,Juan Soto,10,tatif002,Fernando Tatis,9,profj001,Jurickson Profar,7,coopg002,Garrett Cooper,3,grist001,Trent Grisham,8,rosae002,Eguy Rosario,5,sullb001,Brett Sullivan,2,andre001,Elvis Andrus,6,remiz001,Zach Remillard,4,vauga001,Andrew Vaughn,3,sheeg001,Gavin Sheets,9,grany001,Yasmani Grandal,10,sosal001,Lenyn Sosa,5,naqut001,Tyler Naquin,7,thomt002,Trayce Thompson,8,perec005,Carlos Perez,2,,Y,2023,-1,0,3,202310010,0.238173,0.243373,0.286834,0.324396,0.384825,0.413346,0.671659,0.737742,85.0,135.0,22.0,31.0,95.0,72.0,0.229209,0.273959,0.271333,0.347826,0.360041,0.441433,0.631373,0.789259,7.0,32.0,5.0,4.0,23.0,13.0,0.0,0.0,0.5,0.0,
56771,20231001,0,Sun,CLE,AL,162,DET,AL,162,2,5,51,D,,,,DET05,41425.0,148,000110000,11102000x,33,5,1,0,1,2,0,0,0,1,0,8,1,0,0,0,5,4,5,5,0,0,24,7,1,0,1,0,32,9,4,0,1,5,0,0,0,6,0,7,1,0,1,0,9,3,1,1,1,0,27,7,1,0,0,0,bacoj901,John Bacon,hoyej901,James Hoye,libkj901,John Libka,herna901,Angel Hernandez,,(none),,(none),frant001,Terry Francona,hinca001,A.J. Hinch,rodre004,Eduardo Rodriguez,gioll001,Lucas Giolito,langa001,Alex Lange,carpk001,Kerry Carpenter,gioll001,Lucas Giolito,rodre004,Eduardo Rodriguez,kwans001,Steven Kwan,7,ramij003,Jose Ramirez,10,naylj001,Josh Naylor,3,laurr001,Ramon Laureano,8,gonzo001,Oscar Gonzalez,9,roccb001,Brayan Rocchio,6,naylb001,Bo Naylor,2,freet001,Tyler Freeman,5,tenaj001,Jose Tena,4,meadp001,Parker Meadows,8,torks001,Spencer Torkelson,3,cabrm001,Miguel Cabrera,10,carpk001,Kerry Carpenter,9,vierm001,Matt Vierling,5,ibana001,Andy Ibanez,4,badda001,Akil Baddoo,7,baezj001,Javier Baez,6,rogej004,Jake Rogers,2,,Y,2023,3,1,7,202310010,0.235541,0.251767,0.299916,0.310851,0.380040,0.382998,0.679957,0.693849,85.0,150.0,23.0,36.0,100.0,82.0,0.243402,0.260234,0.316254,0.323529,0.389052,0.390838,0.705306,0.714368,24.0,41.0,5.0,11.0,12.0,19.0,0.0,0.0,0.5,0.0,
56772,20231001,0,Sun,NYA,AL,162,KCA,AL,162,2,5,51,D,,,,KAN06,20662.0,144,000002000,02021000x,32,7,3,0,0,2,0,0,0,3,0,7,0,0,2,0,6,3,5,5,0,0,24,12,1,0,0,0,32,11,0,1,3,5,1,1,0,1,0,8,2,1,0,0,6,4,2,2,0,0,27,14,0,0,2,0,vondc901,Clint Vondrak,torrc901,Carlos Torres,kulpr901,Ron Kulpa,riggj901,Jeremy Riggs,,(none),,(none),boona001,Aaron Boone,mathm001,Mike Matheny,greiz001,Zack Greinke,kingm002,Michael King,mcarj001,James McArthur,melem001,MJ Melendez,kingm002,Michael King,greiz001,Zack Greinke,lemad001,DJ LeMahieu,3,volpa001,Anthony Volpe,6,wella002,Austin Wells,2,higak001,Kyle Higashioka,10,kinei001,Isiah Kiner-Falefa,5,cabro002,Oswaldo Cabrera,9,perao002,Oswald Peraza,4,peree005,Everson Pereira,7,flore001,Estevan Florial,8,garcm003,Maikel Garcia,5,wittb002,Bobby Witt,6,peres002,Salvador Perez,2,melem001,MJ Melendez,9,olive001,Edward Olivares,7,massm001,Michael Massey,4,bland001,Dairon Blanco,8,pratn001,Nick Pratto,3,duffm002,Matt Duffy,10,,Y,2023,3,1,7,202310010,0.243139,0.226461,0.296404,0.299881,0.395653,0.397106,0.692057,0.696987,161.0,100.0,46.0,30.0,81.0,96.0,0.248248,0.220120,0.312271,0.300893,0.407407,0.383466,0.719678,0.684359,33.0,21.0,11.0,3.0,18.0,19.0,0.0,0.0,0.5,0.0,
56773,20231001,0,Sun,TEX,AL,162,SEA,AL,162,0,1,51,D,,,,SEA03,43997.0,121,000000000,00010000x,30,4,1,0,0,0,0,0,0,0,0,11,0,0,1,0,3,3,1,1,0,0,24,4,0,0,0,0,28,4,0,0,0,1,0,0,1,1,0,10,0,0,0,0,5,4,0,0,0,0,27,9,0,0,1,0,millb903,Brennan Miller,blakr901,Ryan Blakney,carlm901,Mark Carlson,gibsh902,Tripp Gibson,,(none),,(none),beast801,Tony Beasley,servs002,Scott Servais,kirbg001,George Kirby,dunnd001,Dane Dunning,campi001,Isaiah Campbell,canzd001,Dominic Canzone,dunnd001,Dane Dunning,kirbg001,George Kirby,semim001,Marcus Semien,4,seagc001,Corey Seager,6,garvm001,Mitch Garver,10,garca005,Adolis Garcia,9,lowen001,Nathaniel Lowe,3,jungj001,Josh Jung,5,heimj001,Jonah Heim,2,tavel001,Leody Taveras,8,carte001,Evan Carter,7,crawj002,J.P. Crawford,6,rodrj007,Julio Rodriguez,8,ralec001,Cal Raleigh,2,suare001,Eugenio Suarez,5,kelej001,Jarred Kelenic,9,frant002,Ty France,3,canzd001,Dominic Canzone,7,fordm002,Mike Ford,10,rojaj001,Josh Rojas,4,,Y,2023,1,1,1,202310010,0.242826,0.263891,0.311705,0.335377,0.414639,0.454708,0.726344,0.790085,118.0,79.0,30.0,20.0,74.0,57.0,0.229717,0.239719,0.296429,0.330980,0.389052,0.442327,0.685480,0.773307,17.0,12.0,3.0,1.0,10.0,6.0,0.0,0.0,0.5,0.0,


In [12]:
indicies_to_drop = df[(df.season>=2019) & (df.implied_prob_h==0)].index
indicies_to_drop

Int64Index([50624, 50907, 50908, 54345, 54346, 54347, 54348, 54349, 54350,
            54351,
            ...
            56765, 56766, 56767, 56768, 56769, 56770, 56771, 56772, 56773,
            56774],
           dtype='int64', length=2433)

In [13]:
df.shape

(56775, 199)

In [14]:
df.drop(indicies_to_drop, inplace=True)
df.shape

(54342, 199)

In [15]:
df.reset_index(inplace=True, drop=True)
df.shape

(54342, 199)

In [16]:
df.to_csv('df_bp3.csv', index=False)