Baseball Prediction: 5b - Adding Starting Pitching Features

    - Now that we have raw game-level data for each pitcher, we can derive features based on the starting pitchers to help our prediction model for individual games.


In [34]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)

Overall Plan of Attack

    - For each starting pitcher we will load their raw data, create features for each game based on their previous performance, and then save the dataframe in a dictionary structure for easy lookup

    - Then we can iterate through our game-level dataframe, add in the features for each starting pitcher, and use those to improve our model

Once again, we will use C.C. Sabathia as our example candidate

In [35]:
df_cc = pd.read_csv('/Volumes/CharmedXi/beatVegas/SP_new/pitching_data_sabac001.csv')

In [36]:
df_cc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 31 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   at_vs        561 non-null    object 
 1   Opponent     561 non-null    object 
 2   League       561 non-null    object 
 3   GS           561 non-null    int64  
 4   CG           561 non-null    int64  
 5   SHO          561 non-null    int64  
 6   GF           561 non-null    int64  
 7   SV           561 non-null    int64  
 8   IP           561 non-null    float64
 9   H            561 non-null    int64  
 10  BFP          561 non-null    int64  
 11  HR           561 non-null    int64  
 12  R            561 non-null    int64  
 13  ER           561 non-null    int64  
 14  BB           561 non-null    int64  
 15  IB           561 non-null    int64  
 16  SO           561 non-null    int64  
 17  SH           561 non-null    int64  
 18  SF           561 non-null    int64  
 19  WP      

In [37]:
df_cc.head(10)

Unnamed: 0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,date,dblhead_num
0,VS,BAL,A,1,0,0,0,0,5.2,3,22,1,3,3,2,0,3,0,0,1,0,0,2,0,0,0,0,0,4.76,4- 8-2001,
1,AT,DET,A,1,0,0,0,0,5.0,5,21,0,4,4,2,0,2,0,0,0,0,0,1,0,1,0,1,0,5.91,4-13-2001,
2,AT,BAL,A,1,0,0,0,0,6.0,6,23,0,2,2,2,0,3,0,0,0,0,1,1,0,0,0,1,0,4.86,4-19-2001,
3,VS,ANA,A,1,0,0,0,0,5.0,5,24,0,3,2,4,0,1,0,0,0,0,0,1,1,0,1,0,1,4.57,4-25-2001,
4,AT,KC,A,1,0,0,0,0,5.0,4,19,1,1,1,1,0,2,0,0,0,0,0,0,0,1,0,1,0,4.05,5- 2-2001,
5,VS,KC,A,1,0,0,0,0,5.2,5,23,0,1,1,1,0,5,0,1,1,1,0,1,0,0,0,1,0,3.62,5- 9-2001,
6,AT,TEX,A,1,0,0,0,0,5.0,6,23,0,3,3,3,0,5,0,0,1,0,0,0,0,1,0,1,0,3.86,5-15-2001,
7,AT,ANA,A,1,0,0,0,0,4.1,9,24,1,6,6,3,0,4,0,0,1,0,0,3,0,0,0,0,0,4.75,5-20-2001,
8,VS,NY,A,1,0,0,0,0,4.2,7,25,2,5,5,4,1,3,0,0,0,0,1,2,0,0,0,0,1,5.24,5-26-2001,
9,AT,NY,A,1,0,0,0,0,4.0,4,20,1,4,3,5,0,2,0,0,0,0,0,0,0,1,0,1,0,5.36,6- 1-2001,


In [38]:
# Baseball statisticans report partial innings pitched as X.1 & X.2 where X is the integer number of innings pitched
# However, for calculation purposes, we need to tell the computer to recognize X.1 & X.2 as X & (one-third) and (two-thirds), respectively
# The following line of code does that for us.

df_cc['IP_real'] = df_cc['IP_real'] = (df_cc.IP - (df_cc.IP % 1)) + (df_cc.IP % 1) * (10/3)

In [39]:
df_cc.columns

Index(['at_vs', 'Opponent', 'League', 'GS', 'CG', 'SHO', 'GF', 'SV', 'IP', 'H',
       'BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B', 'GDP', 'ROE', 'W', 'L', 'ERA', 'date', 'dblhead_num',
       'IP_real'],
      dtype='object')

Generating Features for a Starting Pitcher

    - Want to creature features based on a lookback across games for each pitcher

    - Need to aggregate statistics like innings pitched (IP), runs or earned runs given up, hits and walks given up, strikeouts, and so on

    - We will also need to decide how to handle pitchers early in their career when they have relatively few games on which to base their performance.

In [40]:
def roll_column(df, col, winsize):
    # do the standard Pandas rolling calc
    t_col = df[col].rolling(winsize, closed='left').sum().to_numpy()
    
    # for the early columns, just do a rolling sum from the beginning
    t_col[:winsize] = np.concatenate(([0],df[col].iloc[:(winsize)].cumsum().to_numpy()[:-1]))

    return(t_col)

In [43]:
def load_and_process_pitch_df(p_id, filepath=''):
    fname = filepath+'pitching_data_'+p_id+'.csv'
    pitch_df = pd.read_csv(fname)
    
    # Convert date, fix dblhead_num to be 0,1,2
    pitch_df['date'] = (pd.to_datetime(pitch_df.date).astype(str).str.replace('-','')).astype(int)
    pitch_df.dblhead_num.fillna(0, inplace=True)
    pitch_df['dblhead_num'] = pitch_df['dblhead_num'].astype(int)
    
    # Convert IP to proper mathematical format
    pitch_df['IP_real'] = (pitch_df.IP - (pitch_df.IP % 1)) + (pitch_df.IP % 1) * (10/3)
    
    cols_to_agg = ['IP_real', 'H','BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B']
    winsizes = [3,14,30]
    for winsize in winsizes:
        for raw_col in cols_to_agg:
            new_colname = 'rollsum_'+raw_col+'_'+str(winsize)        
            pitch_df[new_colname] = roll_column(pitch_df, raw_col, winsize)

    
    er_per_ip_def = (5/9)
    h_bb_per_ip_def = 1.5
    h_bb_per_bf_def = .37
    so_per_bf_def = .2
    ip_per_game_def = 3
    bf_per_game_def = 12
    tb_bb_perc_def = .45
    fip_numer_per_ip_def = .124*13 + 1.5*3 - 2*.8
    fip_numer_per_bf_def = .03*13 + .37*3 - 2*.2
    for winsize in winsizes:
        hit_col = 'rollsum_H_'+str(winsize)
        bb_col = 'rollsum_BB_'+str(winsize)
        h_bb_col = 'H_BB_roll_'+str(winsize)
        double_col = 'rollsum_2B_'+str(winsize)
        triple_col = 'rollsum_3B_'+str(winsize)
        hr_col = 'rollsum_HR_'+str(winsize)
        xb_col = 'XB_roll_'+str(winsize)
        tb_col = 'TB_roll_'+str(winsize)
        so_col = 'rollsum_SO_'+str(winsize)
        so_mod_col = 'SO_mod_'+str(winsize)
        ip_col = 'rollsum_IP_real_'+str(winsize)
        ip_mod_col = 'IP_mod_'+str(winsize)
        er_col = 'rollsum_ER_'+str(winsize)
        er_mod_col = 'ER_mod_'+str(winsize)
        bf_col = 'rollsum_BFP_'+str(winsize)
        bf_mod_col = 'BF_mod_'+str(winsize)
        era_col = 'ERA_'+str(winsize)
        fip_col = 'FIP_'+str(winsize)
        fip_perc_col = 'FIP_perc_'+str(winsize)
        fip_numer_col = 'FIP_numer_'+str(winsize)
        fip_numer_mod_col = 'FIP_numer_mod_'+str(winsize)
        fip_numer_mod2_col = 'FIP_numer_mod2_'+str(winsize)
        whip_col = 'WHIP_'+str(winsize)
        so_perc_col = 'SO_perc_'+str(winsize)
        h_bb_perc_col = 'H_BB_perc_'+str(winsize)
        h_bb_mod_col = 'H_BB_mod_'+str(winsize)
        h_bb_mod2_col = 'H_BB_mod2_'+str(winsize)
        tb_bb_mod_col = 'TB_BB_mod_'+str(winsize)
        tb_bb_perc_col = 'TB_BB_perc_'+str(winsize)
        pitch_df[h_bb_col] = pitch_df[hit_col]+pitch_df[bb_col]
        pitch_df[xb_col] = pitch_df[double_col]+2*pitch_df[triple_col]+3*pitch_df[hr_col]
        pitch_df[tb_col] = pitch_df[hit_col]+pitch_df[xb_col]
        pitch_df[ip_mod_col] = np.maximum(pitch_df[ip_col], winsize*ip_per_game_def)
        pitch_df[bf_mod_col] = np.maximum(pitch_df[bf_col], winsize*bf_per_game_def)
        pitch_df[er_mod_col] = pitch_df[er_col] + er_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[fip_numer_col] = 13*pitch_df[hr_col] + 3*pitch_df[h_bb_col] -2*pitch_df[so_col]
        pitch_df[fip_numer_mod_col] = pitch_df[fip_numer_col] + fip_numer_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[fip_numer_mod2_col] = pitch_df[fip_numer_col] + fip_numer_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[h_bb_mod_col] = pitch_df[h_bb_col] + h_bb_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[h_bb_mod2_col] = pitch_df[h_bb_col] + h_bb_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[so_mod_col] = pitch_df[so_col] + so_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[tb_bb_mod_col] = (pitch_df[tb_col] + pitch_df[bb_col])+ tb_bb_perc_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[era_col] = (pitch_df[er_mod_col]/pitch_df[ip_mod_col])*9
        pitch_df[fip_col] = (pitch_df[fip_numer_mod_col]/pitch_df[ip_mod_col])
        pitch_df[fip_perc_col] = (pitch_df[fip_numer_mod_col]/pitch_df[bf_mod_col])
        pitch_df[whip_col] = pitch_df[h_bb_mod_col]/pitch_df[ip_mod_col]
        pitch_df[so_perc_col] = pitch_df[so_mod_col]/pitch_df[bf_mod_col]
        pitch_df[tb_bb_perc_col] = pitch_df[tb_bb_mod_col]/pitch_df[bf_mod_col]
        pitch_df[h_bb_perc_col] = pitch_df[h_bb_mod2_col]/pitch_df[bf_mod_col]
    pitch_df['date_dblhead'] = (pitch_df['date'].astype(str) + pitch_df['dblhead_num'].astype(str)).astype(int)
    pitch_df.set_index('date_dblhead', inplace=True)
    return(pitch_df)

In [44]:
df_cc = load_and_process_pitch_df('sabac001','SP_data/')

df_cc.head(15)

Unnamed: 0_level_0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,date,dblhead_num,IP_real,rollsum_IP_real_3,rollsum_H_3,rollsum_BFP_3,rollsum_HR_3,rollsum_R_3,rollsum_ER_3,rollsum_BB_3,rollsum_IB_3,rollsum_SO_3,rollsum_SH_3,rollsum_SF_3,rollsum_WP_3,rollsum_HBP_3,rollsum_BK_3,rollsum_2B_3,rollsum_3B_3,rollsum_IP_real_14,rollsum_H_14,rollsum_BFP_14,rollsum_HR_14,rollsum_R_14,rollsum_ER_14,rollsum_BB_14,rollsum_IB_14,rollsum_SO_14,rollsum_SH_14,rollsum_SF_14,rollsum_WP_14,rollsum_HBP_14,rollsum_BK_14,rollsum_2B_14,rollsum_3B_14,rollsum_IP_real_30,rollsum_H_30,rollsum_BFP_30,rollsum_HR_30,rollsum_R_30,rollsum_ER_30,rollsum_BB_30,rollsum_IB_30,rollsum_SO_30,rollsum_SH_30,rollsum_SF_30,rollsum_WP_30,rollsum_HBP_30,rollsum_BK_30,rollsum_2B_30,rollsum_3B_30,H_BB_roll_3,XB_roll_3,TB_roll_3,IP_mod_3,BF_mod_3,ER_mod_3,FIP_numer_3,FIP_numer_mod_3,FIP_numer_mod2_3,H_BB_mod_3,H_BB_mod2_3,SO_mod_3,TB_BB_mod_3,ERA_3,FIP_3,FIP_perc_3,WHIP_3,SO_perc_3,TB_BB_perc_3,H_BB_perc_3,H_BB_roll_14,XB_roll_14,TB_roll_14,IP_mod_14,BF_mod_14,ER_mod_14,FIP_numer_14,FIP_numer_mod_14,FIP_numer_mod2_14,H_BB_mod_14,H_BB_mod2_14,SO_mod_14,TB_BB_mod_14,ERA_14,FIP_14,FIP_perc_14,WHIP_14,SO_perc_14,TB_BB_perc_14,H_BB_perc_14,H_BB_roll_30,XB_roll_30,TB_roll_30,IP_mod_30,BF_mod_30,ER_mod_30,FIP_numer_30,FIP_numer_mod_30,FIP_numer_mod2_30,H_BB_mod_30,H_BB_mod2_30,SO_mod_30,TB_BB_mod_30,ERA_30,FIP_30,FIP_perc_30,WHIP_30,SO_perc_30,TB_BB_perc_30,H_BB_perc_30
date_dblhead,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1
200104080,VS,BAL,A,1,0,0,0,0,5.2,3,22,1,3,3,2,0,3,0,0,1,0,0,2,0,0,0,0,0,4.76,20010408,0,5.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,36.0,5.0,0.0,40.608,39.6,13.5,13.32,7.2,16.2,5.0,4.512,1.128,1.5,0.2,0.45,0.37,0.0,0.0,0.0,42.0,168.0,23.333333,0.0,189.504,184.8,63.0,62.16,33.6,75.6,5.0,4.512,1.128,1.5,0.2,0.45,0.37,0.0,0.0,0.0,90.0,360.0,50.0,0.0,406.08,396.0,135.0,133.2,72.0,162.0,5.0,4.512,1.128,1.5,0.2,0.45,0.37
200104130,AT,DET,A,1,0,0,0,0,5.0,5,21,0,4,4,2,0,2,0,0,0,0,0,1,0,1,0,1,0,5.91,20010413,0,5.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.0,5.0,8.0,9.0,36.0,4.851852,22.0,37.04,37.4,10.0,10.18,5.8,16.3,4.851852,4.115556,1.028889,1.111111,0.161111,0.452778,0.282778,5.0,5.0,8.0,42.0,168.0,23.185185,22.0,185.936,182.6,59.5,59.02,32.2,75.7,4.968254,4.427048,1.106762,1.416667,0.191667,0.450595,0.35131,5.0,5.0,8.0,90.0,360.0,49.851852,22.0,402.512,393.8,131.5,130.06,70.6,162.1,4.985185,4.472356,1.118089,1.461111,0.196111,0.450278,0.361278
200104190,AT,BAL,A,1,0,0,0,0,6.0,6,23,0,2,2,2,0,3,0,0,0,0,1,1,0,0,0,1,0,4.86,20010419,0,6.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,12.0,6.0,14.0,10.666667,43.0,7.0,39.0,39.0,39.0,12.0,12.0,5.0,18.0,5.90625,3.65625,0.906977,1.125,0.116279,0.418605,0.27907,12.0,6.0,14.0,42.0,168.0,24.407407,39.0,180.376,176.5,59.0,58.25,30.0,74.25,5.230159,4.294667,1.073667,1.404762,0.178571,0.441964,0.346726,12.0,6.0,14.0,90.0,360.0,51.074074,39.0,396.952,387.7,131.0,129.29,68.4,160.65,5.107407,4.410578,1.102644,1.455556,0.19,0.44625,0.359139
200104250,VS,ANA,A,1,0,0,0,0,5.0,5,24,0,3,2,4,0,1,0,0,0,0,0,1,1,0,1,0,1,4.57,20010425,0,5.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,20.0,7.0,21.0,16.666667,66.0,9.0,57.0,57.0,57.0,20.0,20.0,8.0,27.0,4.86,3.42,0.863636,1.2,0.121212,0.409091,0.30303,20.0,7.0,21.0,42.0,168.0,23.074074,57.0,171.304,169.2,58.0,57.74,28.4,72.9,4.944444,4.078667,1.019667,1.380952,0.169048,0.433929,0.34369,20.0,7.0,21.0,90.0,360.0,49.740741,57.0,387.88,380.4,130.0,128.78,66.8,159.3,4.974074,4.309778,1.077444,1.444444,0.185556,0.4425,0.357722
200105020,AT,KC,A,1,0,0,0,0,5.0,4,19,1,1,1,1,0,2,0,0,0,0,0,0,0,1,0,1,0,4.05,20010502,0,5.0,16.0,16.0,68.0,0.0,9.0,8.0,8.0,0.0,6.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,21.666667,19.0,90.0,1.0,12.0,11.0,10.0,0.0,9.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,21.666667,19.0,90.0,1.0,12.0,11.0,10.0,0.0,9.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,24.0,5.0,21.0,16.0,68.0,8.0,60.0,60.0,60.0,24.0,24.0,6.0,29.0,4.5,3.75,0.882353,1.5,0.088235,0.426471,0.352941,29.0,10.0,29.0,42.0,168.0,22.296296,82.0,173.744,167.8,59.5,57.86,24.6,74.1,4.777778,4.136762,1.03419,1.416667,0.146429,0.441071,0.344405,29.0,10.0,29.0,90.0,360.0,48.962963,82.0,390.32,379.0,131.5,128.9,63.0,160.5,4.896296,4.336889,1.084222,1.461111,0.175,0.445833,0.358056
200105090,VS,KC,A,1,0,0,0,0,5.2,5,23,0,1,1,1,0,5,0,1,1,1,0,1,0,0,0,1,0,3.62,20010509,0,5.666667,16.0,15.0,66.0,1.0,6.0,5.0,7.0,0.0,6.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,26.666667,23.0,109.0,2.0,13.0,12.0,11.0,0.0,11.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,26.666667,23.0,109.0,2.0,13.0,12.0,11.0,0.0,11.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,22.0,7.0,22.0,16.0,66.0,5.0,67.0,67.0,67.0,22.0,22.0,6.0,29.0,2.8125,4.1875,1.015152,1.375,0.090909,0.439394,0.333333,34.0,13.0,36.0,42.0,168.0,20.518519,106.0,175.184,170.9,57.0,55.83,22.8,73.55,4.396825,4.171048,1.042762,1.357143,0.135714,0.437798,0.332321,34.0,13.0,36.0,90.0,360.0,47.185185,106.0,391.76,382.1,129.0,126.87,61.2,159.95,4.718519,4.352889,1.088222,1.433333,0.17,0.444306,0.352417
200105150,AT,TEX,A,1,0,0,0,0,5.0,6,23,0,3,3,3,0,5,0,0,1,0,0,0,0,1,0,1,0,3.86,20010515,0,5.0,15.666667,14.0,66.0,1.0,5.0,4.0,6.0,0.0,8.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,32.333333,28.0,132.0,2.0,14.0,13.0,12.0,0.0,16.0,0.0,1.0,2.0,1.0,1.0,6.0,1.0,32.333333,28.0,132.0,2.0,14.0,13.0,12.0,0.0,16.0,0.0,1.0,2.0,1.0,1.0,6.0,1.0,20.0,7.0,21.0,15.666667,66.0,4.0,57.0,57.0,57.0,20.0,20.0,8.0,27.0,2.297872,3.638298,0.863636,1.276596,0.121212,0.409091,0.30303,40.0,14.0,42.0,42.0,168.0,18.37037,114.0,157.616,153.6,54.5,53.32,23.2,70.2,3.936508,3.752762,0.93819,1.297619,0.138095,0.417857,0.317381,40.0,14.0,42.0,90.0,360.0,45.037037,114.0,374.192,364.8,126.5,124.36,61.6,156.6,4.503704,4.157689,1.039422,1.405556,0.171111,0.435,0.345444
200105200,AT,ANA,A,1,0,0,0,0,4.1,9,24,1,6,6,3,0,4,0,0,1,0,0,3,0,0,0,0,0,4.75,20010520,0,4.333333,15.666667,15.0,65.0,1.0,5.0,5.0,5.0,0.0,12.0,0.0,1.0,2.0,1.0,0.0,1.0,0.0,37.333333,34.0,155.0,2.0,17.0,16.0,15.0,0.0,21.0,0.0,1.0,3.0,1.0,1.0,6.0,1.0,37.333333,34.0,155.0,2.0,17.0,16.0,15.0,0.0,21.0,0.0,1.0,3.0,1.0,1.0,6.0,1.0,20.0,4.0,19.0,15.666667,65.0,5.0,49.0,49.0,49.0,20.0,20.0,12.0,24.0,2.87234,3.12766,0.753846,1.276596,0.184615,0.369231,0.307692,49.0,14.0,48.0,42.0,168.0,18.592593,131.0,152.056,145.3,56.0,53.81,23.6,68.85,3.984127,3.620381,0.905095,1.333333,0.140476,0.409821,0.320298,49.0,14.0,48.0,90.0,360.0,45.259259,131.0,368.632,356.5,128.0,124.85,62.0,155.25,4.525926,4.095911,1.023978,1.422222,0.172222,0.43125,0.346806
200105260,VS,NY,A,1,0,0,0,0,4.2,7,25,2,5,5,4,1,3,0,0,0,0,1,2,0,0,0,0,1,5.24,20010526,0,4.666667,15.0,20.0,70.0,1.0,10.0,10.0,7.0,0.0,14.0,0.0,1.0,3.0,1.0,0.0,4.0,0.0,41.666667,43.0,179.0,3.0,23.0,22.0,18.0,0.0,25.0,0.0,1.0,4.0,1.0,1.0,9.0,1.0,41.666667,43.0,179.0,3.0,23.0,22.0,18.0,0.0,25.0,0.0,1.0,4.0,1.0,1.0,9.0,1.0,27.0,7.0,27.0,15.0,70.0,10.0,66.0,66.0,66.0,27.0,27.0,14.0,34.0,6.0,4.4,0.942857,1.8,0.2,0.485714,0.385714,61.0,20.0,63.0,42.0,179.0,22.185185,172.0,173.504,172.0,61.5,61.0,25.0,81.0,4.753968,4.131048,0.969296,1.464286,0.139665,0.452514,0.340782,61.0,20.0,63.0,90.0,360.0,48.851852,172.0,390.08,371.1,133.5,127.97,61.2,162.45,4.885185,4.334222,1.083556,1.483333,0.17,0.45125,0.355472
200106010,AT,NY,A,1,0,0,0,0,4.0,4,20,1,4,3,5,0,2,0,0,0,0,0,0,0,1,0,1,0,5.36,20010601,0,4.0,14.0,22.0,72.0,3.0,14.0,14.0,10.0,1.0,12.0,0.0,0.0,2.0,0.0,1.0,5.0,0.0,46.333333,50.0,204.0,5.0,28.0,27.0,22.0,1.0,28.0,0.0,1.0,4.0,1.0,2.0,11.0,1.0,46.333333,50.0,204.0,5.0,28.0,27.0,22.0,1.0,28.0,0.0,1.0,4.0,1.0,2.0,11.0,1.0,32.0,14.0,36.0,14.0,72.0,14.0,111.0,111.0,111.0,32.0,32.0,12.0,46.0,9.0,7.928571,1.541667,2.285714,0.166667,0.638889,0.444444,72.0,28.0,78.0,46.333333,204.0,27.0,225.0,225.0,225.0,72.0,72.0,28.0,100.0,5.244604,4.856115,1.102941,1.553957,0.137255,0.490196,0.352941,72.0,28.0,78.0,90.0,360.0,51.259259,225.0,422.024,396.6,137.5,129.72,59.2,170.2,5.125926,4.689156,1.172289,1.527778,0.164444,0.472778,0.360333


LOAD IN GAME LEVEL DATA

In [45]:
df = pd.read_csv('df_bp3.csv', low_memory=False)

In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56771 entries, 0 to 56770
Columns: 199 entries, date to over_under_result
dtypes: float64(34), int64(87), object(78)
memory usage: 86.2+ MB


In [47]:
start_pitchers_h = df.pitcher_start_id_h.unique()
start_pitchers_v = df.pitcher_start_id_v.unique()
start_pitchers_all = np.union1d(start_pitchers_h.astype(str), start_pitchers_v.astype(str))

In [48]:
pitcher_data_dict = {}
for i, p_id in enumerate(start_pitchers_all):
    if i % 250 == 0:
        print(i)
    try:  
        pitcher_data_dict[p_id] = load_and_process_pitch_df(p_id,'SP_new/')
    except:
       pass

0
250
500
750
1000
1250
1500
1750
2000


In [49]:
raw_cols_to_add = ['GS', 'IP',
       'H', 'BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP',
       'HBP', 'BK', '2B', '3B', 'IP_real', 'rollsum_IP_real_3', 'rollsum_H_3',
       'rollsum_BFP_3', 'rollsum_HR_3', 'rollsum_R_3', 'rollsum_ER_3',
       'rollsum_BB_3', 'rollsum_IB_3', 'rollsum_SO_3', 'rollsum_SH_3',
       'rollsum_SF_3', 'rollsum_WP_3', 'rollsum_HBP_3',
       'rollsum_BK_3', 'rollsum_2B_3', 'rollsum_3B_3',
       'rollsum_IP_real_14', 'rollsum_H_14', 'rollsum_BFP_14',
       'rollsum_HR_14', 'rollsum_R_14', 'rollsum_ER_14', 'rollsum_BB_14',
       'rollsum_IB_14', 'rollsum_SO_14', 'rollsum_SH_14', 'rollsum_SF_14',
       'rollsum_WP_14', 'rollsum_HBP_14', 'rollsum_BK_14',
       'rollsum_2B_14', 'rollsum_3B_14', 'rollsum_IP_real_30',
       'rollsum_H_30', 'rollsum_BFP_30', 'rollsum_HR_30', 'rollsum_R_30',
       'rollsum_ER_30', 'rollsum_BB_30', 'rollsum_IB_30', 'rollsum_SO_30',
       'rollsum_SH_30', 'rollsum_SF_30', 'rollsum_WP_30',
       'rollsum_HBP_30', 'rollsum_BK_30', 'rollsum_2B_30',
       'rollsum_3B_30', 'H_BB_roll_3', 'XB_roll_3', 'TB_roll_3',
       'IP_mod_3', 'BF_mod_3', 'ER_mod_3', 'FIP_numer_3',
       'FIP_numer_mod_3', 'FIP_numer_mod2_3', 'H_BB_mod_3',
       'H_BB_mod2_3', 'SO_mod_3', 'TB_BB_mod_3', 'ERA_3', 'FIP_3',
       'FIP_perc_3', 'WHIP_3', 'SO_perc_3', 'TB_BB_perc_3',
       'H_BB_perc_3', 'H_BB_roll_14', 'XB_roll_14', 'TB_roll_14',
       'IP_mod_14', 'BF_mod_14', 'ER_mod_14', 'FIP_numer_14',
       'FIP_numer_mod_14', 'FIP_numer_mod2_14', 'H_BB_mod_14',
       'H_BB_mod2_14', 'SO_mod_14', 'TB_BB_mod_14', 'ERA_14', 'FIP_14',
       'FIP_perc_14', 'WHIP_14', 'SO_perc_14', 'TB_BB_perc_14',
       'H_BB_perc_14', 'H_BB_roll_30', 'XB_roll_30', 'TB_roll_30',
       'IP_mod_30', 'BF_mod_30', 'ER_mod_30', 'FIP_numer_30',
       'FIP_numer_mod_30', 'FIP_numer_mod2_30', 'H_BB_mod_30',
       'H_BB_mod2_30', 'SO_mod_30', 'TB_BB_mod_30', 'ERA_30', 'FIP_30',
       'FIP_perc_30', 'WHIP_30', 'SO_perc_30', 'TB_BB_perc_30',
       'H_BB_perc_30']
cols_to_add = ['Strt_'+col+suff for col in raw_cols_to_add for suff in ['_h','_v']]

col_add_dict = {col:np.zeros(df.shape[0]) for col in cols_to_add}

In [50]:
for i in range(df.shape[0]):
    row = df.iloc[i,:]
    if i%1000==0:
        print(i)
    sp_id_v = row['pitcher_start_id_v']
    sp_id_h = row['pitcher_start_id_h']
    date_dblhead = row['date_dblhead']
    if sp_id_v in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_v]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict['Strt_'+col+'_v'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_v} date {date_dblhead}')
    if sp_id_h in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_h]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict['Strt_'+col+'_h'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_h} date {date_dblhead}')

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000


In [51]:
for col in cols_to_add:
    df[col] = col_add_dict[col]

In [52]:
df.sample(5)


Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_3_h,Strt_rollsum_IP_real_3_v,Strt_rollsum_H_3_h,Strt_rollsum_H_3_v,Strt_rollsum_BFP_3_h,Strt_rollsum_BFP_3_v,Strt_rollsum_HR_3_h,Strt_rollsum_HR_3_v,Strt_rollsum_R_3_h,Strt_rollsum_R_3_v,Strt_rollsum_ER_3_h,Strt_rollsum_ER_3_v,Strt_rollsum_BB_3_h,Strt_rollsum_BB_3_v,Strt_rollsum_IB_3_h,Strt_rollsum_IB_3_v,Strt_rollsum_SO_3_h,Strt_rollsum_SO_3_v,Strt_rollsum_SH_3_h,Strt_rollsum_SH_3_v,Strt_rollsum_SF_3_h,Strt_rollsum_SF_3_v,Strt_rollsum_WP_3_h,Strt_rollsum_WP_3_v,Strt_rollsum_HBP_3_h,Strt_rollsum_HBP_3_v,Strt_rollsum_BK_3_h,Strt_rollsum_BK_3_v,Strt_rollsum_2B_3_h,Strt_rollsum_2B_3_v,Strt_rollsum_3B_3_h,Strt_rollsum_3B_3_v,Strt_rollsum_IP_real_14_h,Strt_rollsum_IP_real_14_v,Strt_rollsum_H_14_h,Strt_rollsum_H_14_v,Strt_rollsum_BFP_14_h,Strt_rollsum_BFP_14_v,Strt_rollsum_HR_14_h,Strt_rollsum_HR_14_v,Strt_rollsum_R_14_h,Strt_rollsum_R_14_v,Strt_rollsum_ER_14_h,Strt_rollsum_ER_14_v,Strt_rollsum_BB_14_h,Strt_rollsum_BB_14_v,Strt_rollsum_IB_14_h,Strt_rollsum_IB_14_v,Strt_rollsum_SO_14_h,Strt_rollsum_SO_14_v,Strt_rollsum_SH_14_h,Strt_rollsum_SH_14_v,Strt_rollsum_SF_14_h,Strt_rollsum_SF_14_v,Strt_rollsum_WP_14_h,Strt_rollsum_WP_14_v,Strt_rollsum_HBP_14_h,Strt_rollsum_HBP_14_v,Strt_rollsum_BK_14_h,Strt_rollsum_BK_14_v,Strt_rollsum_2B_14_h,Strt_rollsum_2B_14_v,Strt_rollsum_3B_14_h,Strt_rollsum_3B_14_v,Strt_rollsum_IP_real_30_h,Strt_rollsum_IP_real_30_v,Strt_rollsum_H_30_h,Strt_rollsum_H_30_v,Strt_rollsum_BFP_30_h,Strt_rollsum_BFP_30_v,Strt_rollsum_HR_30_h,Strt_rollsum_HR_30_v,Strt_rollsum_R_30_h,Strt_rollsum_R_30_v,Strt_rollsum_ER_30_h,Strt_rollsum_ER_30_v,Strt_rollsum_BB_30_h,Strt_rollsum_BB_30_v,Strt_rollsum_IB_30_h,Strt_rollsum_IB_30_v,Strt_rollsum_SO_30_h,Strt_rollsum_SO_30_v,Strt_rollsum_SH_30_h,Strt_rollsum_SH_30_v,Strt_rollsum_SF_30_h,Strt_rollsum_SF_30_v,Strt_rollsum_WP_30_h,Strt_rollsum_WP_30_v,Strt_rollsum_HBP_30_h,Strt_rollsum_HBP_30_v,Strt_rollsum_BK_30_h,Strt_rollsum_BK_30_v,Strt_rollsum_2B_30_h,Strt_rollsum_2B_30_v,Strt_rollsum_3B_30_h,Strt_rollsum_3B_30_v,Strt_H_BB_roll_3_h,Strt_H_BB_roll_3_v,Strt_XB_roll_3_h,Strt_XB_roll_3_v,Strt_TB_roll_3_h,Strt_TB_roll_3_v,Strt_IP_mod_3_h,Strt_IP_mod_3_v,Strt_BF_mod_3_h,Strt_BF_mod_3_v,Strt_ER_mod_3_h,Strt_ER_mod_3_v,Strt_FIP_numer_3_h,Strt_FIP_numer_3_v,Strt_FIP_numer_mod_3_h,Strt_FIP_numer_mod_3_v,Strt_FIP_numer_mod2_3_h,Strt_FIP_numer_mod2_3_v,Strt_H_BB_mod_3_h,Strt_H_BB_mod_3_v,Strt_H_BB_mod2_3_h,Strt_H_BB_mod2_3_v,Strt_SO_mod_3_h,Strt_SO_mod_3_v,Strt_TB_BB_mod_3_h,Strt_TB_BB_mod_3_v,Strt_ERA_3_h,Strt_ERA_3_v,Strt_FIP_3_h,Strt_FIP_3_v,Strt_FIP_perc_3_h,Strt_FIP_perc_3_v,Strt_WHIP_3_h,Strt_WHIP_3_v,Strt_SO_perc_3_h,Strt_SO_perc_3_v,Strt_TB_BB_perc_3_h,Strt_TB_BB_perc_3_v,Strt_H_BB_perc_3_h,Strt_H_BB_perc_3_v,Strt_H_BB_roll_14_h,Strt_H_BB_roll_14_v,Strt_XB_roll_14_h,Strt_XB_roll_14_v,Strt_TB_roll_14_h,Strt_TB_roll_14_v,Strt_IP_mod_14_h,Strt_IP_mod_14_v,Strt_BF_mod_14_h,Strt_BF_mod_14_v,Strt_ER_mod_14_h,Strt_ER_mod_14_v,Strt_FIP_numer_14_h,Strt_FIP_numer_14_v,Strt_FIP_numer_mod_14_h,Strt_FIP_numer_mod_14_v,Strt_FIP_numer_mod2_14_h,Strt_FIP_numer_mod2_14_v,Strt_H_BB_mod_14_h,Strt_H_BB_mod_14_v,Strt_H_BB_mod2_14_h,Strt_H_BB_mod2_14_v,Strt_SO_mod_14_h,Strt_SO_mod_14_v,Strt_TB_BB_mod_14_h,Strt_TB_BB_mod_14_v,Strt_ERA_14_h,Strt_ERA_14_v,Strt_FIP_14_h,Strt_FIP_14_v,Strt_FIP_perc_14_h,Strt_FIP_perc_14_v,Strt_WHIP_14_h,Strt_WHIP_14_v,Strt_SO_perc_14_h,Strt_SO_perc_14_v,Strt_TB_BB_perc_14_h,Strt_TB_BB_perc_14_v,Strt_H_BB_perc_14_h,Strt_H_BB_perc_14_v,Strt_H_BB_roll_30_h,Strt_H_BB_roll_30_v,Strt_XB_roll_30_h,Strt_XB_roll_30_v,Strt_TB_roll_30_h,Strt_TB_roll_30_v,Strt_IP_mod_30_h,Strt_IP_mod_30_v,Strt_BF_mod_30_h,Strt_BF_mod_30_v,Strt_ER_mod_30_h,Strt_ER_mod_30_v,Strt_FIP_numer_30_h,Strt_FIP_numer_30_v,Strt_FIP_numer_mod_30_h,Strt_FIP_numer_mod_30_v,Strt_FIP_numer_mod2_30_h,Strt_FIP_numer_mod2_30_v,Strt_H_BB_mod_30_h,Strt_H_BB_mod_30_v,Strt_H_BB_mod2_30_h,Strt_H_BB_mod2_30_v,Strt_SO_mod_30_h,Strt_SO_mod_30_v,Strt_TB_BB_mod_30_h,Strt_TB_BB_mod_30_v,Strt_ERA_30_h,Strt_ERA_30_v,Strt_FIP_30_h,Strt_FIP_30_v,Strt_FIP_perc_30_h,Strt_FIP_perc_30_v,Strt_WHIP_30_h,Strt_WHIP_30_v,Strt_SO_perc_30_h,Strt_SO_perc_30_v,Strt_TB_BB_perc_30_h,Strt_TB_BB_perc_30_v,Strt_H_BB_perc_30_h,Strt_H_BB_perc_30_v
1543,20000730,0,Sun,COL,NL,103,MIL,NL,106,2,3,51,D,,,,MIL05,21071.0,141,100001,00001200x,34,7,1,0,1,2,0,0,0,3,1,2,0,0,0,0,8,2,3,3,0,0,24,8,0,0,0,0,29,5,0,0,3,3,0,0,0,2,0,7,1,0,0,0,4,2,2,2,0,0,27,8,0,0,0,0,fostm901,Marty Foster,kulpr901,Ron Kulpa,vanvm901,Mike Vanvleet,marsr901,Randy Marsh,,(none),,(none),bellb001,Buddy Bell,loped001,Davey Lopes,damij001,Jeff D'Amico,yoshm001,Masato Yoshii,leskc001,Curt Leskanic,jenkg001,Geoff Jenkins,yoshm001,Masato Yoshii,damij001,Jeff D'Amico,huntb002,Brian Hunter,8,peren001,Neifi Perez,6,walkl001,Larry Walker,9,hammj001,Jeffrey Hammonds,7,heltt001,Todd Helton,3,walkt002,Todd Walker,4,shumt001,Terry Shumpert,5,petrb001,Ben Petrick,2,yoshm001,Masato Yoshii,1,bellr002,Ronnie Belliard,4,grism001,Marquis Grissom,8,jenkg001,Geoff Jenkins,7,sexsr001,Richie Sexson,3,burnj001,Jeromy Burnitz,9,hernj001,Jose Hernandez,5,casar001,Raul Casanova,2,lopel002,Luis Lopez,6,damij001,Jeff D'Amico,1,,Y,2000,1,1,5,200007300,,,,,,,,,,,,,,,0.248099,0.275335,0.310375,0.338569,0.405894,0.41109,0.716268,0.749659,13.0,19.0,6.0,7.0,28.0,11.0,0.0,0.0,0.5,0.0,,1.0,1.0,8.0,6.0,7.0,5.0,33.0,24.0,1.0,3.0,2.0,3.0,2.0,3.0,2.0,1.0,0.0,0.0,2.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,8.0,6.0,25.0,18.0,13.0,22.0,93.0,77.0,1.0,5.0,1.0,13.0,1.0,12.0,7.0,5.0,0.0,1.0,11.0,5.0,2.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,7.0,0.0,0.0,89.333333,82.666667,73.0,107.0,365.0,363.0,9.0,16.0,28.0,55.0,22.0,54.0,28.0,22.0,3.0,5.0,50.0,45.0,8.0,3.0,1.0,2.0,4.0,1.0,5.0,2.0,0.0,1.0,14.0,26.0,1.0,4.0,187.666667,182.0,171.0,186.0,774.0,760.0,27.0,26.0,81.0,93.0,72.0,90.0,49.0,51.0,5.0,6.0,116.0,110.0,11.0,10.0,4.0,3.0,6.0,2.0,8.0,4.0,1.0,1.0,30.0,44.0,4.0,10.0,20.0,27.0,5.0,22.0,18.0,44.0,25.0,18.0,93.0,77.0,1.0,12.0,51.0,136.0,51.0,136.0,51.0,136.0,20.0,27.0,20.0,27.0,11.0,5.0,25.0,49.0,0.36,6.0,2.04,7.555556,0.548387,1.766234,0.8,1.5,0.11828,0.064935,0.268817,0.636364,0.215054,0.350649,101.0,129.0,43.0,82.0,116.0,189.0,89.333333,82.666667,365.0,363.0,22.0,54.0,320.0,505.0,320.0,505.0,320.0,505.0,101.0,129.0,101.0,129.0,50.0,45.0,144.0,211.0,2.216418,5.879032,3.58209,6.108871,0.876712,1.391185,1.130597,1.560484,0.136986,0.123967,0.394521,0.581267,0.276712,0.355372,220.0,237.0,119.0,142.0,290.0,328.0,187.666667,182.0,774.0,760.0,72.0,90.0,779.0,829.0,779.0,829.0,779.0,829.0,220.0,237.0,220.0,237.0,116.0,110.0,339.0,379.0,3.452931,4.450549,4.150977,4.554945,1.00646,1.090789,1.172291,1.302198,0.149871,0.144737,0.437984,0.498684,0.284238,0.311842
48913,20200818,0,Tue,CLE,AL,23,PIT,NL,19,6,3,60,N,,,,PIT08,,242,30000003,1000200000,36,6,2,0,1,6,0,1,1,4,0,11,1,0,1,0,7,7,3,3,1,0,30,8,0,0,0,0,38,9,2,0,0,3,1,0,1,3,0,15,0,0,0,0,11,7,5,5,1,1,30,10,3,0,1,0,fleta901,Andy Fletcher,gibsh902,Tripp Gibson,may-b901,Ben May,iassd901,Dan Iassogna,,(none),,(none),frant001,Terry Francona,sheld801,Derek Shelton,wittn001,Nick Wittgren,howas002,Sam Howard,handb001,Brad Hand,santc002,Carlos Santana,carrc003,Carlos Carrasco,brubj001,JT Brubaker,hernc005,Cesar Hernandez,4,ramij003,Jose Ramirez,5,lindf001,Francisco Lindor,6,santc002,Carlos Santana,3,reyef001,Franmil Reyes,10,naqut001,Tyler Naquin,9,santd002,Domingo Santana,7,perer003,Roberto Perez,2,deshd002,Delino DeShields,8,fraza001,Adam Frazier,4,newmk001,Kevin Newman,6,bellj005,Josh Bell,3,morac001,Colin Moran,10,reynb001,Bryan Reynolds,7,polag001,Gregory Polanco,9,gonze004,Erik Gonzalez,5,dysoj001,Jarrod Dyson,8,stalj001,Jacob Stallings,2,,Y,2020,-3,0,9,202008180,0.262266,0.2488,0.312952,0.320874,0.417226,0.433555,0.730178,0.754429,59.0,95.0,30.0,35.0,121.0,80.0,0.220703,0.203742,0.271898,0.292705,0.333008,0.347193,0.604906,0.639899,9.0,7.0,9.0,3.0,27.0,12.0,0.37037,0.655172,0.357599,9.0,P,1.0,1.0,3.0,4.1,3.0,5.0,16.0,21.0,0.0,0.0,3.0,3.0,3.0,3.0,3.0,3.0,0.0,0.0,1.0,5.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,3.0,4.333333,8.0,16.333333,8.0,11.0,35.0,69.0,1.0,4.0,3.0,6.0,3.0,6.0,3.0,10.0,0.0,0.0,9.0,20.0,0.0,0.0,0.0,0.0,1.0,4.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,8.0,36.333333,8.0,32.0,35.0,154.0,1.0,8.0,3.0,18.0,3.0,18.0,3.0,16.0,0.0,0.0,9.0,47.0,0.0,0.0,0.0,0.0,1.0,5.0,1.0,2.0,0.0,0.0,1.0,5.0,0.0,1.0,8.0,118.333333,8.0,119.0,35.0,503.0,1.0,23.0,3.0,61.0,3.0,60.0,3.0,34.0,0.0,1.0,9.0,151.0,0.0,3.0,0.0,2.0,1.0,8.0,1.0,3.0,0.0,0.0,1.0,27.0,0.0,2.0,11.0,21.0,4.0,13.0,12.0,24.0,9.0,16.333333,36.0,69.0,3.555556,6.0,28.0,75.0,32.512,75.0,29.1,75.0,12.5,21.0,11.37,21.0,9.2,20.0,15.45,34.0,3.555556,3.306122,3.612444,4.591837,0.903111,1.086957,1.388889,1.285714,0.255556,0.289855,0.429167,0.492754,0.315833,0.304348,11.0,48.0,4.0,31.0,12.0,63.0,42.0,42.0,168.0,168.0,21.888889,21.148148,28.0,154.0,181.408,179.568,174.3,169.4,62.0,56.5,60.21,53.18,35.6,49.8,74.85,85.3,4.690476,4.531746,4.319238,4.275429,1.07981,1.068857,1.47619,1.345238,0.211905,0.296429,0.445536,0.507738,0.358393,0.316548,11.0,153.0,4.0,100.0,12.0,219.0,90.0,118.333333,360.0,503.0,48.555556,60.0,28.0,456.0,397.984,456.0,385.5,456.0,134.0,153.0,131.25,153.0,74.0,151.0,161.25,253.0,4.855556,4.56338,4.422044,3.853521,1.105511,0.906561,1.488889,1.292958,0.205556,0.300199,0.447917,0.502982,0.364583,0.304175
55067,20230523,0,Tue,DET,AL,46,KCA,AL,50,1,4,51,N,,,,KAN06,13443.0,150,10000,00031000x,34,8,0,0,0,1,0,0,0,4,0,12,0,0,2,0,10,2,2,2,1,0,24,6,1,0,0,0,35,11,1,0,0,4,0,1,0,3,1,10,2,0,0,0,11,5,1,1,0,0,27,7,1,0,2,0,segac901,Chris Segal,walsb901,Brian Walsh,nelsj901,Jeff Nelson,buckc901,CB Bucknor,,(none),,(none),hinca001,A.J. Hinch,mathm001,Mike Matheny,cuasj001,Jose Cuas,rodre004,Eduardo Rodriguez,chapa001,Aroldis Chapman,massm001,Michael Massey,rodre004,Eduardo Rodriguez,mayem001,Mike Mayers,mckiz001,Zach McKinstry,5,greer003,Riley Greene,8,baezj001,Javier Baez,6,torks001,Spencer Torkelson,3,maton001,Nick Maton,4,vierm001,Matt Vierling,9,cabrm001,Miguel Cabrera,10,badda001,Akil Baddoo,7,rogej004,Jake Rogers,2,pratn001,Nick Pratto,7,peres002,Salvador Perez,2,pasqv001,Vinnie Pasquantino,3,wittb002,Bobby Witt,6,melem001,MJ Melendez,9,olive001,Edward Olivares,10,massm001,Michael Massey,4,garcm003,Maikel Garcia,5,bradj001,Jackie Bradley,8,,Y,2023,3,1,5,202305230,0.241073,0.232264,0.299641,0.285446,0.386124,0.354537,0.685765,0.639983,106.0,63.0,33.0,27.0,84.0,93.0,0.238332,0.232971,0.299543,0.301887,0.413108,0.359329,0.712652,0.661216,19.0,16.0,7.0,3.0,18.0,15.0,0.425532,0.6,0.412766,8.5,U,1.0,1.0,4.2,5.0,6.0,8.0,20.0,26.0,0.0,0.0,1.0,4.0,1.0,2.0,1.0,2.0,0.0,0.0,8.0,9.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,4.666667,5.0,6.0,20.0,6.0,12.0,29.0,76.0,3.0,1.0,5.0,4.0,4.0,4.0,5.0,5.0,0.0,0.0,6.0,22.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,42.666667,86.666667,45.0,68.0,189.0,343.0,14.0,8.0,30.0,26.0,27.0,26.0,17.0,21.0,0.0,0.0,42.0,74.0,0.0,2.0,1.0,1.0,4.0,1.0,1.0,2.0,0.0,0.0,4.0,10.0,0.0,1.0,59.0,163.0,56.0,140.0,257.0,670.0,16.0,17.0,37.0,66.0,34.0,58.0,25.0,53.0,0.0,0.0,51.0,144.0,0.0,3.0,2.0,2.0,4.0,1.0,2.0,2.0,0.0,0.0,7.0,25.0,1.0,1.0,11.0,17.0,9.0,5.0,15.0,17.0,9.0,20.0,36.0,76.0,5.666667,4.0,60.0,20.0,73.536,20.0,67.7,20.0,15.5,17.0,13.59,17.0,7.4,22.0,23.15,22.0,5.666667,1.8,8.170667,1.0,2.042667,0.263158,1.722222,0.85,0.205556,0.289474,0.643056,0.289474,0.3775,0.223684,62.0,89.0,46.0,36.0,91.0,104.0,42.666667,86.666667,189.0,343.0,27.0,26.0,284.0,223.0,284.0,223.0,284.0,223.0,62.0,89.0,62.0,89.0,42.0,74.0,108.0,125.0,5.695312,2.7,6.65625,2.573077,1.502646,0.650146,1.453125,1.026923,0.222222,0.215743,0.571429,0.364431,0.328042,0.259475,81.0,193.0,57.0,78.0,113.0,218.0,90.0,163.0,360.0,670.0,51.222222,58.0,349.0,512.0,488.872,512.0,462.3,512.0,127.5,193.0,119.11,193.0,71.6,144.0,184.35,271.0,5.122222,3.202454,5.431911,3.141104,1.357978,0.764179,1.416667,1.184049,0.198889,0.214925,0.512083,0.404478,0.330861,0.28806
10727,20040621,0,Mon,LAN,NL,67,SFN,NL,70,2,3,53,N,,,,SFO03,41453.0,173,10001,000011001,35,9,1,0,0,2,0,0,0,5,1,4,0,0,1,0,11,3,3,3,0,0,26,13,0,0,1,0,33,9,3,0,0,3,1,0,0,4,2,4,1,0,1,0,9,5,2,2,2,0,27,17,0,0,1,0,welkt901,Tim Welke,cedeg901,Gary Cederstrom,reynj901,Jim Reynolds,fleta901,Andy Fletcher,,(none),,(none),tracj101,Jim Tracy,alouf101,Felipe Alou,hergm001,Matt Herges,motag001,Guillermo Mota,,(none),ransc001,Cody Ransom,pereo001,Odalis Perez,ruetk001,Kirk Rueter,iztuc001,Cesar Izturis,6,wertj001,Jayson Werth,7,bradm001,Milton Bradley,8,grees001,Shawn Green,3,lodup001,Paul Lo Duca,2,belta001,Adrian Beltre,5,encaj001,Juan Encarnacion,9,hernj001,Jose Hernandez,4,pereo001,Odalis Perez,1,durhr001,Ray Durham,4,cruzd001,Deivi Cruz,6,grism001,Marquis Grissom,8,bondb001,Barry Bonds,7,alfoe001,Edgardo Alfonzo,5,felip001,Pedro Feliz,3,piera001,A.J. Pierzynski,2,mohrd001,Dustan Mohr,9,ruetk001,Kirk Rueter,1,,Y,2004,1,1,5,200406210,0.260036,0.256596,0.336985,0.30892,0.416423,0.396278,0.753408,0.705197,38.0,93.0,34.0,42.0,95.0,98.0,0.268316,0.283797,0.363411,0.336,0.433873,0.422819,0.797283,0.758819,6.0,23.0,6.0,9.0,21.0,14.0,0.0,0.0,0.5,0.0,,1.0,1.0,6.2,7.2,7.0,7.0,29.0,31.0,0.0,0.0,1.0,2.0,1.0,2.0,3.0,2.0,1.0,2.0,1.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,6.666667,7.666667,17.0,20.0,24.0,18.0,80.0,77.0,2.0,2.0,12.0,5.0,9.0,5.0,7.0,3.0,0.0,0.0,9.0,16.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,4.0,0.0,1.0,78.666667,93.666667,102.0,82.0,361.0,372.0,8.0,10.0,51.0,31.0,44.0,30.0,28.0,20.0,1.0,1.0,28.0,69.0,5.0,8.0,3.0,2.0,0.0,1.0,0.0,2.0,0.0,2.0,22.0,16.0,2.0,3.0,160.0,189.666667,201.0,178.0,715.0,770.0,20.0,25.0,100.0,82.0,91.0,78.0,54.0,46.0,2.0,4.0,51.0,138.0,9.0,10.0,4.0,3.0,0.0,2.0,0.0,3.0,0.0,3.0,37.0,30.0,3.0,5.0,31.0,21.0,12.0,12.0,36.0,30.0,17.0,20.0,80.0,77.0,9.0,5.0,101.0,57.0,101.0,57.0,101.0,57.0,31.0,21.0,31.0,21.0,9.0,16.0,43.0,33.0,4.764706,2.25,5.941176,2.85,1.2625,0.74026,1.823529,1.05,0.1125,0.207792,0.5375,0.428571,0.3875,0.272727,130.0,102.0,50.0,52.0,152.0,134.0,78.666667,93.666667,361.0,372.0,44.0,30.0,438.0,298.0,438.0,298.0,438.0,298.0,130.0,102.0,130.0,102.0,28.0,69.0,180.0,154.0,5.033898,2.882562,5.567797,3.181495,1.213296,0.801075,1.652542,1.088968,0.077562,0.185484,0.498615,0.413978,0.360111,0.274194,255.0,224.0,103.0,115.0,304.0,293.0,160.0,189.666667,715.0,770.0,91.0,78.0,923.0,721.0,923.0,721.0,923.0,721.0,255.0,224.0,255.0,224.0,51.0,138.0,358.0,339.0,5.11875,3.70123,5.76875,3.801406,1.290909,0.936364,1.59375,1.181019,0.071329,0.179221,0.500699,0.44026,0.356643,0.290909
39060,20160419,0,Tue,ANA,AL,14,CHA,AL,14,0,5,51,N,,,,CHI12,12093.0,166,0,01010003x,29,3,1,0,0,0,0,0,0,3,0,7,0,0,0,0,5,2,5,5,0,0,24,10,0,0,0,0,32,9,1,1,2,5,1,0,0,2,1,5,0,0,0,0,6,3,0,0,0,0,27,14,0,0,0,0,conrc901,Chris Conroy,morag901,Gabe Morales,nauep901,Paul Nauert,mealj901,Jerry Meals,,(none),,(none),sciom001,Mike Scioscia,ventr001,Robin Ventura,latom001,Mat Latos,shoem001,Matt Shoemaker,jonen001,Nate Jones,frazt001,Todd Frazier,shoem001,Matt Shoemaker,latom001,Mat Latos,escoy001,Yunel Escobar,5,orter001,Rafael Ortega,7,troum001,Mike Trout,8,pujoa001,Albert Pujols,10,calhk001,Kole Calhoun,9,simma001,Andrelton Simmons,6,cronc002,C.J. Cron,3,perec003,Carlos Perez,2,pennc001,Cliff Pennington,4,eatoa002,Adam Eaton,9,rollj001,Jimmy Rollins,6,abrej003,Jose Abreu,3,frazt001,Todd Frazier,5,cabrm002,Melky Cabrera,7,lawrb002,Brett Lawrie,4,jacka001,Austin Jackson,8,garca003,Avisail Garcia,10,navad001,Dioner Navarro,2,,Y,2016,5,1,5,201604190,0.247789,0.247237,0.298671,0.3038,0.375564,0.394068,0.674235,0.697867,66.0,51.0,41.0,32.0,101.0,92.0,0.216244,0.255632,0.270321,0.315315,0.323858,0.405485,0.594179,0.7208,8.0,13.0,9.0,4.0,17.0,20.0,0.0,0.0,0.5,0.0,,1.0,1.0,6.1,6.1,2.0,6.0,24.0,26.0,0.0,2.0,0.0,2.0,0.0,2.0,3.0,1.0,0.0,0.0,5.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.333333,6.333333,13.666667,12.666667,6.0,14.0,52.0,61.0,2.0,3.0,3.0,10.0,3.0,10.0,3.0,8.0,0.0,0.0,7.0,8.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,66.0,71.666667,56.0,70.0,265.0,305.0,7.0,10.0,29.0,31.0,27.0,31.0,12.0,25.0,1.0,1.0,50.0,57.0,4.0,3.0,3.0,1.0,6.0,2.0,2.0,2.0,1.0,0.0,12.0,13.0,1.0,0.0,152.333333,162.666667,157.0,161.0,644.0,682.0,17.0,26.0,81.0,81.0,75.0,78.0,39.0,44.0,2.0,2.0,126.0,132.0,8.0,4.0,5.0,5.0,10.0,4.0,2.0,4.0,1.0,0.0,34.0,33.0,3.0,0.0,9.0,22.0,8.0,11.0,14.0,25.0,13.666667,12.666667,52.0,61.0,3.0,10.0,39.0,89.0,39.0,89.0,39.0,89.0,9.0,22.0,9.0,22.0,7.0,8.0,17.0,33.0,1.97561,7.105263,2.853659,7.026316,0.75,1.459016,0.658537,1.736842,0.134615,0.131148,0.326923,0.540984,0.173077,0.360656,68.0,95.0,35.0,43.0,91.0,113.0,66.0,71.666667,265.0,305.0,27.0,31.0,195.0,301.0,195.0,301.0,195.0,301.0,68.0,95.0,68.0,95.0,50.0,57.0,103.0,138.0,3.681818,3.893023,2.954545,4.2,0.735849,0.986885,1.030303,1.325581,0.188679,0.186885,0.388679,0.452459,0.256604,0.311475,196.0,205.0,91.0,111.0,248.0,272.0,152.333333,162.666667,644.0,682.0,75.0,78.0,557.0,689.0,557.0,689.0,557.0,689.0,196.0,205.0,196.0,205.0,126.0,132.0,287.0,316.0,4.431072,4.315574,3.656455,4.235656,0.864907,1.010264,1.286652,1.260246,0.195652,0.193548,0.445652,0.463343,0.304348,0.300587


In [53]:
df.shape


(56771, 451)

In [54]:
(df.Strt_IP_mod_3_h==0).sum()


0

In [55]:
(df.Strt_IP_mod_3_v==0).sum()


0

In [None]:
df.loc[df.Strt_IP_mod_3_v==0]


In [56]:
df.loc[df.Strt_IP_mod_3_h==0]


Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_3_h,Strt_rollsum_IP_real_3_v,Strt_rollsum_H_3_h,Strt_rollsum_H_3_v,Strt_rollsum_BFP_3_h,Strt_rollsum_BFP_3_v,Strt_rollsum_HR_3_h,Strt_rollsum_HR_3_v,Strt_rollsum_R_3_h,Strt_rollsum_R_3_v,Strt_rollsum_ER_3_h,Strt_rollsum_ER_3_v,Strt_rollsum_BB_3_h,Strt_rollsum_BB_3_v,Strt_rollsum_IB_3_h,Strt_rollsum_IB_3_v,Strt_rollsum_SO_3_h,Strt_rollsum_SO_3_v,Strt_rollsum_SH_3_h,Strt_rollsum_SH_3_v,Strt_rollsum_SF_3_h,Strt_rollsum_SF_3_v,Strt_rollsum_WP_3_h,Strt_rollsum_WP_3_v,Strt_rollsum_HBP_3_h,Strt_rollsum_HBP_3_v,Strt_rollsum_BK_3_h,Strt_rollsum_BK_3_v,Strt_rollsum_2B_3_h,Strt_rollsum_2B_3_v,Strt_rollsum_3B_3_h,Strt_rollsum_3B_3_v,Strt_rollsum_IP_real_14_h,Strt_rollsum_IP_real_14_v,Strt_rollsum_H_14_h,Strt_rollsum_H_14_v,Strt_rollsum_BFP_14_h,Strt_rollsum_BFP_14_v,Strt_rollsum_HR_14_h,Strt_rollsum_HR_14_v,Strt_rollsum_R_14_h,Strt_rollsum_R_14_v,Strt_rollsum_ER_14_h,Strt_rollsum_ER_14_v,Strt_rollsum_BB_14_h,Strt_rollsum_BB_14_v,Strt_rollsum_IB_14_h,Strt_rollsum_IB_14_v,Strt_rollsum_SO_14_h,Strt_rollsum_SO_14_v,Strt_rollsum_SH_14_h,Strt_rollsum_SH_14_v,Strt_rollsum_SF_14_h,Strt_rollsum_SF_14_v,Strt_rollsum_WP_14_h,Strt_rollsum_WP_14_v,Strt_rollsum_HBP_14_h,Strt_rollsum_HBP_14_v,Strt_rollsum_BK_14_h,Strt_rollsum_BK_14_v,Strt_rollsum_2B_14_h,Strt_rollsum_2B_14_v,Strt_rollsum_3B_14_h,Strt_rollsum_3B_14_v,Strt_rollsum_IP_real_30_h,Strt_rollsum_IP_real_30_v,Strt_rollsum_H_30_h,Strt_rollsum_H_30_v,Strt_rollsum_BFP_30_h,Strt_rollsum_BFP_30_v,Strt_rollsum_HR_30_h,Strt_rollsum_HR_30_v,Strt_rollsum_R_30_h,Strt_rollsum_R_30_v,Strt_rollsum_ER_30_h,Strt_rollsum_ER_30_v,Strt_rollsum_BB_30_h,Strt_rollsum_BB_30_v,Strt_rollsum_IB_30_h,Strt_rollsum_IB_30_v,Strt_rollsum_SO_30_h,Strt_rollsum_SO_30_v,Strt_rollsum_SH_30_h,Strt_rollsum_SH_30_v,Strt_rollsum_SF_30_h,Strt_rollsum_SF_30_v,Strt_rollsum_WP_30_h,Strt_rollsum_WP_30_v,Strt_rollsum_HBP_30_h,Strt_rollsum_HBP_30_v,Strt_rollsum_BK_30_h,Strt_rollsum_BK_30_v,Strt_rollsum_2B_30_h,Strt_rollsum_2B_30_v,Strt_rollsum_3B_30_h,Strt_rollsum_3B_30_v,Strt_H_BB_roll_3_h,Strt_H_BB_roll_3_v,Strt_XB_roll_3_h,Strt_XB_roll_3_v,Strt_TB_roll_3_h,Strt_TB_roll_3_v,Strt_IP_mod_3_h,Strt_IP_mod_3_v,Strt_BF_mod_3_h,Strt_BF_mod_3_v,Strt_ER_mod_3_h,Strt_ER_mod_3_v,Strt_FIP_numer_3_h,Strt_FIP_numer_3_v,Strt_FIP_numer_mod_3_h,Strt_FIP_numer_mod_3_v,Strt_FIP_numer_mod2_3_h,Strt_FIP_numer_mod2_3_v,Strt_H_BB_mod_3_h,Strt_H_BB_mod_3_v,Strt_H_BB_mod2_3_h,Strt_H_BB_mod2_3_v,Strt_SO_mod_3_h,Strt_SO_mod_3_v,Strt_TB_BB_mod_3_h,Strt_TB_BB_mod_3_v,Strt_ERA_3_h,Strt_ERA_3_v,Strt_FIP_3_h,Strt_FIP_3_v,Strt_FIP_perc_3_h,Strt_FIP_perc_3_v,Strt_WHIP_3_h,Strt_WHIP_3_v,Strt_SO_perc_3_h,Strt_SO_perc_3_v,Strt_TB_BB_perc_3_h,Strt_TB_BB_perc_3_v,Strt_H_BB_perc_3_h,Strt_H_BB_perc_3_v,Strt_H_BB_roll_14_h,Strt_H_BB_roll_14_v,Strt_XB_roll_14_h,Strt_XB_roll_14_v,Strt_TB_roll_14_h,Strt_TB_roll_14_v,Strt_IP_mod_14_h,Strt_IP_mod_14_v,Strt_BF_mod_14_h,Strt_BF_mod_14_v,Strt_ER_mod_14_h,Strt_ER_mod_14_v,Strt_FIP_numer_14_h,Strt_FIP_numer_14_v,Strt_FIP_numer_mod_14_h,Strt_FIP_numer_mod_14_v,Strt_FIP_numer_mod2_14_h,Strt_FIP_numer_mod2_14_v,Strt_H_BB_mod_14_h,Strt_H_BB_mod_14_v,Strt_H_BB_mod2_14_h,Strt_H_BB_mod2_14_v,Strt_SO_mod_14_h,Strt_SO_mod_14_v,Strt_TB_BB_mod_14_h,Strt_TB_BB_mod_14_v,Strt_ERA_14_h,Strt_ERA_14_v,Strt_FIP_14_h,Strt_FIP_14_v,Strt_FIP_perc_14_h,Strt_FIP_perc_14_v,Strt_WHIP_14_h,Strt_WHIP_14_v,Strt_SO_perc_14_h,Strt_SO_perc_14_v,Strt_TB_BB_perc_14_h,Strt_TB_BB_perc_14_v,Strt_H_BB_perc_14_h,Strt_H_BB_perc_14_v,Strt_H_BB_roll_30_h,Strt_H_BB_roll_30_v,Strt_XB_roll_30_h,Strt_XB_roll_30_v,Strt_TB_roll_30_h,Strt_TB_roll_30_v,Strt_IP_mod_30_h,Strt_IP_mod_30_v,Strt_BF_mod_30_h,Strt_BF_mod_30_v,Strt_ER_mod_30_h,Strt_ER_mod_30_v,Strt_FIP_numer_30_h,Strt_FIP_numer_30_v,Strt_FIP_numer_mod_30_h,Strt_FIP_numer_mod_30_v,Strt_FIP_numer_mod2_30_h,Strt_FIP_numer_mod2_30_v,Strt_H_BB_mod_30_h,Strt_H_BB_mod_30_v,Strt_H_BB_mod2_30_h,Strt_H_BB_mod2_30_v,Strt_SO_mod_30_h,Strt_SO_mod_30_v,Strt_TB_BB_mod_30_h,Strt_TB_BB_mod_30_v,Strt_ERA_30_h,Strt_ERA_30_v,Strt_FIP_30_h,Strt_FIP_30_v,Strt_FIP_perc_30_h,Strt_FIP_perc_30_v,Strt_WHIP_30_h,Strt_WHIP_30_v,Strt_SO_perc_30_h,Strt_SO_perc_30_v,Strt_TB_BB_perc_30_h,Strt_TB_BB_perc_30_v,Strt_H_BB_perc_30_h,Strt_H_BB_perc_30_v


In [57]:
df.drop(df.index[df.Strt_IP_mod_3_v==0],inplace=True)
df.drop(df.index[df.Strt_IP_mod_3_h==0],inplace=True)
df.shape


(56771, 451)

In [58]:
df.reset_index(drop=True, inplace=True)

In [59]:
df.to_csv('df_bp5.csv', index=False)