Baseball Prediction: 5b - Adding Starting Pitching Features

    - Now that we have raw game-level data for each pitcher, we can derive features based on the starting pitchers to help our prediction model for individual games.


In [51]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)

Overall Plan of Attack

    - For each starting pitcher we will load their raw data, create features for each game based on their previous performance, and then save the dataframe in a dictionary structure for easy lookup

    - Then we can iterate through our game-level dataframe, add in the features for each starting pitcher, and use those to improve our model

Once again, we will use C.C. Sabathia as our example candidate

In [52]:
df_cc = pd.read_csv('/Volumes/CharmedXi/beatVegas/SP_2000/pitching_data_sabac001.csv')

In [53]:
df_cc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 561 entries, 0 to 560
Data columns (total 31 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   at_vs        561 non-null    object 
 1   Opponent     561 non-null    object 
 2   League       561 non-null    object 
 3   GS           561 non-null    int64  
 4   CG           561 non-null    int64  
 5   SHO          561 non-null    int64  
 6   GF           561 non-null    int64  
 7   SV           561 non-null    int64  
 8   IP           561 non-null    float64
 9   H            561 non-null    int64  
 10  BFP          561 non-null    int64  
 11  HR           561 non-null    int64  
 12  R            561 non-null    int64  
 13  ER           561 non-null    int64  
 14  BB           561 non-null    int64  
 15  IB           561 non-null    int64  
 16  SO           561 non-null    int64  
 17  SH           561 non-null    int64  
 18  SF           561 non-null    int64  
 19  WP      

In [54]:
df_cc.head(10)

Unnamed: 0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,date,dblhead_num
0,VS,BAL,A,1,0,0,0,0,5.2,3,22,1,3,3,2,0,3,0,0,1,0,0,2,0,0,0,0,0,4.76,4- 8-2001,
1,AT,DET,A,1,0,0,0,0,5.0,5,21,0,4,4,2,0,2,0,0,0,0,0,1,0,1,0,1,0,5.91,4-13-2001,
2,AT,BAL,A,1,0,0,0,0,6.0,6,23,0,2,2,2,0,3,0,0,0,0,1,1,0,0,0,1,0,4.86,4-19-2001,
3,VS,ANA,A,1,0,0,0,0,5.0,5,24,0,3,2,4,0,1,0,0,0,0,0,1,1,0,1,0,1,4.57,4-25-2001,
4,AT,KC,A,1,0,0,0,0,5.0,4,19,1,1,1,1,0,2,0,0,0,0,0,0,0,1,0,1,0,4.05,5- 2-2001,
5,VS,KC,A,1,0,0,0,0,5.2,5,23,0,1,1,1,0,5,0,1,1,1,0,1,0,0,0,1,0,3.62,5- 9-2001,
6,AT,TEX,A,1,0,0,0,0,5.0,6,23,0,3,3,3,0,5,0,0,1,0,0,0,0,1,0,1,0,3.86,5-15-2001,
7,AT,ANA,A,1,0,0,0,0,4.1,9,24,1,6,6,3,0,4,0,0,1,0,0,3,0,0,0,0,0,4.75,5-20-2001,
8,VS,NY,A,1,0,0,0,0,4.2,7,25,2,5,5,4,1,3,0,0,0,0,1,2,0,0,0,0,1,5.24,5-26-2001,
9,AT,NY,A,1,0,0,0,0,4.0,4,20,1,4,3,5,0,2,0,0,0,0,0,0,0,1,0,1,0,5.36,6- 1-2001,


In [55]:
# Baseball statisticans report partial innings pitched as X.1 & X.2 where X is the integer number of innings pitched
# However, for calculation purposes, we need to tell the computer to recognize X.1 & X.2 as X & (one-third) and (two-thirds), respectively
# The following line of code does that for us.

df_cc['IP_real'] = df_cc['IP_real'] = (df_cc.IP - (df_cc.IP % 1)) + (df_cc.IP % 1) * (10/3)

In [56]:
df_cc.columns

Index(['at_vs', 'Opponent', 'League', 'GS', 'CG', 'SHO', 'GF', 'SV', 'IP', 'H',
       'BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B', 'GDP', 'ROE', 'W', 'L', 'ERA', 'date', 'dblhead_num',
       'IP_real'],
      dtype='object')

Generating Features for a Starting Pitcher

    - Want to creature features based on a lookback across games for each pitcher

    - Need to aggregate statistics like innings pitched (IP), runs or earned runs given up, hits and walks given up, strikeouts, and so on

    - We will also need to decide how to handle pitchers early in their career when they have relatively few games on which to base their performance.

In [57]:
def roll_column(df, col, winsize):
    # do the standard Pandas rolling calc
    t_col = df[col].rolling(winsize, closed='left').sum().to_numpy()
    
    # for the early columns, just do a rolling sum from the beginning
    t_col[:winsize] = np.concatenate(([0],df[col].iloc[:(winsize)].cumsum().to_numpy()[:-1]))

    return(t_col)

In [58]:
def load_and_process_pitch_df(p_id, filepath=''):
    fname = filepath+'pitching_data_'+p_id+'.csv'
    pitch_df = pd.read_csv(fname)
    
    # Convert date, fix dblhead_num to be 0,1,2
    pitch_df['date'] = (pd.to_datetime(pitch_df.date).astype(str).str.replace('-','')).astype(int)
    pitch_df.dblhead_num.fillna(0, inplace=True)
    pitch_df['dblhead_num'] = pitch_df['dblhead_num'].astype(int)
    
    # Convert IP to proper mathematical format
    pitch_df['IP_real'] = (pitch_df.IP - (pitch_df.IP % 1)) + (pitch_df.IP % 1) * (10/3)
    
    cols_to_agg = ['IP_real', 'H','BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B']
    winsizes = [6,34,100]
    for winsize in winsizes:
        for raw_col in cols_to_agg:
            new_colname = 'rollsum_'+raw_col+'_'+str(winsize)        
            pitch_df[new_colname] = roll_column(pitch_df, raw_col, winsize)

    
    er_per_ip_def = (5/9)
    h_bb_per_ip_def = 1.5
    h_bb_per_bf_def = .37
    so_per_bf_def = .2
    ip_per_game_def = 3
    bf_per_game_def = 12
    tb_bb_perc_def = .45
    fip_numer_per_ip_def = .124*13 + 1.5*3 - 2*.8
    fip_numer_per_bf_def = .03*13 + .37*3 - 2*.2
    for winsize in winsizes:
        hit_col = 'rollsum_H_'+str(winsize)
        bb_col = 'rollsum_BB_'+str(winsize)
        h_bb_col = 'H_BB_roll_'+str(winsize)
        double_col = 'rollsum_2B_'+str(winsize)
        triple_col = 'rollsum_3B_'+str(winsize)
        hr_col = 'rollsum_HR_'+str(winsize)
        xb_col = 'XB_roll_'+str(winsize)
        tb_col = 'TB_roll_'+str(winsize)
        so_col = 'rollsum_SO_'+str(winsize)
        so_mod_col = 'SO_mod_'+str(winsize)
        ip_col = 'rollsum_IP_real_'+str(winsize)
        ip_mod_col = 'IP_mod_'+str(winsize)
        er_col = 'rollsum_ER_'+str(winsize)
        er_mod_col = 'ER_mod_'+str(winsize)
        bf_col = 'rollsum_BFP_'+str(winsize)
        bf_mod_col = 'BF_mod_'+str(winsize)
        era_col = 'ERA_'+str(winsize)
        fip_col = 'FIP_'+str(winsize)
        fip_perc_col = 'FIP_perc_'+str(winsize)
        fip_numer_col = 'FIP_numer_'+str(winsize)
        fip_numer_mod_col = 'FIP_numer_mod_'+str(winsize)
        fip_numer_mod2_col = 'FIP_numer_mod2_'+str(winsize)
        whip_col = 'WHIP_'+str(winsize)
        so_perc_col = 'SO_perc_'+str(winsize)
        h_bb_perc_col = 'H_BB_perc_'+str(winsize)
        h_bb_mod_col = 'H_BB_mod_'+str(winsize)
        h_bb_mod2_col = 'H_BB_mod2_'+str(winsize)
        tb_bb_mod_col = 'TB_BB_mod_'+str(winsize)
        tb_bb_perc_col = 'TB_BB_perc_'+str(winsize)
        pitch_df[h_bb_col] = pitch_df[hit_col]+pitch_df[bb_col]
        pitch_df[xb_col] = pitch_df[double_col]+2*pitch_df[triple_col]+3*pitch_df[hr_col]
        pitch_df[tb_col] = pitch_df[hit_col]+pitch_df[xb_col]
        pitch_df[ip_mod_col] = np.maximum(pitch_df[ip_col], winsize*ip_per_game_def)
        pitch_df[bf_mod_col] = np.maximum(pitch_df[bf_col], winsize*bf_per_game_def)
        pitch_df[er_mod_col] = pitch_df[er_col] + er_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[fip_numer_col] = 13*pitch_df[hr_col] + 3*pitch_df[h_bb_col] -2*pitch_df[so_col]
        pitch_df[fip_numer_mod_col] = pitch_df[fip_numer_col] + fip_numer_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[fip_numer_mod2_col] = pitch_df[fip_numer_col] + fip_numer_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[h_bb_mod_col] = pitch_df[h_bb_col] + h_bb_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[h_bb_mod2_col] = pitch_df[h_bb_col] + h_bb_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[so_mod_col] = pitch_df[so_col] + so_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[tb_bb_mod_col] = (pitch_df[tb_col] + pitch_df[bb_col])+ tb_bb_perc_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[era_col] = (pitch_df[er_mod_col]/pitch_df[ip_mod_col])*9
        pitch_df[fip_col] = (pitch_df[fip_numer_mod_col]/pitch_df[ip_mod_col])
        pitch_df[fip_perc_col] = (pitch_df[fip_numer_mod_col]/pitch_df[bf_mod_col])
        pitch_df[whip_col] = pitch_df[h_bb_mod_col]/pitch_df[ip_mod_col]
        pitch_df[so_perc_col] = pitch_df[so_mod_col]/pitch_df[bf_mod_col]
        pitch_df[tb_bb_perc_col] = pitch_df[tb_bb_mod_col]/pitch_df[bf_mod_col]
        pitch_df[h_bb_perc_col] = pitch_df[h_bb_mod2_col]/pitch_df[bf_mod_col]
    pitch_df['date_dblhead'] = (pitch_df['date'].astype(str) + pitch_df['dblhead_num'].astype(str)).astype(int)
    pitch_df.set_index('date_dblhead', inplace=True)
    return(pitch_df)

In [59]:
df_cc = load_and_process_pitch_df('sabac001','SP_2000/')

df_cc.head(15)

Unnamed: 0_level_0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,date,dblhead_num,IP_real,rollsum_IP_real_6,rollsum_H_6,rollsum_BFP_6,rollsum_HR_6,rollsum_R_6,rollsum_ER_6,rollsum_BB_6,rollsum_IB_6,rollsum_SO_6,rollsum_SH_6,rollsum_SF_6,rollsum_WP_6,rollsum_HBP_6,rollsum_BK_6,rollsum_2B_6,rollsum_3B_6,rollsum_IP_real_34,rollsum_H_34,rollsum_BFP_34,rollsum_HR_34,rollsum_R_34,rollsum_ER_34,rollsum_BB_34,rollsum_IB_34,rollsum_SO_34,rollsum_SH_34,rollsum_SF_34,rollsum_WP_34,rollsum_HBP_34,rollsum_BK_34,rollsum_2B_34,rollsum_3B_34,rollsum_IP_real_100,rollsum_H_100,rollsum_BFP_100,rollsum_HR_100,rollsum_R_100,rollsum_ER_100,rollsum_BB_100,rollsum_IB_100,rollsum_SO_100,rollsum_SH_100,rollsum_SF_100,rollsum_WP_100,rollsum_HBP_100,rollsum_BK_100,rollsum_2B_100,rollsum_3B_100,H_BB_roll_6,XB_roll_6,TB_roll_6,IP_mod_6,BF_mod_6,ER_mod_6,FIP_numer_6,FIP_numer_mod_6,FIP_numer_mod2_6,H_BB_mod_6,H_BB_mod2_6,SO_mod_6,TB_BB_mod_6,ERA_6,FIP_6,FIP_perc_6,WHIP_6,SO_perc_6,TB_BB_perc_6,H_BB_perc_6,H_BB_roll_34,XB_roll_34,TB_roll_34,IP_mod_34,BF_mod_34,ER_mod_34,FIP_numer_34,FIP_numer_mod_34,FIP_numer_mod2_34,H_BB_mod_34,H_BB_mod2_34,SO_mod_34,TB_BB_mod_34,ERA_34,FIP_34,FIP_perc_34,WHIP_34,SO_perc_34,TB_BB_perc_34,H_BB_perc_34,H_BB_roll_100,XB_roll_100,TB_roll_100,IP_mod_100,BF_mod_100,ER_mod_100,FIP_numer_100,FIP_numer_mod_100,FIP_numer_mod2_100,H_BB_mod_100,H_BB_mod2_100,SO_mod_100,TB_BB_mod_100,ERA_100,FIP_100,FIP_perc_100,WHIP_100,SO_perc_100,TB_BB_perc_100,H_BB_perc_100
date_dblhead,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1
200104080,VS,BAL,A,1,0,0,0,0,5.2,3,22,1,3,3,2,0,3,0,0,1,0,0,2,0,0,0,0,0,4.76,20010408,0,5.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,72.0,10.0,0.0,81.216,79.2,27.0,26.64,14.4,32.4,5.0,4.512,1.128,1.5,0.2,0.45,0.37,0.0,0.0,0.0,102.0,408.0,56.666667,0.0,460.224,448.8,153.0,150.96,81.6,183.6,5.0,4.512,1.128,1.5,0.2,0.45,0.37,0.0,0.0,0.0,300.0,1200.0,166.666667,0.0,1353.6,1320.0,450.0,444.0,240.0,540.0,5.0,4.512,1.128,1.5,0.2,0.45,0.37
200104130,AT,DET,A,1,0,0,0,0,5.0,5,21,0,4,4,2,0,2,0,0,0,0,0,1,0,1,0,1,0,5.91,20010413,0,5.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.666667,3.0,22.0,1.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,5.0,5.0,8.0,18.0,72.0,9.851852,22.0,77.648,77.0,23.5,23.5,13.0,32.5,4.925926,4.313778,1.078444,1.305556,0.180556,0.451389,0.326389,5.0,5.0,8.0,102.0,408.0,56.518519,22.0,456.656,446.6,149.5,147.82,80.2,183.7,4.986928,4.47702,1.119255,1.465686,0.196569,0.450245,0.362304,5.0,5.0,8.0,300.0,1200.0,166.518519,22.0,1350.032,1317.8,446.5,440.86,238.6,540.1,4.995556,4.500107,1.125027,1.488333,0.198833,0.450083,0.367383
200104190,AT,BAL,A,1,0,0,0,0,6.0,6,23,0,2,2,2,0,3,0,0,0,0,1,1,0,0,0,1,0,4.86,20010419,0,6.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,10.666667,8.0,43.0,1.0,7.0,7.0,4.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,12.0,6.0,14.0,18.0,72.0,11.074074,39.0,72.088,70.9,23.0,22.73,10.8,31.05,5.537037,4.004889,1.001222,1.277778,0.15,0.43125,0.315694,12.0,6.0,14.0,102.0,408.0,57.740741,39.0,451.096,440.5,149.0,147.05,78.0,182.25,5.094771,4.42251,1.105627,1.460784,0.191176,0.446691,0.360417,12.0,6.0,14.0,300.0,1200.0,167.740741,39.0,1344.472,1311.7,446.0,440.09,236.4,538.65,5.032222,4.481573,1.120393,1.486667,0.197,0.448875,0.366742
200104250,VS,ANA,A,1,0,0,0,0,5.0,5,24,0,3,2,4,0,1,0,0,0,0,0,1,1,0,1,0,1,4.57,20010425,0,5.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,16.666667,14.0,66.0,1.0,9.0,9.0,6.0,0.0,8.0,0.0,0.0,1.0,0.0,1.0,4.0,0.0,20.0,7.0,21.0,18.0,72.0,9.740741,57.0,63.016,63.6,22.0,22.22,9.2,29.7,4.87037,3.500889,0.875222,1.222222,0.127778,0.4125,0.308611,20.0,7.0,21.0,102.0,408.0,56.407407,57.0,442.024,433.2,148.0,146.54,76.4,180.9,4.977124,4.333569,1.083392,1.45098,0.187255,0.443382,0.359167,20.0,7.0,21.0,300.0,1200.0,166.407407,57.0,1335.4,1304.4,445.0,439.58,234.8,537.3,4.992222,4.451333,1.112833,1.483333,0.195667,0.44775,0.366317
200105020,AT,KC,A,1,0,0,0,0,5.0,4,19,1,1,1,1,0,2,0,0,0,0,0,0,0,1,0,1,0,4.05,20010502,0,5.0,21.666667,19.0,90.0,1.0,12.0,11.0,10.0,0.0,9.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,21.666667,19.0,90.0,1.0,12.0,11.0,10.0,0.0,9.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,21.666667,19.0,90.0,1.0,12.0,11.0,10.0,0.0,9.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,29.0,10.0,29.0,21.666667,90.0,11.0,82.0,82.0,82.0,29.0,29.0,9.0,39.0,4.569231,3.784615,0.911111,1.338462,0.1,0.433333,0.322222,29.0,10.0,29.0,102.0,408.0,55.62963,82.0,444.464,431.8,149.5,146.66,72.6,182.1,4.908497,4.35749,1.089373,1.465686,0.177941,0.446324,0.359461,29.0,10.0,29.0,300.0,1200.0,165.62963,82.0,1337.84,1303.0,446.5,439.7,231.0,538.5,4.968889,4.459467,1.114867,1.488333,0.1925,0.44875,0.366417
200105090,VS,KC,A,1,0,0,0,0,5.2,5,23,0,1,1,1,0,5,0,1,1,1,0,1,0,0,0,1,0,3.62,20010509,0,5.666667,26.666667,23.0,109.0,2.0,13.0,12.0,11.0,0.0,11.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,26.666667,23.0,109.0,2.0,13.0,12.0,11.0,0.0,11.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,26.666667,23.0,109.0,2.0,13.0,12.0,11.0,0.0,11.0,0.0,0.0,1.0,0.0,1.0,5.0,1.0,34.0,13.0,36.0,26.666667,109.0,12.0,106.0,106.0,106.0,34.0,34.0,11.0,47.0,4.05,3.975,0.972477,1.275,0.100917,0.431193,0.311927,34.0,13.0,36.0,102.0,408.0,53.851852,106.0,445.904,434.9,147.0,144.63,70.8,181.55,4.751634,4.371608,1.092902,1.441176,0.173529,0.444975,0.354485,34.0,13.0,36.0,300.0,1200.0,163.851852,106.0,1339.28,1306.1,444.0,437.67,229.2,537.95,4.915556,4.464267,1.116067,1.48,0.191,0.448292,0.364725
200105150,AT,TEX,A,1,0,0,0,0,5.0,6,23,0,3,3,3,0,5,0,0,1,0,0,0,0,1,0,1,0,3.86,20010515,0,5.0,32.333333,28.0,132.0,2.0,14.0,13.0,12.0,0.0,16.0,0.0,1.0,2.0,1.0,1.0,6.0,1.0,32.333333,28.0,132.0,2.0,14.0,13.0,12.0,0.0,16.0,0.0,1.0,2.0,1.0,1.0,6.0,1.0,32.333333,28.0,132.0,2.0,14.0,13.0,12.0,0.0,16.0,0.0,1.0,2.0,1.0,1.0,6.0,1.0,40.0,14.0,42.0,32.333333,132.0,13.0,114.0,114.0,114.0,40.0,40.0,16.0,54.0,3.618557,3.525773,0.863636,1.237113,0.121212,0.409091,0.30303,40.0,14.0,42.0,102.0,408.0,51.703704,114.0,428.336,417.6,144.5,142.12,71.2,178.2,4.562092,4.199373,1.049843,1.416667,0.17451,0.436765,0.348333,40.0,14.0,42.0,300.0,1200.0,161.703704,114.0,1321.712,1288.8,441.5,435.16,229.6,534.6,4.851111,4.405707,1.101427,1.471667,0.191333,0.4455,0.362633
200105200,AT,ANA,A,1,0,0,0,0,4.1,9,24,1,6,6,3,0,4,0,0,1,0,0,3,0,0,0,0,0,4.75,20010520,0,4.333333,31.666667,31.0,133.0,1.0,14.0,13.0,13.0,0.0,18.0,0.0,1.0,2.0,1.0,1.0,4.0,1.0,37.333333,34.0,155.0,2.0,17.0,16.0,15.0,0.0,21.0,0.0,1.0,3.0,1.0,1.0,6.0,1.0,37.333333,34.0,155.0,2.0,17.0,16.0,15.0,0.0,21.0,0.0,1.0,3.0,1.0,1.0,6.0,1.0,44.0,9.0,40.0,31.666667,133.0,13.0,109.0,109.0,109.0,44.0,44.0,18.0,53.0,3.694737,3.442105,0.819549,1.389474,0.135338,0.398496,0.330827,49.0,14.0,48.0,102.0,408.0,51.925926,131.0,422.776,409.3,146.0,142.61,71.6,176.85,4.581699,4.144863,1.036216,1.431373,0.17549,0.433456,0.349534,49.0,14.0,48.0,300.0,1200.0,161.925926,131.0,1316.152,1280.5,443.0,435.65,230.0,533.25,4.857778,4.387173,1.096793,1.476667,0.191667,0.444375,0.363042
200105260,VS,NY,A,1,0,0,0,0,4.2,7,25,2,5,5,4,1,3,0,0,0,0,1,2,0,0,0,0,1,5.24,20010526,0,4.666667,31.0,35.0,136.0,2.0,16.0,15.0,14.0,0.0,20.0,0.0,1.0,3.0,1.0,1.0,6.0,1.0,41.666667,43.0,179.0,3.0,23.0,22.0,18.0,0.0,25.0,0.0,1.0,4.0,1.0,1.0,9.0,1.0,41.666667,43.0,179.0,3.0,23.0,22.0,18.0,0.0,25.0,0.0,1.0,4.0,1.0,1.0,9.0,1.0,49.0,14.0,49.0,31.0,136.0,15.0,133.0,133.0,133.0,49.0,49.0,20.0,63.0,4.354839,4.290323,0.977941,1.580645,0.147059,0.463235,0.360294,61.0,20.0,63.0,102.0,408.0,55.518519,172.0,444.224,423.9,151.5,145.73,70.8,184.05,4.898693,4.355137,1.088784,1.485294,0.173529,0.451103,0.357181,61.0,20.0,63.0,300.0,1200.0,165.518519,172.0,1337.6,1295.1,448.5,438.77,229.2,540.45,4.965556,4.458667,1.114667,1.495,0.191,0.450375,0.365642
200106010,AT,NY,A,1,0,0,0,0,4.0,4,20,1,4,3,5,0,2,0,0,0,0,0,0,0,1,0,1,0,5.36,20010601,0,4.0,29.666667,36.0,138.0,4.0,19.0,18.0,16.0,1.0,20.0,0.0,1.0,3.0,1.0,1.0,7.0,1.0,46.333333,50.0,204.0,5.0,28.0,27.0,22.0,1.0,28.0,0.0,1.0,4.0,1.0,2.0,11.0,1.0,46.333333,50.0,204.0,5.0,28.0,27.0,22.0,1.0,28.0,0.0,1.0,4.0,1.0,2.0,11.0,1.0,52.0,21.0,57.0,29.666667,138.0,18.0,168.0,168.0,168.0,52.0,52.0,20.0,73.0,5.460674,5.662921,1.217391,1.752809,0.144928,0.528986,0.376812,72.0,28.0,78.0,102.0,408.0,57.925926,225.0,476.168,449.4,155.5,147.48,68.8,191.8,5.111111,4.668314,1.167078,1.52451,0.168627,0.470098,0.361471,72.0,28.0,78.0,300.0,1200.0,167.925926,225.0,1369.544,1320.6,452.5,440.52,227.2,548.2,5.037778,4.565147,1.141287,1.508333,0.189333,0.456833,0.3671


LOAD IN GAME LEVEL DATA

In [60]:
df = pd.read_csv('df_bp3.csv', low_memory=False)

In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56771 entries, 0 to 56770
Columns: 199 entries, date to over_under_result
dtypes: float64(34), int64(87), object(78)
memory usage: 86.2+ MB


In [62]:
start_pitchers_h = df.pitcher_start_id_h.unique()
start_pitchers_v = df.pitcher_start_id_v.unique()
start_pitchers_all = np.union1d(start_pitchers_h.astype(str), start_pitchers_v.astype(str))

In [63]:
pitcher_data_dict = {}
for i, p_id in enumerate(start_pitchers_all):
    if i % 250 == 0:
        print(i)
    try:  
        pitcher_data_dict[p_id] = load_and_process_pitch_df(p_id,'SP_2000/')
    except:
       pass

0
250
500
750
1000
1250
1500
1750
2000


In [64]:
raw_cols_to_add = ['GS', 'IP',
       'H', 'BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP',
       'HBP', 'BK', '2B', '3B', 'IP_real', 'rollsum_IP_real_6', 'rollsum_H_6',
       'rollsum_BFP_6', 'rollsum_HR_6', 'rollsum_R_6', 'rollsum_ER_6',
       'rollsum_BB_6', 'rollsum_IB_6', 'rollsum_SO_6', 'rollsum_SH_6',
       'rollsum_SF_6', 'rollsum_WP_6', 'rollsum_HBP_6',
       'rollsum_BK_6', 'rollsum_2B_6', 'rollsum_3B_6',
       'rollsum_IP_real_34', 'rollsum_H_34', 'rollsum_BFP_34',
       'rollsum_HR_34', 'rollsum_R_34', 'rollsum_ER_34', 'rollsum_BB_34',
       'rollsum_IB_34', 'rollsum_SO_34', 'rollsum_SH_34', 'rollsum_SF_34',
       'rollsum_WP_34', 'rollsum_HBP_34', 'rollsum_BK_34',
       'rollsum_2B_34', 'rollsum_3B_34', 'rollsum_IP_real_100',
       'rollsum_H_100', 'rollsum_BFP_100', 'rollsum_HR_100', 'rollsum_R_100',
       'rollsum_ER_100', 'rollsum_BB_100', 'rollsum_IB_100', 'rollsum_SO_100',
       'rollsum_SH_100', 'rollsum_SF_100', 'rollsum_WP_100',
       'rollsum_HBP_100', 'rollsum_BK_100', 'rollsum_2B_100',
       'rollsum_3B_100', 'H_BB_roll_6', 'XB_roll_6', 'TB_roll_6',
       'IP_mod_6', 'BF_mod_6', 'ER_mod_6', 'FIP_numer_6',
       'FIP_numer_mod_6', 'FIP_numer_mod2_6', 'H_BB_mod_6',
       'H_BB_mod2_6', 'SO_mod_6', 'TB_BB_mod_6', 'ERA_6', 'FIP_6',
       'FIP_perc_6', 'WHIP_6', 'SO_perc_6', 'TB_BB_perc_6',
       'H_BB_perc_6', 'H_BB_roll_34', 'XB_roll_34', 'TB_roll_34',
       'IP_mod_34', 'BF_mod_34', 'ER_mod_34', 'FIP_numer_34',
       'FIP_numer_mod_34', 'FIP_numer_mod2_34', 'H_BB_mod_34',
       'H_BB_mod2_34', 'SO_mod_34', 'TB_BB_mod_34', 'ERA_34', 'FIP_34',
       'FIP_perc_34', 'WHIP_34', 'SO_perc_34', 'TB_BB_perc_34',
       'H_BB_perc_34', 'H_BB_roll_100', 'XB_roll_100', 'TB_roll_100',
       'IP_mod_100', 'BF_mod_100', 'ER_mod_100', 'FIP_numer_100',
       'FIP_numer_mod_100', 'FIP_numer_mod2_100', 'H_BB_mod_100',
       'H_BB_mod2_100', 'SO_mod_100', 'TB_BB_mod_100', 'ERA_100', 'FIP_100',
       'FIP_perc_100', 'WHIP_100', 'SO_perc_100', 'TB_BB_perc_100',
       'H_BB_perc_100']
cols_to_add = ['Strt_'+col+suff for col in raw_cols_to_add for suff in ['_h','_v']]

col_add_dict = {col:np.zeros(df.shape[0]) for col in cols_to_add}

In [65]:
for i in range(df.shape[0]):
    row = df.iloc[i,:]
    if i % 1000 == 0:
        print(i)
    sp_id_v = row['pitcher_start_id_v']
    sp_id_h = row['pitcher_start_id_h']
    date_dblhead = row['date_dblhead']
    if sp_id_v in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_v]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict['Strt_'+col+'_v'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_v} date {date_dblhead}')
    if sp_id_h in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_h]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict['Strt_'+col+'_h'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_h} date {date_dblhead}')

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000


In [66]:
for col in cols_to_add:
    df[col] = col_add_dict[col]

In [67]:
df.sample(5)

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_6_h,Strt_rollsum_IP_real_6_v,Strt_rollsum_H_6_h,Strt_rollsum_H_6_v,Strt_rollsum_BFP_6_h,Strt_rollsum_BFP_6_v,Strt_rollsum_HR_6_h,Strt_rollsum_HR_6_v,Strt_rollsum_R_6_h,Strt_rollsum_R_6_v,Strt_rollsum_ER_6_h,Strt_rollsum_ER_6_v,Strt_rollsum_BB_6_h,Strt_rollsum_BB_6_v,Strt_rollsum_IB_6_h,Strt_rollsum_IB_6_v,Strt_rollsum_SO_6_h,Strt_rollsum_SO_6_v,Strt_rollsum_SH_6_h,Strt_rollsum_SH_6_v,Strt_rollsum_SF_6_h,Strt_rollsum_SF_6_v,Strt_rollsum_WP_6_h,Strt_rollsum_WP_6_v,Strt_rollsum_HBP_6_h,Strt_rollsum_HBP_6_v,Strt_rollsum_BK_6_h,Strt_rollsum_BK_6_v,Strt_rollsum_2B_6_h,Strt_rollsum_2B_6_v,Strt_rollsum_3B_6_h,Strt_rollsum_3B_6_v,Strt_rollsum_IP_real_34_h,Strt_rollsum_IP_real_34_v,Strt_rollsum_H_34_h,Strt_rollsum_H_34_v,Strt_rollsum_BFP_34_h,Strt_rollsum_BFP_34_v,Strt_rollsum_HR_34_h,Strt_rollsum_HR_34_v,Strt_rollsum_R_34_h,Strt_rollsum_R_34_v,Strt_rollsum_ER_34_h,Strt_rollsum_ER_34_v,Strt_rollsum_BB_34_h,Strt_rollsum_BB_34_v,Strt_rollsum_IB_34_h,Strt_rollsum_IB_34_v,Strt_rollsum_SO_34_h,Strt_rollsum_SO_34_v,Strt_rollsum_SH_34_h,Strt_rollsum_SH_34_v,Strt_rollsum_SF_34_h,Strt_rollsum_SF_34_v,Strt_rollsum_WP_34_h,Strt_rollsum_WP_34_v,Strt_rollsum_HBP_34_h,Strt_rollsum_HBP_34_v,Strt_rollsum_BK_34_h,Strt_rollsum_BK_34_v,Strt_rollsum_2B_34_h,Strt_rollsum_2B_34_v,Strt_rollsum_3B_34_h,Strt_rollsum_3B_34_v,Strt_rollsum_IP_real_100_h,Strt_rollsum_IP_real_100_v,Strt_rollsum_H_100_h,Strt_rollsum_H_100_v,Strt_rollsum_BFP_100_h,Strt_rollsum_BFP_100_v,Strt_rollsum_HR_100_h,Strt_rollsum_HR_100_v,Strt_rollsum_R_100_h,Strt_rollsum_R_100_v,Strt_rollsum_ER_100_h,Strt_rollsum_ER_100_v,Strt_rollsum_BB_100_h,Strt_rollsum_BB_100_v,Strt_rollsum_IB_100_h,Strt_rollsum_IB_100_v,Strt_rollsum_SO_100_h,Strt_rollsum_SO_100_v,Strt_rollsum_SH_100_h,Strt_rollsum_SH_100_v,Strt_rollsum_SF_100_h,Strt_rollsum_SF_100_v,Strt_rollsum_WP_100_h,Strt_rollsum_WP_100_v,Strt_rollsum_HBP_100_h,Strt_rollsum_HBP_100_v,Strt_rollsum_BK_100_h,Strt_rollsum_BK_100_v,Strt_rollsum_2B_100_h,Strt_rollsum_2B_100_v,Strt_rollsum_3B_100_h,Strt_rollsum_3B_100_v,Strt_H_BB_roll_6_h,Strt_H_BB_roll_6_v,Strt_XB_roll_6_h,Strt_XB_roll_6_v,Strt_TB_roll_6_h,Strt_TB_roll_6_v,Strt_IP_mod_6_h,Strt_IP_mod_6_v,Strt_BF_mod_6_h,Strt_BF_mod_6_v,Strt_ER_mod_6_h,Strt_ER_mod_6_v,Strt_FIP_numer_6_h,Strt_FIP_numer_6_v,Strt_FIP_numer_mod_6_h,Strt_FIP_numer_mod_6_v,Strt_FIP_numer_mod2_6_h,Strt_FIP_numer_mod2_6_v,Strt_H_BB_mod_6_h,Strt_H_BB_mod_6_v,Strt_H_BB_mod2_6_h,Strt_H_BB_mod2_6_v,Strt_SO_mod_6_h,Strt_SO_mod_6_v,Strt_TB_BB_mod_6_h,Strt_TB_BB_mod_6_v,Strt_ERA_6_h,Strt_ERA_6_v,Strt_FIP_6_h,Strt_FIP_6_v,Strt_FIP_perc_6_h,Strt_FIP_perc_6_v,Strt_WHIP_6_h,Strt_WHIP_6_v,Strt_SO_perc_6_h,Strt_SO_perc_6_v,Strt_TB_BB_perc_6_h,Strt_TB_BB_perc_6_v,Strt_H_BB_perc_6_h,Strt_H_BB_perc_6_v,Strt_H_BB_roll_34_h,Strt_H_BB_roll_34_v,Strt_XB_roll_34_h,Strt_XB_roll_34_v,Strt_TB_roll_34_h,Strt_TB_roll_34_v,Strt_IP_mod_34_h,Strt_IP_mod_34_v,Strt_BF_mod_34_h,Strt_BF_mod_34_v,Strt_ER_mod_34_h,Strt_ER_mod_34_v,Strt_FIP_numer_34_h,Strt_FIP_numer_34_v,Strt_FIP_numer_mod_34_h,Strt_FIP_numer_mod_34_v,Strt_FIP_numer_mod2_34_h,Strt_FIP_numer_mod2_34_v,Strt_H_BB_mod_34_h,Strt_H_BB_mod_34_v,Strt_H_BB_mod2_34_h,Strt_H_BB_mod2_34_v,Strt_SO_mod_34_h,Strt_SO_mod_34_v,Strt_TB_BB_mod_34_h,Strt_TB_BB_mod_34_v,Strt_ERA_34_h,Strt_ERA_34_v,Strt_FIP_34_h,Strt_FIP_34_v,Strt_FIP_perc_34_h,Strt_FIP_perc_34_v,Strt_WHIP_34_h,Strt_WHIP_34_v,Strt_SO_perc_34_h,Strt_SO_perc_34_v,Strt_TB_BB_perc_34_h,Strt_TB_BB_perc_34_v,Strt_H_BB_perc_34_h,Strt_H_BB_perc_34_v,Strt_H_BB_roll_100_h,Strt_H_BB_roll_100_v,Strt_XB_roll_100_h,Strt_XB_roll_100_v,Strt_TB_roll_100_h,Strt_TB_roll_100_v,Strt_IP_mod_100_h,Strt_IP_mod_100_v,Strt_BF_mod_100_h,Strt_BF_mod_100_v,Strt_ER_mod_100_h,Strt_ER_mod_100_v,Strt_FIP_numer_100_h,Strt_FIP_numer_100_v,Strt_FIP_numer_mod_100_h,Strt_FIP_numer_mod_100_v,Strt_FIP_numer_mod2_100_h,Strt_FIP_numer_mod2_100_v,Strt_H_BB_mod_100_h,Strt_H_BB_mod_100_v,Strt_H_BB_mod2_100_h,Strt_H_BB_mod2_100_v,Strt_SO_mod_100_h,Strt_SO_mod_100_v,Strt_TB_BB_mod_100_h,Strt_TB_BB_mod_100_v,Strt_ERA_100_h,Strt_ERA_100_v,Strt_FIP_100_h,Strt_FIP_100_v,Strt_FIP_perc_100_h,Strt_FIP_perc_100_v,Strt_WHIP_100_h,Strt_WHIP_100_v,Strt_SO_perc_100_h,Strt_SO_perc_100_v,Strt_TB_BB_perc_100_h,Strt_TB_BB_perc_100_v,Strt_H_BB_perc_100_h,Strt_H_BB_perc_100_v
53718,20220821,0,Sun,SLN,NL,120,ARI,NL,121,6,4,54,D,,,,PHO01,25064.0,215,300000210,031000000,36,12,1,0,1,5,0,0,1,6,0,9,1,1,2,0,10,5,2,2,0,0,27,12,2,0,2,0,33,8,0,0,0,3,0,0,0,5,0,2,0,0,2,1,8,6,6,6,1,0,27,9,0,0,2,0,buckc901,CB Bucknor,tosia901,Alex Tosi,sches901,Stu Scheurwater,nelsj901,Jeff Nelson,,(none),,(none),marmo801,Oliver Marmol,lovut001,Tony Lovullo,woodj003,Jake Woodford,mantj002,Joe Mantiply,gallg001,Giovanny Gallegos,arenn001,Nolan Arenado,quinj001,Jose Quintana,kellm002,Merrill Kelly,nootl001,Lars Nootbaar,9,donob001,Brendan Donovan,5,goldp001,Paul Goldschmidt,3,arenn001,Nolan Arenado,10,gormn001,Nolan Gorman,4,oneit001,Tyler O'Neill,8,dickc002,Corey Dickerson,7,dejop001,Paul DeJong,6,kniza001,Andrew Knizner,2,kellc002,Carson Kelly,2,rivee001,Emmanuel Rivera,5,martk001,Ketel Marte,10,walkc002,Christian Walker,3,garrs003,Stone Garrett,7,rojaj001,Josh Rojas,4,luplj001,Jordan Luplow,9,mccaj003,Jake McCarthy,8,perdg001,Gerardo Perdomo,6,,Y,2022,-2,0,10,202208210,0.231284,0.258507,0.301132,0.321447,0.387516,0.435785,0.688648,0.757231,73.0,101.0,25.0,24.0,103.0,62.0,0.245935,0.281773,0.312963,0.354867,0.398374,0.499507,0.711337,0.854375,27.0,14.0,7.0,5.0,13.0,10.0,0.487805,0.555556,0.466125,7.5,O,1.0,1.0,6.0,2.2,7.0,7.0,26.0,17.0,1.0,0.0,3.0,4.0,3.0,2.0,3.0,3.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,2.666667,40.0,34.666667,27.0,27.0,149.0,136.0,2.0,1.0,6.0,11.0,6.0,11.0,8.0,10.0,0.0,0.0,38.0,27.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,8.0,4.0,0.0,1.0,204.0,137.333333,181.0,132.0,830.0,576.0,15.0,12.0,75.0,60.0,72.0,56.0,60.0,43.0,1.0,2.0,163.0,122.0,3.0,0.0,2.0,1.0,4.0,3.0,4.0,2.0,1.0,1.0,46.0,32.0,2.0,1.0,517.0,430.666667,491.0,448.0,2147.0,1855.0,63.0,51.0,233.0,230.0,222.0,211.0,147.0,137.0,9.0,2.0,439.0,417.0,9.0,7.0,10.0,17.0,10.0,20.0,9.0,7.0,1.0,1.0,107.0,98.0,9.0,4.0,35.0,37.0,14.0,9.0,41.0,36.0,40.0,34.666667,149.0,136.0,6.0,11.0,55.0,70.0,55.0,70.0,55.0,70.0,35.0,37.0,35.0,37.0,38.0,27.0,49.0,46.0,1.35,2.855769,1.375,2.019231,0.369128,0.514706,0.875,1.067308,0.255034,0.198529,0.328859,0.338235,0.234899,0.272059,241.0,175.0,95.0,70.0,276.0,202.0,204.0,137.333333,830.0,576.0,72.0,56.0,592.0,437.0,592.0,437.0,592.0,437.0,241.0,175.0,241.0,175.0,163.0,122.0,336.0,245.0,3.176471,3.669903,2.901961,3.182039,0.713253,0.758681,1.181373,1.274272,0.196386,0.211806,0.404819,0.425347,0.290361,0.303819,638.0,585.0,314.0,259.0,805.0,707.0,517.0,430.666667,2147.0,1855.0,222.0,211.0,1855.0,1584.0,1855.0,1584.0,1855.0,1584.0,638.0,585.0,638.0,585.0,439.0,417.0,952.0,844.0,3.864603,4.409443,3.588008,3.678019,0.863996,0.853908,1.234043,1.358359,0.204471,0.224798,0.443409,0.454987,0.297159,0.315364
28970,20110916,0,Fri,CHA,AL,150,KCA,AL,152,6,7,52,N,,,,KAN06,24918.0,193,101010120,200301001,37,12,0,0,1,6,1,1,0,4,0,6,0,1,1,0,10,3,6,6,0,0,25,8,1,0,0,0,36,12,1,0,1,7,1,0,0,3,1,6,3,0,0,0,8,5,5,5,1,1,27,8,1,0,1,0,tscht901,Tim Tschida,gibsg901,Greg Gibson,herna901,Angel Hernandez,ticht901,Todd Tichenor,,(none),,(none),guilo001,Ozzie Guillen,yoste001,Ned Yost,hollg001,Greg Holland,thorm001,Matt Thornton,,(none),hosme001,Eric Hosmer,humbp001,Philip Humber,paulf001,Felipe Paulino,pierj002,Juan Pierre,7,ramia003,Alexei Ramirez,6,konep001,Paul Konerko,3,piera001,A.J. Pierzynski,2,riosa002,Alex Rios,8,dunna001,Adam Dunn,10,deaza001,Alejandro de Aza,9,moreb001,Brent Morel,5,beckg001,Gordon Beckham,4,gorda001,Alex Gordon,7,cabrm002,Melky Cabrera,8,butlb003,Billy Butler,10,hosme001,Eric Hosmer,3,franj004,Jeff Francoeur,9,mousm001,Mike Moustakas,5,giavj001,Johnny Giavotella,4,peres002,Salvador Perez,2,escoa003,Alcides Escobar,6,,Y,2011,1,1,13,201109160,0.269156,0.254271,0.32417,0.312845,0.408954,0.38695,0.733124,0.699796,153.0,94.0,57.0,50.0,93.0,85.0,0.289896,0.260618,0.328571,0.323322,0.454202,0.393822,0.782774,0.717144,21.0,20.0,11.0,8.0,17.0,17.0,0.0,0.0,0.5,0.0,,1.0,1.0,6.0,6.0,7.0,9.0,26.0,29.0,0.0,1.0,3.0,6.0,3.0,5.0,3.0,2.0,0.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,34.333333,31.333333,31.0,38.0,150.0,135.0,4.0,3.0,20.0,14.0,20.0,14.0,22.0,5.0,0.0,1.0,29.0,26.0,2.0,1.0,1.0,3.0,3.0,0.0,1.0,0.0,0.0,0.0,4.0,5.0,1.0,0.0,120.333333,167.0,125.0,155.0,517.0,690.0,12.0,12.0,66.0,67.0,61.0,65.0,47.0,40.0,2.0,4.0,112.0,115.0,4.0,2.0,6.0,6.0,7.0,10.0,8.0,6.0,0.0,1.0,19.0,27.0,4.0,1.0,330.666667,195.666667,370.0,191.0,1468.0,826.0,41.0,18.0,217.0,87.0,196.0,85.0,138.0,57.0,9.0,6.0,300.0,134.0,20.0,2.0,10.0,6.0,19.0,11.0,15.0,7.0,1.0,1.0,71.0,31.0,10.0,2.0,53.0,43.0,18.0,14.0,49.0,52.0,34.333333,31.333333,150.0,135.0,20.0,14.0,153.0,116.0,153.0,116.0,153.0,116.0,53.0,43.0,53.0,43.0,29.0,26.0,71.0,57.0,5.242718,4.021277,4.456311,3.702128,1.02,0.859259,1.543689,1.37234,0.193333,0.192593,0.473333,0.422222,0.353333,0.318519,172.0,195.0,63.0,65.0,188.0,220.0,120.333333,167.0,517.0,690.0,61.0,65.0,448.0,511.0,448.0,511.0,448.0,511.0,172.0,195.0,172.0,195.0,112.0,115.0,235.0,260.0,4.562327,3.502994,3.722992,3.05988,0.866538,0.74058,1.429363,1.167665,0.216634,0.166667,0.454545,0.376812,0.332689,0.282609,508.0,248.0,214.0,89.0,584.0,280.0,330.666667,300.0,1468.0,1200.0,196.0,142.962963,1457.0,710.0,1457.0,1180.752,1457.0,1121.4,508.0,404.5,508.0,386.38,300.0,208.8,722.0,505.3,5.334677,4.288889,4.40625,3.93584,0.992507,0.98396,1.53629,1.348333,0.20436,0.174,0.491826,0.421083,0.346049,0.321983
43552,20170919,0,Tue,CLE,AL,151,ANA,AL,150,6,3,54,N,,,,ANA01,36171.0,200,111003,000010011,38,14,1,1,1,6,0,0,0,3,1,5,1,0,2,0,8,4,3,3,0,0,27,13,1,0,2,0,30,7,0,0,1,2,0,0,0,2,0,8,0,2,2,1,3,7,6,6,0,0,27,10,0,0,2,0,cuzzp901,Phil Cuzzi,hallt901,Tom Hallion,rippm901,Mark Ripperger,carav901,Vic Carapazza,,(none),,(none),frant001,Terry Francona,sciom001,Mike Scioscia,clevm001,Mike Clevinger,skagt001,Tyler Skaggs,,(none),brucj001,Jay Bruce,clevm001,Mike Clevinger,skagt001,Tyler Skaggs,lindf001,Francisco Lindor,6,jacka001,Austin Jackson,7,ramij003,Jose Ramirez,4,encae001,Edwin Encarnacion,10,santc002,Carlos Santana,3,brucj001,Jay Bruce,9,diazy001,Yandy Diaz,5,kipnj001,Jason Kipnis,8,perer003,Roberto Perez,2,philb001,Brandon Phillips,4,troum001,Mike Trout,8,uptoj001,Justin Upton,7,pujoa001,Albert Pujols,10,calhk001,Kole Calhoun,9,simma001,Andrelton Simmons,6,valbl001,Luis Valbuena,5,cronc002,C.J. Cron,3,maldm001,Martin Maldonado,2,,Y,2017,-3,0,9,201709190,0.2441,0.261187,0.311387,0.335687,0.39694,0.444749,0.708327,0.780436,138.0,91.0,46.0,26.0,79.0,78.0,0.240796,0.280156,0.318141,0.354839,0.422886,0.498054,0.741027,0.852893,24.0,16.0,5.0,4.0,16.0,10.0,0.0,0.0,0.5,0.0,,1.0,1.0,5.1,6.0,8.0,3.0,23.0,20.0,1.0,0.0,2.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5.333333,6.0,30.333333,30.666667,31.0,23.0,133.0,130.0,6.0,3.0,21.0,9.0,18.0,7.0,10.0,15.0,0.0,0.0,28.0,37.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,1.0,2.0,0.0,9.0,5.0,0.0,0.0,187.0,138.666667,189.0,111.0,795.0,581.0,19.0,17.0,96.0,58.0,90.0,54.0,65.0,68.0,0.0,2.0,173.0,157.0,2.0,0.0,9.0,0.0,10.0,3.0,9.0,3.0,2.0,0.0,35.0,31.0,1.0,0.0,300.666667,165.0,296.0,136.0,1283.0,699.0,36.0,21.0,162.0,75.0,152.0,71.0,105.0,85.0,3.0,2.0,258.0,177.0,3.0,0.0,12.0,1.0,15.0,5.0,14.0,3.0,2.0,0.0,59.0,36.0,2.0,0.0,41.0,38.0,27.0,14.0,58.0,37.0,30.333333,30.666667,133.0,130.0,18.0,7.0,145.0,79.0,145.0,79.0,145.0,79.0,41.0,38.0,41.0,38.0,28.0,37.0,68.0,52.0,5.340659,2.054348,4.78022,2.576087,1.090226,0.607692,1.351648,1.23913,0.210526,0.284615,0.511278,0.4,0.308271,0.292308,254.0,179.0,94.0,82.0,283.0,193.0,187.0,138.666667,795.0,581.0,90.0,54.0,663.0,444.0,663.0,444.0,663.0,444.0,254.0,179.0,254.0,179.0,173.0,157.0,348.0,261.0,4.331551,3.504808,3.545455,3.201923,0.833962,0.7642,1.358289,1.290865,0.21761,0.270224,0.437736,0.449225,0.319497,0.30809,401.0,221.0,171.0,99.0,467.0,235.0,300.666667,300.0,1283.0,1200.0,152.0,146.0,1155.0,582.0,1155.0,1191.12,1155.0,1133.1,401.0,423.5,401.0,406.37,258.0,277.2,572.0,545.45,4.549889,4.38,3.841463,3.9704,0.900234,0.9926,1.333703,1.411667,0.201091,0.231,0.44583,0.454542,0.312549,0.338642
55948,20230801,0,Tue,CLE,AL,108,HOU,AL,108,0,2,51,N,,,,HOU03,33703.0,126,0,00200000x,26,0,0,0,0,0,0,0,0,1,0,7,0,0,1,0,0,4,2,2,1,0,24,8,0,0,1,0,29,6,1,0,0,2,0,0,0,4,0,10,0,1,0,0,7,1,0,0,0,0,27,15,0,0,1,0,wolcq901,Quinn Wolcott,valej901,Junior Valentine,clemp901,Paul Clemons,johna901,Adrian Johnson,,(none),,(none),frant001,Terry Francona,baked002,Dusty Baker,valdf001,Framber Valdez,willg004,Gavin Williams,,(none),tuckk001,Kyle Tucker,willg004,Gavin Williams,valdf001,Framber Valdez,kwans001,Steven Kwan,7,freet001,Tyler Freeman,4,ramij003,Jose Ramirez,5,gonzo001,Oscar Gonzalez,10,fry-d001,David Fry,3,brenw002,Will Brennan,9,ariag002,Gabriel Arias,6,stram002,Myles Straw,8,gallc001,Cam Gallagher,2,altuj001,Jose Altuve,4,penaj004,Jeremy Pena,6,tuckk001,Kyle Tucker,9,brega001,Alex Bregman,5,abrej003,Jose Abreu,3,mccoc001,Chas McCormick,7,diazy004,Yainer Diaz,10,meyej002,Jake Meyers,8,maldm001,Martin Maldonado,2,,Y,2023,2,1,2,202308010,0.253148,0.255205,0.316062,0.311889,0.417959,0.382807,0.734022,0.694695,108.0,139.0,25.0,33.0,73.0,84.0,0.260786,0.268756,0.326638,0.322183,0.450623,0.418803,0.777261,0.740987,22.0,30.0,5.0,3.0,17.0,14.0,0.649123,0.377358,0.635882,7.5,U,1.0,1.0,9.0,5.0,0.0,4.0,27.0,22.0,0.0,0.0,0.0,2.0,0.0,2.0,1.0,4.0,0.0,0.0,7.0,6.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,9.0,5.0,35.0,32.0,38.0,28.0,154.0,134.0,5.0,3.0,23.0,10.0,23.0,10.0,12.0,14.0,0.0,0.0,39.0,27.0,0.0,0.0,3.0,2.0,4.0,0.0,2.0,1.0,0.0,0.0,8.0,4.0,0.0,2.0,218.333333,37.666667,197.0,32.0,902.0,157.0,16.0,4.0,86.0,14.0,77.0,14.0,59.0,17.0,0.0,0.0,232.0,31.0,3.0,0.0,6.0,2.0,14.0,0.0,11.0,1.0,0.0,0.0,35.0,5.0,2.0,2.0,586.666667,37.666667,510.0,32.0,2460.0,157.0,47.0,4.0,249.0,14.0,222.0,14.0,208.0,17.0,1.0,0.0,582.0,31.0,6.0,0.0,14.0,2.0,36.0,0.0,36.0,1.0,5.0,0.0,82.0,5.0,6.0,2.0,50.0,42.0,23.0,17.0,61.0,45.0,35.0,32.0,154.0,134.0,23.0,10.0,137.0,111.0,137.0,111.0,137.0,111.0,50.0,42.0,50.0,42.0,39.0,27.0,73.0,59.0,5.914286,2.8125,3.914286,3.46875,0.88961,0.828358,1.428571,1.3125,0.253247,0.201493,0.474026,0.440299,0.324675,0.313433,256.0,49.0,87.0,21.0,284.0,53.0,218.333333,102.0,902.0,408.0,77.0,49.740741,512.0,137.0,512.0,427.272,512.0,413.1,256.0,145.5,256.0,141.87,232.0,81.2,343.0,182.95,3.174046,4.388889,2.345038,4.188941,0.567627,1.047235,1.172519,1.426471,0.257206,0.19902,0.380266,0.448407,0.283814,0.347721,718.0,49.0,235.0,21.0,745.0,53.0,586.666667,300.0,2460.0,1200.0,222.0,159.740741,1601.0,137.0,1601.0,1320.648,1601.0,1284.3,718.0,442.5,718.0,434.91,582.0,239.6,953.0,539.35,3.405682,4.792222,2.728977,4.40216,0.650813,1.10054,1.223864,1.475,0.236585,0.199667,0.387398,0.449458,0.29187,0.362425
1699,20000810,0,Thu,OAK,AL,113,NYA,AL,110,6,12,51,D,,,,NYC16,41011.0,175,2000103,16031100x,40,13,2,0,3,6,0,0,0,2,0,10,0,0,0,0,9,3,12,12,0,0,24,12,0,0,1,0,36,15,5,0,2,11,0,2,0,6,1,3,0,0,1,0,8,3,6,6,0,0,27,9,0,0,0,0,relic901,Charlie Reliford,carlm901,Mark Carlson,demud901,Dana DeMuth,eddid901,Doug Eddings,,(none),,(none),howea001,Art Howe,torrj101,Joe Torre,coned001,David Cone,muldm001,Mark Mulder,,(none),justd001,David Justice,muldm001,Mark Mulder,coned001,David Cone,longt002,Terrence Long,8,velar001,Randy Velarde,4,giamj001,Jason Giambi,3,grieb001,Ben Grieve,7,staim001,Matt Stairs,9,tejam001,Miguel Tejada,6,chave001,Eric Chavez,5,giamj002,Jeremy Giambi,10,hernr002,Ramon Hernandez,2,jeted001,Derek Jeter,6,sojol001,Luis Sojo,4,willb002,Bernie Williams,8,cansj001,Jose Canseco,10,justd001,David Justice,9,hillg001,Glenallen Hill,7,martt002,Tino Martinez,3,turnc001,Chris Turner,2,bross001,Scott Brosius,5,,Y,2000,6,1,18,200008100,,,,,,,,,,,,,,,0.277286,0.262597,0.358639,0.335951,0.45821,0.447674,0.816849,0.783626,14.0,3.0,10.0,2.0,12.0,25.0,0.0,0.0,0.5,0.0,,1.0,1.0,6.0,3.1,8.0,11.0,27.0,24.0,1.0,2.0,2.0,10.0,2.0,10.0,1.0,3.0,0.0,0.0,8.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,6.0,3.333333,32.0,38.333333,45.0,48.0,155.0,173.0,10.0,4.0,33.0,27.0,30.0,18.0,17.0,14.0,2.0,1.0,27.0,21.0,0.0,1.0,1.0,3.0,2.0,1.0,0.0,2.0,0.0,0.0,8.0,11.0,0.0,0.0,192.0,118.666667,210.0,142.0,874.0,537.0,35.0,17.0,135.0,78.0,122.0,65.0,96.0,50.0,3.0,3.0,172.0,67.0,7.0,3.0,7.0,6.0,13.0,4.0,7.0,4.0,1.0,0.0,45.0,30.0,2.0,1.0,632.0,118.666667,572.0,142.0,2694.0,537.0,73.0,17.0,304.0,78.0,280.0,65.0,255.0,50.0,6.0,3.0,605.0,67.0,14.0,3.0,16.0,6.0,31.0,4.0,31.0,4.0,2.0,0.0,123.0,30.0,8.0,1.0,62.0,62.0,38.0,23.0,83.0,71.0,32.0,38.333333,155.0,173.0,30.0,18.0,262.0,196.0,262.0,196.0,262.0,196.0,62.0,62.0,62.0,62.0,27.0,21.0,100.0,85.0,8.4375,4.226087,8.1875,5.113043,1.690323,1.132948,1.9375,1.617391,0.174194,0.121387,0.645161,0.491329,0.4,0.358382,306.0,192.0,154.0,83.0,364.0,225.0,192.0,118.666667,874.0,537.0,122.0,65.0,1029.0,663.0,1029.0,663.0,1029.0,663.0,306.0,192.0,306.0,192.0,172.0,67.0,460.0,275.0,5.71875,4.929775,5.359375,5.587079,1.177346,1.234637,1.59375,1.617978,0.196796,0.124767,0.526316,0.512104,0.350114,0.357542,827.0,192.0,358.0,83.0,930.0,225.0,632.0,300.0,2694.0,1200.0,280.0,165.740741,2220.0,663.0,2220.0,1481.176,2220.0,1392.3,827.0,464.0,827.0,437.31,605.0,199.6,1185.0,573.35,3.987342,4.972222,3.512658,4.937253,0.824053,1.234313,1.308544,1.546667,0.224573,0.166333,0.439866,0.477792,0.306978,0.364425


In [68]:
df.shape

(56771, 451)

In [69]:
(df.Strt_IP_mod_6_h==0).sum()

0

In [70]:
(df.Strt_IP_mod_6_v==0).sum()

0

In [71]:
df.loc[df.Strt_IP_mod_6_v==0]

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_6_h,Strt_rollsum_IP_real_6_v,Strt_rollsum_H_6_h,Strt_rollsum_H_6_v,Strt_rollsum_BFP_6_h,Strt_rollsum_BFP_6_v,Strt_rollsum_HR_6_h,Strt_rollsum_HR_6_v,Strt_rollsum_R_6_h,Strt_rollsum_R_6_v,Strt_rollsum_ER_6_h,Strt_rollsum_ER_6_v,Strt_rollsum_BB_6_h,Strt_rollsum_BB_6_v,Strt_rollsum_IB_6_h,Strt_rollsum_IB_6_v,Strt_rollsum_SO_6_h,Strt_rollsum_SO_6_v,Strt_rollsum_SH_6_h,Strt_rollsum_SH_6_v,Strt_rollsum_SF_6_h,Strt_rollsum_SF_6_v,Strt_rollsum_WP_6_h,Strt_rollsum_WP_6_v,Strt_rollsum_HBP_6_h,Strt_rollsum_HBP_6_v,Strt_rollsum_BK_6_h,Strt_rollsum_BK_6_v,Strt_rollsum_2B_6_h,Strt_rollsum_2B_6_v,Strt_rollsum_3B_6_h,Strt_rollsum_3B_6_v,Strt_rollsum_IP_real_34_h,Strt_rollsum_IP_real_34_v,Strt_rollsum_H_34_h,Strt_rollsum_H_34_v,Strt_rollsum_BFP_34_h,Strt_rollsum_BFP_34_v,Strt_rollsum_HR_34_h,Strt_rollsum_HR_34_v,Strt_rollsum_R_34_h,Strt_rollsum_R_34_v,Strt_rollsum_ER_34_h,Strt_rollsum_ER_34_v,Strt_rollsum_BB_34_h,Strt_rollsum_BB_34_v,Strt_rollsum_IB_34_h,Strt_rollsum_IB_34_v,Strt_rollsum_SO_34_h,Strt_rollsum_SO_34_v,Strt_rollsum_SH_34_h,Strt_rollsum_SH_34_v,Strt_rollsum_SF_34_h,Strt_rollsum_SF_34_v,Strt_rollsum_WP_34_h,Strt_rollsum_WP_34_v,Strt_rollsum_HBP_34_h,Strt_rollsum_HBP_34_v,Strt_rollsum_BK_34_h,Strt_rollsum_BK_34_v,Strt_rollsum_2B_34_h,Strt_rollsum_2B_34_v,Strt_rollsum_3B_34_h,Strt_rollsum_3B_34_v,Strt_rollsum_IP_real_100_h,Strt_rollsum_IP_real_100_v,Strt_rollsum_H_100_h,Strt_rollsum_H_100_v,Strt_rollsum_BFP_100_h,Strt_rollsum_BFP_100_v,Strt_rollsum_HR_100_h,Strt_rollsum_HR_100_v,Strt_rollsum_R_100_h,Strt_rollsum_R_100_v,Strt_rollsum_ER_100_h,Strt_rollsum_ER_100_v,Strt_rollsum_BB_100_h,Strt_rollsum_BB_100_v,Strt_rollsum_IB_100_h,Strt_rollsum_IB_100_v,Strt_rollsum_SO_100_h,Strt_rollsum_SO_100_v,Strt_rollsum_SH_100_h,Strt_rollsum_SH_100_v,Strt_rollsum_SF_100_h,Strt_rollsum_SF_100_v,Strt_rollsum_WP_100_h,Strt_rollsum_WP_100_v,Strt_rollsum_HBP_100_h,Strt_rollsum_HBP_100_v,Strt_rollsum_BK_100_h,Strt_rollsum_BK_100_v,Strt_rollsum_2B_100_h,Strt_rollsum_2B_100_v,Strt_rollsum_3B_100_h,Strt_rollsum_3B_100_v,Strt_H_BB_roll_6_h,Strt_H_BB_roll_6_v,Strt_XB_roll_6_h,Strt_XB_roll_6_v,Strt_TB_roll_6_h,Strt_TB_roll_6_v,Strt_IP_mod_6_h,Strt_IP_mod_6_v,Strt_BF_mod_6_h,Strt_BF_mod_6_v,Strt_ER_mod_6_h,Strt_ER_mod_6_v,Strt_FIP_numer_6_h,Strt_FIP_numer_6_v,Strt_FIP_numer_mod_6_h,Strt_FIP_numer_mod_6_v,Strt_FIP_numer_mod2_6_h,Strt_FIP_numer_mod2_6_v,Strt_H_BB_mod_6_h,Strt_H_BB_mod_6_v,Strt_H_BB_mod2_6_h,Strt_H_BB_mod2_6_v,Strt_SO_mod_6_h,Strt_SO_mod_6_v,Strt_TB_BB_mod_6_h,Strt_TB_BB_mod_6_v,Strt_ERA_6_h,Strt_ERA_6_v,Strt_FIP_6_h,Strt_FIP_6_v,Strt_FIP_perc_6_h,Strt_FIP_perc_6_v,Strt_WHIP_6_h,Strt_WHIP_6_v,Strt_SO_perc_6_h,Strt_SO_perc_6_v,Strt_TB_BB_perc_6_h,Strt_TB_BB_perc_6_v,Strt_H_BB_perc_6_h,Strt_H_BB_perc_6_v,Strt_H_BB_roll_34_h,Strt_H_BB_roll_34_v,Strt_XB_roll_34_h,Strt_XB_roll_34_v,Strt_TB_roll_34_h,Strt_TB_roll_34_v,Strt_IP_mod_34_h,Strt_IP_mod_34_v,Strt_BF_mod_34_h,Strt_BF_mod_34_v,Strt_ER_mod_34_h,Strt_ER_mod_34_v,Strt_FIP_numer_34_h,Strt_FIP_numer_34_v,Strt_FIP_numer_mod_34_h,Strt_FIP_numer_mod_34_v,Strt_FIP_numer_mod2_34_h,Strt_FIP_numer_mod2_34_v,Strt_H_BB_mod_34_h,Strt_H_BB_mod_34_v,Strt_H_BB_mod2_34_h,Strt_H_BB_mod2_34_v,Strt_SO_mod_34_h,Strt_SO_mod_34_v,Strt_TB_BB_mod_34_h,Strt_TB_BB_mod_34_v,Strt_ERA_34_h,Strt_ERA_34_v,Strt_FIP_34_h,Strt_FIP_34_v,Strt_FIP_perc_34_h,Strt_FIP_perc_34_v,Strt_WHIP_34_h,Strt_WHIP_34_v,Strt_SO_perc_34_h,Strt_SO_perc_34_v,Strt_TB_BB_perc_34_h,Strt_TB_BB_perc_34_v,Strt_H_BB_perc_34_h,Strt_H_BB_perc_34_v,Strt_H_BB_roll_100_h,Strt_H_BB_roll_100_v,Strt_XB_roll_100_h,Strt_XB_roll_100_v,Strt_TB_roll_100_h,Strt_TB_roll_100_v,Strt_IP_mod_100_h,Strt_IP_mod_100_v,Strt_BF_mod_100_h,Strt_BF_mod_100_v,Strt_ER_mod_100_h,Strt_ER_mod_100_v,Strt_FIP_numer_100_h,Strt_FIP_numer_100_v,Strt_FIP_numer_mod_100_h,Strt_FIP_numer_mod_100_v,Strt_FIP_numer_mod2_100_h,Strt_FIP_numer_mod2_100_v,Strt_H_BB_mod_100_h,Strt_H_BB_mod_100_v,Strt_H_BB_mod2_100_h,Strt_H_BB_mod2_100_v,Strt_SO_mod_100_h,Strt_SO_mod_100_v,Strt_TB_BB_mod_100_h,Strt_TB_BB_mod_100_v,Strt_ERA_100_h,Strt_ERA_100_v,Strt_FIP_100_h,Strt_FIP_100_v,Strt_FIP_perc_100_h,Strt_FIP_perc_100_v,Strt_WHIP_100_h,Strt_WHIP_100_v,Strt_SO_perc_100_h,Strt_SO_perc_100_v,Strt_TB_BB_perc_100_h,Strt_TB_BB_perc_100_v,Strt_H_BB_perc_100_h,Strt_H_BB_perc_100_v


In [72]:
df.loc[df.Strt_IP_mod_6_h==0]

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_6_h,Strt_rollsum_IP_real_6_v,Strt_rollsum_H_6_h,Strt_rollsum_H_6_v,Strt_rollsum_BFP_6_h,Strt_rollsum_BFP_6_v,Strt_rollsum_HR_6_h,Strt_rollsum_HR_6_v,Strt_rollsum_R_6_h,Strt_rollsum_R_6_v,Strt_rollsum_ER_6_h,Strt_rollsum_ER_6_v,Strt_rollsum_BB_6_h,Strt_rollsum_BB_6_v,Strt_rollsum_IB_6_h,Strt_rollsum_IB_6_v,Strt_rollsum_SO_6_h,Strt_rollsum_SO_6_v,Strt_rollsum_SH_6_h,Strt_rollsum_SH_6_v,Strt_rollsum_SF_6_h,Strt_rollsum_SF_6_v,Strt_rollsum_WP_6_h,Strt_rollsum_WP_6_v,Strt_rollsum_HBP_6_h,Strt_rollsum_HBP_6_v,Strt_rollsum_BK_6_h,Strt_rollsum_BK_6_v,Strt_rollsum_2B_6_h,Strt_rollsum_2B_6_v,Strt_rollsum_3B_6_h,Strt_rollsum_3B_6_v,Strt_rollsum_IP_real_34_h,Strt_rollsum_IP_real_34_v,Strt_rollsum_H_34_h,Strt_rollsum_H_34_v,Strt_rollsum_BFP_34_h,Strt_rollsum_BFP_34_v,Strt_rollsum_HR_34_h,Strt_rollsum_HR_34_v,Strt_rollsum_R_34_h,Strt_rollsum_R_34_v,Strt_rollsum_ER_34_h,Strt_rollsum_ER_34_v,Strt_rollsum_BB_34_h,Strt_rollsum_BB_34_v,Strt_rollsum_IB_34_h,Strt_rollsum_IB_34_v,Strt_rollsum_SO_34_h,Strt_rollsum_SO_34_v,Strt_rollsum_SH_34_h,Strt_rollsum_SH_34_v,Strt_rollsum_SF_34_h,Strt_rollsum_SF_34_v,Strt_rollsum_WP_34_h,Strt_rollsum_WP_34_v,Strt_rollsum_HBP_34_h,Strt_rollsum_HBP_34_v,Strt_rollsum_BK_34_h,Strt_rollsum_BK_34_v,Strt_rollsum_2B_34_h,Strt_rollsum_2B_34_v,Strt_rollsum_3B_34_h,Strt_rollsum_3B_34_v,Strt_rollsum_IP_real_100_h,Strt_rollsum_IP_real_100_v,Strt_rollsum_H_100_h,Strt_rollsum_H_100_v,Strt_rollsum_BFP_100_h,Strt_rollsum_BFP_100_v,Strt_rollsum_HR_100_h,Strt_rollsum_HR_100_v,Strt_rollsum_R_100_h,Strt_rollsum_R_100_v,Strt_rollsum_ER_100_h,Strt_rollsum_ER_100_v,Strt_rollsum_BB_100_h,Strt_rollsum_BB_100_v,Strt_rollsum_IB_100_h,Strt_rollsum_IB_100_v,Strt_rollsum_SO_100_h,Strt_rollsum_SO_100_v,Strt_rollsum_SH_100_h,Strt_rollsum_SH_100_v,Strt_rollsum_SF_100_h,Strt_rollsum_SF_100_v,Strt_rollsum_WP_100_h,Strt_rollsum_WP_100_v,Strt_rollsum_HBP_100_h,Strt_rollsum_HBP_100_v,Strt_rollsum_BK_100_h,Strt_rollsum_BK_100_v,Strt_rollsum_2B_100_h,Strt_rollsum_2B_100_v,Strt_rollsum_3B_100_h,Strt_rollsum_3B_100_v,Strt_H_BB_roll_6_h,Strt_H_BB_roll_6_v,Strt_XB_roll_6_h,Strt_XB_roll_6_v,Strt_TB_roll_6_h,Strt_TB_roll_6_v,Strt_IP_mod_6_h,Strt_IP_mod_6_v,Strt_BF_mod_6_h,Strt_BF_mod_6_v,Strt_ER_mod_6_h,Strt_ER_mod_6_v,Strt_FIP_numer_6_h,Strt_FIP_numer_6_v,Strt_FIP_numer_mod_6_h,Strt_FIP_numer_mod_6_v,Strt_FIP_numer_mod2_6_h,Strt_FIP_numer_mod2_6_v,Strt_H_BB_mod_6_h,Strt_H_BB_mod_6_v,Strt_H_BB_mod2_6_h,Strt_H_BB_mod2_6_v,Strt_SO_mod_6_h,Strt_SO_mod_6_v,Strt_TB_BB_mod_6_h,Strt_TB_BB_mod_6_v,Strt_ERA_6_h,Strt_ERA_6_v,Strt_FIP_6_h,Strt_FIP_6_v,Strt_FIP_perc_6_h,Strt_FIP_perc_6_v,Strt_WHIP_6_h,Strt_WHIP_6_v,Strt_SO_perc_6_h,Strt_SO_perc_6_v,Strt_TB_BB_perc_6_h,Strt_TB_BB_perc_6_v,Strt_H_BB_perc_6_h,Strt_H_BB_perc_6_v,Strt_H_BB_roll_34_h,Strt_H_BB_roll_34_v,Strt_XB_roll_34_h,Strt_XB_roll_34_v,Strt_TB_roll_34_h,Strt_TB_roll_34_v,Strt_IP_mod_34_h,Strt_IP_mod_34_v,Strt_BF_mod_34_h,Strt_BF_mod_34_v,Strt_ER_mod_34_h,Strt_ER_mod_34_v,Strt_FIP_numer_34_h,Strt_FIP_numer_34_v,Strt_FIP_numer_mod_34_h,Strt_FIP_numer_mod_34_v,Strt_FIP_numer_mod2_34_h,Strt_FIP_numer_mod2_34_v,Strt_H_BB_mod_34_h,Strt_H_BB_mod_34_v,Strt_H_BB_mod2_34_h,Strt_H_BB_mod2_34_v,Strt_SO_mod_34_h,Strt_SO_mod_34_v,Strt_TB_BB_mod_34_h,Strt_TB_BB_mod_34_v,Strt_ERA_34_h,Strt_ERA_34_v,Strt_FIP_34_h,Strt_FIP_34_v,Strt_FIP_perc_34_h,Strt_FIP_perc_34_v,Strt_WHIP_34_h,Strt_WHIP_34_v,Strt_SO_perc_34_h,Strt_SO_perc_34_v,Strt_TB_BB_perc_34_h,Strt_TB_BB_perc_34_v,Strt_H_BB_perc_34_h,Strt_H_BB_perc_34_v,Strt_H_BB_roll_100_h,Strt_H_BB_roll_100_v,Strt_XB_roll_100_h,Strt_XB_roll_100_v,Strt_TB_roll_100_h,Strt_TB_roll_100_v,Strt_IP_mod_100_h,Strt_IP_mod_100_v,Strt_BF_mod_100_h,Strt_BF_mod_100_v,Strt_ER_mod_100_h,Strt_ER_mod_100_v,Strt_FIP_numer_100_h,Strt_FIP_numer_100_v,Strt_FIP_numer_mod_100_h,Strt_FIP_numer_mod_100_v,Strt_FIP_numer_mod2_100_h,Strt_FIP_numer_mod2_100_v,Strt_H_BB_mod_100_h,Strt_H_BB_mod_100_v,Strt_H_BB_mod2_100_h,Strt_H_BB_mod2_100_v,Strt_SO_mod_100_h,Strt_SO_mod_100_v,Strt_TB_BB_mod_100_h,Strt_TB_BB_mod_100_v,Strt_ERA_100_h,Strt_ERA_100_v,Strt_FIP_100_h,Strt_FIP_100_v,Strt_FIP_perc_100_h,Strt_FIP_perc_100_v,Strt_WHIP_100_h,Strt_WHIP_100_v,Strt_SO_perc_100_h,Strt_SO_perc_100_v,Strt_TB_BB_perc_100_h,Strt_TB_BB_perc_100_v,Strt_H_BB_perc_100_h,Strt_H_BB_perc_100_v


In [73]:
df.drop(df.index[df.Strt_IP_mod_6_v==0],inplace=True)
df.drop(df.index[df.Strt_IP_mod_6_h==0],inplace=True)
df.shape

(56771, 451)

In [74]:
df.reset_index(drop=True, inplace=True)

In [75]:
df.to_csv('df_bp5.csv', index=False)