Get Raw Batting Data

    - Currently the model averages the recent team hitting performance
    - This does not account for the particular players in the starting lineup that day
    - e.g. If a key hitter is resting, injured, got traded, etc.
    - To begin to model this we first need to scrape the raw batter data (similar to how we got the pitching data)

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_rows', 5000)

import lxml
import html5lib
from urllib.request import urlopen
import time

from bs4 import BeautifulSoup
import requests

Let's look at Rafael Furcal's url: https://www.retrosheet.org/boxesetc/F/Pfurcr001.htm

In [2]:
def get_daily_season_links_batter(batter_id):
    letter = batter_id.upper()[0]
    url_prefix = 'https://www.retrosheet.org/boxesetc/'
    url = url_prefix+letter+'/P'+batter_id+'.htm'
    time.sleep(1)
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    html=list(soup.children)


    body = list(html[2].children)[5]
    pre_texts = [x for x in body.find_all('pre')]
    secnum = np.where([x.get_text().strip().startswith('Batting Record') for x in pre_texts])[0][0]
    a_pre_texts = pre_texts[secnum].find_all('a')
    daily_season_links = [url_prefix+x.attrs['href'][3:] for x in a_pre_texts if x.get_text()=='Daily']
    return(daily_season_links)

In [3]:
get_daily_season_links_batter('furcr001')

['https://www.retrosheet.org/boxesetc/2000/Ifurcr0010012000.htm',
 'https://www.retrosheet.org/boxesetc/2001/Ifurcr0010022001.htm',
 'https://www.retrosheet.org/boxesetc/2002/Ifurcr0010032002.htm',
 'https://www.retrosheet.org/boxesetc/2003/Ifurcr0010042003.htm',
 'https://www.retrosheet.org/boxesetc/2004/Ifurcr0010052004.htm',
 'https://www.retrosheet.org/boxesetc/2005/Ifurcr0010062005.htm',
 'https://www.retrosheet.org/boxesetc/2006/Ifurcr0010072006.htm',
 'https://www.retrosheet.org/boxesetc/2007/Ifurcr0010082007.htm',
 'https://www.retrosheet.org/boxesetc/2008/Ifurcr0010092008.htm',
 'https://www.retrosheet.org/boxesetc/2009/Ifurcr0010102009.htm',
 'https://www.retrosheet.org/boxesetc/2010/Ifurcr0010112010.htm',
 'https://www.retrosheet.org/boxesetc/2011/Ifurcr0010122011.htm',
 'https://www.retrosheet.org/boxesetc/2011/Ifurcr0010132011.htm',
 'https://www.retrosheet.org/boxesetc/2012/Ifurcr0010142012.htm',
 'https://www.retrosheet.org/boxesetc/2014/Ifurcr0010152014.htm']

In [4]:
def get_season_batting_data(url):    
    time.sleep(1)
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    html=list(soup.children)[-1]
    body = list(html.children)[-1]
    sec_next = list(body.children)
    secnum = np.where(["Opponent" in str(x) for x in sec_next])[0][0]
    key_section = sec_next[secnum]
    working_part = list(key_section.children)
    p_header = working_part[0].strip().split()
    mod_header= ['at_vs','Opponent','League', 'GS', 'AB', 'R', 'H', '2B', '3B', 'HR',
       'RBI', 'BB', 'IBB', 'SO', 'HBP', 'SH', 'SF', 'XI', 'ROE', 'GDP',
       'SB', 'CS', 'AVG', 'OBP', 'SLG', 'BP', 'Pos']

    date_list = []
    day_href_list = []
    for k in range(1,len(working_part),4):
        date_list.append(working_part[k].get_text().strip())
        day_href_list.append(working_part[k].attrs['href'])

    dblhead_num_list = []
    for k in range(2,len(working_part),4):
        dblhead_num_list.append(working_part[k].strip())

    game_href_list = []
    for k in range(3,len(working_part),4):
        game_href_list.append(working_part[k].attrs['href'])

    main_data_matrix = []
    for k in range(4,len(working_part),4):
        main_data_row = (working_part[k].strip().split())[:27]
        main_data_matrix.append(main_data_row)
    row_sizes = [len(x) for x in main_data_matrix]
    max_row_size = max(row_sizes)
    min_row_size = min(row_sizes)
    if (min_row_size == max_row_size) and (max_row_size==27):
        # Everything has all 27 columns
        out_df = pd.DataFrame(main_data_matrix, columns = mod_header)
    elif (min_row_size == max_row_size) and (max_row_size==26):
        # Everything has 26 columns, will guess position is missing
        out_df = pd.DataFrame(main_data_matrix, columns = mod_header[:26])
        out_df['Pos'] = ''
    elif (min_row_size == 26) and (max_row_size==27):
        # Guessing position is missing for some rows but not others
        main_data_matrix = [x if len(x)==27 else x+[''] for x in main_data_matrix]
        out_df = pd.DataFrame(main_data_matrix, columns = mod_header)
    else:
        print('finding rows with less than 26 or more than 27 entries - Returning None')
        return(None)
    out_df['date'] = date_list
    out_df['dblhead_num'] = dblhead_num_list
    return(out_df)

In [5]:
# Get all the data for a particular batter

def get_full_batting_data(batter_id):
    link_list = get_daily_season_links_batter(batter_id)
    df_batting = pd.DataFrame()
    for url in link_list:
        df_batting = pd.concat((df_batting, get_season_batting_data(url)))
    return(df_batting)

In [6]:
df_furcal = get_full_batting_data('furcr001')
df_furcal.head()

Unnamed: 0,at_vs,Opponent,League,GS,AB,R,H,2B,3B,HR,RBI,BB,IBB,SO,HBP,SH,SF,XI,ROE,GDP,SB,CS,AVG,OBP,SLG,BP,Pos,date,dblhead_num
0,VS,COL,N,1,4,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0.5,0.5,0.5,8,ss,4- 4-2000,
1,VS,COL,N,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.5,0.5,0.5,1,"pr,2b",4- 5-2000,
2,VS,SF,N,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.429,0.429,0.429,9,ss,4- 7-2000,
3,VS,SF,N,1,4,1,1,0,0,0,1,0,0,2,0,1,0,0,0,0,0,0,0.364,0.364,0.364,1,2b,4- 8-2000,
4,VS,SF,N,1,3,3,2,0,1,0,0,1,0,0,0,1,0,0,1,0,2,0,0.429,0.467,0.571,1,ss,4- 9-2000,


In [7]:
df_furcal.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1614 entries, 0 to 8
Data columns (total 29 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   at_vs        1614 non-null   object
 1   Opponent     1614 non-null   object
 2   League       1614 non-null   object
 3   GS           1614 non-null   object
 4   AB           1614 non-null   object
 5   R            1614 non-null   object
 6   H            1614 non-null   object
 7   2B           1614 non-null   object
 8   3B           1614 non-null   object
 9   HR           1614 non-null   object
 10  RBI          1614 non-null   object
 11  BB           1614 non-null   object
 12  IBB          1614 non-null   object
 13  SO           1614 non-null   object
 14  HBP          1614 non-null   object
 15  SH           1614 non-null   object
 16  SF           1614 non-null   object
 17  XI           1614 non-null   object
 18  ROE          1614 non-null   object
 19  GDP          1614 non-null   o

In [8]:
df = pd.read_csv('df_bp7.csv', low_memory=False)
df.shape
df.head()

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_3_h,Strt_rollsum_IP_real_3_v,Strt_rollsum_H_3_h,Strt_rollsum_H_3_v,Strt_rollsum_BFP_3_h,Strt_rollsum_BFP_3_v,Strt_rollsum_HR_3_h,Strt_rollsum_HR_3_v,Strt_rollsum_R_3_h,Strt_rollsum_R_3_v,Strt_rollsum_ER_3_h,Strt_rollsum_ER_3_v,Strt_rollsum_BB_3_h,Strt_rollsum_BB_3_v,Strt_rollsum_IB_3_h,Strt_rollsum_IB_3_v,Strt_rollsum_SO_3_h,Strt_rollsum_SO_3_v,Strt_rollsum_SH_3_h,Strt_rollsum_SH_3_v,Strt_rollsum_SF_3_h,Strt_rollsum_SF_3_v,Strt_rollsum_WP_3_h,Strt_rollsum_WP_3_v,Strt_rollsum_HBP_3_h,Strt_rollsum_HBP_3_v,Strt_rollsum_BK_3_h,Strt_rollsum_BK_3_v,Strt_rollsum_2B_3_h,Strt_rollsum_2B_3_v,Strt_rollsum_3B_3_h,Strt_rollsum_3B_3_v,Strt_rollsum_IP_real_14_h,Strt_rollsum_IP_real_14_v,Strt_rollsum_H_14_h,Strt_rollsum_H_14_v,Strt_rollsum_BFP_14_h,Strt_rollsum_BFP_14_v,Strt_rollsum_HR_14_h,Strt_rollsum_HR_14_v,Strt_rollsum_R_14_h,Strt_rollsum_R_14_v,Strt_rollsum_ER_14_h,Strt_rollsum_ER_14_v,Strt_rollsum_BB_14_h,Strt_rollsum_BB_14_v,Strt_rollsum_IB_14_h,Strt_rollsum_IB_14_v,Strt_rollsum_SO_14_h,Strt_rollsum_SO_14_v,Strt_rollsum_SH_14_h,Strt_rollsum_SH_14_v,Strt_rollsum_SF_14_h,Strt_rollsum_SF_14_v,Strt_rollsum_WP_14_h,Strt_rollsum_WP_14_v,Strt_rollsum_HBP_14_h,Strt_rollsum_HBP_14_v,Strt_rollsum_BK_14_h,Strt_rollsum_BK_14_v,Strt_rollsum_2B_14_h,Strt_rollsum_2B_14_v,Strt_rollsum_3B_14_h,Strt_rollsum_3B_14_v,Strt_rollsum_IP_real_30_h,Strt_rollsum_IP_real_30_v,Strt_rollsum_H_30_h,Strt_rollsum_H_30_v,Strt_rollsum_BFP_30_h,Strt_rollsum_BFP_30_v,Strt_rollsum_HR_30_h,Strt_rollsum_HR_30_v,Strt_rollsum_R_30_h,Strt_rollsum_R_30_v,Strt_rollsum_ER_30_h,Strt_rollsum_ER_30_v,Strt_rollsum_BB_30_h,Strt_rollsum_BB_30_v,Strt_rollsum_IB_30_h,Strt_rollsum_IB_30_v,Strt_rollsum_SO_30_h,Strt_rollsum_SO_30_v,Strt_rollsum_SH_30_h,Strt_rollsum_SH_30_v,Strt_rollsum_SF_30_h,Strt_rollsum_SF_30_v,Strt_rollsum_WP_30_h,Strt_rollsum_WP_30_v,Strt_rollsum_HBP_30_h,Strt_rollsum_HBP_30_v,Strt_rollsum_BK_30_h,Strt_rollsum_BK_30_v,Strt_rollsum_2B_30_h,Strt_rollsum_2B_30_v,Strt_rollsum_3B_30_h,Strt_rollsum_3B_30_v,Strt_H_BB_roll_3_h,Strt_H_BB_roll_3_v,Strt_XB_roll_3_h,Strt_XB_roll_3_v,Strt_TB_roll_3_h,Strt_TB_roll_3_v,Strt_IP_mod_3_h,Strt_IP_mod_3_v,Strt_BF_mod_3_h,Strt_BF_mod_3_v,Strt_ER_mod_3_h,Strt_ER_mod_3_v,Strt_FIP_numer_3_h,Strt_FIP_numer_3_v,Strt_FIP_numer_mod_3_h,Strt_FIP_numer_mod_3_v,Strt_FIP_numer_mod2_3_h,Strt_FIP_numer_mod2_3_v,Strt_H_BB_mod_3_h,Strt_H_BB_mod_3_v,Strt_H_BB_mod2_3_h,Strt_H_BB_mod2_3_v,Strt_SO_mod_3_h,Strt_SO_mod_3_v,Strt_TB_BB_mod_3_h,Strt_TB_BB_mod_3_v,Strt_ERA_3_h,Strt_ERA_3_v,Strt_FIP_3_h,Strt_FIP_3_v,Strt_FIP_perc_3_h,Strt_FIP_perc_3_v,Strt_WHIP_3_h,Strt_WHIP_3_v,Strt_SO_perc_3_h,Strt_SO_perc_3_v,Strt_TB_BB_perc_3_h,Strt_TB_BB_perc_3_v,Strt_H_BB_perc_3_h,Strt_H_BB_perc_3_v,Strt_H_BB_roll_14_h,Strt_H_BB_roll_14_v,Strt_XB_roll_14_h,Strt_XB_roll_14_v,Strt_TB_roll_14_h,Strt_TB_roll_14_v,Strt_IP_mod_14_h,Strt_IP_mod_14_v,Strt_BF_mod_14_h,Strt_BF_mod_14_v,Strt_ER_mod_14_h,Strt_ER_mod_14_v,Strt_FIP_numer_14_h,Strt_FIP_numer_14_v,Strt_FIP_numer_mod_14_h,Strt_FIP_numer_mod_14_v,Strt_FIP_numer_mod2_14_h,Strt_FIP_numer_mod2_14_v,Strt_H_BB_mod_14_h,Strt_H_BB_mod_14_v,Strt_H_BB_mod2_14_h,Strt_H_BB_mod2_14_v,Strt_SO_mod_14_h,Strt_SO_mod_14_v,Strt_TB_BB_mod_14_h,Strt_TB_BB_mod_14_v,Strt_ERA_14_h,Strt_ERA_14_v,Strt_FIP_14_h,Strt_FIP_14_v,Strt_FIP_perc_14_h,Strt_FIP_perc_14_v,Strt_WHIP_14_h,Strt_WHIP_14_v,Strt_SO_perc_14_h,Strt_SO_perc_14_v,Strt_TB_BB_perc_14_h,Strt_TB_BB_perc_14_v,Strt_H_BB_perc_14_h,Strt_H_BB_perc_14_v,Strt_H_BB_roll_30_h,Strt_H_BB_roll_30_v,Strt_XB_roll_30_h,Strt_XB_roll_30_v,Strt_TB_roll_30_h,Strt_TB_roll_30_v,Strt_IP_mod_30_h,Strt_IP_mod_30_v,Strt_BF_mod_30_h,Strt_BF_mod_30_v,Strt_ER_mod_30_h,Strt_ER_mod_30_v,Strt_FIP_numer_30_h,Strt_FIP_numer_30_v,Strt_FIP_numer_mod_30_h,Strt_FIP_numer_mod_30_v,Strt_FIP_numer_mod2_30_h,Strt_FIP_numer_mod2_30_v,Strt_H_BB_mod_30_h,Strt_H_BB_mod_30_v,Strt_H_BB_mod2_30_h,Strt_H_BB_mod2_30_v,Strt_SO_mod_30_h,Strt_SO_mod_30_v,Strt_TB_BB_mod_30_h,Strt_TB_BB_mod_30_v,Strt_ERA_30_h,Strt_ERA_30_v,Strt_FIP_30_h,Strt_FIP_30_v,Strt_FIP_perc_30_h,Strt_FIP_perc_30_v,Strt_WHIP_30_h,Strt_WHIP_30_v,Strt_SO_perc_30_h,Strt_SO_perc_30_v,Strt_TB_BB_perc_30_h,Strt_TB_BB_perc_30_v,Strt_H_BB_perc_30_h,Strt_H_BB_perc_30_v,innings_pitched_h,innings_pitched_v,Bpen_IP_h,Bpen_IP_v,Bpen_BFP_h,Bpen_BFP_v,Bpen_R_h,Bpen_R_v,Bpen_H_h,Bpen_H_v,Bpen_HR_h,Bpen_HR_v,Bpen_2B_h,Bpen_2B_v,Bpen_3B_h,Bpen_3B_v,Bpen_BB_h,Bpen_BB_v,Bpen_HBP_h,Bpen_HBP_v,Bpen_SO_h,Bpen_SO_v,Bpen_rollsum_IP_3_h,Bpen_rollsum_IP_3_v,Bpen_rollsum_H_3_h,Bpen_rollsum_H_3_v,Bpen_rollsum_BFP_3_h,Bpen_rollsum_BFP_3_v,Bpen_rollsum_HR_3_h,Bpen_rollsum_HR_3_v,Bpen_rollsum_R_3_h,Bpen_rollsum_R_3_v,Bpen_rollsum_BB_3_h,Bpen_rollsum_BB_3_v,Bpen_rollsum_SO_3_h,Bpen_rollsum_SO_3_v,Bpen_rollsum_HBP_3_h,Bpen_rollsum_HBP_3_v,Bpen_rollsum_2B_3_h,Bpen_rollsum_2B_3_v,Bpen_rollsum_3B_3_h,Bpen_rollsum_3B_3_v,Bpen_H_BB_roll_3_h,Bpen_H_BB_roll_3_v,Bpen_XB_roll_3_h,Bpen_XB_roll_3_v,Bpen_TB_roll_3_h,Bpen_TB_roll_3_v,Bpen_IP_mod_3_h,Bpen_IP_mod_3_v,Bpen_BF_mod_3_h,Bpen_BF_mod_3_v,Bpen_H_BB_mod_3_h,Bpen_H_BB_mod_3_v,Bpen_Bpen_H_BB_mod2_3_h,Bpen_Bpen_H_BB_mod2_3_v,Bpen_SO_mod_3_h,Bpen_SO_mod_3_v,Bpen_TB_BB_mod_3_h,Bpen_TB_BB_mod_3_v,Bpen_WHIP_3_h,Bpen_WHIP_3_v,Bpen_SO_perc_3_h,Bpen_SO_perc_3_v,Bpen_TB_BB_perc_3_h,Bpen_TB_BB_perc_3_v,Bpen_H_BB_perc_3_h,Bpen_H_BB_perc_3_v,Bpen_rollsum_IP_14_h,Bpen_rollsum_IP_14_v,Bpen_rollsum_H_14_h,Bpen_rollsum_H_14_v,Bpen_rollsum_BFP_14_h,Bpen_rollsum_BFP_14_v,Bpen_rollsum_HR_14_h,Bpen_rollsum_HR_14_v,Bpen_rollsum_R_14_h,Bpen_rollsum_R_14_v,Bpen_rollsum_BB_14_h,Bpen_rollsum_BB_14_v,Bpen_rollsum_SO_14_h,Bpen_rollsum_SO_14_v,Bpen_rollsum_HBP_14_h,Bpen_rollsum_HBP_14_v,Bpen_rollsum_2B_14_h,Bpen_rollsum_2B_14_v,Bpen_rollsum_3B_14_h,Bpen_rollsum_3B_14_v,Bpen_H_BB_roll_14_h,Bpen_H_BB_roll_14_v,Bpen_XB_roll_14_h,Bpen_XB_roll_14_v,Bpen_TB_roll_14_h,Bpen_TB_roll_14_v,Bpen_IP_mod_14_h,Bpen_IP_mod_14_v,Bpen_BF_mod_14_h,Bpen_BF_mod_14_v,Bpen_H_BB_mod_14_h,Bpen_H_BB_mod_14_v,Bpen_Bpen_H_BB_mod2_14_h,Bpen_Bpen_H_BB_mod2_14_v,Bpen_SO_mod_14_h,Bpen_SO_mod_14_v,Bpen_TB_BB_mod_14_h,Bpen_TB_BB_mod_14_v,Bpen_WHIP_14_h,Bpen_WHIP_14_v,Bpen_SO_perc_14_h,Bpen_SO_perc_14_v,Bpen_TB_BB_perc_14_h,Bpen_TB_BB_perc_14_v,Bpen_H_BB_perc_14_h,Bpen_H_BB_perc_14_v,Bpen_rollsum_IP_30_h,Bpen_rollsum_IP_30_v,Bpen_rollsum_H_30_h,Bpen_rollsum_H_30_v,Bpen_rollsum_BFP_30_h,Bpen_rollsum_BFP_30_v,Bpen_rollsum_HR_30_h,Bpen_rollsum_HR_30_v,Bpen_rollsum_R_30_h,Bpen_rollsum_R_30_v,Bpen_rollsum_BB_30_h,Bpen_rollsum_BB_30_v,Bpen_rollsum_SO_30_h,Bpen_rollsum_SO_30_v,Bpen_rollsum_HBP_30_h,Bpen_rollsum_HBP_30_v,Bpen_rollsum_2B_30_h,Bpen_rollsum_2B_30_v,Bpen_rollsum_3B_30_h,Bpen_rollsum_3B_30_v,Bpen_H_BB_roll_30_h,Bpen_H_BB_roll_30_v,Bpen_XB_roll_30_h,Bpen_XB_roll_30_v,Bpen_TB_roll_30_h,Bpen_TB_roll_30_v,Bpen_IP_mod_30_h,Bpen_IP_mod_30_v,Bpen_BF_mod_30_h,Bpen_BF_mod_30_v,Bpen_H_BB_mod_30_h,Bpen_H_BB_mod_30_v,Bpen_Bpen_H_BB_mod2_30_h,Bpen_Bpen_H_BB_mod2_30_v,Bpen_SO_mod_30_h,Bpen_SO_mod_30_v,Bpen_TB_BB_mod_30_h,Bpen_TB_BB_mod_30_v,Bpen_WHIP_30_h,Bpen_WHIP_30_v,Bpen_SO_perc_30_h,Bpen_SO_perc_30_v,Bpen_TB_BB_perc_30_h,Bpen_TB_BB_perc_30_v,Bpen_H_BB_perc_30_h,Bpen_H_BB_perc_30_v
0,20000329,0,Wed,CHN,NL,1,NYN,NL,1,5,3,54,N,,,,TOK01,55000.0,206,100010210,001000020,33,12,1,0,2,5,1,0,1,10,0,5,1,0,4,0,13,3,3,3,0,0,27,10,2,0,1,0,33,7,1,0,1,3,1,1,0,3,0,4,0,0,1,0,8,5,5,5,1,0,27,12,0,0,4,0,marsr901,Randy Marsh,herna901,Angel Hernandez,fostm901,Marty Foster,kulpr901,Ron Kulpa,,(none),,(none),bayld001,Don Baylor,valeb102,Bobby Valentine,liebj001,Jon Lieber,hampm001,Mike Hampton,aguir001,Rick Aguilera,andrs001,Shane Andrews,liebj001,Jon Lieber,hampm001,Mike Hampton,youne001,Eric Young,4,bufod001,Damon Buford,8,gracm001,Mark Grace,3,sosas001,Sammy Sosa,9,rodrh001,Henry Rodriguez,7,andrs001,Shane Andrews,5,nievj002,Jose Nieves,6,giraj001,Joe Girardi,2,liebj001,Jon Lieber,1,hendr001,Rickey Henderson,7,hamid001,Darryl Hamilton,8,alfoe001,Edgardo Alfonzo,4,piazm001,Mike Piazza,2,ventr001,Robin Ventura,5,belld001,Derek Bell,9,zeilt001,Todd Zeile,3,ordor001,Rey Ordonez,6,hampm001,Mike Hampton,1,,Y,2000,-2,0,8,200003290,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.5,0.0,,1.0,1.0,5.0,7.0,4.0,5.0,25.0,29.0,0.0,0.0,2.0,1.0,2.0,1.0,9.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,7.0,21.0,24.666667,15.0,16.0,88.0,88.0,0.0,4.0,3.0,9.0,3.0,9.0,10.0,0.0,0.0,0.0,21.0,27.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,95.666667,94.0,85.0,95.0,406.0,389.0,6.0,15.0,34.0,53.0,31.0,49.0,52.0,14.0,0.0,3.0,79.0,94.0,5.0,4.0,2.0,5.0,6.0,0.0,0.0,1.0,0.0,1.0,10.0,12.0,1.0,5.0,215.333333,195.333333,183.0,221.0,877.0,847.0,10.0,28.0,73.0,106.0,64.0,91.0,87.0,46.0,1.0,6.0,160.0,180.0,10.0,6.0,7.0,11.0,9.0,2.0,5.0,1.0,0.0,2.0,28.0,23.0,2.0,8.0,25.0,16.0,2.0,14.0,17.0,30.0,21.0,24.666667,88.0,88.0,3.0,9.0,33.0,46.0,33.0,46.0,33.0,46.0,25.0,16.0,25.0,16.0,21.0,27.0,27.0,30.0,1.285714,3.283784,1.571429,1.864865,0.375,0.522727,1.190476,0.648649,0.238636,0.306818,0.306818,0.340909,0.284091,0.181818,137.0,109.0,30.0,67.0,115.0,162.0,95.666667,94.0,406.0,389.0,31.0,49.0,331.0,334.0,331.0,334.0,331.0,334.0,137.0,109.0,137.0,109.0,79.0,94.0,167.0,176.0,2.916376,4.691489,3.45993,3.553191,0.815271,0.858612,1.432056,1.159574,0.194581,0.241645,0.41133,0.452442,0.337438,0.280206,270.0,267.0,62.0,123.0,245.0,344.0,215.333333,195.333333,877.0,847.0,64.0,91.0,620.0,805.0,620.0,805.0,620.0,805.0,270.0,267.0,270.0,267.0,160.0,180.0,332.0,390.0,2.674923,4.192833,2.879257,4.12116,0.706956,0.950413,1.25387,1.366894,0.18244,0.212515,0.378563,0.460449,0.307868,0.31523,9.0,9.0,4.0,2.0,19.0,7.0,3.0,2.0,8.0,2.0,2.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,18.0,18.0,9.0,9.0,6.66,6.66,3.6,3.6,8.1,8.1,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,28.0,84.0,84.0,42.0,42.0,31.08,31.08,16.8,16.8,37.8,37.8,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,60.0,180.0,180.0,90.0,90.0,66.6,66.6,36.0,36.0,81.0,81.0,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37
1,20000330,0,Thu,NYN,NL,2,CHN,NL,2,5,1,66,N,,,,TOK01,55000.0,235,1000004,00001000000,37,6,2,0,1,5,1,1,1,8,0,5,1,0,0,0,10,5,0,0,0,0,33,14,2,0,2,0,36,5,0,0,0,0,2,0,0,6,1,9,0,0,2,0,10,7,5,5,0,0,33,14,0,0,0,0,herna901,Angel Hernandez,fostm901,Marty Foster,kulpr901,Ron Kulpa,marsr901,Randy Marsh,,(none),,(none),valeb102,Bobby Valentine,bayld001,Don Baylor,cookd001,Dennis Cook,yound002,Danny Young,,(none),agbab001,Benny Agbayani,reedr002,Rick Reed,farnk001,Kyle Farnsworth,hendr001,Rickey Henderson,7,hamid001,Darryl Hamilton,8,alfoe001,Edgardo Alfonzo,4,piazm001,Mike Piazza,2,ventr001,Robin Ventura,5,belld001,Derek Bell,9,zeilt001,Todd Zeile,3,ordor001,Rey Ordonez,6,reedr002,Rick Reed,1,youne001,Eric Young,4,bufod001,Damon Buford,8,sosas001,Sammy Sosa,9,gracm001,Mark Grace,3,rodrh001,Henry Rodriguez,7,andrs001,Shane Andrews,5,husoj001,Jeff Huson,6,giraj001,Joe Girardi,2,farnk001,Kyle Farnsworth,1,,Y,2000,-4,0,6,200003300,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.5,0.0,,1.0,1.0,5.2,8.0,3.0,4.0,25.0,30.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,2.0,0.0,0.0,4.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5.666667,8.0,19.333333,21.0,14.0,12.0,79.0,77.0,2.0,2.0,5.0,4.0,5.0,4.0,8.0,3.0,0.0,0.0,13.0,20.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,73.0,81.333333,61.0,85.0,304.0,342.0,12.0,13.0,29.0,37.0,25.0,36.0,26.0,23.0,0.0,2.0,40.0,55.0,2.0,1.0,1.0,1.0,3.0,1.0,1.0,0.0,1.0,0.0,6.0,17.0,0.0,4.0,130.0,174.333333,140.0,191.0,579.0,738.0,28.0,33.0,80.0,92.0,73.0,90.0,52.0,50.0,1.0,2.0,70.0,118.0,6.0,7.0,2.0,3.0,7.0,1.0,3.0,2.0,1.0,0.0,22.0,38.0,2.0,5.0,22.0,15.0,7.0,8.0,21.0,20.0,19.333333,21.0,79.0,77.0,5.0,4.0,66.0,31.0,66.0,31.0,66.0,31.0,22.0,15.0,22.0,15.0,13.0,20.0,29.0,23.0,2.327586,1.714286,3.413793,1.47619,0.835443,0.402597,1.137931,0.714286,0.164557,0.25974,0.367089,0.298701,0.278481,0.194805,87.0,108.0,42.0,64.0,103.0,149.0,73.0,81.333333,304.0,342.0,25.0,36.0,337.0,383.0,337.0,383.0,337.0,383.0,87.0,108.0,87.0,108.0,40.0,55.0,129.0,172.0,3.082192,3.983607,4.616438,4.709016,1.108553,1.119883,1.191781,1.327869,0.131579,0.160819,0.424342,0.502924,0.286184,0.315789,192.0,241.0,110.0,147.0,250.0,338.0,130.0,174.333333,579.0,738.0,73.0,90.0,800.0,916.0,800.0,916.0,800.0,916.0,192.0,241.0,192.0,241.0,70.0,118.0,302.0,388.0,5.053846,4.646272,6.153846,5.254302,1.381693,1.241192,1.476923,1.382409,0.120898,0.159892,0.521589,0.525745,0.331606,0.326558,11.0,11.0,5.333333,3.0,21.0,12.0,4.0,0.0,3.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,1.0,5.0,2.0,4.0,2.0,8.0,7.0,19.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,9.0,3.0,7.0,5.0,15.0,6.0,6.0,18.0,19.0,9.0,12.0,7.07,9.0,4.2,4.0,10.95,16.0,1.5,2.0,0.233333,0.210526,0.608333,0.842105,0.392778,0.473684,2.0,4.0,2.0,8.0,7.0,19.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,9.0,3.0,7.0,5.0,15.0,28.0,28.0,84.0,84.0,42.0,45.0,31.49,33.05,17.4,17.0,40.65,45.25,1.5,1.607143,0.207143,0.202381,0.483929,0.53869,0.374881,0.393452,2.0,4.0,2.0,8.0,7.0,19.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,9.0,3.0,7.0,5.0,15.0,60.0,60.0,180.0,180.0,90.0,93.0,67.01,68.57,36.6,36.2,83.85,88.45,1.5,1.55,0.203333,0.201111,0.465833,0.491389,0.372278,0.380944
2,20000403,0,Mon,COL,NL,1,ATL,NL,1,0,2,51,D,,,,ATL02,42255.0,134,0,00000020x,31,6,2,0,0,0,1,0,0,2,2,7,0,0,1,0,7,3,2,2,1,0,24,10,0,0,1,0,30,7,0,0,2,2,0,0,1,1,0,6,1,0,1,0,6,2,0,0,0,0,27,12,0,0,1,0,hirsj901,John Hirschbeck,willc901,Charlie Williams,wegnm901,Mark Wegner,reynj901,Jim Reynolds,,(none),,(none),bellb001,Buddy Bell,cox-b103,Bobby Cox,maddg002,Greg Maddux,astap001,Pedro Astacio,remlm001,Mike Remlinger,galaa001,Andres Galarraga,astap001,Pedro Astacio,maddg002,Greg Maddux,goodt001,Tom Goodwin,8,lansm001,Mike Lansing,4,walkl001,Larry Walker,9,cirij001,Jeff Cirillo,5,heltt001,Todd Helton,3,hammj001,Jeffrey Hammonds,7,peren001,Neifi Perez,6,maynb001,Brent Mayne,2,astap001,Pedro Astacio,1,veraq001,Quilvio Veras,4,sandr002,Reggie Sanders,7,jonec004,Chipper Jones,5,jordb001,Brian Jordan,9,galaa001,Andres Galarraga,3,jonea002,Andruw Jones,8,peree002,Eddie Perez,2,weisw001,Walt Weiss,6,maddg002,Greg Maddux,1,,Y,2000,2,1,2,200004030,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.5,0.0,,1.0,1.0,7.2,7.1,5.0,6.0,29.0,29.0,0.0,2.0,0.0,2.0,0.0,2.0,2.0,1.0,2.0,0.0,6.0,5.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,7.666667,7.333333,19.0,24.0,26.0,25.0,88.0,100.0,2.0,3.0,15.0,9.0,9.0,9.0,2.0,8.0,1.0,1.0,16.0,24.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,4.0,8.0,0.0,1.0,92.333333,98.666667,113.0,99.0,400.0,416.0,5.0,17.0,47.0,52.0,39.0,50.0,16.0,28.0,4.0,4.0,62.0,94.0,6.0,1.0,3.0,4.0,0.0,2.0,1.0,4.0,0.0,0.0,15.0,20.0,1.0,3.0,199.333333,211.666667,235.0,229.0,853.0,911.0,14.0,35.0,96.0,120.0,80.0,111.0,34.0,66.0,8.0,6.0,127.0,194.0,14.0,5.0,5.0,10.0,1.0,3.0,4.0,9.0,0.0,0.0,34.0,44.0,4.0,5.0,28.0,33.0,10.0,19.0,36.0,44.0,19.0,24.0,88.0,100.0,9.0,9.0,78.0,90.0,78.0,90.0,78.0,90.0,28.0,33.0,28.0,33.0,16.0,24.0,38.0,52.0,4.263158,3.375,4.105263,3.75,0.886364,0.9,1.473684,1.375,0.181818,0.24,0.431818,0.52,0.318182,0.33,129.0,127.0,32.0,77.0,145.0,176.0,92.333333,98.666667,400.0,416.0,39.0,50.0,328.0,414.0,328.0,414.0,328.0,414.0,129.0,127.0,129.0,127.0,62.0,94.0,161.0,204.0,3.801444,4.560811,3.552347,4.195946,0.82,0.995192,1.397112,1.287162,0.155,0.225962,0.4025,0.490385,0.3225,0.305288,269.0,295.0,84.0,159.0,319.0,388.0,199.333333,211.666667,853.0,911.0,80.0,111.0,735.0,952.0,735.0,952.0,735.0,952.0,269.0,295.0,269.0,295.0,127.0,194.0,353.0,454.0,3.61204,4.719685,3.687291,4.497638,0.861665,1.045005,1.349498,1.393701,0.148886,0.212953,0.413834,0.498353,0.315358,0.32382,9.0,8.0,1.333333,0.666667,4.0,3.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,18.0,18.0,9.0,9.0,6.66,6.66,3.6,3.6,8.1,8.1,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,28.0,84.0,84.0,42.0,42.0,31.08,31.08,16.8,16.8,37.8,37.8,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,60.0,180.0,180.0,90.0,90.0,66.6,66.6,36.0,36.0,81.0,81.0,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37
3,20000403,0,Mon,MIL,NL,1,CIN,NL,1,3,3,31,D,,,,CIN08,55596.0,111,2100,21000x,22,7,1,0,0,2,0,0,0,5,0,1,1,0,0,0,8,1,3,3,0,0,15,5,0,0,0,0,19,5,1,0,1,3,0,0,0,1,0,4,0,0,0,0,2,2,2,2,0,0,16,8,2,0,0,0,marsr901,Randy Marsh,herna901,Angel Hernandez,fostm901,Marty Foster,kulpr901,Ron Kulpa,,(none),,(none),loped001,Davey Lopes,mckej801,Jack McKeon,,,,,,(none),,(none),woods001,Steve Woodard,harnp001,Pete Harnisch,grism001,Marquis Grissom,8,lorem001,Mark Loretta,6,burnj001,Jeromy Burnitz,9,jenkg001,Geoff Jenkins,7,hernj001,Jose Hernandez,5,barkk001,Kevin Barker,3,bellr002,Ronnie Belliard,4,blanh001,Henry Blanco,2,woods001,Steve Woodard,1,reesp001,Pokey Reese,4,larkb001,Barry Larkin,6,grifk002,Ken Griffey,8,bichd001,Dante Bichette,9,yound001,Dmitri Young,3,taube001,Ed Taubensee,2,boona001,Aaron Boone,5,tuckm001,Michael Tucker,7,harnp001,Pete Harnisch,1,,Y,2000,0,0,6,200004030,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.5,0.0,,1.0,1.0,4.0,5.0,7.0,5.0,21.0,20.0,0.0,1.0,3.0,3.0,2.0,3.0,3.0,1.0,0.0,0.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,5.0,16.666667,15.0,13.0,19.0,66.0,64.0,2.0,3.0,6.0,10.0,6.0,9.0,5.0,0.0,1.0,0.0,7.0,9.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,85.333333,81.0,78.0,92.0,356.0,349.0,10.0,12.0,40.0,48.0,37.0,42.0,28.0,13.0,2.0,3.0,55.0,65.0,4.0,5.0,3.0,3.0,1.0,2.0,1.0,3.0,0.0,0.0,12.0,23.0,4.0,1.0,177.0,178.0,172.0,213.0,750.0,773.0,22.0,22.0,79.0,97.0,75.0,89.0,55.0,34.0,2.0,7.0,108.0,116.0,8.0,9.0,5.0,4.0,3.0,4.0,5.0,6.0,0.0,1.0,36.0,50.0,5.0,3.0,18.0,19.0,7.0,11.0,20.0,30.0,16.666667,15.0,66.0,64.0,6.0,9.0,66.0,78.0,66.0,78.0,66.0,78.0,18.0,19.0,18.0,19.0,7.0,9.0,25.0,30.0,3.24,5.4,3.96,5.2,1.0,1.21875,1.08,1.266667,0.106061,0.140625,0.378788,0.46875,0.272727,0.296875,106.0,105.0,50.0,61.0,128.0,153.0,85.333333,81.0,356.0,349.0,37.0,42.0,338.0,341.0,338.0,341.0,338.0,341.0,106.0,105.0,106.0,105.0,55.0,65.0,156.0,166.0,3.902344,4.666667,3.960938,4.209877,0.949438,0.977077,1.242188,1.296296,0.154494,0.186246,0.438202,0.475645,0.297753,0.30086,227.0,247.0,112.0,122.0,284.0,335.0,177.0,178.0,750.0,773.0,75.0,89.0,751.0,795.0,751.0,795.0,751.0,795.0,227.0,247.0,227.0,247.0,108.0,116.0,339.0,369.0,3.813559,4.5,4.242938,4.466292,1.001333,1.028461,1.282486,1.38764,0.144,0.150065,0.452,0.477361,0.302667,0.319534,5.333333,5.0,1.333333,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,18.0,18.0,9.0,9.0,6.66,6.66,3.6,3.6,8.1,8.1,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,28.0,84.0,84.0,42.0,42.0,31.08,31.08,16.8,16.8,37.8,37.8,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,60.0,180.0,180.0,90.0,90.0,66.6,66.6,36.0,36.0,81.0,81.0,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37
4,20000403,0,Mon,SFN,NL,1,FLO,NL,1,4,6,51,N,,,,MIA01,35101.0,166,2100001,20002101x,35,10,2,2,1,4,0,0,0,1,0,8,0,0,2,0,5,2,4,4,0,0,24,7,2,0,1,0,36,12,3,0,0,5,0,0,1,1,0,7,1,0,1,0,8,3,4,4,0,0,27,15,0,0,2,0,demud901,Dana DeMuth,relic901,Charlie Reliford,eddid901,Doug Eddings,carlm901,Mark Carlson,,(none),,(none),baked002,Dusty Baker,bolej801,John Boles,ferna001,Alex Fernandez,hernl003,Livan Hernandez,alfoa001,Antonio Alfonseca,,(none),hernl003,Livan Hernandez,ferna001,Alex Fernandez,benam001,Marvin Benard,8,muelb001,Bill Mueller,5,bondb001,Barry Bonds,7,kentj001,Jeff Kent,4,snowj001,J.T. Snow,3,burke001,Ellis Burks,9,aurir001,Rich Aurilia,6,estab001,Bobby Estalella,2,hernl003,Livan Hernandez,1,castl001,Luis Castillo,4,gonza002,Alex Gonzalez,6,floyc001,Cliff Floyd,7,wilsp002,Preston Wilson,8,lowem001,Mike Lowell,5,millk005,Kevin Millar,3,browb003,Brant Brown,9,redmm001,Mike Redmond,2,ferna001,Alex Fernandez,1,,Y,2000,2,1,10,200004030,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.5,0.0,,1.0,1.0,7.0,6.0,8.0,9.0,28.0,29.0,0.0,0.0,3.0,5.0,3.0,3.0,1.0,1.0,0.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,3.0,2.0,0.0,7.0,6.0,20.333333,14.333333,15.0,20.0,80.0,64.0,5.0,1.0,13.0,5.0,13.0,5.0,8.0,2.0,1.0,0.0,15.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,93.0,91.333333,86.0,100.0,386.0,395.0,9.0,11.0,46.0,48.0,40.0,45.0,27.0,28.0,1.0,2.0,55.0,67.0,3.0,4.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,2.0,15.0,15.0,7.0,3.0,180.0,199.666667,168.0,227.0,750.0,886.0,16.0,23.0,79.0,110.0,72.0,103.0,55.0,76.0,2.0,5.0,121.0,144.0,7.0,7.0,7.0,6.0,2.0,2.0,4.0,2.0,0.0,2.0,34.0,38.0,8.0,5.0,23.0,22.0,19.0,7.0,34.0,27.0,20.333333,14.333333,80.0,64.0,13.0,5.0,104.0,71.0,104.0,71.0,104.0,71.0,23.0,22.0,23.0,22.0,15.0,4.0,42.0,29.0,5.754098,3.139535,5.114754,4.953488,1.3,1.109375,1.131148,1.534884,0.1875,0.0625,0.525,0.453125,0.2875,0.34375,113.0,128.0,56.0,54.0,142.0,154.0,93.0,91.333333,386.0,395.0,40.0,45.0,346.0,393.0,346.0,393.0,346.0,393.0,113.0,128.0,113.0,128.0,55.0,67.0,169.0,182.0,3.870968,4.434307,3.72043,4.30292,0.896373,0.994937,1.215054,1.40146,0.142487,0.16962,0.437824,0.460759,0.292746,0.324051,223.0,303.0,98.0,117.0,266.0,344.0,180.0,199.666667,750.0,886.0,72.0,103.0,635.0,920.0,635.0,920.0,635.0,920.0,223.0,303.0,223.0,303.0,121.0,144.0,321.0,420.0,3.6,4.642738,3.527778,4.607679,0.846667,1.038375,1.238889,1.517529,0.161333,0.162528,0.428,0.474041,0.297333,0.341986,9.0,8.0,2.0,2.0,8.0,9.0,1.0,1.0,2.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,18.0,18.0,9.0,9.0,6.66,6.66,3.6,3.6,8.1,8.1,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,28.0,84.0,84.0,42.0,42.0,31.08,31.08,16.8,16.8,37.8,37.8,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,60.0,180.0,180.0,90.0,90.0,66.6,66.6,36.0,36.0,81.0,81.0,1.5,1.5,0.2,0.2,0.45,0.45,0.37,0.37


In [9]:
batter_ids = np.array([])
for num in range(1,10):
    for suffix in ['_h','_v']:
        # Check whether this should be '_id' or '_name'
        colname = 'batter'+str(num)+'_name'+suffix
        batter_ids = np.concatenate((batter_ids, pd.unique(df[colname])))
batter_ids = pd.unique(batter_ids)

In [10]:
len(batter_ids), batter_ids[:10]

(4392,
 array(['hendr001', 'youne001', 'veraq001', 'reesp001', 'castl001',
        'bergp001', 'vinaf001', 'erstd001', 'andeb001', 'walkt002'],
       dtype=object))

In [15]:
# Given to you in /batter_data 
# *** run only if you don't mind waiting ***

# Specify the folder path where you want to save the files
folder_path = '/Volumes/CharmedXi/beatVegas/batter_2000'

for i in range(len(batter_ids)):
    p_id = batter_ids[i]
    print(i, p_id)
    # Construct the file path with the folder path and batter ID
    fname_out = os.path.join(folder_path, 'batting_data_' + p_id + '.csv')
    
    # Check if the file already exists
    if os.path.exists(fname_out):
        print(f'File exists for {p_id}. Skipping...')
        continue
    
    # Fetch batting data
    df_temp = get_full_batting_data(p_id)
    
    # Save the data to CSV
    df_temp.to_csv(fname_out, index=False)

0 hendr001
File exists for hendr001. Skipping...
1 youne001
File exists for youne001. Skipping...
2 veraq001
File exists for veraq001. Skipping...
3 reesp001
File exists for reesp001. Skipping...
4 castl001
File exists for castl001. Skipping...
5 bergp001
File exists for bergp001. Skipping...
6 vinaf001
File exists for vinaf001. Skipping...
7 erstd001
File exists for erstd001. Skipping...
8 andeb001
File exists for andeb001. Skipping...
9 walkt002
File exists for walkt002. Skipping...
10 beckr002
File exists for beckr002. Skipping...
11 clayr001
File exists for clayr001. Skipping...
12 stews002
File exists for stews002. Skipping...
13 womat001
File exists for womat001. Skipping...
14 kendj001
File exists for kendj001. Skipping...
15 chrir001
File exists for chrir001. Skipping...
16 camem001
File exists for camem001. Skipping...
17 hockd001
File exists for hockd001. Skipping...
18 mclem001
File exists for mclem001. Skipping...
19 biggc001
File exists for biggc001. Skipping...
20 nunnj00