# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [376]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [1121]:
year = '2004'

In [1122]:
url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'
url_app = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-appearances-fielding.shtml'

In [1123]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/2004-specialpos_c-fielding.shtml'

In [1124]:
session_requests = requests.session()

result = session_requests.get(url_bat, headers = dict(referer = url_bat))
result.status_code

200

In [1125]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [1126]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [1127]:
soup = BeautifulSoup(result.content, 'lxml')

In [1128]:
# print(soup.prettify())

In [1129]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [1130]:
bat = pullTable(url_bat, 'players_standard_batting')

In [1131]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [1132]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [1133]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [1134]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [1135]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [1136]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [1137]:
findTables(url_app)

['teams_appearances', 'players_players_appearances_fielding']

In [1138]:
app = pullTable(url_app, 'players_players_appearances_fielding')

In [1139]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
1304,1305.0,Gregg Zaun#,33.0,TOR,AL,107,392,338,46,91,24,0,6,36,0,2,47,61,0.269,0.367,0.393,0.761,96.0,133,7,6,0,1,3,2H/D
1305,1306.0,Todd Zeile,38.0,NYM,NL,137,396,348,30,81,16,0,9,35,0,0,44,83,0.233,0.319,0.356,0.675,76.0,124,13,1,1,2,1,35H/21
1306,1307.0,Alan Zinter#,36.0,ARI,NL,28,40,34,2,7,2,0,1,6,0,0,5,15,0.206,0.3,0.353,0.653,65.0,12,0,0,0,1,0,H/3D
1307,1308.0,Barry Zito*,26.0,OAK,AL,2,4,4,0,0,0,0,0,0,0,0,0,3,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,1
1308,,LgAvg per 600 PA,,,,195,600,533,74,141,28,3,17,70,8,3,52,101,0.265,0.334,0.425,0.759,,227,12,6,6,4,4,


In [1140]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [1141]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [1142]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    812
L    367
S    129
Name: Bats, dtype: int64

In [1143]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1303,1304,Victor Zambrano,28,NYM,NL,3,7,6,0,1,0,0,0,0,0,0,0,1,0.167,0.167,0.167,0.333,-12,1,0,0,1,0,0,/1,S
1304,1305,Gregg Zaun,33,TOR,AL,107,392,338,46,91,24,0,6,36,0,2,47,61,0.269,0.367,0.393,0.761,96,133,7,6,0,1,3,2H/D,S
1305,1306,Todd Zeile,38,NYM,NL,137,396,348,30,81,16,0,9,35,0,0,44,83,0.233,0.319,0.356,0.675,76,124,13,1,1,2,1,35H/21,R
1306,1307,Alan Zinter,36,ARI,NL,28,40,34,2,7,2,0,1,6,0,0,5,15,0.206,0.3,0.353,0.653,65,12,0,0,0,1,0,H/3D,S
1307,1308,Barry Zito,26,OAK,AL,2,4,4,0,0,0,0,0,0,0,0,0,3,0.0,0.0,0.0,0.0,-100,0,0,0,0,0,0,1,L


In [1144]:
bat.drop(columns=["Rk"], inplace=True)

In [1145]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1303,Victor Zambrano,28,NYM,NL,3,7,6,0,1,0,0,0,0,0,0,0,1,0.167,0.167,0.167,0.333,-12,1,0,0,1,0,0,/1,S
1304,Gregg Zaun,33,TOR,AL,107,392,338,46,91,24,0,6,36,0,2,47,61,0.269,0.367,0.393,0.761,96,133,7,6,0,1,3,2H/D,S
1305,Todd Zeile,38,NYM,NL,137,396,348,30,81,16,0,9,35,0,0,44,83,0.233,0.319,0.356,0.675,76,124,13,1,1,2,1,35H/21,R
1306,Alan Zinter,36,ARI,NL,28,40,34,2,7,2,0,1,6,0,0,5,15,0.206,0.3,0.353,0.653,65,12,0,0,0,1,0,H/3D,S
1307,Barry Zito,26,OAK,AL,2,4,4,0,0,0,0,0,0,0,0,0,3,0.0,0.0,0.0,0.0,-100,0,0,0,0,0,0,1,L


In [1146]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
724,725.0,Victor Zambrano,28.0,TBD,AL,9,7,0.563,4.43,23,22,0,0,0,0,128.0,107,68,63,13,96,2,109,16,0,5,588,102,5.29,1.586,7.5,0.9,6.8,7.7,1.14
725,726.0,Victor Zambrano,28.0,NYM,NL,2,0,1.0,3.86,3,3,0,0,0,0,14.0,12,9,6,0,6,0,14,0,0,1,62,114,2.33,1.286,7.7,0.0,3.9,9.0,2.33
726,727.0,Todd Zeile,38.0,NYM,NL,0,0,,45.0,1,0,1,0,0,0,1.0,4,5,5,0,2,0,0,0,0,0,9,13,9.05,6.0,36.0,0.0,18.0,0.0,0.0
727,728.0,Barry Zito*,26.0,OAK,AL,11,11,0.5,4.48,34,34,0,0,0,0,213.0,216,116,106,28,81,2,163,9,1,4,926,102,4.5,1.394,9.1,1.2,3.4,6.9,2.01
728,,LgAvg per 180 IP,,,,10,10,0.499,4.5,77,20,20,1,0,5,180.0,185,98,90,23,68,6,132,8,1,6,783,100,4.49,1.406,9.3,1.1,3.4,6.6,1.95


In [1147]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [1148]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [1149]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    538
L    190
Name: Throws, dtype: int64

In [1150]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
723,724,Victor Zambrano,28,TOT,MLB,11,7,0.611,4.37,26,25,0,0,0,0,142.0,119,77,69,13,102,2,123,16,0,6,650,103,5.0,1.556,7.5,0.8,6.5,7.8,1.21,R
724,725,Victor Zambrano,28,TBD,AL,9,7,0.563,4.43,23,22,0,0,0,0,128.0,107,68,63,13,96,2,109,16,0,5,588,102,5.29,1.586,7.5,0.9,6.8,7.7,1.14,R
725,726,Victor Zambrano,28,NYM,NL,2,0,1.0,3.86,3,3,0,0,0,0,14.0,12,9,6,0,6,0,14,0,0,1,62,114,2.33,1.286,7.7,0.0,3.9,9.0,2.33,R
726,727,Todd Zeile,38,NYM,NL,0,0,,45.0,1,0,1,0,0,0,1.0,4,5,5,0,2,0,0,0,0,0,9,13,9.05,6.0,36.0,0.0,18.0,0.0,0.0,R
727,728,Barry Zito,26,OAK,AL,11,11,0.5,4.48,34,34,0,0,0,0,213.0,216,116,106,28,81,2,163,9,1,4,926,102,4.5,1.394,9.1,1.2,3.4,6.9,2.01,L


In [1151]:
pit.drop(columns=["Rk"], inplace=True)

In [1152]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [1153]:
players.shape

(1441, 62)

In [1154]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary
1237,1238.0,Gregg Zaun,33.0,TOR,AL,97,91,82,789.0,601,547,46,8,5,0.987,-1.0,-2.0,6.0,9.0,2.0,6.76,6.11,C
1238,1239.0,Todd Zeile,38.0,NYM,NL,116,75,56,726.0,542,436,96,10,42,0.982,-6.0,-10.0,2.0,3.0,1.0,6.6,4.59,1B-3B-C-P
1239,1240.0,Alan Zinter,36.0,ARI,NL,8,3,3,39.0,45,41,3,1,1,0.978,1.0,15.0,-1.0,-37.0,0.0,10.15,5.5,1B
1240,1241.0,Barry Zito,26.0,OAK,AL,34,34,0,213.0,38,5,31,2,1,0.947,,,1.0,1.0,1.0,1.52,1.06,P
1241,,LgAvg,,,,214,151,116,1348.0,632,447,174,11,42,0.983,0.0,0.0,,,,4.15,2.91,


In [1155]:
fld = fld[fld["Name"] != "LgAvg"]

In [1156]:
fld.drop(columns=["Rk"], inplace=True)

In [1157]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [1158]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rctch,Rdrs,Rdrs/yr,Rgood,RsbC,RerC,RF/9,RF/G,PB,WP,SB,CS,CS%
124,125.0,Tom Wilson,33.0,LAD,NL,7,0,0,17.0,11,11,0,0,0,1.0,0,0,0,0,0,,0,0,5.82,1.57,0,1,0,1,100%
125,126.0,Vance Wilson,31.0,NYM,NL,69,38,34,384.0,282,257,23,2,2,0.993,1,3,1,1,3,0.0,1,-1,6.56,4.06,4,13,21,11,34%
126,127.0,Gregg Zaun,33.0,TOR,AL,97,91,82,789.0,601,547,46,8,5,0.987,-1,-2,-1,6,9,2.0,2,2,6.76,6.11,3,28,60,23,28%
127,128.0,Todd Zeile,38.0,NYM,NL,2,2,0,14.0,12,12,0,0,0,1.0,0,0,0,0,0,0.0,0,0,7.71,6.0,0,0,0,0,
128,,LgAvg,,,,19,17,14,148.0,121,113,8,1,1,0.993,0,0,0,0,0,0.0,0,0,7.3,6.36,1,5,9,4,30%


In [1159]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [1160]:
cat = cat[cat["Name"] != "LgAvg"]

In [1161]:
cat.drop(columns=["Rk"], inplace=True)

In [1162]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [1163]:
app.tail()

Unnamed: 0,Rk,Name,Age,Tm,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
1242,1243,Victor Zambrano,28,2TM,4,26,25,6,26,26,0,0,0,0,0,0,0,0,0,0,0,0
1243,1244,Gregg Zaun,33,TOR,10,107,95,107,97,0,97,0,0,0,0,0,0,0,0,6,12,0
1244,1245,Todd Zeile,38,NYM,16,137,75,137,102,1,2,67,0,46,0,0,0,0,0,0,43,1
1245,1246,Alan Zinter,36,ARI,2,28,4,28,8,0,0,8,0,0,0,0,0,0,0,2,19,1
1246,1247,Barry Zito,26,OAK,5,34,34,2,34,34,0,0,0,0,0,0,0,0,0,0,0,0


In [1164]:
players = pd.merge(players, app, how='left', on=["Name", "Age"], suffixes=('', '_app'))

In [1165]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [1166]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,0.56,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0


In [1167]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1168]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'Rdrs',
 'Rdrs/yr',
 'Rgood',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'Rdrs_cat',
 'Rdrs/yr_cat',
 'Rgood_cat',
 'RsbC',
 'RerC',
 'RF/9_cat',
 'RF/G_cat',
 'PB',
 '

In [1169]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [1170]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     719
OF    273
C     117
1B     92
2B     90
SS     77
3B     67
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [1171]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [1172]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    527
0.500000     13
0.333333     13
0.142857     11
0.250000     10
           ... 
0.305263      1
0.724138      1
0.775641      1
0.362963      1
0.592857      1
Name: rbi_per_g, Length: 527, dtype: int64

In [1173]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     1367
#      74
Name: clutch, dtype: int64

In [1174]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#
23,Moises Alou,37,CHC,NL,155.0,675.0,601.0,106.0,176.0,36.0,3.0,39.0,106.0,3.0,0.0,68.0,80.0,0.293,0.361,0.557,0.919,132.0,335.0,12.0,0.0,0.0,6.0,2.0,*7/DH,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,154.0,152.0,131.0,1338.1,255.0,240.0,7.0,8.0,2.0,0.969,-11.0,-10.0,-1.0,-1.0,-2.0,1.66,1.60,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,21,CHC,13,155,153,155,154,0,0,0,0,0,0,154,0,0,154,1,1,0,OF,0.683871,#
28,Garret Anderson,32,ANA,AL,112.0,475.0,442.0,57.0,133.0,20.0,1.0,14.0,75.0,2.0,1.0,29.0,75.0,0.301,0.343,0.446,0.789,107.0,197.0,3.0,1.0,0.0,3.0,6.0,8D/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ANA,AL,94.0,92.0,75.0,791.2,217.0,211.0,4.0,2.0,1.0,0.991,-7.0,-11.0,-5.0,-8.0,2.0,2.44,2.29,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,ANA,11,112,110,112,94,0,0,0,0,0,0,0,94,0,94,18,2,0,OF,0.669643,#
60,Tony Batista,30,MON,NL,157.0,650.0,606.0,76.0,146.0,30.0,2.0,32.0,110.0,14.0,6.0,26.0,78.0,0.241,0.272,0.455,0.728,81.0,276.0,14.0,4.0,4.0,10.0,4.0,*5/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MON,NL,155.0,149.0,144.0,1326.0,409.0,82.0,308.0,19.0,35.0,0.954,5.0,4.0,-1.0,-1.0,0.0,2.65,2.52,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,62,MON,9,157,149,157,155,0,0,0,0,155,0,0,0,0,0,0,5,0,3B,0.700637,#
69,Jason Bay,25,PIT,NL,120.0,472.0,411.0,61.0,116.0,24.0,4.0,26.0,82.0,4.0,6.0,41.0,129.0,0.282,0.358,0.550,0.907,132.0,226.0,9.0,10.0,5.0,5.0,2.0,*7/H8,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,119.0,110.0,105.0,984.0,216.0,211.0,3.0,2.0,0.0,0.991,-3.0,-4.0,0.0,0.0,1.0,1.96,1.80,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,67,PIT,2,120,110,120,119,0,0,0,0,0,0,117,5,0,119,0,6,0,OF,0.683333,#
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1164,Jim Thome,33,PHI,NL,143.0,618.0,508.0,97.0,139.0,28.0,1.0,42.0,105.0,0.0,2.0,104.0,144.0,0.274,0.396,0.581,0.977,144.0,295.0,10.0,2.0,0.0,4.0,26.0,*3/DH,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,134.0,134.0,123.0,1179.2,1182.0,1091.0,84.0,7.0,103.0,0.994,3.0,3.0,-2.0,-2.0,2.0,8.96,8.77,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,1109,PHI,14,143,140,143,134,0,0,134,0,0,0,0,0,0,0,6,3,0,1B,0.734266,#
1190,Chase Utley,25,PHI,NL,94.0,287.0,267.0,36.0,71.0,11.0,2.0,13.0,57.0,4.0,1.0,15.0,40.0,0.266,0.308,0.468,0.776,93.0,125.0,6.0,2.0,1.0,2.0,1.0,4H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,63.0,57.0,52.0,514.2,332.0,194.0,134.0,4.0,35.0,0.988,4.0,10.0,10.0,24.0,2.0,5.74,5.21,2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,1136,PHI,2,94,57,94,63,0,0,13,50,0,0,0,0,0,0,0,34,0,2B,0.606383,#
1226,Larry Walker,37,STL,NL,44.0,178.0,150.0,29.0,42.0,7.0,1.0,11.0,27.0,4.0,0.0,24.0,34.0,0.280,0.393,0.560,0.953,144.0,84.0,6.0,4.0,0.0,0.0,1.0,9/H8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,75.0,69.0,60.0,605.0,128.0,122.0,5.0,1.0,1.0,0.992,1.0,1.0,,,,1.89,1.69,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,1171,2TM,16,82,70,82,75,0,0,0,0,0,0,0,1,75,75,1,9,0,OF,0.613636,#
1229,Daryle Ward,29,PIT,NL,79.0,321.0,293.0,39.0,73.0,17.0,2.0,15.0,57.0,0.0,0.0,22.0,45.0,0.249,0.305,0.474,0.780,99.0,139.0,8.0,3.0,0.0,3.0,3.0,39/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,83.0,73.0,58.0,637.1,597.0,558.0,34.0,5.0,73.0,0.992,0.0,0.0,-4.0,-8.0,0.0,8.36,7.13,1B-OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,1174,PIT,7,79,73,79,74,0,0,71,0,0,0,0,0,12,12,0,5,0,1B,0.721519,#


### Letter Rating

In [1175]:
players["BA"].value_counts()

0.000    220
0.250     26
0.200     22
0.500     15
0.167     13
        ... 
0.207      1
0.327      1
0.129      1
0.069      1
0.181      1
Name: BA, Length: 238, dtype: int64

In [1176]:
players["BA"].isnull().sum()

343

In [1177]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [1178]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [1179]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      565
B      185
C+     155
B+     121
C      103
D+      64
D       57
A       56
E+      44
E       26
AAA     25
F       20
A+       8
G+       6
AA       6
Name: bat_letter, dtype: int64

In [1180]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
54,Josh Bard,26,CLE,AL,7.0,23.0,19.0,5.0,8.0,2.0,0.0,1.0,4.0,0.0,0.0,3.0,0.0,0.421,0.478,0.684,1.162,207.0,13.0,0.0,0.0,0.0,1.0,0.0,/2,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,7.0,6.0,5.0,53.0,51.0,47.0,4.0,0.0,1.0,1.0,0.0,0.0,-1.0,-23.0,-1.0,8.66,7.29,C,CLE,AL,7.0,6.0,5.0,53.0,51.0,47.0,4.0,0.0,1.0,1.0,0.0,0.0,0.0,-1.0,-23.0,-1.0,0.0,0.0,8.66,7.29,0.0,4.0,4.0,2.0,33%,56,CLE,3,7,6,7,7,0,7,0,0,0,0,0,0,0,0,0,0,0,C,0.571429,,AAA
83,Francis Beltran,24,MON,NL,11.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,1.0,157.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,0.0,0.0,,7.53,11.0,0.0,3.0,0.0,0.0,1.0,14.1,20.0,12.0,12.0,3.0,5.0,1.0,8.0,2.0,0.0,1.0,69.0,62.0,6.12,1.744,12.6,1.9,3.1,5.0,1.6,R,TOT,NL,45.0,0.0,0.0,49.1,6.0,0.0,5.0,1.0,1.0,0.833,,,,,,0.91,0.11,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,78,2TM,2,45,0,42,45,45,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
93,Chad Bentz,24,MON,NL,35.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,1.0,157.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,0.0,3.0,0.0,5.86,36.0,0.0,5.0,0.0,0.0,0.0,27.2,23.0,19.0,18.0,5.0,23.0,3.0,18.0,2.0,0.0,1.0,126.0,78.0,6.81,1.663,7.5,1.6,7.5,5.9,0.78,L,MON,NL,36.0,0.0,0.0,27.2,10.0,5.0,5.0,0.0,0.0,1.0,,,0.0,0.0,0.0,3.25,0.28,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,86,MON,1st,36,0,35,36,36,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
140,Jim Brower,31,SFG,NL,85.0,3.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.5,0.667,0.5,1.167,208.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,7.0,7.0,0.5,3.29,89.0,0.0,21.0,0.0,0.0,1.0,93.0,90.0,42.0,34.0,6.0,36.0,2.0,63.0,4.0,0.0,10.0,401.0,133.0,3.82,1.355,8.7,0.6,3.5,6.1,1.75,R,SFG,NL,89.0,0.0,0.0,93.0,29.0,6.0,21.0,2.0,0.0,0.931,,,1.0,2.0,0.0,2.61,0.3,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,133,SFG,6,89,0,85,89,89,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
246,Neal Cotts,24,CHW,AL,3.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0,650.0,2.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,4.0,4.0,0.5,5.65,56.0,1.0,12.0,0.0,0.0,0.0,65.1,61.0,45.0,41.0,13.0,30.0,2.0,58.0,3.0,0.0,8.0,281.0,83.0,5.38,1.393,8.4,1.8,4.1,8.0,1.93,L,CHW,AL,56.0,1.0,0.0,65.1,10.0,1.0,8.0,1.0,1.0,0.9,,,2.0,6.0,0.0,1.24,0.16,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,241,CHW,2,56,1,3,56,56,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
312,Ryan Drese,28,TEX,AL,2.0,4.0,4.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.5,0.5,0.75,1.25,210.0,3.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,14.0,10.0,0.583,4.2,34.0,33.0,1.0,2.0,0.0,0.0,207.2,233.0,104.0,97.0,16.0,58.0,6.0,98.0,11.0,0.0,1.0,897.0,119.0,4.1,1.401,10.1,0.7,2.5,4.2,1.69,R,TEX,AL,34.0,33.0,2.0,207.2,48.0,15.0,31.0,2.0,5.0,0.958,,,-1.0,-1.0,0.0,1.99,1.35,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,307,TEX,4,34,33,2,34,34,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
332,Joey Eischen,34,MON,NL,22.0,4.0,3.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.667,0.667,0.667,1.333,242.0,2.0,0.0,0.0,1.0,0.0,0.0,1/H,L,NL,0.0,1.0,0.0,3.93,21.0,0.0,3.0,0.0,0.0,0.0,18.1,16.0,10.0,8.0,2.0,8.0,2.0,17.0,1.0,0.0,0.0,80.0,117.0,4.09,1.309,7.9,1.0,3.9,8.3,2.13,L,MON,NL,21.0,0.0,0.0,18.1,2.0,0.0,1.0,1.0,0.0,0.5,,,-1.0,-11.0,0.0,0.49,0.05,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,328,MON,8,22,0,22,21,21,0,0,0,0,0,0,0,0,0,0,1,0,P,0.0,,AAA
337,Jason Ellison,26,SFG,NL,13.0,4.0,4.0,4.0,2.0,0.0,0.0,1.0,3.0,2.0,0.0,0.0,1.0,0.5,0.5,1.25,1.75,331.0,5.0,0.0,0.0,0.0,0.0,0.0,H/8,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,4.0,0.0,0.0,10.0,5.0,5.0,0.0,0.0,0.0,1.0,0.0,48.0,1.0,108.0,0.0,4.5,1.25,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,331,SFG,2,13,0,13,4,0,0,0,0,0,0,0,4,0,4,0,1,9,OF,0.230769,,AAA
444,Charles Gipson,31,TBD,AL,5.0,5.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.5,0.5,0.5,1.0,167.0,2.0,0.0,0.0,1.0,0.0,0.0,/86D7H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBD,AL,4.0,0.0,0.0,10.1,5.0,3.0,2.0,0.0,1.0,1.0,1.0,70.0,0.0,0.0,0.0,4.35,1.25,OF-SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,419,TBD,7,5,0,5,4,0,0,0,0,0,2,1,2,0,2,1,1,0,OF,0.0,,AAA
465,Mike Gonzalez,26,PIT,NL,44.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0,654.0,2.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,3.0,1.0,0.75,1.25,47.0,0.0,12.0,0.0,0.0,1.0,43.1,32.0,7.0,6.0,2.0,6.0,0.0,55.0,1.0,0.0,4.0,169.0,346.0,1.6,0.877,6.6,0.4,1.2,11.4,9.17,L,PIT,NL,47.0,0.0,0.0,43.1,7.0,1.0,5.0,1.0,0.0,0.857,,,-1.0,-5.0,0.0,1.25,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,440,PIT,2,47,0,44,47,47,0,0,0,0,0,0,0,0,0,0,0,0,P,0.045455,,AAA


In [1181]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.223

### HR Number

In [1182]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     860
4     100
3      90
2      79
6      75
5      51
7      47
1      41
8      33
9      24
12     12
10      8
11      7
36      7
18      3
14      3
15      1
Name: hr_rate, dtype: int64

In [1183]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      860
14    100
13     90
12     79
16     75
15     51
21     47
11     41
22     33
23     24
26     12
24      8
66      7
25      7
36      3
32      3
33      1
Name: hr_num_bat, dtype: int64

### Triple Number

In [1184]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     1164
1      158
2       82
3       15
4       10
6        4
12       3
9        2
5        2
7        1
Name: triple_rate, dtype: int64

In [1185]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     1164
4.0       45
3.0       40
6.0       37
5.0       31
7.0       31
2.0       23
9.0       22
8.0       21
10.0       8
12.0       5
1.0        4
11.0       3
13.0       3
14.0       2
15.0       1
18.0       1
Name: triple_val, dtype: int64

In [1186]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        1164
(14)      45
(13)      40
(16)      37
(15)      31
(21)      31
(12)      23
(23)      22
(22)      21
(24)       8
(26)       5
(11)       4
(25)       3
(31)       3
(32)       2
(36)       1
(33)       1
Name: triple_num, dtype: int64

### Speed Rating

In [1187]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [1188]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    1007
0.033      11
0.018      10
0.048      10
0.020       9
         ... 
0.108       1
0.159       1
0.035       1
0.158       1
0.126       1
Name: speed_score, Length: 166, dtype: int64

In [1189]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [1190]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        1277
**        85
*         35
***       25
****      19
Name: speed_rating, dtype: int64

In [1191]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
57,Jason Bartlett,24,MIN,AL,8.0,14.0,12.0,2.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,0.083,0.154,0.083,0.237,-36.0,1.0,0.0,0.0,1.0,0.0,0.0,/H64D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,6.0,2.0,2.0,23.0,19.0,5.0,12.0,2.0,3.0,0.895,-1.0,-63.0,1.0,63.0,0.0,6.65,2.83,SS-2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,59,MIN,1st,8,2,8,6,0,0,0,1,0,5,0,0,0,0,1,1,4,SS,0.125,,F,0,,0,0.0,,1.0,****
164,Homer Bush,31,NYY,AL,9.0,8.0,7.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.125,0.0,0.125,-63.0,0.0,1.0,1.0,0.0,0.0,0.0,/H4D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,4.0,2.0,2.0,23.0,11.0,7.0,4.0,0.0,3.0,1.0,0.0,-10.0,-1.0,-37.0,0.0,4.3,2.75,2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,156,NYY,7,9,2,9,4,0,0,0,4,0,0,0,0,0,0,2,0,6,2B,0.0,,G,0,,0,0.0,,1.0,****
248,Carl Crawford,22,TBD,AL,152.0,672.0,626.0,104.0,185.0,26.0,19.0,11.0,55.0,59.0,15.0,35.0,81.0,0.296,0.331,0.45,0.781,105.0,282.0,2.0,1.0,4.0,6.0,2.0,*78/DH,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBD,AL,145.0,140.0,130.0,1235.0,357.0,350.0,5.0,2.0,1.0,0.994,16.0,16.0,15.0,14.0,-2.0,2.59,2.45,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,244,TBD,3,152,145,152,145,0,0,0,0,0,0,122,30,0,145,5,4,1,OF,0.361842,,B+,2,12.0,4,6.0,(16),0.358,****
320,Jeff Duncan,25,NYM,NL,13.0,17.0,15.0,2.0,1.0,0.0,0.0,0.0,1.0,3.0,0.0,1.0,5.0,0.067,0.125,0.067,0.192,-48.0,1.0,0.0,0.0,1.0,0.0,0.0,H/87,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,4.0,2.0,2.0,22.0,7.0,7.0,0.0,0.0,0.0,1.0,1.0,71.0,2.0,104.0,0.0,2.86,1.75,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,315,NYM,2,13,2,13,4,0,0,0,0,0,0,2,2,0,4,0,8,3,OF,0.076923,,F,0,,0,0.0,,1.5,****
337,Jason Ellison,26,SFG,NL,13.0,4.0,4.0,4.0,2.0,0.0,0.0,1.0,3.0,2.0,0.0,0.0,1.0,0.5,0.5,1.25,1.75,331.0,5.0,0.0,0.0,0.0,0.0,0.0,H/8,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,4.0,0.0,0.0,10.0,5.0,5.0,0.0,0.0,0.0,1.0,0.0,48.0,1.0,108.0,0.0,4.5,1.25,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,331,SFG,2,13,0,13,4,0,0,0,0,0,0,0,4,0,4,0,1,9,OF,0.230769,,AAA,18,36.0,0,0.0,,2.0,****
427,Joey Gathright,23,TBD,AL,19.0,57.0,52.0,11.0,13.0,0.0,0.0,0.0,1.0,6.0,1.0,2.0,14.0,0.25,0.316,0.25,0.566,53.0,13.0,2.0,3.0,0.0,0.0,0.0,8/7HD9,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBD,AL,16.0,14.0,14.0,126.0,30.0,30.0,0.0,0.0,0.0,1.0,-7.0,-68.0,-5.0,-49.0,-2.0,2.14,1.88,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,402,TBD,1st,19,14,19,16,0,0,0,0,0,0,4,11,1,16,1,2,1,OF,0.052632,,C+,0,,0,0.0,,0.333,****
444,Charles Gipson,31,TBD,AL,5.0,5.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.5,0.5,0.5,1.0,167.0,2.0,0.0,0.0,1.0,0.0,0.0,/86D7H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBD,AL,4.0,0.0,0.0,10.1,5.0,3.0,2.0,0.0,1.0,1.0,1.0,70.0,0.0,0.0,0.0,4.35,1.25,OF-SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,419,TBD,7,5,0,5,4,0,0,0,0,0,2,1,2,0,2,1,1,0,OF,0.0,,AAA,0,,0,0.0,,0.5,****
857,Donnie Murphy,21,KCR,AL,7.0,27.0,27.0,1.0,5.0,3.0,0.0,0.0,3.0,1.0,0.0,0.0,7.0,0.185,0.185,0.296,0.481,22.0,8.0,1.0,0.0,0.0,0.0,0.0,/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,7.0,7.0,7.0,61.0,29.0,12.0,17.0,0.0,8.0,1.0,0.0,-4.0,2.0,45.0,0.0,4.28,4.14,2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,805,KCR,1st,7,7,7,7,0,0,0,7,0,0,0,0,0,0,0,0,0,2B,0.428571,,D+,0,,0,0.0,,0.5,****
889,Tomo Ohka,28,MON,NL,14.0,26.0,25.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,8.0,0.08,0.115,0.08,0.195,-49.0,2.0,0.0,0.0,0.0,0.0,0.0,1,S,NL,3.0,7.0,0.3,3.4,15.0,15.0,0.0,0.0,0.0,0.0,84.2,98.0,40.0,32.0,11.0,20.0,1.0,38.0,1.0,0.0,3.0,367.0,134.0,4.58,1.394,10.4,1.2,2.1,4.0,1.9,R,MON,NL,15.0,15.0,0.0,84.2,16.0,4.0,11.0,1.0,3.0,0.938,,,2.0,5.0,0.0,1.59,1.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,840,MON,6,15,15,14,15,15,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.333,****
938,Antonio Perez,24,LAD,NL,13.0,14.0,13.0,5.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,0.231,0.286,0.308,0.593,57.0,4.0,0.0,1.0,0.0,0.0,0.0,H/46,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAD,NL,3.0,0.0,0.0,12.0,7.0,3.0,3.0,1.0,2.0,0.857,-1.0,-60.0,0.0,0.0,0.0,4.5,2.0,2B-SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,882,LAD,2,13,0,13,2,0,0,0,2,0,1,0,0,0,0,0,8,3,2B,0.0,,C+,0,,0,0.0,,0.333,****


### Base on Balls Number

In [1192]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     671
2     231
3     204
4     125
1      90
5      64
6      23
7      16
12      4
9       4
36      3
18      3
14      1
10      1
8       1
Name: walk_rate, dtype: int64

In [1193]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     671
12    231
13    204
14    125
11     90
15     64
16     23
21     16
26      4
23      4
36      3
66      3
32      1
24      1
22      1
Name: bb_num, dtype: int64

### Batter K Number

In [1194]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     420
7     118
6     115
5     103
8      92
4      86
9      85
10     54
18     51
12     50
36     50
3      48
11     32
14     29
13     18
2      15
24     14
15     13
22     11
27      7
21      6
16      6
17      6
19      4
20      2
26      2
23      1
31      1
29      1
30      1
Name: k_rate, dtype: int64

In [1195]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     420
8.0     112
10.0     99
12.0     99
9.0      98
11.0     93
7.0      89
13.0     60
36.0     54
18.0     51
6.0      44
14.0     41
5.0      37
15.0     20
16.0     20
24.0     18
4.0      18
17.0     13
22.0     10
19.0      9
21.0      8
27.0      8
20.0      7
26.0      3
3.0       3
30.0      2
29.0      2
23.0      2
31.0      1
Name: k_val, dtype: int64

In [1196]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     420
22    112
24     99
26     99
23     98
25     93
21     89
31     60
66     54
36     51
16     44
32     41
15     37
33     20
34     20
46     18
14     18
35     13
44     10
41      9
53      8
43      8
42      7
52      3
13      3
55      2
45      2
56      2
61      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [1197]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0     1244
1      171
2       14
3        5
6        3
4        3
12       1
Name: hbp_rate, dtype: int64

In [1198]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [1199]:
players["hbp_val"].value_counts()

0.0     1244
10.0      26
13.0      24
9.0       23
11.0      22
14.0      18
8.0       17
12.0      16
7.0       13
6.0       11
15.0       6
5.0        5
17.0       5
16.0       4
3.0        1
24.0       1
19.0       1
30.0       1
18.0       1
4.0        1
21.0       1
Name: hbp_val, dtype: int64

In [1200]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       1244
/24      26
/31      24
/23      23
/25      22
/32      18
/22      17
/26      16
/21      13
/16      11
/33       6
/15       5
/35       5
/34       4
/41       1
/13       1
/56       1
/46       1
/36       1
/14       1
/43       1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [1201]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     563
9     186
8     186
7     130
6      87
10     76
5      56
4      50
3      31
11     16
2      16
18     13
12     11
1       7
13      4
14      3
15      3
36      3
Name: hit_rate, dtype: int64

In [1202]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    570
54    186
55    186
56    130
61     87
53     76
62     56
63     50
64     31
65     16
52     16
41     13
51     11
46      4
44      3
45      3
11      3
Name: PH_num_bat, dtype: int64

### Batter Rating

In [1203]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1204]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " (" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + ")")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24)
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24)
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26)
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,0.56,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42)


In [1205]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
1436,Randy Williams,28,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,5.79,6.0,0.0,1.0,0.0,0.0,0.0,4.2,3.0,3.0,3.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,22.0,83.0,5.19,1.929,5.8,0.0,11.6,7.7,0.67,L,SEA,AL,6.0,0.0,0.0,4.2,1.0,0.0,1.0,0.0,1.0,1.0,,,0.0,0.0,0.0,1.93,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1202,SEA,1st,6,0,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1437,Todd Williams,33,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,0.0,1.0,2.87,29.0,0.0,7.0,0.0,0.0,0.0,31.1,26.0,10.0,10.0,2.0,9.0,0.0,13.0,5.0,0.0,1.0,126.0,160.0,4.39,1.117,7.5,0.6,2.6,3.7,1.44,R,BAL,AL,29.0,0.0,0.0,31.1,6.0,2.0,4.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.72,0.21,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1203,BAL,5,29,0,0,29,29,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1438,Dan Wright,26,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,4.0,0.0,8.15,4.0,4.0,0.0,0.0,0.0,0.0,17.2,24.0,17.0,16.0,5.0,11.0,1.0,6.0,2.0,1.0,0.0,88.0,59.0,8.26,1.981,12.2,2.5,5.6,3.1,0.55,R,CHW,AL,4.0,4.0,0.0,17.2,4.0,1.0,2.0,1.0,0.0,0.75,,,0.0,0.0,0.0,1.53,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1227,CHW,4,4,4,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1439,Kelly Wunsch,31,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,0.0,3.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,8.0,,3.55,1.5,9.0,0.0,4.5,4.5,1.0,L,CHW,AL,3.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,4.5,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1232,CHW,5,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1440,Chris Young,25,TEX,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,3.0,2.0,0.6,4.71,7.0,7.0,0.0,0.0,0.0,0.0,36.1,36.0,21.0,19.0,7.0,10.0,0.0,27.0,2.0,0.0,1.0,158.0,107.0,5.06,1.266,8.9,1.7,2.5,6.7,2.7,R,TEX,AL,7.0,7.0,0.0,36.1,6.0,2.0,3.0,1.0,0.0,0.833,,,0.0,0.0,0.0,1.24,0.71,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1236,TEX,1st,7,7,0,7,7,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)


In [1206]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.000,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.50,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.250000,,D+,0,,0,0.0,,0.000,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24)
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.250000,,D+,0,,0,0.0,,0.000,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24)
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26)
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.60,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.000,,,-2.0,-3.0,0.0,1.26,0.56,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.000,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1436,Randy Williams,28,SEA,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,5.79,6.0,0.0,1.0,0.0,0.0,0.0,4.2,3.0,3.0,3.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,22.0,83.0,5.19,1.929,5.8,0.0,11.6,7.7,0.67,L,SEA,AL,6.0,0.0,0.0,4.2,1.0,0.0,1.0,0.0,1.0,1.000,,,0.0,0.0,0.0,1.93,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1202,SEA,1st,6,0,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1437,Todd Williams,33,BAL,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,2.0,0.0,1.000,2.87,29.0,0.0,7.0,0.0,0.0,0.0,31.1,26.0,10.0,10.0,2.0,9.0,0.0,13.0,5.0,0.0,1.0,126.0,160.0,4.39,1.117,7.5,0.6,2.6,3.7,1.44,R,BAL,AL,29.0,0.0,0.0,31.1,6.0,2.0,4.0,0.0,0.0,1.000,,,0.0,0.0,0.0,1.72,0.21,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1203,BAL,5,29,0,0,29,29,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1438,Dan Wright,26,CHW,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,4.0,0.000,8.15,4.0,4.0,0.0,0.0,0.0,0.0,17.2,24.0,17.0,16.0,5.0,11.0,1.0,6.0,2.0,1.0,0.0,88.0,59.0,8.26,1.981,12.2,2.5,5.6,3.1,0.55,R,CHW,AL,4.0,4.0,0.0,17.2,4.0,1.0,2.0,1.0,0.0,0.750,,,0.0,0.0,0.0,1.53,0.75,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1227,CHW,4,4,4,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)
1439,Kelly Wunsch,31,CHW,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,0.00,3.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,8.0,,3.55,1.500,9.0,0.0,4.5,4.5,1.00,L,CHW,AL,3.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,1.000,,,0.0,0.0,0.0,4.50,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1232,CHW,5,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)


In [1207]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [1208]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [1209]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_num', 'k_rate', 'k_val', 'k_num', 'hbp_rate', 'hbp_val', 'hbp_num',
       'hit_rate', 'PH_num_bat', 'batter_rating'],
      dtype='object', length=151)

### Pitcher Letter Rating

In [1210]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.2746428571428572

In [1211]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [1212]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     768
W     183
M     162
X     136
Y      69
L      53
K      27
Z+     25
J+     10
J       8
Name: pit_letter, dtype: int64

In [1213]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      713
W     183
M     162
X     136
Y      69
Z      55
L      53
K      27
Z+     25
J+     10
J       8
Name: pit_letter, dtype: int64

In [1214]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.169

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [1215]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

2.000000     9
14.333333    9
1.000000     8
9.000000     6
14.000000    6
32.666667    6
50.666667    6
27.666667    6
8.000000     5
1.666667     5
2.666667     5
30.666667    5
25.666667    5
6.000000     5
7.333333     5
3.666667     5
1.333333     5
3.333333     5
10.666667    5
19.000000    5
26.000000    5
15.666667    5
38.666667    4
32.000000    4
8.333333     4
Name: IP_real, dtype: int64

In [1216]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

1.0    351
6.0    112
2.0     78
5.0     58
4.0     48
3.0     45
7.0     24
0.0     12
Name: IE, dtype: int64

In [1217]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
84,Rigo Beltran,34,MON,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,13.5,2.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,51.0,3.05,1.5,13.5,0.0,0.0,0.0,,L,MON,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,79,MON,5,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,0.666667,0.0
165,Mike Bynum,26,SDP,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,1.0,0.0,54.0,2.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0,4.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,6.0,11.0,16.55,6.0,13.5,0.0,40.5,0.0,0.0,L,SDP,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-300.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,157,SDP,3,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,0.666667,0.0
326,Trent Durrington,28,MIL,NL,53.0,87.0,82.0,13.0,19.0,2.0,3.0,2.0,4.0,4.0,0.0,4.0,23.0,0.232,0.267,0.402,0.67,70.0,33.0,1.0,0.0,1.0,0.0,0.0,H5/4D1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.05,0.0,0.0,0.0,0.0,0.0,,R,MIL,NL,18.0,10.0,10.0,105.1,43.0,15.0,24.0,4.0,1.0,0.907,-4.0,-48.0,-1.0,-7.0,0.0,3.33,2.17,3B-2B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,322,MIL,4,53,11,53,17,1,0,0,6,11,0,0,0,0,0,1,30,9,3B,0.075472,,C+,4,14.0,6,10.0,(24),0.25,***,2,12,10,12.0,26,0,0.0,,8,55,C+14(24)*** (12-26),0.0,J+,0.333333,0.0
368,Jared Fernandez,32,HOU,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,54.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,6.0,6.0,6.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,14.0,11.0,18.05,11.0,54.0,0.0,45.0,0.0,0.0,R,HOU,NL,2.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,9.0,0.5,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,354,HOU,4,2,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.667,Z,1.0,0.0
808,Frank Menechino,33,TOT,AL,84.0,311.0,269.0,40.0,74.0,13.0,4.0,9.0,26.0,0.0,2.0,37.0,52.0,0.275,0.371,0.454,0.824,111.0,122.0,5.0,4.0,1.0,0.0,1.0,4D6H/51,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.05,6.0,54.0,0.0,0.0,0.0,,R,TOT,AL,64.0,56.0,46.0,490.2,243.0,100.0,141.0,2.0,26.0,0.992,2.0,6.0,,-9.0,,4.42,3.77,2B-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,762,2TM,6,85,71,84,62,1,0,0,42,7,14,0,0,0,0,19,9,1,2B,0.309524,,B,4,14.0,2,6.0,(16),0.0,,4,14,6,10.0,24,0,0.0,,9,54,B14(16) (14-24),0.667,Z,0.333333,0.0
810,Frank Menechino,33,TOR,AL,71.0,276.0,236.0,40.0,71.0,13.0,4.0,9.0,25.0,0.0,2.0,36.0,44.0,0.301,0.4,0.504,0.904,131.0,119.0,3.0,3.0,1.0,0.0,1.0,4D6/H51,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.05,6.0,54.0,0.0,0.0,0.0,,R,TOT,AL,64.0,56.0,46.0,490.2,243.0,100.0,141.0,2.0,26.0,0.992,2.0,6.0,,-9.0,,4.42,3.77,2B-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,762,2TM,6,85,71,84,62,1,0,0,42,7,14,0,0,0,0,19,9,1,2B,0.352113,,B+,5,15.0,2,7.0,(21),0.0,,5,15,6,11.0,25,0,0.0,,9,54,B+15(21) (15-25),0.667,Z,0.333333,0.0
882,Abraham Nunez,28,PIT,NL,112.0,195.0,182.0,17.0,43.0,9.0,0.0,2.0,13.0,1.0,3.0,10.0,36.0,0.236,0.275,0.319,0.593,54.0,58.0,8.0,0.0,2.0,1.0,0.0,H46/5D1,S,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.05,0.0,0.0,0.0,0.0,0.0,,R,PIT,NL,52.0,29.0,20.0,277.0,163.0,70.0,90.0,3.0,28.0,0.982,4.0,15.0,3.0,11.0,1.0,5.2,3.08,2B-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,834,PIT,8,112,29,112,49,1,0,0,32,6,13,0,0,0,0,1,68,1,2B,0.116071,,C+,2,12.0,0,0.0,,0.024,,2,12,7,9.0,23,0,0.0,,8,55,C+12 (12-23),0.0,J+,0.333333,0.0
974,Andy Pratt,24,CHC,NL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,1.0,0.0,21.6,4.0,0.0,1.0,0.0,0.0,0.0,1.2,0.0,4.0,4.0,0.0,7.0,1.0,1.0,1.0,0.0,0.0,13.0,25.0,16.25,4.2,0.0,0.0,37.8,5.4,0.14,L,CHC,NL,4.0,0.0,0.0,1.2,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,5.4,0.25,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,917,CHC,2,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.0,J+,1.666667,0.0
1223,Kevin Walker,27,SFG,NL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,16.2,5.0,0.0,0.0,0.0,0.0,0.0,1.2,3.0,3.0,3.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,10.0,33.0,15.05,3.0,16.2,5.4,10.8,5.4,0.5,L,SFG,NL,5.0,0.0,0.0,1.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,1170,SFG,5,5,0,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.429,Z,1.666667,0.0
1334,Frank Castillo,35,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,0.0,2.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,6.05,2.0,9.0,0.0,9.0,0.0,0.0,R,BOS,AL,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,186,BOS,12,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,1.0,0.0


In [1218]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

1.0    363
6.0    112
2.0     78
5.0     58
4.0     48
3.0     45
7.0     24
Name: IE, dtype: int64

In [1219]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
91,Kris Benson,29,PIT,NL,19.0,50.0,39.0,2.0,7.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,17.0,0.179,0.2,0.179,0.379,0.0,7.0,0.0,0.0,10.0,0.0,0.0,1,R,NL,8.0,8.0,0.5,4.22,20.0,20.0,0.0,0.0,0.0,0.0,132.1,137.0,69.0,62.0,7.0,44.0,5.0,83.0,6.0,0.0,2.0,564.0,102.0,3.62,1.368,9.3,0.5,3.0,5.6,1.89,R,TOT,NL,31.0,31.0,1.0,200.1,43.0,17.0,25.0,1.0,2.0,0.977,,,,,,1.89,1.35,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,85,2TM,5,31,31,30,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.157895,,D+,0,,0,0.0,,0.0,,1,11,12,13.0,31,0,0.0,,5,62,D+ (11-31),0.267,W,132.333333,7.0
149,Mark Buehrle,25,CHW,AL,2.0,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,2.0,0.0,0.0,1,L,AL,16.0,10.0,0.615,3.89,35.0,35.0,0.0,4.0,1.0,0.0,245.1,257.0,119.0,106.0,33.0,51.0,2.0,165.0,8.0,0.0,0.0,1016.0,121.0,4.17,1.255,9.4,1.2,1.9,6.1,3.24,L,CHW,AL,35.0,35.0,4.0,245.1,71.0,16.0,51.0,4.0,3.0,0.944,,,10.0,8.0,1.0,2.46,1.91,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,142,CHW,5,35,35,2,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,0,66,G (n-32),0.269,W,245.333333,7.0
417,Freddy Garcia,27,TOT,AL,2.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,13.0,11.0,0.542,3.81,31.0,31.0,0.0,1.0,0.0,0.0,210.0,192.0,92.0,89.0,22.0,64.0,3.0,184.0,7.0,0.0,8.0,878.0,121.0,3.67,1.219,8.2,0.9,2.7,7.9,2.88,R,TOT,AL,31.0,31.0,1.0,210.0,49.0,12.0,37.0,0.0,3.0,1.0,,,,,,2.1,1.58,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,396,2TM,6,31,31,2,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,27,27.0,53,0,0.0,,0,66,G (n-53),0.238,M,210.0,7.0
418,Freddy Garcia,27,SEA,AL,2.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,4.0,7.0,0.364,3.2,15.0,15.0,0.0,1.0,0.0,0.0,107.0,96.0,39.0,38.0,8.0,32.0,1.0,82.0,2.0,0.0,5.0,446.0,141.0,3.44,1.196,8.1,0.7,2.7,6.9,2.56,R,TOT,AL,31.0,31.0,1.0,210.0,49.0,12.0,37.0,0.0,3.0,1.0,,,,,,2.1,1.58,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,396,2TM,6,31,31,2,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,27,27.0,53,0,0.0,,0,66,G (n-53),0.233,M,107.0,7.0
563,Liván Hernández,29,MON,NL,34.0,97.0,81.0,2.0,20.0,7.0,0.0,1.0,10.0,0.0,0.0,1.0,8.0,0.247,0.256,0.37,0.626,57.0,30.0,1.0,0.0,15.0,0.0,0.0,1/H,R,NL,11.0,15.0,0.423,3.6,35.0,35.0,0.0,9.0,2.0,0.0,255.0,234.0,105.0,102.0,26.0,83.0,9.0,186.0,10.0,0.0,1.0,1053.0,126.0,4.01,1.243,8.3,0.9,2.9,6.6,2.24,R,MON,NL,35.0,35.0,9.0,255.0,84.0,21.0,61.0,2.0,10.0,0.976,,,9.0,7.0,1.0,2.89,2.34,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,530,MON,9,36,35,34,35,35,0,0,0,0,0,0,0,0,0,0,1,0,P,0.294118,,C+,2,12.0,0,0.0,,0.0,,0,n,3,3.0,13,0,0.0,,7,56,C+12 (n-13),0.244,M,255.0,7.0
595,Tim Hudson,28,OAK,AL,1.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,12.0,6.0,0.667,3.53,27.0,27.0,0.0,3.0,2.0,0.0,188.2,194.0,82.0,74.0,8.0,44.0,3.0,103.0,12.0,1.0,4.0,793.0,129.0,3.4,1.261,9.3,0.4,2.1,4.9,2.34,R,OAK,AL,27.0,27.0,3.0,188.2,49.0,22.0,26.0,1.0,3.0,0.98,,,-2.0,-2.0,0.0,2.29,1.78,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,558,OAK,6,27,27,1,27,27,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,12,12.0,26,0,0.0,,0,66,G (n-26),0.263,W,188.666667,7.0
625,Randy Johnson,40,ARI,NL,33.0,87.0,80.0,1.0,10.0,3.0,0.0,0.0,6.0,0.0,0.0,4.0,37.0,0.125,0.167,0.163,0.329,-16.0,13.0,1.0,0.0,3.0,0.0,0.0,1,R,NL,16.0,14.0,0.533,2.6,35.0,35.0,0.0,4.0,2.0,0.0,245.2,177.0,88.0,71.0,18.0,44.0,1.0,290.0,10.0,1.0,3.0,964.0,176.0,2.3,0.9,6.5,0.7,1.6,10.6,6.59,L,ARI,NL,35.0,35.0,4.0,245.2,25.0,5.0,20.0,0.0,0.0,1.0,,,-3.0,-2.0,0.0,0.92,0.71,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,586,ARI,17,35,35,33,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.181818,,E+,0,,0,0.0,,0.0,,2,12,15,17.0,35,0,0.0,,4,63,E+ (12-35),0.195,K,245.666667,7.0
707,Jon Lieber,34,NYY,AL,1.0,3.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333,0.333,0.333,0.667,77.0,1.0,1.0,0.0,0.0,0.0,0.0,1,L,AL,14.0,8.0,0.636,4.33,27.0,27.0,0.0,0.0,0.0,0.0,176.2,216.0,95.0,85.0,20.0,18.0,2.0,102.0,2.0,0.0,7.0,749.0,104.0,3.71,1.325,11.0,1.0,0.9,5.2,5.67,R,NYY,AL,27.0,27.0,0.0,176.2,33.0,10.0,19.0,4.0,1.0,0.879,,,-3.0,-3.0,0.0,1.48,1.07,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,665,NYY,10,27,27,1,27,27,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,A,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,12,51,A (n-n),0.296,X,176.666667,7.0
722,Esteban Loaiza,32,CHW,AL,2.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,1.0,0.0,0.0,0.0,0.0,1,R,AL,9.0,5.0,0.643,4.86,21.0,21.0,0.0,2.0,1.0,0.0,140.2,156.0,81.0,76.0,23.0,45.0,3.0,83.0,1.0,0.0,2.0,604.0,97.0,4.98,1.429,10.0,1.5,2.9,5.3,1.84,R,TOT,AL,31.0,27.0,2.0,183.0,42.0,13.0,29.0,0.0,4.0,1.0,,,,,,2.07,1.35,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,677,2TM,10,31,27,2,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,0,66,G (n-32),0.28,X,140.666667,7.0
758,Mike Maroth,26,DET,AL,2.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,1.0,0.0,0.0,1,L,AL,11.0,13.0,0.458,4.31,33.0,33.0,0.0,2.0,1.0,0.0,217.0,244.0,112.0,104.0,25.0,59.0,1.0,108.0,7.0,1.0,10.0,928.0,103.0,4.46,1.396,10.1,1.0,2.4,4.5,1.83,L,DET,AL,33.0,33.0,2.0,217.0,48.0,11.0,37.0,0.0,5.0,1.0,,,5.0,5.0,0.0,1.99,1.45,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,718,DET,3,33,33,2,33,33,0,0,0,0,0,0,0,0,0,0,0,0,P,0.5,,G,0,,0,0.0,,0.0,,0,n,22,22.0,44,0,0.0,,0,66,G (n-44),0.283,X,217.0,7.0


In [1220]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [1221]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     727
3     249
4     171
2     129
5      73
6      36
1      18
7      14
10      6
8       5
9       5
18      2
11      2
12      1
13      1
14      1
19      1
Name: bb_rate, dtype: int64

In [1222]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     249
4.0     171
2.0     129
5.0      73
6.0      36
1.0      18
7.0      14
0.0      14
10.0      6
8.0       5
9.0       5
18.0      2
11.0      2
14.0      1
13.0      1
19.0      1
12.0      1
Name: bb_rate, dtype: int64

In [1223]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    713
13     249
14     171
12     129
15      73
16      36
11      32
21      14
24       6
22       5
23       5
36       2
25       2
26       1
31       1
32       1
41       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [1224]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

5.0     155
6.0     154
7.0      98
4.0      89
8.0      62
3.0      42
9.0      42
0.0      24
10.0     19
2.0      15
12.0     10
11.0      8
1.0       4
13.0      2
14.0      2
15.0      1
18.0      1
Name: k_rate_pit, dtype: int64

In [1225]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
84,Rigo Beltran,34,MON,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,13.5,2.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,51.0,3.05,1.5,13.5,0.0,0.0,0.0,,L,MON,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,79,MON,5,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,0.666667,1,0.0,11,0.0
165,Mike Bynum,26,SDP,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,1.0,0.0,54.0,2.0,0.0,0.0,0.0,0.0,0.0,0.2,1.0,4.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,6.0,11.0,16.55,6.0,13.5,0.0,40.5,0.0,0.0,L,SDP,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-300.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,157,SDP,3,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,0.666667,1,18.0,36,0.0
326,Trent Durrington,28,MIL,NL,53.0,87.0,82.0,13.0,19.0,2.0,3.0,2.0,4.0,4.0,0.0,4.0,23.0,0.232,0.267,0.402,0.67,70.0,33.0,1.0,0.0,1.0,0.0,0.0,H5/4D1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.05,0.0,0.0,0.0,0.0,0.0,,R,MIL,NL,18.0,10.0,10.0,105.1,43.0,15.0,24.0,4.0,1.0,0.907,-4.0,-48.0,-1.0,-7.0,0.0,3.33,2.17,3B-2B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,322,MIL,4,53,11,53,17,1,0,0,6,11,0,0,0,0,0,1,30,9,3B,0.075472,,C+,4,14.0,6,10.0,(24),0.25,***,2,12,10,12.0,26,0,0.0,,8,55,C+14(24)*** (12-26),0.0,J+,0.333333,1,0.0,11,0.0
355,Leo Estrella,29,SFG,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,27.0,2.0,0.0,0.0,0.0,0.0,0.0,1.1,8.0,4.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,13.0,20.0,5.3,6.75,54.0,0.0,6.8,0.0,0.0,R,SFG,NL,2.0,0.0,0.0,1.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,345,SFG,3,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.667,Z,1.333333,1,3.0,13,0.0
368,Jared Fernandez,32,HOU,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,54.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,6.0,6.0,6.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,14.0,11.0,18.05,11.0,54.0,0.0,45.0,0.0,0.0,R,HOU,NL,2.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,9.0,0.5,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,354,HOU,4,2,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.667,Z,1.0,1,13.0,31,0.0
615,Kevin Jarvis,34,COL,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,27.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,6.0,6.0,6.0,1.0,4.0,2.0,0.0,0.0,0.0,0.0,15.0,21.0,15.55,5.0,27.0,4.5,18.0,0.0,0.0,R,TOT,ZZ,10.0,0.0,0.0,15.0,3.0,0.0,3.0,0.0,0.0,1.0,,,,,,1.8,0.3,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,576,2TM,10,10,0,1,10,10,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.545,Z,2.0,1,10.0,24,0.0
677,Tim Laker,34,CLE,AL,43.0,128.0,117.0,12.0,25.0,2.0,0.0,3.0,17.0,0.0,0.0,7.0,28.0,0.214,0.262,0.308,0.57,52.0,36.0,5.0,1.0,2.0,1.0,1.0,2/H1,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,6.05,2.0,9.0,0.0,9.0,0.0,0.0,R,CLE,AL,42.0,31.0,24.0,299.2,258.0,233.0,21.0,4.0,0.0,0.984,-1.0,-4.0,4.0,16.0,3.0,7.63,6.05,C-P,CLE,AL,41.0,31.0,24.0,298.2,258.0,233.0,21.0,4.0,0.0,0.984,-1.0,-4.0,-1.0,4.0,16.0,3.0,-1.0,2.0,7.65,6.2,5.0,4.0,22.0,7.0,24%,636,CLE,9,44,31,43,42,1,41,0,0,0,0,0,0,0,0,0,1,2,C,0.395349,,C,4,14.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,7,56,C14 (12-24),0.333,Y,1.0,1,9.0,23,0.0
799,Cody McKay,30,STL,NL,35.0,79.0,74.0,7.0,17.0,2.0,0.0,0.0,6.0,0.0,0.0,2.0,14.0,0.23,0.269,0.257,0.526,38.0,19.0,3.0,2.0,1.0,0.0,0.0,2H/531,L,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,,4.55,0.5,0.0,0.0,4.5,0.0,0.0,R,STL,NL,27.0,13.0,11.0,152.2,94.0,79.0,14.0,1.0,1.0,0.989,1.0,10.0,0.0,1.0,0.0,5.48,3.44,C-3B-1B-P,STL,NL,18.0,13.0,11.0,132.0,87.0,75.0,12.0,0.0,1.0,1.0,2.0,18.0,2.0,2.0,18.0,0.0,2.0,0.0,5.93,4.83,3.0,4.0,6.0,5.0,45%,751,STL,2,35,13,35,27,1,18,1,0,7,0,0,0,0,0,0,16,1,C,0.171429,,C+,0,,0,0.0,,0.0,,1,11,6,7.0,21,1,8.0,/22,8,55,C+ (11-21/22),0.0,J+,2.0,2,5.0,15,0.0
808,Frank Menechino,33,TOT,AL,84.0,311.0,269.0,40.0,74.0,13.0,4.0,9.0,26.0,0.0,2.0,37.0,52.0,0.275,0.371,0.454,0.824,111.0,122.0,5.0,4.0,1.0,0.0,1.0,4D6H/51,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.05,6.0,54.0,0.0,0.0,0.0,,R,TOT,AL,64.0,56.0,46.0,490.2,243.0,100.0,141.0,2.0,26.0,0.992,2.0,6.0,,-9.0,,4.42,3.77,2B-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,762,2TM,6,85,71,84,62,1,0,0,42,7,14,0,0,0,0,19,9,1,2B,0.309524,,B,4,14.0,2,6.0,(16),0.0,,4,14,6,10.0,24,0,0.0,,9,54,B14(16) (14-24),0.667,Z,0.333333,1,0.0,11,0.0
810,Frank Menechino,33,TOR,AL,71.0,276.0,236.0,40.0,71.0,13.0,4.0,9.0,25.0,0.0,2.0,36.0,44.0,0.301,0.4,0.504,0.904,131.0,119.0,3.0,3.0,1.0,0.0,1.0,4D6/H51,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.05,6.0,54.0,0.0,0.0,0.0,,R,TOT,AL,64.0,56.0,46.0,490.2,243.0,100.0,141.0,2.0,26.0,0.992,2.0,6.0,,-9.0,,4.42,3.77,2B-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,762,2TM,6,85,71,84,62,1,0,0,42,7,14,0,0,0,0,19,9,1,2B,0.352113,,B+,5,15.0,2,7.0,(21),0.0,,5,15,6,11.0,25,0,0.0,,9,54,B+15(21) (15-25),0.667,Z,0.333333,1,0.0,11,0.0


In [1226]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

8.0     128
9.0     108
10.0    104
7.0      82
11.0     68
12.0     66
6.0      44
13.0     37
0.0      24
15.0     18
14.0     18
5.0       8
16.0      6
17.0      4
4.0       4
18.0      3
19.0      2
22.0      2
2.0       1
21.0      1
Name: k_val_pit, dtype: int64

In [1227]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    713
22     128
23     108
24     104
21      82
25      68
26      66
16      44
31      37
n       24
32      18
33      18
15       8
34       6
14       4
35       4
36       3
44       2
41       2
12       1
43       1
Name: k_num_pit, dtype: int64

In [1228]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [1229]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    1229
1     193
2      14
3       3
4       2
Name: hbp_rate_pit, dtype: int64

In [1230]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [1231]:
players["hbp_val_pit"].value_counts()

0.0     1229
9.0       46
11.0      32
10.0      31
13.0      23
8.0       22
12.0      15
14.0      14
7.0       12
16.0       7
19.0       2
15.0       2
17.0       1
22.0       1
25.0       1
6.0        1
5.0        1
21.0       1
Name: hbp_val_pit, dtype: int64

In [1232]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       1229
/23      46
/25      32
/24      31
/31      23
/22      22
/26      15
/32      14
/21      12
/34       7
/33       2
/41       2
/16       1
/44       1
/51       1
/35       1
/43       1
/15       1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [1233]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1234]:
players["WP"].value_counts()

0.0     202
1.0     159
2.0     115
3.0      76
4.0      58
5.0      48
6.0      24
7.0      16
8.0       9
10.0      6
9.0       5
12.0      4
11.0      2
17.0      2
13.0      1
14.0      1
Name: WP, dtype: int64

In [1235]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit
59,Miguel Batista,33,TOR,AL,2.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,10.0,13.0,0.435,4.8,38.0,31.0,7.0,2.0,1.0,5.0,198.2,206.0,115.0,106.0,22.0,96.0,1.0,104.0,3.0,0.0,12.0,867.0,101.0,4.94,1.52,9.3,1.0,4.3,4.7,1.08,R,TOR,AL,38.0,31.0,2.0,198.2,49.0,18.0,30.0,1.0,5.0,0.98,,,2.0,2.0,0.0,2.17,...,,,,,,,,,,,,,,,,,,,,,,,,,,,61,TOR,10,38,31,2,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,22,22.0,44,0,0.0,,0,66,G (n-44),0.268,W,198.666667,5,4.0,14,4.0,8.0,22,0,0.0,
75,Rob Bell,27,TBD,AL,2.0,5.0,5.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.2,0.2,0.4,0.6,54.0,2.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,8.0,8.0,0.5,4.46,24.0,19.0,3.0,1.0,0.0,0.0,123.0,121.0,71.0,61.0,16.0,41.0,0.0,57.0,5.0,0.0,10.0,529.0,101.0,4.94,1.317,8.9,1.2,3.0,4.2,1.39,R,TBD,AL,24.0,19.0,1.0,123.0,29.0,17.0,11.0,1.0,0.0,0.966,,,-1.0,-2.0,0.0,2.05,...,,,,,,,,,,,,,,,,,,,,,,,,,,,74,TBD,5,24,19,2,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,C,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,7,56,C (n-32),0.251,W,123.0,5,3.0,13,4.0,7.0,21,0,0.0,
140,Jim Brower,31,SFG,NL,85.0,3.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.5,0.667,0.5,1.167,208.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,7.0,7.0,0.5,3.29,89.0,0.0,21.0,0.0,0.0,1.0,93.0,90.0,42.0,34.0,6.0,36.0,2.0,63.0,4.0,0.0,10.0,401.0,133.0,3.82,1.355,8.7,0.6,3.5,6.1,1.75,R,SFG,NL,89.0,0.0,0.0,93.0,29.0,6.0,21.0,2.0,0.0,0.931,,,1.0,2.0,0.0,2.61,...,,,,,,,,,,,,,,,,,,,,,,,,,,,133,SFG,6,89,0,85,89,89,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,12,26,12,24.0,46,0,0.0,,12,51,AAA (26-46),0.249,M,93.0,1,3.0,13,6.0,9.0,23,0,0.0,
169,Daniel Cabrera,23,BAL,AL,3.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,12.0,8.0,0.6,5.0,28.0,27.0,1.0,1.0,1.0,1.0,147.2,145.0,85.0,82.0,14.0,89.0,2.0,76.0,2.0,0.0,12.0,662.0,91.0,5.1,1.585,8.8,0.9,5.4,4.6,0.85,R,BAL,AL,28.0,27.0,1.0,147.2,16.0,9.0,6.0,1.0,0.0,0.938,,,-7.0,-9.0,-1.0,0.91,...,,,,,,,,,,,,,,,,,,,,,,,,,,,161,BAL,1st,28,27,3,28,28,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G (n-66),0.254,W,147.666667,5,5.0,15,4.0,9.0,23,0,0.0,
222,Matt Clement,29,CHC,NL,29.0,61.0,55.0,2.0,8.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,29.0,0.145,0.158,0.164,0.322,-17.0,9.0,0.0,0.0,4.0,1.0,0.0,1,R,NL,9.0,13.0,0.409,3.68,30.0,30.0,0.0,0.0,0.0,0.0,181.0,155.0,79.0,74.0,23.0,77.0,4.0,190.0,12.0,1.0,14.0,775.0,120.0,4.08,1.282,7.7,1.1,3.8,9.4,2.47,R,CHC,NL,30.0,30.0,0.0,181.0,40.0,16.0,21.0,3.0,3.0,0.925,,,-5.0,-6.0,-1.0,1.84,...,,,,,,,,,,,,,,,,,,,,,,,,,,,217,CHC,7,30,30,29,30,30,0,0,0,0,0,0,0,0,0,0,0,0,P,0.068966,,D,0,,0,0.0,,0.0,,1,11,17,18.0,36,0,0.0,,5,62,D (11-36),0.226,M,181.0,6,4.0,14,9.0,13.0,31,1,14.0,/32
232,José Contreras,32,TOT,AL,3.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,13.0,9.0,0.591,5.5,31.0,31.0,0.0,0.0,0.0,0.0,170.1,166.0,114.0,104.0,31.0,84.0,1.0,150.0,8.0,0.0,17.0,758.0,84.0,5.27,1.468,8.8,1.6,4.4,7.9,1.79,R,TOT,AL,31.0,31.0,0.0,170.1,21.0,7.0,12.0,2.0,0.0,0.905,,,,,,1.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,227,2TM,2,31,31,3,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,22,22.0,44,0,0.0,,0,66,G (n-44),0.249,M,170.333333,5,4.0,14,7.0,11.0,25,0,0.0,
233,José Contreras,32,NYY,AL,3.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,8.0,5.0,0.615,5.64,18.0,18.0,0.0,0.0,0.0,0.0,95.2,93.0,66.0,60.0,22.0,42.0,1.0,82.0,6.0,0.0,10.0,425.0,80.0,5.83,1.411,8.7,2.1,4.0,7.7,1.95,R,TOT,AL,31.0,31.0,0.0,170.1,21.0,7.0,12.0,2.0,0.0,0.905,,,,,,1.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,227,2TM,2,31,31,3,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,22,22.0,44,0,0.0,,0,66,G (n-44),0.247,M,95.666667,5,4.0,14,7.0,11.0,25,1,12.0,/26
483,Kevin Gregg,26,ANA,AL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,5.0,2.0,0.714,4.21,55.0,0.0,23.0,0.0,0.0,1.0,87.2,86.0,43.0,41.0,6.0,28.0,3.0,84.0,3.0,1.0,13.0,377.0,106.0,3.08,1.3,8.8,0.6,2.9,8.6,3.0,R,ANA,AL,55.0,0.0,0.0,87.2,7.0,2.0,5.0,0.0,1.0,1.0,,,-3.0,-7.0,0.0,0.72,...,,,,,,,,,,,,,,,,,,,,,,,,,,,459,ANA,2,55,0,5,55,55,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.249,M,87.666667,2,3.0,13,8.0,11.0,25,0,0.0,
661,Gary Knotts,27,DET,AL,2.0,5.0,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.333,0.5,0.333,0.833,130.0,1.0,0.0,0.0,1.0,0.0,0.0,1,R,AL,7.0,6.0,0.538,5.25,36.0,19.0,6.0,0.0,0.0,2.0,135.1,142.0,83.0,79.0,20.0,58.0,3.0,81.0,4.0,0.0,11.0,599.0,85.0,5.15,1.478,9.4,1.3,3.9,5.4,1.4,R,DET,AL,36.0,19.0,0.0,135.1,25.0,14.0,11.0,0.0,1.0,1.0,,,-2.0,-3.0,0.0,1.66,...,,,,,,,,,,,,,,,,,,,,,,,,,,,621,DET,4,36,19,2,36,36,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,A,0,,0,0.0,,0.0,,7,21,7,14.0,32,0,0.0,,7,56,A (21-32),0.264,W,135.333333,4,3.0,13,5.0,8.0,22,0,0.0,
675,John Lackey,25,ANA,AL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,14.0,13.0,0.519,4.67,33.0,32.0,0.0,1.0,1.0,0.0,198.1,215.0,108.0,103.0,22.0,60.0,4.0,144.0,8.0,1.0,11.0,855.0,95.0,4.07,1.387,9.8,1.0,2.7,6.5,2.4,R,ANA,AL,33.0,32.0,1.0,198.1,38.0,15.0,23.0,0.0,1.0,1.0,,,-3.0,-3.0,0.0,1.72,...,,,,,,,,,,,,,,,,,,,,,,,,,,,634,ANA,3,33,32,2,33,33,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G (n-36),0.273,W,198.333333,6,3.0,13,6.0,9.0,23,0,0.0,


In [1236]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        610
[WP]    118
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [1237]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    65
0.142857    22
0.125000    18
0.111111    17
0.166667    15
            ..
0.191176     1
0.130208     1
0.176744     1
0.041667     1
0.096774     1
Name: hr_rate_pit, Length: 354, dtype: int64

In [1238]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     885
+    462
-     94
Name: gopher_ball, dtype: int64

In [1239]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.50,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.00,...,,,,,,,,,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1,6.0,16,3.0,9.0,23,1,10.0,/24,,0.05,-
29,Jimmy Anderson,28,TOT,MLB,7.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,L,MLB,0.0,0.0,,5.17,12.0,0.0,4.0,0.0,0.0,1.0,15.2,19.0,9.0,9.0,0.0,6.0,0.0,6.0,2.0,0.0,2.0,70.0,92.0,3.81,1.596,10.9,0.0,3.4,3.4,1.00,L,TOT,ZZ,12.0,0.0,0.0,15.2,5.0,4.0,1.0,0.0,0.0,1.0,,,,,,2.87,...,,,,,,,,,,,,,,,,,,,,,,,,29,2TM,6,12,0,7,12,12,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.306,X,15.666667,1,3.0,13,3.0,6.0,16,1,7.0,/21,,0.00,-
30,Jimmy Anderson,28,CHC,NL,6.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,4.66,7.0,0.0,2.0,0.0,0.0,1.0,9.2,9.0,5.0,5.0,0.0,3.0,0.0,3.0,2.0,0.0,1.0,42.0,98.0,3.98,1.241,8.4,0.0,2.8,2.8,1.00,L,TOT,ZZ,12.0,0.0,0.0,15.2,5.0,4.0,1.0,0.0,0.0,1.0,,,,,,2.87,...,,,,,,,,,,,,,,,,,,,,,,,,29,2TM,6,12,0,7,12,12,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.243,M,9.666667,1,3.0,13,3.0,6.0,16,2,8.0,/22,,0.00,-
31,Jimmy Anderson,28,BOS,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,AL,0.0,0.0,,6.00,5.0,0.0,2.0,0.0,0.0,0.0,6.0,10.0,4.0,4.0,0.0,3.0,0.0,3.0,0.0,0.0,1.0,28.0,85.0,3.55,2.167,15.0,0.0,4.5,4.5,1.00,L,TOT,ZZ,12.0,0.0,0.0,15.2,5.0,4.0,1.0,0.0,0.0,1.0,,,,,,2.87,...,,,,,,,,,,,,,,,,,,,,,,,,29,2TM,6,12,0,7,12,12,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.400,Z,6.000000,1,4.0,14,4.0,8.0,22,0,0.0,,,0.00,-
38,Andy Ashby,36,SDP,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,0.00,2.0,0.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,7.0,,1.05,0.500,4.5,0.0,0.0,9.0,,R,SDP,NL,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.00,...,,,,,,,,,,,,,,,,,,,,,,,,37,SDP,14,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.143,J,2.000000,1,0.0,11,10.0,10.0,24,0,0.0,,,0.00,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1424,Jason Stanford,27,CLE,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,0.82,2.0,2.0,0.0,0.0,0.0,0.0,11.0,12.0,1.0,1.0,0.0,5.0,0.0,5.0,1.0,0.0,1.0,50.0,550.0,3.78,1.545,9.8,0.0,4.1,4.1,1.00,L,CLE,AL,2.0,2.0,0.0,11.0,4.0,2.0,2.0,0.0,1.0,1.0,,,-1.0,-18.0,0.0,3.27,...,,,,,,,,,,,,,,,,,,,,,,,,1073,CLE,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.273,W,11.000000,6,4.0,14,4.0,8.0,22,1,9.0,/23,,0.00,-
1428,Brad Thomas,26,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,16.88,3.0,0.0,0.0,0.0,0.0,0.0,2.2,7.0,5.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,16.0,31.0,4.17,3.000,23.6,0.0,3.4,0.0,0.00,L,MIN,AL,3.0,0.0,0.0,2.2,1.0,0.0,1.0,0.0,0.0,1.0,,,-1.0,-75.0,0.0,3.38,...,,,,,,,,,,,,,,,,,,,,,,,,1106,MIN,3,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.467,Z,2.666667,1,2.0,12,0.0,0.0,n,0,0.0,,,0.00,-
1429,Lino Urdaneta,24,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,6.0,6.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6.0,3.0,,,,,,,0.00,R,DET,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,...,,,,,,,,,,,,,,,,,,,,,,,,1134,DET,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),1.000,Z,0.000000,1,6.0,16,0.0,0.0,n,0,0.0,,,0.00,-
1436,Randy Williams,28,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,5.79,6.0,0.0,1.0,0.0,0.0,0.0,4.2,3.0,3.0,3.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,22.0,83.0,5.19,1.929,5.8,0.0,11.6,7.7,0.67,L,SEA,AL,6.0,0.0,0.0,4.2,1.0,0.0,1.0,0.0,1.0,1.0,,,0.0,0.0,0.0,1.93,...,,,,,,,,,,,,,,,,,,,,,,,,1202,SEA,1st,6,0,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.188,K,4.666667,1,10.0,24,7.0,17.0,35,0,0.0,,,0.00,-


### Pitcher Control Number

In [1240]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

12.0    169
13.0    148
11.0    141
14.0     70
15.0     44
10.0     37
16.0     30
9.0      20
17.0     13
19.0     10
18.0      9
8.0       9
24.0      6
22.0      5
0.0       3
20.0      3
5.0       2
21.0      2
26.0      1
25.0      1
28.0      1
6.0       1
4.0       1
7.0       1
36.0      1
Name: control_rate, dtype: int64

In [1241]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    713
46     169
45     148
51     141
44      70
43      44
52      37
42      30
53      20
41      13
35      10
36       9
54       9
26       6
32       5
65       3
34       3
33       2
61       2
55       1
62       1
25       1
22       1
11       1
24       1
56       1
Name: PCN, dtype: int64

### Probable Hit Number

In [1242]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     719
9     198
8     181
7      95
10     93
11     42
6      41
12     23
5      12
13     11
14      8
15      4
3       4
16      3
24      2
4       2
19      1
22      1
30      1
Name: hit_rate_pit, dtype: int64

In [1243]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [1244]:
players["hit_rate_pit"].value_counts()

9.0     198
8.0     181
7.0      95
10.0     93
11.0     42
6.0      41
12.0     23
5.0      12
13.0     11
14.0      8
0.0       6
15.0      4
3.0       4
16.0      3
24.0      2
4.0       2
19.0      1
22.0      1
30.0      1
Name: hit_rate_pit, dtype: int64

In [1245]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    713
54     198
55     181
56      95
53      93
52      42
61      41
51      23
62      12
46      11
45       8
66       6
64       4
44       4
43       3
31       2
63       2
21       1
33       1
36       1
Name: PPH, dtype: int64

### Pitcher Rating

In [1246]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1247]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

        713
+M1      52
+W1      48
+W6      37
+X1      36
       ... 
-J+2      1
+L4       1
L7        1
L6        1
Z5        1
Name: goph_lett_inn, Length: 118, dtype: int64

In [1248]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      713
(13-23)         39
(13-22)         38
(14-25)         27
(13-21)         26
              ... 
(22-33/34)       1
(11-23)          1
(23-33)          1
(12-15/16)       1
(13-32)          1
Name: bb_k_hbp, Length: 157, dtype: int64

In [1249]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               713
(13-23)         39
(13-22)         38
(14-25)         27
(13-21)         26
              ... 
(21-24)          1
(24-26/32)       1
(15-32/34)       1
(14-32/34)       1
(13-32)          1
Name: bb_k_hbp, Length: 157, dtype: int64

In [1250]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

+W1 (14-24)          7
+W6 (13-23)  [WP]    6
+L1 (14-26)          4
+X1 (13-22/23)       4
+W1 (13-23)          4
                    ..
+X5 (14-23/24)       1
-Z1 (15-26)          1
Z+2 (13-21)          1
+L1 (15-33)          1
J+1 (11-26)          1
Name: pitcher_rating, Length: 624, dtype: int64

In [1251]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [1252]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,...,,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1.0,6.0,16.0,3.0,9.0,23.0,1,10.0,/24,,0.05,-,19.0,35.0,12.0,51.0,-Z1,(16-23/24),-Z1 (16-23/24)
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.272,W,96.0,5.0,5.0,15.0,4.0,9.0,23.0,0,0.0,,[WP],0.207547,+,13.0,45.0,8.0,55.0,+W5,(15-23),+W5 (15-23) [WP]
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.289,X,49.0,5.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.245614,+,14.0,44.0,9.0,54.0,+X5,(15-22),+X5 (15-22)
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,...,,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,...,,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42),0.287,X,157.666667,4.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.159574,+,12.0,46.0,10.0,53.0,+X4,(12-22),+X4 (12-22)
5,Mike Adams,25,MIL,NL,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,2.0,3.0,0.4,3.4,46.0,0.0,13.0,0.0,0.0,0.0,53.0,50.0,21.0,20.0,5.0,14.0,2.0,39.0,2.0,0.0,2.0,225.0,129.0,3.71,1.208,8.5,0.8,2.4,6.6,2.79,R,MIL,NL,46.0,0.0,0.0,53.0,5.0,0.0,5.0,0.0,0.0,1.0,,,0.0,0.0,0.0,0.85,...,,,,,,,,,,,,,,,,,5,MIL,1st,46,0,43,46,46,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.239,M,53.0,1.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(12-22),+M1 (12-22)
6,Russ Adams,23,TOR,AL,22.0,78.0,72.0,10.0,22.0,2.0,1.0,4.0,10.0,1.0,0.0,5.0,5.0,0.306,0.359,0.528,0.887,124.0,38.0,3.0,1.0,0.0,0.0,0.0,6/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOR,AL,21.0,18.0,17.0,159.1,78.0,26.0,47.0,5.0,9.0,0.936,-1.0,-9.0,-2.0,-14.0,0.0,4.12,...,,,,,,,,,,,,,,,,,6,TOR,1st,22,18,22,21,0,0,0,0,0,21,0,0,0,0,0,4,0,SS,0.454545,,B+,7,21.0,2,9.0,(23),0.048,,2,12,2,4.0,14,0,0.0,,10,53,B+21(23) (12-14),,,,,,,,,,0,0.0,,,,,,,,,,,
7,Terry Adams,31,TOT,AL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,6.0,4.0,0.6,4.76,61.0,0.0,21.0,0.0,0.0,3.0,70.0,84.0,39.0,37.0,10.0,28.0,3.0,56.0,2.0,0.0,8.0,316.0,102.0,4.59,1.6,10.8,1.3,3.6,7.2,2.0,R,TOT,AL,61.0,0.0,0.0,70.0,15.0,9.0,4.0,2.0,1.0,0.867,,,,,,1.67,...,,,,,,,,,,,,,,,,,7,2TM,10,61,0,5,61,61,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.294,X,70.0,1.0,3.0,13.0,6.0,9.0,23.0,0,0.0,,[WP],0.119048,+,13.0,45.0,10.0,53.0,+X1,(13-23),+X1 (13-23) [WP]
8,Terry Adams,31,TOR,AL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,4.0,4.0,0.5,3.98,42.0,0.0,20.0,0.0,0.0,3.0,43.0,49.0,20.0,19.0,4.0,22.0,2.0,35.0,1.0,0.0,6.0,197.0,122.0,4.24,1.651,10.3,0.8,4.6,7.3,1.59,R,TOT,AL,61.0,0.0,0.0,70.0,15.0,9.0,4.0,2.0,1.0,0.867,,,,,,1.67,...,,,,,,,,,,,,,,,,,7,2TM,10,61,0,5,61,61,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.282,X,43.0,1.0,4.0,14.0,6.0,10.0,24.0,0,0.0,,[WP],0.081633,,13.0,45.0,9.0,54.0,X1,(14-24),X1 (14-24) [WP]
9,Jon Adkins,26,CHW,AL,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,3.0,0.4,4.65,50.0,0.0,19.0,0.0,0.0,0.0,62.0,75.0,35.0,32.0,13.0,20.0,3.0,44.0,1.0,0.0,1.0,271.0,101.0,5.37,1.532,10.9,1.9,2.9,6.4,2.2,R,CHW,AL,50.0,0.0,0.0,62.0,13.0,4.0,8.0,1.0,1.0,0.923,,,2.0,6.0,0.0,1.74,...,,,,,,,,,,,,,,,,,8,CHW,2,50,0,3,50,50,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.3,X,62.0,1.0,3.0,13.0,6.0,9.0,23.0,0,0.0,,,0.173333,+,13.0,45.0,10.0,53.0,+X1,(13-23),+X1 (13-23)


In [1253]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
1431,Eduardo Villacis,24,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,13.5,1.0,1.0,0.0,0.0,0.0,0.0,3.1,6.0,5.0,5.0,1.0,4.0,0.0,0.0,0.0,0.0,1.0,20.0,39.0,10.55,3.0,16.2,2.7,10.8,0.0,0.0,R,KCR,AL,1.0,1.0,0.0,3.1,2.0,0.0,1.0,1.0,0.0,0.5,,,0.0,0.0,0.0,2.7,...,,,,,,,,,,,,,,,,,1156,KCR,1st,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.375,Z,3.333333,3,7.0,21,0.0,0.0,n,0,0.0,,,0.166667,+,18.0,36,11.0,52,+Z3,(21-n),+Z3 (21-n)
1432,Doug Waechter,23,TBD,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,5.0,7.0,0.417,6.01,14.0,14.0,0.0,0.0,0.0,0.0,70.1,68.0,54.0,47.0,20.0,33.0,1.0,36.0,4.0,1.0,1.0,309.0,75.0,7.3,1.436,8.7,2.6,4.2,4.6,1.09,R,TBD,AL,14.0,14.0,0.0,70.1,11.0,5.0,5.0,1.0,0.0,0.909,,,2.0,6.0,0.0,1.28,...,,,,,,,,,,,,,,,,,1165,TBD,2,14,14,0,14,14,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.25,M,70.333333,5,4.0,14,4.0,8.0,22,0,0.0,,,0.294118,+,12.0,46,8.0,55,+M5,(14-22),+M5 (14-22)
1433,John Webb,25,TBD,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.0,4.0,0.0,1.0,0.0,0.0,0.0,9.0,12.0,7.0,7.0,2.0,7.0,0.0,9.0,1.0,0.0,1.0,45.0,67.0,6.6,2.111,12.0,2.0,7.0,9.0,1.29,R,TBD,AL,4.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,...,,,,,,,,,,,,,,,,,1182,TBD,1st,4,0,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.324,Y,9.0,2,6.0,16,7.0,13.0,31,1,14.0,/32,,0.166667,+,16.0,42,10.0,53,+Y2,(16-31/32),+Y2 (16-31/32)
1434,Ben Weber,34,ANA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,2.0,0.0,8.06,18.0,0.0,5.0,0.0,0.0,0.0,22.1,37.0,24.0,20.0,4.0,15.0,0.0,11.0,0.0,0.0,0.0,117.0,56.0,6.41,2.328,14.9,1.6,6.0,4.4,0.73,R,ANA,AL,18.0,0.0,0.0,22.1,0.0,0.0,0.0,0.0,0.0,,,,-2.0,-18.0,,0.0,...,,,,,,,,,,,,,,,,,1183,ANA,5,18,0,0,18,18,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.363,Z,22.333333,1,5.0,15,3.0,8.0,22,0,0.0,,,0.108108,+,16.0,42,11.0,52,+Z1,(15-22),+Z1 (15-22)
1435,Bob Wickman,35,CLE,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,2.0,0.0,4.25,30.0,0.0,21.0,0.0,0.0,13.0,29.2,33.0,14.0,14.0,4.0,10.0,0.0,26.0,2.0,0.0,0.0,129.0,104.0,4.26,1.449,10.0,1.2,3.0,7.9,2.6,R,CLE,AL,30.0,0.0,0.0,29.2,6.0,3.0,3.0,0.0,1.0,1.0,,,0.0,0.0,0.0,1.82,...,,,,,,,,,,,,,,,,,1195,CLE,12,30,0,0,30,30,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.282,X,29.666667,1,3.0,13,7.0,10.0,24,1,11.0,/25,,0.121212,+,13.0,45,9.0,54,+X1,(13-24/25),+X1 (13-24/25)
1436,Randy Williams,28,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,5.79,6.0,0.0,1.0,0.0,0.0,0.0,4.2,3.0,3.0,3.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,22.0,83.0,5.19,1.929,5.8,0.0,11.6,7.7,0.67,L,SEA,AL,6.0,0.0,0.0,4.2,1.0,0.0,1.0,0.0,1.0,1.0,,,0.0,0.0,0.0,1.93,...,,,,,,,,,,,,,,,,,1202,SEA,1st,6,0,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.188,K,4.666667,1,10.0,24,7.0,17.0,35,0,0.0,,,0.0,-,15.0,43,5.0,62,-K1,(24-35),-K1 (24-35)
1437,Todd Williams,33,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,0.0,1.0,2.87,29.0,0.0,7.0,0.0,0.0,0.0,31.1,26.0,10.0,10.0,2.0,9.0,0.0,13.0,5.0,0.0,1.0,126.0,160.0,4.39,1.117,7.5,0.6,2.6,3.7,1.44,R,BAL,AL,29.0,0.0,0.0,31.1,6.0,2.0,4.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.72,...,,,,,,,,,,,,,,,,,1203,BAL,5,29,0,0,29,29,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.232,M,31.333333,1,3.0,13,4.0,7.0,21,1,8.0,/22,,0.076923,,11.0,51,7.0,56,M1,(13-21/22),M1 (13-21/22)
1438,Dan Wright,26,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,4.0,0.0,8.15,4.0,4.0,0.0,0.0,0.0,0.0,17.2,24.0,17.0,16.0,5.0,11.0,1.0,6.0,2.0,1.0,0.0,88.0,59.0,8.26,1.981,12.2,2.5,5.6,3.1,0.55,R,CHW,AL,4.0,4.0,0.0,17.2,4.0,1.0,2.0,1.0,0.0,0.75,,,0.0,0.0,0.0,1.53,...,,,,,,,,,,,,,,,,,1227,CHW,4,4,4,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.32,Y,17.666667,4,4.0,14,2.0,6.0,16,1,7.0,/21,,0.208333,+,15.0,43,10.0,53,+Y4,(14-16/21),+Y4 (14-16/21)
1439,Kelly Wunsch,31,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,0.0,3.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,8.0,,3.55,1.5,9.0,0.0,4.5,4.5,1.0,L,CHW,AL,3.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,4.5,...,,,,,,,,,,,,,,,,,1232,CHW,5,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.286,X,2.0,1,4.0,14,4.0,8.0,22,0,0.0,,,0.0,-,14.0,44,9.0,54,-X1,(14-22),-X1 (14-22)
1440,Chris Young,25,TEX,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,3.0,2.0,0.6,4.71,7.0,7.0,0.0,0.0,0.0,0.0,36.1,36.0,21.0,19.0,7.0,10.0,0.0,27.0,2.0,0.0,1.0,158.0,107.0,5.06,1.266,8.9,1.7,2.5,6.7,2.7,R,TEX,AL,7.0,7.0,0.0,36.1,6.0,2.0,3.0,1.0,0.0,0.833,,,0.0,0.0,0.0,1.24,...,,,,,,,,,,,,,,,,,1236,TEX,1st,7,7,0,7,7,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.247,M,36.333333,5,2.0,12,6.0,8.0,22,0,0.0,,,0.194444,+,11.0,51,8.0,55,+M5,(12-22),+M5 (12-22)


In [1254]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [1255]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [1256]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1257]:
players["Primary_Pos_fld"].value_counts()

P     719
OF    273
C     117
1B     92
2B     90
SS     77
3B     67
Name: Primary_Pos_fld, dtype: int64

In [1258]:
players["Primary_Pos_fld"].isnull().sum()

6

In [1259]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.990587
2B    0.976800
3B    0.961939
C     0.992086
OF    0.982015
P     0.947038
SS    0.965961
Name: Fld%, dtype: float64

### Superior Rating

In [1260]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [1261]:
players["superior_rating"].value_counts()

S    744
     697
Name: superior_rating, dtype: int64

In [1262]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,...,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1.0,6.0,16.0,3.0,9.0,23.0,1,10.0,/24,,0.05,-,19.0,35.0,12.0,51.0,-Z1,(16-23/24),-Z1 (16-23/24),
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.272,W,96.0,5.0,5.0,15.0,4.0,9.0,23.0,0,0.0,,[WP],0.207547,+,13.0,45.0,8.0,55.0,+W5,(15-23),+W5 (15-23) [WP],
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.289,X,49.0,5.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.245614,+,14.0,44.0,9.0,54.0,+X5,(15-22),+X5 (15-22),
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,...,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,,
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,...,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42),0.287,X,157.666667,4.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.159574,+,12.0,46.0,10.0,53.0,+X4,(12-22),+X4 (12-22),S
5,Mike Adams,25,MIL,NL,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,2.0,3.0,0.4,3.4,46.0,0.0,13.0,0.0,0.0,0.0,53.0,50.0,21.0,20.0,5.0,14.0,2.0,39.0,2.0,0.0,2.0,225.0,129.0,3.71,1.208,8.5,0.8,2.4,6.6,2.79,R,MIL,NL,46.0,0.0,0.0,53.0,5.0,0.0,5.0,0.0,0.0,1.0,,,0.0,0.0,0.0,0.85,...,,,,,,,,,,,,,,,,5,MIL,1st,46,0,43,46,46,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.239,M,53.0,1.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(12-22),+M1 (12-22),S
6,Russ Adams,23,TOR,AL,22.0,78.0,72.0,10.0,22.0,2.0,1.0,4.0,10.0,1.0,0.0,5.0,5.0,0.306,0.359,0.528,0.887,124.0,38.0,3.0,1.0,0.0,0.0,0.0,6/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOR,AL,21.0,18.0,17.0,159.1,78.0,26.0,47.0,5.0,9.0,0.936,-1.0,-9.0,-2.0,-14.0,0.0,4.12,...,,,,,,,,,,,,,,,,6,TOR,1st,22,18,22,21,0,0,0,0,0,21,0,0,0,0,0,4,0,SS,0.454545,,B+,7,21.0,2,9.0,(23),0.048,,2,12,2,4.0,14,0,0.0,,10,53,B+21(23) (12-14),,,,,,,,,,0,0.0,,,,,,,,,,,,
7,Terry Adams,31,TOT,AL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,6.0,4.0,0.6,4.76,61.0,0.0,21.0,0.0,0.0,3.0,70.0,84.0,39.0,37.0,10.0,28.0,3.0,56.0,2.0,0.0,8.0,316.0,102.0,4.59,1.6,10.8,1.3,3.6,7.2,2.0,R,TOT,AL,61.0,0.0,0.0,70.0,15.0,9.0,4.0,2.0,1.0,0.867,,,,,,1.67,...,,,,,,,,,,,,,,,,7,2TM,10,61,0,5,61,61,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.294,X,70.0,1.0,3.0,13.0,6.0,9.0,23.0,0,0.0,,[WP],0.119048,+,13.0,45.0,10.0,53.0,+X1,(13-23),+X1 (13-23) [WP],
8,Terry Adams,31,TOR,AL,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,4.0,4.0,0.5,3.98,42.0,0.0,20.0,0.0,0.0,3.0,43.0,49.0,20.0,19.0,4.0,22.0,2.0,35.0,1.0,0.0,6.0,197.0,122.0,4.24,1.651,10.3,0.8,4.6,7.3,1.59,R,TOT,AL,61.0,0.0,0.0,70.0,15.0,9.0,4.0,2.0,1.0,0.867,,,,,,1.67,...,,,,,,,,,,,,,,,,7,2TM,10,61,0,5,61,61,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.282,X,43.0,1.0,4.0,14.0,6.0,10.0,24.0,0,0.0,,[WP],0.081633,,13.0,45.0,9.0,54.0,X1,(14-24),X1 (14-24) [WP],
9,Jon Adkins,26,CHW,AL,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,3.0,0.4,4.65,50.0,0.0,19.0,0.0,0.0,0.0,62.0,75.0,35.0,32.0,13.0,20.0,3.0,44.0,1.0,0.0,1.0,271.0,101.0,5.37,1.532,10.9,1.9,2.9,6.4,2.2,R,CHW,AL,50.0,0.0,0.0,62.0,13.0,4.0,8.0,1.0,1.0,0.923,,,2.0,6.0,0.0,1.74,...,,,,,,,,,,,,,,,,8,CHW,2,50,0,3,50,50,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.3,X,62.0,1.0,3.0,13.0,6.0,9.0,23.0,0,0.0,,,0.173333,+,13.0,45.0,10.0,53.0,+X1,(13-23),+X1 (13-23),


### Arm Rating

In [1263]:
players["G"].value_counts()

4.0      41
34.0     38
31.0     33
3.0      32
2.0      29
         ..
96.0      1
110.0     1
58.0      1
88.0      1
106.0     1
Name: G, Length: 164, dtype: int64

In [1264]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.5390366881673258

In [1265]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [1266]:
players["arm_rating"].value_counts()

8    1094
9     347
Name: arm_rating, dtype: int64

### Range Rating

In [1267]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.2434084447528901

In [1268]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    4.635973
2B    1.319364
3B    0.904045
C     4.776824
OF    1.465741
P     0.175621
SS    1.209878
Name: po_gp, dtype: float64

In [1269]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [1270]:
players["range_rating"].value_counts()

4    1174
5     267
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [1271]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'RsbC', 'RerC', 'RF/9_cat', 'RF/G_cat',
       'PB', 'WP_cat', 'SB_c

In [1272]:
players["CS%"].value_counts()

25%    9
33%    8
0%     7
28%    7
50%    6
27%    6
31%    6
29%    5
32%    5
43%    4
20%    4
38%    4
23%    3
30%    3
36%    3
19%    3
34%    3
21%    3
35%    3
24%    2
45%    2
22%    2
26%    2
49%    2
41%    1
44%    1
64%    1
13%    1
67%    1
47%    1
40%    1
39%    1
15%    1
6%     1
11%    1
42%    1
14%    1
Name: CS%, dtype: int64

In [1273]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.2968236522879854

In [1274]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [1275]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    1328
-1      44
-2      31
        20
-3      18
Name: cs_num, dtype: int64

In [1276]:
players["G_cat"].isnull().sum()

1310

In [1277]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      1346
-1      44
-2      31
-3      18
-4       2
Name: cs_num, dtype: int64

In [1278]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num
576,AJ Hinch,30,PHI,NL,4.0,11.0,11.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.182,0.182,0.273,0.455,13.0,3.0,0.0,0.0,0.0,0.0,0.0,/2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,4.0,1.0,1.0,25.2,19.0,14.0,5.0,0.0,0.0,1.0,1.0,47.0,1.0,47.0,0.0,6.66,...,1.0,0.0,6.66,4.75,0.0,0.0,1.0,2.0,67%,542,PHI,7,4,1,4,4,0,4,0,0,0,0,0,0,0,0,0,0,0,C,0.0,,D+,0,,0,0.0,,0.0,,0,n,13,13.0,31,0,0.0,,7,56,D+ (n-31),,,,,,,,,,0,0.0,,,,,,,,,,,,S,1.25,9,3.5,4,0.666667,-4
746,Robert Machado,31,BAL,AL,37.0,77.0,73.0,5.0,11.0,3.0,0.0,1.0,3.0,0.0,0.0,4.0,18.0,0.151,0.195,0.233,0.428,12.0,17.0,2.0,0.0,0.0,0.0,0.0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,35.0,19.0,14.0,188.2,172.0,157.0,14.0,1.0,6.0,0.994,2.0,13.0,3.0,19.0,0.0,8.16,...,3.0,0.0,8.16,4.89,2.0,9.0,4.0,7.0,64%,704,BAL,9,37,19,37,35,0,35,0,0,0,0,0,0,0,0,0,4,0,C,0.081081,,D,3,13.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,5,62,D13 (12-24),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.378378,9,4.243243,4,0.636364,-4


### Fielder Rating

In [1279]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

S84       506
84        433
85         88
94         78
S85        66
S94        63
S95        57
95         55
S94 -1     28
94 -2      19
94 -1      16
S94 -2     12
S94 -3     10
94 -3       7
S94 -4      2
85 -3       1
Name: fielder_rating, dtype: int64

In [1280]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [1281]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [1282]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [1283]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,...,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1.0,6.0,16.0,3.0,9.0,23.0,1,10.0,/24,,0.05,-,19.0,35.0,12.0,51.0,-Z1,(16-23/24),-Z1 (16-23/24),,0.0,8,0.0,4,,,84
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.272,W,96.0,5.0,5.0,15.0,4.0,9.0,23.0,0,0.0,,[WP],0.207547,+,13.0,45.0,8.0,55.0,+W5,(15-23),+W5 (15-23) [WP],,0.45,8,0.3,5,,,85
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,...,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.289,X,49.0,5.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.245614,+,14.0,44.0,9.0,54.0,+X5,(15-22),+X5 (15-22),,0.45,8,0.3,5,,,85
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,...,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.081761,9,1.955975,4,,,94
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,...,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42),0.287,X,157.666667,4.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.159574,+,12.0,46.0,10.0,53.0,+X4,(12-22),+X4 (12-22),S,0.461538,8,0.102564,4,,,S84


In [1284]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_k_hbp', 'pitcher_rating', 'superior_rating', 'a_gp', 'arm_rating',
       'po_gp', 'range_rating', 'cs_rate', 'cs_num', 'fielder_rating'],
      dtype='object', length=181)

In [1285]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,0.0,P,,,,,,...,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1.0,6.0,16.0,3.0,9.0,23.0,1,10.0,/24,,0.05,-,19.0,35.0,12.0,51.0,-Z1,(16-23/24),-Z1 (16-23/24),,0.0,8,0.0,4,,,84
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.272,W,96.0,5.0,5.0,15.0,4.0,9.0,23.0,0,0.0,,[WP],0.207547,+,13.0,45.0,8.0,55.0,+W5,(15-23),+W5 (15-23) [WP],,0.45,8,0.3,5,,,85
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.289,X,49.0,5.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.245614,+,14.0,44.0,9.0,54.0,+X5,(15-22),+X5 (15-22),,0.45,8,0.3,5,,,85
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,...,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.081761,9,1.955975,4,,,94
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,0.56,P,,,,,,...,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42),0.287,X,157.666667,4.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.159574,+,12.0,46.0,10.0,53.0,+X4,(12-22),+X4 (12-22),S,0.461538,8,0.102564,4,,,S84


In [1286]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,David Aardsma,22,SFG,NL,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,6.75,11.0,0.0,5.0,0.0,0.0,0.0,10.2,20.0,8.0,8.0,1.0,10.0,0.0,5.0,2.0,0.0,0.0,61.0,67.0,6.71,2.813,16.9,0.8,8.4,4.2,0.5,R,SFG,NL,11.0,0.0,0.0,10.2,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-19.0,0.0,0.0,0.0,P,,,,,,...,,,,,,,,,,,,,,,,1,SFG,1st,11,0,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.408,Z,10.666667,1.0,6.0,16.0,3.0,9.0,23.0,1,10.0,/24,,0.05,-,19.0,35.0,12.0,51.0,-Z1,(16-23/24),-Z1 (16-23/24),,0.0,8,0.0,4,,,84
1,Paul Abbott,36,TOT,MLB,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,3.0,11.0,0.214,6.47,20.0,19.0,0.0,0.0,0.0,0.0,96.0,106.0,76.0,69.0,22.0,58.0,1.0,46.0,4.0,0.0,6.0,451.0,70.0,7.01,1.708,9.9,2.1,5.4,4.3,0.79,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.272,W,96.0,5.0,5.0,15.0,4.0,9.0,23.0,0,0.0,,[WP],0.207547,+,13.0,45.0,8.0,55.0,+W5,(15-23),+W5 (15-23) [WP],,0.45,8,0.3,5,,,85
2,Paul Abbott,36,PHI,NL,8.0,14.0,11.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0.182,0.182,0.182,0.364,-7.0,2.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,1.0,6.0,0.143,6.24,10.0,10.0,0.0,0.0,0.0,0.0,49.0,57.0,37.0,34.0,14.0,31.0,1.0,21.0,1.0,0.0,3.0,229.0,72.0,7.87,1.796,10.5,2.6,5.7,3.9,0.68,R,TOT,ZZ,20.0,19.0,0.0,96.0,17.0,6.0,9.0,2.0,0.0,0.882,,,,,,1.41,0.75,P,,,,,,...,,,,,,,,,,,,,,,,2,2TM,11,20,19,8,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D+,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,D+ (n-24),0.289,X,49.0,5.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.245614,+,14.0,44.0,9.0,54.0,+X5,(15-22),+X5 (15-22),,0.45,8,0.3,5,,,85
3,Bobby Abreu,30,PHI,NL,159.0,713.0,574.0,118.0,173.0,47.0,1.0,30.0,105.0,40.0,5.0,127.0,116.0,0.301,0.428,0.544,0.971,145.0,312.0,5.0,5.0,0.0,7.0,10.0,*9/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,158.0,157.0,149.0,1394.2,330.0,311.0,13.0,6.0,4.0,0.982,-16.0,-14.0,1.0,1.0,3.0,2.09,2.05,OF,,,,,,...,,,,,,,,,,,,,,,,3,PHI,9,159,157,159,158,0,0,0,0,0,0,0,0,158,158,0,2,0,OF,0.660377,#,B+,6,16.0,0,0.0,,0.176,**,6,16,6,12.0,26,0,0.0,,9,54,#B+16** (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.081761,9,1.955975,4,,,94
4,Jose Acevedo,26,CIN,NL,38.0,52.0,43.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,26.0,0.047,0.106,0.047,0.153,-58.0,2.0,1.0,0.0,5.0,1.0,0.0,1,R,NL,5.0,12.0,0.294,5.94,39.0,27.0,3.0,0.0,0.0,0.0,157.2,188.0,108.0,104.0,30.0,45.0,8.0,117.0,5.0,1.0,3.0,704.0,71.0,4.99,1.478,10.7,1.7,2.6,6.7,2.6,R,CIN,NL,39.0,27.0,0.0,157.2,22.0,4.0,18.0,0.0,0.0,1.0,,,-2.0,-3.0,0.0,1.26,0.56,P,,,,,,...,,,,,,,,,,,,,,,,4,CIN,4,39,27,38,39,39,0,0,0,0,0,0,0,0,0,0,0,0,P,0.026316,,G+,0,,0,0.0,,0.0,,2,12,18,20.0,42,0,0.0,,1,66,G+ (12-42),0.287,X,157.666667,4.0,2.0,12.0,6.0,8.0,22.0,0,0.0,,,0.159574,+,12.0,46.0,10.0,53.0,+X4,(12-22),+X4 (12-22),S,0.461538,8,0.102564,4,,,S84


In [1287]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

7.0      34
3.0      33
32.0     32
4.0      31
2.0      31
         ..
105.0     1
132.0     1
85.0      1
124.0     1
133.0     1
Name: Games_Played, Length: 160, dtype: int64

In [1288]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "pitcher_rating", "PCN", "PPH", 
                                "Bats", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,David Aardsma,22,SFG,11.0,0.0,5.0,P,84,G (n-n),66,-Z1 (16-23/24),35.0,51.0,R,R,P,11,0,0,0,0,0,0,0,0,0,0,0,0
1,Paul Abbott,36,TOT,20.0,19.0,0.0,P,85,D+ (n-24),62,+W5 (15-23) [WP],45.0,55.0,R,R,P,20,0,0,0,0,0,0,0,0,0,0,0,0
2,Paul Abbott,36,PHI,10.0,10.0,0.0,P,85,D+ (n-24),62,+X5 (15-22),44.0,54.0,R,R,P,20,0,0,0,0,0,0,0,0,0,0,0,0
3,Bobby Abreu,30,PHI,159.0,,,OF,94,#B+16** (16-26),54,,,,L,,OF,0,0,0,0,0,0,0,0,158,158,0,2,0
4,Jose Acevedo,26,CIN,39.0,27.0,3.0,P,S84,G+ (12-42),66,+X4 (12-22),46.0,53.0,R,R,P,39,0,0,0,0,0,0,0,0,0,0,0,0


In [1289]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [1290]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           719
OF          273
C           117
1B           92
2B           90
SS           77
3B           67
DH_PH_PR      6
              0
Name: Primary, dtype: int64

In [1291]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,David Aardsma,22,SFG,11.0,0.0,5.0,P,84,G (n-n),66,-Z1 (16-23/24),35.0,51.0,R,R,P,11,0,0,0,0,0,0,0,0,0,0,0,0
1,Paul Abbott,36,TOT,20.0,19.0,0.0,P,85,D+ (n-24),62,+W5 (15-23) [WP],45.0,55.0,R,R,P,20,0,0,0,0,0,0,0,0,0,0,0,0
2,Paul Abbott,36,PHI,10.0,10.0,0.0,P,85,D+ (n-24),62,+X5 (15-22),44.0,54.0,R,R,P,20,0,0,0,0,0,0,0,0,0,0,0,0
3,Bobby Abreu,30,PHI,159.0,,,OF,94,#B+16** (16-26),54,,,,L,,OF,0,0,0,0,0,0,0,0,158,158,0,2,0
4,Jose Acevedo,26,CIN,39.0,27.0,3.0,P,S84,G+ (12-42),66,+X4 (12-22),46.0,53.0,R,R,P,39,0,0,0,0,0,0,0,0,0,0,0,0


In [1292]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [1293]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [1294]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [1295]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [1296]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
834,Bengie Molina,29,ANA,97.0,,,C,S94 -1,B14 (12-15),54,,,,R,,C,0,89,0,0,0,0,0,0,0,0,5,6,0
835,Jose Molina,29,ANA,73.0,,,C-1B,S94 -3,B12(13)* (12-25),54,,,,R,,C,0,70,2,0,0,0,0,0,0,0,1,0,2
925,Josh Paul,29,ANA,46.0,,,C-OF,S94 -1,C+14** (13-25),55,,,,R,,C,0,37,0,0,0,0,4,0,0,4,2,7,8
346,Darin Erstad,30,ANA,125.0,,,1B,S84,B+12** (12-21),53,,,,L,,1B,0,0,124,0,0,0,0,0,0,0,0,0,1
668,Casey Kotchman,21,ANA,38.0,,,1B,84,C+* (12-15/16),56,,,,L,,1B,0,0,34,0,0,0,0,0,0,0,1,5,0
414,Andres Galarraga,43,ANA,7.0,,,1B,S84,B+26 (n-24/31),53,,,,R,,1B,0,0,1,0,0,0,0,0,0,0,4,5,0
643,Adam Kennedy,28,ANA,144.0,,,2B,84,B13(14)** (13-23/24),54,,,,L,,2B,0,0,0,144,0,0,0,0,0,0,0,1,1
374,Chone Figgins,26,ANA,148.0,,,3B-OF-2B-SS,85,B+11(15)** (13-22),53,,,,S,,3B,0,0,0,20,92,13,1,54,2,57,1,1,2
447,Troy Glaus,27,ANA,58.0,,,3B,84,#B26(31) (15-31),55,,,,R,,3B,0,0,0,0,19,0,0,0,0,0,39,0,0
986,Robb Quinlan,27,ANA,56.0,,,3B-1B-OF,S85,A+13 (13-22),52,,,,R,,3B,0,0,13,0,32,0,6,0,3,9,4,5,2


In [1297]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'ANA':                      Name  Age   Tm      G  GS  GF    Positions     DEF  \
 834         Bengie Molina   29  ANA   97.0                    C  S94 -1   
 835           Jose Molina   29  ANA   73.0                 C-1B  S94 -3   
 925             Josh Paul   29  ANA   46.0                 C-OF  S94 -1   
 346          Darin Erstad   30  ANA  125.0                   1B    S84    
 668        Casey Kotchman   21  ANA   38.0                   1B     84    
 414      Andres Galarraga   43  ANA    7.0                   1B    S84    
 643          Adam Kennedy   28  ANA  144.0                   2B     84    
 374         Chone Figgins   26  ANA  148.0          3B-OF-2B-SS     85    
 447            Troy Glaus   27  ANA   58.0                   3B     84    
 986          Robb Quinlan   27  ANA   56.0             3B-1B-OF    S85    
 518          Shane Halter   34  ANA   46.0          3B-2B-SS-1B     85    
 803      Dallas McPherson   23  ANA   16.0                   3B    S84    
 330 

In [1298]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [1299]:
create_excel(my_dict)

31

## Format Excel file

In [1306]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [1301]:
# wb.sheetnames

In [1305]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBD": "Tampa Bay Devil Rays",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators"
}

In [1303]:
# team_dict["ATL"]

In [1307]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 8.43
width_2 = 15.0
width_3 = 23.0
width_4 = 6.33
width_5 = 12.83

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_1    
    sheet.column_dimensions['B'].width = width_3
    sheet.column_dimensions['C'].width = width_1
    sheet.column_dimensions['D'].width = width_1
    sheet.column_dimensions['E'].width = width_1
    sheet.column_dimensions['F'].width = width_1
    sheet.column_dimensions['G'].width = width_1
    sheet.column_dimensions['H'].width = width_2
    sheet.column_dimensions['I'].width = width_1
    sheet.column_dimensions['J'].width = width_3
    sheet.column_dimensions['K'].width = width_1
    sheet.column_dimensions['L'].width = width_3
    sheet.column_dimensions['M'].width = width_4
    sheet.column_dimensions['N'].width = width_4
    sheet.column_dimensions['O'].width = width_4
    sheet.column_dimensions['P'].width = width_4
    sheet.column_dimensions['Q'].width = width_5
    sheet.column_dimensions['R'].width = width_4
    sheet.column_dimensions['S'].width = width_4
    sheet.column_dimensions['T'].width = width_4
    sheet.column_dimensions['U'].width = width_4
    sheet.column_dimensions['V'].width = width_4
    sheet.column_dimensions['W'].width = width_4
    sheet.column_dimensions['X'].width = width_4
    sheet.column_dimensions['Y'].width = width_4
    sheet.column_dimensions['Z'].width = width_4
    sheet.column_dimensions['AA'].width = width_4
    sheet.column_dimensions['AB'].width = width_4
    sheet.column_dimensions['AC'].width = width_4
    sheet.column_dimensions['AD'].width = width_4
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment
            cell.font = Font(size = 14)
    
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
            

wb.save('../data/' + year + ' rosters ' + ' formatted.xlsx')

# Clean up

## Remove unwanted files

In [1308]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')