# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [558]:
year = '2020'

In [559]:
url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'
url_app = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-appearances-fielding.shtml'

In [560]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/2020-specialpos_c-fielding.shtml'

In [561]:
session_requests = requests.session()

result = session_requests.get(url_bat, headers = dict(referer = url_bat))
result.status_code

200

In [562]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [563]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [564]:
soup = BeautifulSoup(result.content, 'lxml')

In [565]:
# print(soup.prettify())

In [566]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [567]:
bat = pullTable(url_bat, 'players_standard_batting')

In [568]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [569]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [570]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [571]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [572]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [573]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [574]:
findTables(url_app)

['teams_appearances', 'players_players_appearances_fielding']

In [575]:
app = pullTable(url_app, 'players_players_appearances_fielding')

In [576]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
692,693.0,Christian Yelich*,28.0,MIL,NL,58,247,200,39,41,7,1,12,22,4,2,46,76,0.205,0.356,0.43,0.786,111.0,86,4,1,0,0,2,*7/D
693,694.0,Andy Young,26.0,ARI,NL,12,34,26,3,5,2,0,1,4,0,0,5,10,0.192,0.382,0.385,0.767,107.0,10,0,3,0,0,0,/4D5H7
694,695.0,Bradley Zimmer*,27.0,CLE,AL,20,50,37,3,6,0,0,1,3,2,1,7,14,0.162,0.36,0.243,0.603,69.0,9,2,5,0,1,0,/789H
695,696.0,Mike Zunino,29.0,TBR,AL,28,84,75,8,11,4,0,4,10,0,0,6,37,0.147,0.238,0.36,0.598,65.0,27,0,3,0,0,0,2
696,,LgAvg per 600 PA,,,,171,600,533,75,130,25,2,21,71,8,3,55,140,0.244,0.321,0.415,0.736,,221,11,7,1,4,2,


In [577]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [578]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [579]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    395
L    218
S     83
Name: Bats, dtype: int64

In [580]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
691,692,Mike Yastrzemski,29,SFG,NL,54,225,192,39,57,14,4,10,35,2,1,30,55,0.297,0.4,0.568,0.968,165,109,2,3,0,0,2,*98/7H,L
692,693,Christian Yelich,28,MIL,NL,58,247,200,39,41,7,1,12,22,4,2,46,76,0.205,0.356,0.43,0.786,111,86,4,1,0,0,2,*7/D,L
693,694,Andy Young,26,ARI,NL,12,34,26,3,5,2,0,1,4,0,0,5,10,0.192,0.382,0.385,0.767,107,10,0,3,0,0,0,/4D5H7,R
694,695,Bradley Zimmer,27,CLE,AL,20,50,37,3,6,0,0,1,3,2,1,7,14,0.162,0.36,0.243,0.603,69,9,2,5,0,1,0,/789H,L
695,696,Mike Zunino,29,TBR,AL,28,84,75,8,11,4,0,4,10,0,0,6,37,0.147,0.238,0.36,0.598,65,27,0,3,0,0,0,2,R


In [581]:
bat.drop(columns=["Rk"], inplace=True)

In [582]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
691,Mike Yastrzemski,29,SFG,NL,54,225,192,39,57,14,4,10,35,2,1,30,55,0.297,0.4,0.568,0.968,165,109,2,3,0,0,2,*98/7H,L
692,Christian Yelich,28,MIL,NL,58,247,200,39,41,7,1,12,22,4,2,46,76,0.205,0.356,0.43,0.786,111,86,4,1,0,0,2,*7/D,L
693,Andy Young,26,ARI,NL,12,34,26,3,5,2,0,1,4,0,0,5,10,0.192,0.382,0.385,0.767,107,10,0,3,0,0,0,/4D5H7,R
694,Bradley Zimmer,27,CLE,AL,20,50,37,3,6,0,0,1,3,2,1,7,14,0.162,0.36,0.243,0.603,69,9,2,5,0,1,0,/789H,L
695,Mike Zunino,29,TBR,AL,28,84,75,8,11,4,0,4,10,0,0,6,37,0.147,0.238,0.36,0.598,65,27,0,3,0,0,0,2,R


In [583]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
801,802.0,Kyle Zimmer,28.0,KCR,AL,1,0,1.0,1.57,16,1,4,0,0,0,23.0,14,4,4,0,10,0,26,1,0,2,91,295,2.36,1.043,5.5,0.0,3.9,10.2,2.6
802,803.0,Bruce Zimmermann*,25.0,BAL,AL,0,0,,7.71,2,1,1,0,0,0,7.0,6,6,6,2,2,0,7,2,0,2,31,61,6.62,1.143,7.7,2.6,2.6,9.0,3.5
803,804.0,Jordan Zimmermann,34.0,DET,AL,0,0,,7.94,3,2,0,0,0,0,5.2,11,6,5,0,2,0,6,0,0,0,28,62,2.13,2.294,17.5,0.0,3.2,9.5,3.0
804,805.0,Tyler Zuber,25.0,KCR,AL,1,2,0.333,4.09,23,0,8,0,0,0,22.0,15,11,10,4,20,1,30,1,0,1,99,113,5.69,1.591,6.1,1.6,8.2,12.3,1.5
805,,LgAvg per 180 IP,,,,10,10,0.498,4.48,93,21,21,0,0,5,180.0,169,98,90,27,71,2,182,10,1,8,775,101,4.48,1.332,8.4,1.4,3.6,9.1,2.56


In [584]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [585]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [586]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    585
L    220
Name: Throws, dtype: int64

In [587]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
800,801,T.J. Zeuch,24,TOR,AL,1,0,1.0,1.59,3,1,1,0,0,0,11.1,9,2,2,1,4,0,3,0,0,0,47,284,4.87,1.147,7.1,0.8,3.2,2.4,0.75,R
801,802,Kyle Zimmer,28,KCR,AL,1,0,1.0,1.57,16,1,4,0,0,0,23.0,14,4,4,0,10,0,26,1,0,2,91,295,2.36,1.043,5.5,0.0,3.9,10.2,2.6,R
802,803,Bruce Zimmermann,25,BAL,AL,0,0,,7.71,2,1,1,0,0,0,7.0,6,6,6,2,2,0,7,2,0,2,31,61,6.62,1.143,7.7,2.6,2.6,9.0,3.5,L
803,804,Jordan Zimmermann,34,DET,AL,0,0,,7.94,3,2,0,0,0,0,5.2,11,6,5,0,2,0,6,0,0,0,28,62,2.13,2.294,17.5,0.0,3.2,9.5,3.0,R
804,805,Tyler Zuber,25,KCR,AL,1,2,0.333,4.09,23,0,8,0,0,0,22.0,15,11,10,4,20,1,30,1,0,1,99,113,5.69,1.591,6.1,1.6,8.2,12.3,1.5,R


In [588]:
pit.drop(columns=["Rk"], inplace=True)

In [589]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [590]:
players.shape

(1431, 62)

In [591]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary
1275,1276.0,Bruce Zimmermann,25.0,BAL,AL,2,1,0,7.0,0,0,0,0,0,,,,-1.0,-29.0,,0.0,0.0,P
1276,1277.0,Jordan Zimmermann,34.0,DET,AL,3,2,0,5.2,1,0,1,0,0,1.0,,,0.0,0.0,0.0,1.59,0.33,P
1277,1278.0,Tyler Zuber,25.0,KCR,AL,23,0,0,22.0,2,0,2,0,0,1.0,,,-1.0,-9.0,0.0,0.82,0.09,P
1278,1279.0,Mike Zunino,29.0,TBR,AL,28,25,16,215.2,260,244,15,1,2,0.996,-2.0,-14.0,-2.0,-11.0,-2.0,10.81,9.25,C
1279,,LgAvg,,,,86,57,42,492.0,222,163,55,4,14,0.983,0.0,0.0,,,,3.99,2.53,


In [592]:
fld = fld[fld["Name"] != "LgAvg"]

In [593]:
fld.drop(columns=["Rk"], inplace=True)

In [594]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [595]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rctch,Rdrs,Rdrs/yr,Rgood,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9,RF/G,PB,WP,SB,CS,CS%
107,108.0,Chad Wallach,28.0,MIA,NL,15,13,11,112.1,114,110,4,0,1,1.0,2,16,2,0,0,1,0,0,0,0,0,-1,9.13,7.6,0,2,8,4,33%
108,109.0,Matt Wieters,34.0,STL,NL,18,12,11,97.2,121,119,2,0,0,1.0,-1,-7,-1,-1,-12,0,0,0,0,-1,0,0,11.15,6.72,2,5,1,0,0%
109,110.0,Tony Wolters,28.0,COL,NL,39,35,15,283.2,244,230,13,1,2,0.996,-3,-13,-3,-2,-8,-1,0,-1,0,0,-1,1,7.71,6.23,4,17,17,3,15%
110,111.0,Mike Zunino,29.0,TBR,AL,28,25,16,215.2,260,244,15,1,2,0.996,-2,-14,-2,-2,-11,-2,0,0,1,-1,1,-1,10.81,9.25,5,16,11,4,27%
111,,LgAvg,,,,8,7,6,57.0,61,58,3,0,0,0.993,0,0,0,0,0,0,0,0,0,0,0,0,9.57,7.87,1,3,3,1,25%


In [596]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [597]:
cat = cat[cat["Name"] != "LgAvg"]

In [598]:
cat.drop(columns=["Rk"], inplace=True)

In [599]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [600]:
app.tail()

Unnamed: 0,Rk,Name,Age,Tm,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
1284,1285,Kyle Zimmer,28,KCR,2,16,1,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0
1285,1286,Bruce Zimmermann,25,BAL,1st,2,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0
1286,1287,Jordan Zimmermann,34,DET,12,3,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0
1287,1288,Tyler Zuber,25,KCR,1st,23,0,0,23,23,0,0,0,0,0,0,0,0,0,0,0,0
1288,1289,Mike Zunino,29,TBR,8,28,25,28,28,0,28,0,0,0,0,0,0,0,0,0,0,0


In [601]:
players = pd.merge(players, app, how='left', on=["Name", "Age"], suffixes=('', '_app'))

In [602]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [603]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,2.22,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,3.47,SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,7.5,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0


In [604]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [605]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'Rdrs',
 'Rdrs/yr',
 'Rgood',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'Rdrs_cat',
 'Rdrs/yr_cat',
 'Rgood_cat',
 'Rair',
 'Rrange',
 'Rthrow',
 'RszC',
 'RsbC',
 'Re

In [606]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [607]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     779
OF    245
C     109
2B     86
1B     84
3B     65
SS     53
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [608]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [609]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    126
0.333333     21
0.500000     16
0.250000     13
0.400000     10
           ... 
0.043478      1
0.488372      1
0.695652      1
0.306122      1
0.387097      1
Name: rbi_per_g, Length: 314, dtype: int64

In [610]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     1344
#      87
Name: clutch, dtype: int64

In [611]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.370,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.000000,#
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.250,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.000,9.0,30.0,,,,2.44,2.22,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#
7,Jesus Aguilar,30,MIA,NL,51.0,216.0,188.0,31.0,52.0,10.0,0.0,8.0,34.0,0.0,1.0,23.0,40.0,0.277,0.352,0.457,0.809,120.0,86.0,5.0,1.0,0.0,4.0,0.0,3D/5H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,32.0,31.0,29.0,262.0,266.0,243.0,20.0,3.0,36.0,0.989,-5.0,-24.0,0.0,0.0,0.0,9.03,8.22,1B-3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,MIA,7,51,50,51,31,0,0,31,0,1,0,0,0,0,0,20,1,0,1B,0.666667,#
11,Ozzie Albies,23,ATL,NL,29.0,124.0,118.0,21.0,32.0,5.0,0.0,6.0,19.0,3.0,1.0,5.0,30.0,0.271,0.306,0.466,0.773,99.0,55.0,0.0,1.0,0.0,0.0,0.0,4/H,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,29.0,28.0,28.0,252.2,110.0,43.0,65.0,2.0,13.0,0.982,6.0,30.0,-1.0,-5.0,-1.0,3.85,3.72,2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17,ATL,4,29,28,29,29,0,0,0,29,0,0,0,0,0,0,0,1,0,2B,0.655172,#
16,Anthony Alford,25,PIT,NL,5.0,13.0,12.0,2.0,3.0,0.0,1.0,1.0,4.0,0.0,0.0,1.0,1.0,0.250,0.308,0.667,0.974,156.0,8.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,13.0,10.0,4.0,73.0,8.0,8.0,0.0,0.0,0.0,1.000,-3.0,-52.0,,,,0.99,0.62,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,2TM,4,18,10,18,13,0,0,0,0,0,0,6,6,1,13,2,0,6,OF,0.800000,#
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,Daniel Vogelbach,27,MIL,NL,19.0,67.0,58.0,13.0,19.0,2.0,0.0,4.0,12.0,0.0,0.0,8.0,18.0,0.328,0.418,0.569,0.987,162.0,33.0,2.0,1.0,0.0,0.0,0.0,D/3H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIL,NL,2.0,2.0,1.0,15.0,17.0,15.0,2.0,0.0,1.0,1.000,0.0,-11.0,0.0,0.0,0.0,10.20,8.50,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1210,3TM,5,39,36,39,2,0,0,2,0,0,0,0,0,0,0,34,3,0,1B,0.631579,#
664,Luke Voit,29,NYY,AL,56.0,234.0,213.0,41.0,59.0,5.0,0.0,22.0,52.0,0.0,0.0,17.0,54.0,0.277,0.338,0.610,0.948,156.0,130.0,4.0,3.0,0.0,1.0,0.0,*3/DH,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,48.0,47.0,35.0,377.2,331.0,308.0,20.0,3.0,23.0,0.991,-3.0,-9.0,-3.0,-10.0,0.0,7.82,6.83,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1212,NYY,4,56,55,56,48,0,0,48,0,0,0,0,0,0,0,8,1,0,1B,0.928571,#
672,Jared Walsh,26,LAA,AL,32.0,108.0,99.0,19.0,29.0,4.0,2.0,9.0,26.0,0.0,0.0,5.0,15.0,0.293,0.324,0.646,0.971,158.0,64.0,0.0,1.0,0.0,3.0,0.0,3/H9D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAA,AL,31.0,24.0,21.0,219.0,172.0,160.0,11.0,1.0,10.0,0.994,-1.0,-4.0,-2.0,-11.0,0.0,7.03,5.52,1B-OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1228,LAA,2,32,24,32,30,0,0,29,0,0,0,0,0,2,2,1,2,5,1B,0.812500,#
673,Donovan Walton,26,SEA,AL,5.0,14.0,13.0,0.0,2.0,1.0,0.0,0.0,3.0,0.0,1.0,1.0,5.0,0.154,0.214,0.231,0.445,26.0,3.0,1.0,0.0,0.0,0.0,0.0,/64,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SEA,AL,5.0,4.0,4.0,35.0,12.0,5.0,7.0,0.0,3.0,1.000,0.0,3.0,0.0,0.0,0.0,3.09,2.40,SS-2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1229,SEA,2,5,4,5,5,0,0,0,1,0,4,0,0,0,0,0,0,0,SS,0.600000,#


### Letter Rating

In [612]:
players["BA"].value_counts()

0.000    30
0.250    26
0.200    14
0.333    12
0.167    11
         ..
0.340     1
0.429     1
0.295     1
0.165     1
0.360     1
Name: BA, Length: 212, dtype: int64

In [613]:
players["BA"].isnull().sum()

780

In [614]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [615]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [616]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      810
C+     131
B       99
C       86
D+      70
D       61
B+      60
A       37
E+      20
E       14
A+      13
F       12
AAA      8
AA       7
G+       3
Name: bat_letter, dtype: int64

In [617]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
124,William Contreras,22,ATL,NL,4.0,10.0,10.0,0.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,0.4,0.4,0.5,0.9,135.0,5.0,0.0,0.0,0.0,0.0,0.0,/2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,4.0,2.0,2.0,21.0,22.0,21.0,1.0,0.0,0.0,1.0,-1.0,-35.0,0.0,0.0,0.0,9.43,5.5,C,ATL,NL,4.0,2.0,2.0,21.0,22.0,21.0,1.0,0.0,0.0,1.0,-1.0,-35.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.43,5.5,0.0,2.0,2.0,0.0,0%,238,ATL,1st,4,2,4,4,0,4,0,0,0,0,0,0,0,0,0,0,0,C,0.25,,AAA
285,Oscar Hernandez,26,KCR,AL,4.0,4.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.5,1.0,175.0,2.0,1.0,0.0,0.0,0.0,0.0,/2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,3.0,1.0,0.0,10.0,9.0,7.0,2.0,0.0,0.0,1.0,0.0,42.0,0.0,0.0,0.0,8.1,3.0,C,KCR,AL,3.0,1.0,0.0,10.0,9.0,7.0,2.0,0.0,0.0,1.0,0.0,42.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,8.1,3.0,0.0,1.0,0.0,0.0,,537,KCR,3,4,1,4,3,0,3,0,0,0,0,0,0,0,0,0,1,0,C,0.0,,AAA
318,Jahmai Jones,22,LAA,AL,3.0,7.0,7.0,2.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.429,0.429,0.429,0.857,138.0,3.0,0.0,0.0,0.0,0.0,0.0,/4H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAA,AL,2.0,2.0,2.0,16.0,4.0,1.0,3.0,0.0,1.0,1.0,0.0,25.0,0.0,0.0,0.0,2.25,2.0,2B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,597,LAA,1st,3,2,3,2,0,0,0,2,0,0,0,0,0,0,0,0,1,2B,0.333333,,AAA
371,Rafael Marchan,21,PHI,NL,3.0,9.0,8.0,3.0,4.0,0.0,0.0,1.0,3.0,0.0,0.0,1.0,2.0,0.5,0.556,0.875,1.431,277.0,7.0,0.0,0.0,0.0,0.0,0.0,/2,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,3.0,3.0,2.0,23.0,27.0,25.0,1.0,1.0,0.0,0.963,0.0,-6.0,0.0,0.0,0.0,10.17,8.67,C,PHI,NL,3.0,3.0,2.0,23.0,27.0,25.0,1.0,1.0,0.0,0.963,0.0,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.17,8.67,0.0,0.0,1.0,0.0,0%,713,PHI,1st,3,3,3,3,0,3,0,0,0,0,0,0,0,0,0,0,0,C,1.0,#,AAA
399,Billy McKinney,25,TOR,AL,2.0,3.0,3.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.667,0.667,0.667,1.333,270.0,2.0,0.0,0.0,0.0,0.0,0.0,/7H9,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOR,AL,1.0,1.0,0.0,7.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,15.0,,,,1.29,1.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,757,TOR,3,2,1,2,1,0,0,0,0,0,0,1,0,1,1,0,0,1,OF,0.0,,AAA
438,Brian Navarreto,25,MIA,NL,2.0,5.0,5.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.4,0.4,0.4,0.8,121.0,2.0,0.0,0.0,0.0,0.0,0.0,/2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,2.0,2.0,2.0,15.0,14.0,13.0,1.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,8.4,7.0,C,MIA,NL,2.0,2.0,2.0,15.0,14.0,13.0,1.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.4,7.0,0.0,0.0,1.0,1.0,50%,824,MIA,1st,2,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,C,0.0,,AAA
453,Eduardo Nunez,33,NYM,NL,2.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.5,0.5,0.5,1.0,180.0,1.0,0.0,0.0,0.0,0.0,0.0,/DH9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,1.0,0.0,0.0,1.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,168.0,,,,18.0,2.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,846,NYM,11,2,0,2,1,0,0,0,0,0,0,0,0,1,1,1,0,1,OF,0.0,,AAA
455,Brian O'Grady,28,TBR,AL,2.0,5.0,5.0,2.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.4,0.4,0.6,1.0,175.0,3.0,0.0,0.0,0.0,0.0,0.0,/387,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,2.0,1.0,0.0,10.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,-49.0,0.0,0.0,0.0,0.9,0.5,OF-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,849,TBR,2,2,1,2,2,0,0,1,0,0,0,1,1,0,1,0,0,0,OF,0.0,,AAA


In [618]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.223

### HR Number

In [619]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     960
6      59
4      57
5      52
7      39
9      39
3      38
8      37
2      36
10     26
12     20
11     17
1      15
14     10
18      8
13      7
15      5
16      3
36      3
Name: hr_rate, dtype: int64

In [620]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      960
16     59
14     57
15     52
21     39
23     39
13     38
22     37
12     36
24     26
26     20
25     17
11     15
32     10
36      8
31      7
33      5
66      3
34      3
Name: hr_num_bat, dtype: int64

### Triple Number

In [621]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     1257
1       92
2       45
3       14
4       11
12       3
7        3
6        2
5        2
18       1
9        1
Name: triple_rate, dtype: int64

In [622]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     1257
7.0       24
4.0       24
6.0       22
8.0       20
5.0       17
9.0       15
10.0      11
3.0        9
11.0       8
2.0        5
13.0       4
12.0       4
24.0       3
15.0       2
18.0       2
1.0        2
21.0       1
14.0       1
Name: triple_val, dtype: int64

In [623]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        1257
(14)      24
(21)      24
(16)      22
(22)      20
(15)      17
(23)      15
(24)      11
(13)       9
(25)       8
(12)       5
(31)       4
(26)       4
(46)       3
(11)       2
(33)       2
(36)       2
(32)       1
(43)       1
Name: triple_num, dtype: int64

### Speed Rating

In [624]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [625]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    1122
0.333      12
0.018      10
0.019       8
0.077       8
         ... 
0.174       1
0.049       1
0.179       1
0.092       1
0.061       1
Name: speed_score, Length: 133, dtype: int64

In [626]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [627]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        1264
**        70
****      40
*         34
***       23
Name: speed_rating, dtype: int64

In [628]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
14,Anthony Alford,25,TOT,MLB,18.0,29.0,28.0,5.0,6.0,0.0,1.0,2.0,7.0,3.0,0.0,1.0,8.0,0.214,0.241,0.5,0.741,96.0,14.0,0.0,0.0,0.0,0.0,0.0,/87HD9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,13.0,10.0,4.0,73.0,8.0,8.0,0.0,0.0,0.0,1.0,-3.0,-52.0,,,,0.99,0.62,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,2TM,4,18,10,18,13,0,0,0,0,0,0,6,6,1,13,2,0,6,OF,0.388889,,C,12,26.0,6,18.0,(36),0.75,****
15,Anthony Alford,25,TOR,AL,13.0,16.0,16.0,3.0,3.0,0.0,0.0,1.0,3.0,3.0,0.0,0.0,7.0,0.188,0.188,0.375,0.563,49.0,6.0,0.0,0.0,0.0,0.0,0.0,/7H8D9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,13.0,10.0,4.0,73.0,8.0,8.0,0.0,0.0,0.0,1.0,-3.0,-52.0,,,,0.99,0.62,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,2TM,4,18,10,18,13,0,0,0,0,0,0,6,6,1,13,2,0,6,OF,0.230769,,D+,12,26.0,0,0.0,,1.5,****
20,Greg Allen,27,SDP,NL,1.0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,0.75,0.0,0.75,136.0,0.0,0.0,1.0,0.0,0.0,0.0,/79,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,15.0,7.0,5.0,85.0,20.0,20.0,0.0,0.0,0.0,1.0,1.0,7.0,,,,2.12,1.33,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,2TM,4,16,7,16,15,0,0,0,0,0,0,12,4,1,15,0,0,3,OF,0.0,,G,0,,0,0.0,,0.333,****
21,Abraham Almonte,31,SDP,NL,7.0,13.0,11.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,4.0,0.091,0.231,0.091,0.322,-5.0,1.0,0.0,0.0,0.0,0.0,0.0,/D7H8,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SDP,NL,3.0,2.0,1.0,16.0,6.0,6.0,0.0,0.0,0.0,1.0,1.0,71.0,,,,3.38,2.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,29,SDP,8,7,4,7,3,0,0,0,0,0,0,2,1,0,3,3,1,1,OF,0.0,,E,0,,0,0.0,,0.333,****
49,Franklin Barreto,24,TOT,AL,21.0,28.0,27.0,5.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,15.0,0.074,0.107,0.074,0.181,-48.0,2.0,0.0,1.0,0.0,0.0,0.0,H/465D7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,13.0,6.0,3.0,55.0,25.0,11.0,13.0,1.0,2.0,0.96,1.0,23.0,,0.0,,3.93,1.85,2B-SS-3B-OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,82,2TM,4,21,6,21,13,0,0,0,6,2,4,1,0,0,1,2,4,8,2B,0.095238,,F,0,,0,0.0,,0.333,****
51,Franklin Barreto,24,LAA,AL,6.0,18.0,17.0,0.0,2.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,8.0,0.118,0.167,0.118,0.284,-19.0,2.0,0.0,1.0,0.0,0.0,0.0,/457H6,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,13.0,6.0,3.0,55.0,25.0,11.0,13.0,1.0,2.0,0.96,1.0,23.0,,0.0,,3.93,1.85,2B-SS-3B-OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,82,2TM,4,21,6,21,13,0,0,0,6,2,4,1,0,0,1,2,4,8,2B,0.333333,,E+,0,,0,0.0,,0.333,****
53,Luis Alexander Basabe,23,SFG,NL,9.0,18.0,14.0,5.0,2.0,0.0,0.0,0.0,1.0,2.0,0.0,4.0,5.0,0.143,0.333,0.143,0.476,40.0,2.0,0.0,0.0,0.0,0.0,0.0,/97H,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,8.0,4.0,3.0,44.0,12.0,12.0,0.0,0.0,0.0,1.0,1.0,23.0,,,,2.45,1.5,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,86,SFG,1st,9,4,9,8,0,0,0,0,0,0,3,0,5,8,0,0,2,OF,0.111111,,D,0,,0,0.0,,0.333,****
122,Christian Colon,31,CIN,NL,11.0,24.0,23.0,3.0,3.0,1.0,0.0,0.0,2.0,1.0,0.0,1.0,3.0,0.13,0.167,0.174,0.341,-10.0,4.0,1.0,0.0,0.0,0.0,0.0,/4DH3,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,8.0,4.0,4.0,40.0,16.0,6.0,10.0,0.0,3.0,1.0,0.0,-3.0,-1.0,-30.0,0.0,3.6,2.0,2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,236,CIN,6,11,6,11,8,0,0,1,7,0,0,0,0,0,0,3,2,0,2B,0.181818,,E+,0,,0,0.0,,0.333,****
127,Ryan Cordell,28,NYM,NL,5.0,8.0,8.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,0.125,0.125,0.125,0.25,-30.0,1.0,0.0,0.0,0.0,0.0,0.0,/8H9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,5.0,2.0,1.0,19.0,5.0,5.0,0.0,0.0,0.0,1.0,0.0,-8.0,,,,2.37,1.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,243,NYM,3,5,2,5,5,0,0,0,0,0,0,0,4,1,5,0,0,1,OF,0.0,,E+,0,,0,0.0,,1.0,****
173,Jarrod Dyson,35,TOT,MLB,32.0,66.0,61.0,9.0,11.0,0.0,0.0,0.0,5.0,6.0,0.0,4.0,11.0,0.18,0.231,0.18,0.411,15.0,11.0,0.0,0.0,1.0,0.0,0.0,8/H7D9,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,29.0,18.0,17.0,175.0,44.0,42.0,1.0,1.0,0.0,0.977,-2.0,-17.0,,,,2.21,1.48,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,327,2TM,11,32,18,32,29,0,0,0,0,0,0,6,23,1,29,2,1,8,OF,0.15625,,D+,0,,0,0.0,,0.4,****


### Base on Balls Number

In [629]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     849
3     168
2     129
4     109
5      58
1      57
6      40
7       9
9       4
8       4
18      3
15      1
Name: walk_rate, dtype: int64

In [630]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     849
13    168
12    129
14    109
15     58
11     57
16     40
21      9
23      4
22      4
36      3
33      1
Name: bb_num, dtype: int64

### Batter K Number

In [631]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     792
10     87
7      84
9      81
8      80
11     61
6      48
12     34
5      33
13     26
4      24
14     18
15     12
16     11
3      11
18      9
2       4
17      3
21      3
24      3
36      3
27      1
19      1
23      1
25      1
Name: k_rate, dtype: int64

In [632]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     792
13.0     79
12.0     72
9.0      63
10.0     62
14.0     62
11.0     59
15.0     37
16.0     35
8.0      35
18.0     24
7.0      24
17.0     22
6.0      16
19.0     11
5.0       7
21.0      6
27.0      4
20.0      4
36.0      3
4.0       3
24.0      3
22.0      2
25.0      2
3.0       2
26.0      1
23.0      1
Name: k_val, dtype: int64

In [633]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     792
31     79
26     72
23     63
32     62
24     62
25     59
33     37
34     35
22     35
21     24
36     24
35     22
16     16
41     11
15      7
43      6
42      4
53      4
46      3
66      3
14      3
44      2
13      2
51      2
52      1
45      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [634]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0     1232
1      160
2       27
9        3
4        3
3        3
36       1
18       1
6        1
Name: hbp_rate, dtype: int64

In [635]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [636]:
players["hbp_val"].value_counts()

0.0     1232
13.0      27
14.0      27
15.0      22
11.0      18
12.0      15
16.0      14
19.0      12
10.0      11
9.0       11
17.0      11
18.0       6
20.0       5
8.0        5
6.0        3
36.0       3
7.0        3
30.0       1
24.0       1
21.0       1
4.0        1
27.0       1
22.0       1
Name: hbp_val, dtype: int64

In [637]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       1232
/31      27
/32      27
/33      22
/25      18
/26      15
/34      14
/41      12
/23      11
/24      11
/35      11
/36       6
/22       5
/42       5
/21       3
/66       3
/16       3
/44       1
/53       1
/46       1
/43       1
/56       1
/14       1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [638]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     810
8     116
7     116
9      99
6      85
5      62
10     54
4      29
3      16
11     12
12     10
2       8
13      4
14      3
18      2
1       2
15      1
16      1
24      1
Name: hit_rate, dtype: int64

In [639]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    812
55    116
56    116
54     99
61     85
62     62
53     54
63     29
64     16
52     12
51     10
65      8
46      4
45      3
41      2
43      1
44      1
31      1
Name: PH_num_bat, dtype: int64

### Batter Rating

In [640]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [641]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " [" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + "]")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24]
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,2.22,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41]
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,3.47,SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35]
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66]
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,7.5,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32]


In [642]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
1426,T.J. Zeuch,24,TOR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.0,1.59,3.0,1.0,1.0,0.0,0.0,0.0,11.1,9.0,2.0,2.0,1.0,4.0,0.0,3.0,0.0,0.0,0.0,47.0,284.0,4.87,1.147,7.1,0.8,3.2,2.4,0.75,R,TOR,AL,3.0,1.0,0.0,11.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1283,TOR,2,3,1,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1427,Kyle Zimmer,28,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.0,1.57,16.0,1.0,4.0,0.0,0.0,0.0,23.0,14.0,4.0,4.0,0.0,10.0,0.0,26.0,1.0,0.0,2.0,91.0,295.0,2.36,1.043,5.5,0.0,3.9,10.2,2.6,R,KCR,AL,16.0,1.0,0.0,23.0,6.0,2.0,4.0,0.0,1.0,1.0,,,1.0,9.0,0.0,2.35,0.38,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1285,KCR,2,16,1,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1428,Bruce Zimmermann,25,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.71,2.0,1.0,1.0,0.0,0.0,0.0,7.0,6.0,6.0,6.0,2.0,2.0,0.0,7.0,2.0,0.0,2.0,31.0,61.0,6.62,1.143,7.7,2.6,2.6,9.0,3.5,L,BAL,AL,2.0,1.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-29.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1286,BAL,1st,2,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1429,Jordan Zimmermann,34,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.94,3.0,2.0,0.0,0.0,0.0,0.0,5.2,11.0,6.0,5.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,28.0,62.0,2.13,2.294,17.5,0.0,3.2,9.5,3.0,R,DET,AL,3.0,2.0,0.0,5.2,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.59,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1287,DET,12,3,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1430,Tyler Zuber,25,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,2.0,0.333,4.09,23.0,0.0,8.0,0.0,0.0,0.0,22.0,15.0,11.0,10.0,4.0,20.0,1.0,30.0,1.0,0.0,1.0,99.0,113.0,5.69,1.591,6.1,1.6,8.2,12.3,1.5,R,KCR,AL,23.0,0.0,0.0,22.0,2.0,0.0,2.0,0.0,0.0,1.0,,,-1.0,-9.0,0.0,0.82,0.09,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1288,KCR,1st,23,0,0,23,23,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [643]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.370,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.000000,#,A,9,23,0,0.0,,0.000,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24]
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.250,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.000,9.0,30.0,,,,2.44,2.22,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31,0,0.0,,0.140,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41]
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,3.47,SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35]
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000,0.000,0.000,0.000,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.50,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.250,6.8,2.3,4.5,15.8,3.50,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.000,,,0.0,0.0,0.0,2.25,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66]
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.000,0.0,32.0,0.0,0.0,0.0,7.94,7.50,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.562500,,D+,8,22,0,0.0,,0.000,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1426,T.J. Zeuch,24,TOR,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,1.0,0.0,1.000,1.59,3.0,1.0,1.0,0.0,0.0,0.0,11.1,9.0,2.0,2.0,1.0,4.0,0.0,3.0,0.0,0.0,0.0,47.0,284.0,4.87,1.147,7.1,0.8,3.2,2.4,0.75,R,TOR,AL,3.0,1.0,0.0,11.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1283,TOR,2,3,1,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1427,Kyle Zimmer,28,KCR,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,1.0,0.0,1.000,1.57,16.0,1.0,4.0,0.0,0.0,0.0,23.0,14.0,4.0,4.0,0.0,10.0,0.0,26.0,1.0,0.0,2.0,91.0,295.0,2.36,1.043,5.5,0.0,3.9,10.2,2.60,R,KCR,AL,16.0,1.0,0.0,23.0,6.0,2.0,4.0,0.0,1.0,1.000,,,1.0,9.0,0.0,2.35,0.38,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1285,KCR,2,16,1,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1428,Bruce Zimmermann,25,BAL,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,7.71,2.0,1.0,1.0,0.0,0.0,0.0,7.0,6.0,6.0,6.0,2.0,2.0,0.0,7.0,2.0,0.0,2.0,31.0,61.0,6.62,1.143,7.7,2.6,2.6,9.0,3.50,L,BAL,AL,2.0,1.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-29.0,,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1286,BAL,1st,2,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1429,Jordan Zimmermann,34,DET,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,7.94,3.0,2.0,0.0,0.0,0.0,0.0,5.2,11.0,6.0,5.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,28.0,62.0,2.13,2.294,17.5,0.0,3.2,9.5,3.00,R,DET,AL,3.0,2.0,0.0,5.2,1.0,0.0,1.0,0.0,0.0,1.000,,,0.0,0.0,0.0,1.59,0.33,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1287,DET,12,3,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [644]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [645]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [646]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_num', 'k_rate', 'k_val', 'k_num', 'hbp_rate', 'hbp_val', 'hbp_num',
       'hit_rate', 'PH_num_bat', 'batter_rating'],
      dtype='object', length=155)

### Pitcher Letter Rating

In [647]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.2583229813664595

In [648]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [649]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     721
M     130
W     121
L     115
X      82
K      75
Y      64
J+     56
J      48
Z+     19
Name: pit_letter, dtype: int64

In [650]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      626
M     130
W     121
L     115
Z      95
X      82
K      75
Y      64
J+     56
J      48
Z+     19
Name: pit_letter, dtype: int64

In [651]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.168

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [652]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

1.000000     30
3.333333     15
4.000000     13
6.000000     13
1.666667     13
3.666667     13
3.000000     13
20.666667    12
2.333333     12
21.666667    12
15.666667    11
4.333333     11
8.333333     11
10.000000    11
2.000000     11
20.000000    11
25.666667    11
7.666667     10
22.333333    10
17.333333     9
23.333333     9
19.666667     9
0.666667      9
0.333333      9
7.333333      8
Name: IP_real, dtype: int64

In [653]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

1.0    449
2.0    112
5.0     78
4.0     57
3.0     52
6.0     41
0.0     12
7.0      4
Name: IE, dtype: int64

In [654]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
81,Mike Brosseau,26,TBR,AL,36.0,98.0,86.0,12.0,26.0,5.0,1.0,5.0,12.0,2.0,0.0,8.0,31.0,0.302,0.378,0.558,0.936,158.0,48.0,1.0,3.0,0.0,1.0,0.0,35/4H7D19,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,-2.81,0.0,0.0,0.0,0.0,27.0,,R,TBR,AL,36.0,23.0,14.0,207.1,127.0,77.0,48.0,2.0,18.0,0.984,4.0,21.0,5.0,29.0,0.0,5.43,3.47,3B-1B-2B-OF-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,150,TBR,2,37,23,36,34,1,0,12,9,11,0,2,0,1,3,1,4,3,3B,0.333333,,B+,7,21.0,1,8.0,(22),0.077,*,3,13,11,14.0,32,1,15.0,/33,10,53,B+21(22)* [13-32/33],0.0,J+,0.333333,0.0
137,Charlie Culberson,31,ATL,NL,9.0,7.0,7.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,0.143,0.143,0.286,0.429,9.0,2.0,0.0,0.0,0.0,0.0,0.0,/H34D1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.19,0.0,0.0,0.0,0.0,0.0,,R,ATL,NL,6.0,1.0,0.0,17.1,11.0,11.0,0.0,0.0,3.0,1.0,0.0,-23.0,0.0,0.0,0.0,5.71,1.83,1B-2B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,264,ATL,8,10,1,9,6,1,0,4,1,0,0,0,0,0,0,1,2,4,1B,0.111111,,D,0,,0,0.0,,0.0,,0,n,21,21.0,43,0,0.0,,5,62,D [n-43],0.0,J+,0.333333,0.0
297,Bryan Holaday,32,BAL,AL,20.0,33.0,31.0,5.0,5.0,1.0,0.0,0.0,4.0,0.0,0.0,2.0,9.0,0.161,0.212,0.194,0.406,13.0,6.0,1.0,0.0,0.0,0.0,0.0,2/H3D1,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,,3.19,6.0,54.0,0.0,0.0,0.0,,R,BAL,AL,17.0,7.0,3.0,73.1,67.0,62.0,5.0,0.0,2.0,1.0,1.0,20.0,-1.0,-16.0,0.0,8.22,3.94,C-1B-P,BAL,AL,10.0,6.0,3.0,59.0,53.0,51.0,2.0,0.0,0.0,1.0,1.0,13.0,1.0,-1.0,-20.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,8.08,5.3,0.0,2.0,1.0,1.0,50%,556,BAL,9,21,8,20,15,1,10,6,0,0,0,0,0,0,0,4,5,3,C,0.2,,D,0,,0,0.0,,0.0,,2,12,10,12.0,26,0,0.0,,5,62,D [12-26],0.667,Z,0.333333,0.0
480,Jose Peraza,26,BOS,AL,34.0,120.0,111.0,13.0,25.0,8.0,1.0,1.0,8.0,1.0,1.0,5.0,18.0,0.225,0.275,0.342,0.617,65.0,38.0,3.0,3.0,0.0,1.0,0.0,4/7H65D1,R,AL,0.0,0.0,,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.1,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,34.0,3.19,6.0,54.0,0.0,0.0,0.0,,R,BOS,AL,37.0,30.0,24.0,261.2,120.0,50.0,65.0,5.0,18.0,0.958,-4.0,-21.0,-1.0,-5.0,2.0,3.96,3.11,2B-OF-SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,890,BOS,6,34,30,34,32,1,0,0,27,1,3,5,0,0,5,1,2,1,2B,0.235294,,C+,1,11.0,1,2.0,(12),0.043,,2,12,5,7.0,21,1,8.0,/22,8,55,C+11(12) [12-21/22],0.667,Z,0.333333,0.0
742,Jeremy Beasley,24,ARI,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,,-2.81,6.0,54.0,0.0,0.0,27.0,,R,ARI,NL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,91,ARI,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.667,Z,0.333333,0.0
833,Edwar Colina,23,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,81.0,1.0,0.0,0.0,0.0,0.0,0.0,0.1,4.0,3.0,3.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,7.0,11.0,60.19,18.0,108.0,27.0,54.0,0.0,0.0,R,MIN,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,233,MIN,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.8,Z,0.333333,0.0
954,Matt Grace,31,ARI,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,1.0,0.0,54.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,5.0,6.0,6.0,1.0,2.0,0.0,2.0,0.0,0.0,0.0,10.0,11.0,18.19,7.0,45.0,9.0,18.0,18.0,1.0,L,ARI,NL,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-200.0,0.0,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,455,ARI,6,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.625,Z,1.0,0.0
1010,Jordan Holloway,24,MIA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.1,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,12.19,9.0,54.0,0.0,27.0,0.0,0.0,R,MIA,NL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,560,MIA,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.667,Z,0.333333,0.0
1102,Dillon Maples,28,CHC,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,0.0,,18.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,9.0,33.0,13.19,5.0,9.0,0.0,36.0,9.0,0.25,R,CHC,NL,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,712,CHC,4,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,1.0,0.0
1123,David McKay,25,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,54.0,1.0,0.0,0.0,0.0,0.0,0.0,0.1,1.0,2.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,17.0,51.19,6.0,27.0,27.0,27.0,0.0,0.0,R,DET,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,755,DET,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.5,Z,0.333333,0.0


In [655]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

1.0    461
2.0    112
5.0     78
4.0     57
3.0     52
6.0     41
7.0      4
Name: IE, dtype: int64

In [656]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
278,Kyle Hendricks,30,CHC,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,R,NL,6.0,5.0,0.545,2.88,12.0,12.0,0.0,1.0,1.0,0.0,81.1,73.0,26.0,26.0,10.0,8.0,1.0,64.0,1.0,0.0,1.0,315.0,155.0,3.55,0.996,8.1,1.1,0.9,7.1,8.0,R,CHC,NL,12.0,12.0,1.0,81.1,20.0,7.0,13.0,0.0,0.0,1.0,,,3.0,7.0,0.0,2.21,1.67,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,526,CHC,7,12,12,1,12,12,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.239,M,81.333333,7.0
741,Trevor Bauer,29,CIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,5.0,4.0,0.556,1.73,11.0,11.0,0.0,2.0,2.0,0.0,73.0,41.0,17.0,14.0,9.0,17.0,1.0,100.0,3.0,0.0,3.0,278.0,276.0,2.88,0.795,5.1,1.1,2.1,12.3,5.88,R,CIN,NL,11.0,11.0,2.0,73.0,11.0,5.0,6.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.36,1.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,90,CIN,9,11,11,0,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.159,J,73.0,7.0
1210,Zach Plesac,25,CLE,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,4.0,2.0,0.667,2.28,8.0,8.0,0.0,0.0,0.0,0.0,55.1,38.0,14.0,14.0,8.0,6.0,0.0,57.0,1.0,0.0,0.0,206.0,201.0,3.39,0.795,6.2,1.3,1.0,9.3,9.5,R,CLE,AL,8.0,8.0,0.0,55.1,13.0,5.0,8.0,0.0,0.0,1.0,,,2.0,7.0,0.0,2.11,1.63,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,917,CLE,2,8,8,0,8,8,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.191,K,55.333333,7.0
1385,Adam Wainwright,38,STL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,5.0,3.0,0.625,3.15,10.0,10.0,0.0,2.0,0.0,0.0,65.2,54.0,25.0,23.0,9.0,15.0,0.0,54.0,2.0,0.0,0.0,262.0,137.0,4.1,1.051,7.4,1.2,2.1,7.4,3.6,R,STL,NL,10.0,10.0,2.0,65.2,10.0,2.0,8.0,0.0,0.0,1.0,,,1.0,3.0,0.0,1.37,1.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1222,STL,15,10,10,0,10,10,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.22,L,65.666667,7.0


In [657]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [658]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     677
3     198
4     150
2     129
5     101
6      46
1      43
7      30
9      15
8      14
10      7
11      6
12      5
14      3
13      2
15      2
18      1
16      1
27      1
Name: bb_rate, dtype: int64

In [659]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     198
4.0     150
2.0     129
5.0     101
0.0      51
6.0      46
1.0      43
7.0      30
9.0      15
8.0      14
10.0      7
11.0      6
12.0      5
14.0      3
15.0      2
13.0      2
18.0      1
16.0      1
27.0      1
Name: bb_rate, dtype: int64

In [660]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    626
13     198
14     150
12     129
15     101
11      94
16      46
21      30
23      15
22      14
24       7
25       6
26       5
32       3
33       2
31       2
34       1
53       1
36       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [661]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

7.0     119
8.0     116
9.0      97
10.0     80
6.0      77
11.0     62
5.0      52
12.0     43
0.0      38
4.0      29
13.0     23
3.0      22
14.0     15
2.0      12
15.0      7
18.0      4
1.0       3
19.0      2
16.0      2
36.0      1
17.0      1
Name: k_rate_pit, dtype: int64

In [662]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
6,Ehire Adrianza,30,MIN,AL,44.0,101.0,89.0,10.0,17.0,7.0,0.0,0.0,3.0,1.0,0.0,11.0,23.0,0.191,0.287,0.27,0.557,56.0,24.0,3.0,1.0,0.0,0.0,0.0,5H/64D1,S,AL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,65.0,19.19,1.0,9.0,9.0,0.0,0.0,,R,MIN,AL,38.0,24.0,22.0,239.1,81.0,29.0,51.0,1.0,9.0,0.988,1.0,6.0,2.0,10.0,0.0,3.01,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11,MIN,8,44,24,44,37,1,0,0,5,23,9,0,0,0,0,1,6,5,3B,0.068182,,D+,0,,0,0.0,,0.045,,4,14,8,12.0,26,0,0.0,,6,61,D+ [14-26],0.25,M,1.0,1,0.0,11,0.0
36,Orlando Arcia,25,MIL,NL,59.0,189.0,173.0,22.0,45.0,10.0,1.0,5.0,20.0,2.0,0.0,14.0,32.0,0.26,0.317,0.416,0.734,96.0,72.0,10.0,1.0,0.0,1.0,0.0,*6/H18,R,NL,0.0,0.0,,18.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,4.0,4.0,4.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,29.0,11.19,2.5,18.0,4.5,4.5,0.0,0.0,R,MIL,NL,60.0,52.0,46.0,445.1,181.0,58.0,120.0,3.0,26.0,0.983,-2.0,-5.0,-5.0,-13.0,1.0,3.6,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,56,MIL,5,59,52,59,58,2,0,0,0,0,57,0,1,0,1,0,2,2,SS,0.338983,,B,4,14.0,1,5.0,(15),0.045,,3,13,6,9.0,23,0,0.0,,9,54,B14(15) [13-23],0.4,Z,2.0,1,3.0,13,0.0
59,Anthony Bemboom,30,LAA,AL,21.0,60.0,48.0,9.0,10.0,1.0,0.0,3.0,5.0,0.0,1.0,7.0,13.0,0.208,0.328,0.417,0.744,103.0,20.0,0.0,2.0,2.0,1.0,0.0,2/1H,L,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,6.19,1.0,0.0,0.0,9.0,0.0,0.0,R,LAA,AL,21.0,16.0,14.0,145.0,151.0,143.0,8.0,0.0,2.0,1.0,2.0,13.0,2.0,17.0,0.0,9.37,...,LAA,AL,20.0,16.0,14.0,144.0,151.0,143.0,8.0,0.0,2.0,1.0,2.0,13.0,2.0,2.0,17.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,9.44,7.55,0.0,9.0,10.0,7.0,41%,99,LAA,2,21,16,21,20,1,20,0,0,0,0,0,0,0,0,0,0,1,C,0.238095,,C,11,25.0,0,0.0,,0.0,,4,14,8,12.0,26,1,13.0,/31,6,61,C25 [14-26/31],0.0,J+,1.0,1,9.0,23,0.0
137,Charlie Culberson,31,ATL,NL,9.0,7.0,7.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,0.143,0.143,0.286,0.429,9.0,2.0,0.0,0.0,0.0,0.0,0.0,/H34D1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.19,0.0,0.0,0.0,0.0,0.0,,R,ATL,NL,6.0,1.0,0.0,17.1,11.0,11.0,0.0,0.0,3.0,1.0,0.0,-23.0,0.0,0.0,0.0,5.71,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,264,ATL,8,10,1,9,6,1,0,4,1,0,0,0,0,0,0,1,2,4,1B,0.111111,,D,0,,0,0.0,,0.0,,0,n,21,21.0,43,0,0.0,,5,62,D [n-43],0.0,J+,0.333333,1,0.0,11,0.0
153,Travis Demeritte,25,DET,AL,18.0,33.0,29.0,5.0,5.0,1.0,0.0,0.0,4.0,0.0,0.0,3.0,14.0,0.172,0.273,0.207,0.48,34.0,6.0,0.0,1.0,0.0,0.0,0.0,9/HD71,R,AL,0.0,0.0,,36.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,4.0,4.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,17.0,29.19,4.0,36.0,18.0,0.0,0.0,,R,DET,AL,15.0,8.0,6.0,72.0,25.0,24.0,0.0,1.0,0.0,0.96,2.0,30.0,0.0,0.0,,3.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,289,DET,2,18,8,18,15,1,0,0,0,0,0,2,0,12,14,3,4,5,OF,0.222222,,D+,0,,0,0.0,,0.0,,3,13,15,18.0,36,1,19.0,/41,5,62,D+ [13-36/41],0.571,Z,1.0,1,0.0,11,0.0
185,Santiago Espinal,25,TOR,AL,26.0,66.0,60.0,10.0,16.0,4.0,0.0,0.0,6.0,1.0,0.0,4.0,16.0,0.267,0.308,0.333,0.641,77.0,20.0,1.0,0.0,1.0,1.0,0.0,6/H51,R,AL,0.0,0.0,,9.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,3.0,2.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,11.0,57.0,12.69,2.0,13.5,4.5,4.5,0.0,0.0,R,TOR,AL,25.0,19.0,13.0,165.0,84.0,24.0,58.0,2.0,12.0,0.976,2.0,18.0,1.0,7.0,0.0,4.47,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,341,TOR,1st,27,19,26,24,2,0,0,0,2,21,0,0,0,0,0,0,4,SS,0.230769,,B,0,,0,0.0,,0.062,,2,12,9,11.0,25,0,0.0,,9,54,B [12-25],0.333,Y,2.0,1,3.0,13,0.0
195,Logan Forsythe,33,MIA,NL,12.0,38.0,34.0,2.0,4.0,1.0,0.0,1.0,2.0,0.0,0.0,4.0,12.0,0.118,0.211,0.235,0.446,22.0,8.0,0.0,0.0,0.0,0.0,0.0,/43D1,R,NL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,6.0,66.0,6.19,3.0,18.0,0.0,9.0,0.0,0.0,R,MIA,NL,11.0,9.0,7.0,76.1,43.0,29.0,14.0,0.0,5.0,1.0,1.0,12.0,0.0,0.0,0.0,5.07,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,372,MIA,10,13,11,12,11,1,0,4,6,0,0,0,0,0,0,2,0,0,2B,0.166667,,E+,9,23.0,0,0.0,,0.0,,4,14,11,15.0,33,0,0.0,,4,63,E+23 [14-33],0.4,Z,1.0,1,6.0,16,0.0
249,Luis Guillorme,25,NYM,NL,29.0,68.0,57.0,6.0,19.0,6.0,0.0,0.0,9.0,2.0,0.0,10.0,17.0,0.333,0.426,0.439,0.865,141.0,25.0,3.0,0.0,0.0,1.0,0.0,4/H561,L,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.19,0.0,0.0,0.0,0.0,0.0,,R,NYM,NL,25.0,15.0,13.0,142.0,68.0,30.0,38.0,0.0,10.0,1.0,2.0,16.0,0.0,0.0,0.0,4.31,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,480,NYM,3,30,15,29,23,1,0,0,17,4,3,0,0,0,0,0,7,1,2B,0.310345,,A,0,,0,0.0,,0.087,*,5,15,9,14.0,32,0,0.0,,10,53,A* [15-32],0.0,J+,1.0,1,0.0,11,0.0
254,Jedd Gyorko,31,MIL,NL,42.0,135.0,117.0,19.0,29.0,3.0,0.0,9.0,17.0,0.0,0.0,15.0,38.0,0.248,0.333,0.504,0.838,121.0,59.0,4.0,1.0,0.0,2.0,1.0,35/HD1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,,3.19,1.0,9.0,0.0,0.0,0.0,,R,MIL,NL,42.0,34.0,21.0,270.1,191.0,165.0,25.0,1.0,11.0,0.995,2.0,8.0,1.0,4.0,1.0,6.33,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,486,MIL,8,43,35,42,40,1,0,30,0,11,0,0,0,0,0,1,7,0,1B,0.404762,,C+,11,25.0,0,0.0,,0.0,,4,14,10,14.0,32,0,0.0,,8,55,C+25 [14-32],0.25,M,1.0,1,0.0,11,0.0
277,Tyler Heineman,29,SFG,NL,15.0,50.0,42.0,3.0,8.0,1.0,0.0,0.0,1.0,1.0,0.0,4.0,6.0,0.19,0.292,0.214,0.506,44.0,9.0,0.0,2.0,2.0,0.0,0.0,2/1,S,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.19,1.0,9.0,0.0,0.0,0.0,,R,SFG,NL,16.0,13.0,11.0,119.0,122.0,110.0,9.0,3.0,0.0,0.975,1.0,6.0,1.0,10.0,1.0,9.0,...,SFG,NL,15.0,13.0,11.0,118.0,122.0,110.0,9.0,3.0,0.0,0.975,1.0,6.0,1.0,1.0,10.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,9.08,7.93,0.0,4.0,8.0,5.0,38%,522,SFG,2,15,13,15,15,1,15,0,0,0,0,0,0,0,0,0,0,0,C,0.066667,,D+,0,,0,0.0,,0.077,*,3,13,4,7.0,21,1,8.0,/22,6,61,D+* [13-21/22],0.333,Y,1.0,1,0.0,11,0.0


In [663]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

11.0    99
12.0    87
14.0    83
10.0    82
9.0     72
13.0    69
15.0    55
8.0     48
0.0     38
16.0    37
18.0    28
7.0     21
17.0    18
6.0     17
5.0     10
21.0     9
19.0     7
4.0      6
20.0     6
24.0     4
3.0      3
22.0     2
23.0     1
25.0     1
36.0     1
26.0     1
Name: k_val_pit, dtype: int64

In [664]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    626
25      99
26      87
32      83
24      82
23      72
31      69
33      55
22      48
n       38
34      37
36      28
21      21
35      18
16      17
15      10
43       9
41       7
14       6
42       6
46       4
13       3
44       2
45       1
52       1
51       1
66       1
Name: k_num_pit, dtype: int64

In [665]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [666]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    1148
1     216
2      44
3      12
4       5
5       3
7       2
8       1
Name: hbp_rate_pit, dtype: int64

In [667]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [668]:
players["hbp_val_pit"].value_counts()

0.0     1148
12.0      43
13.0      40
11.0      32
10.0      26
14.0      24
15.0      24
17.0      20
16.0      19
9.0       11
18.0       8
7.0        7
8.0        6
22.0       5
20.0       5
19.0       3
5.0        3
4.0        2
6.0        2
24.0       1
3.0        1
25.0       1
Name: hbp_val_pit, dtype: int64

In [669]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       1148
/26      43
/31      40
/25      32
/24      26
/32      24
/33      24
/35      20
/34      19
/23      11
/36       8
/21       7
/22       6
/42       5
/44       5
/15       3
/41       3
/14       2
/16       2
/51       1
/13       1
/46       1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [670]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [671]:
players["WP"].value_counts()

0.0    428
1.0    191
2.0    107
3.0     39
4.0     20
5.0      9
6.0      8
7.0      3
Name: WP, dtype: int64

In [672]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit


In [673]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        785
[WP]     20
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [674]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    135
0.200000     31
0.250000     27
0.166667     26
0.142857     25
           ... 
0.291667      1
0.368421      1
0.114286      1
0.157143      1
0.205882      1
Name: hr_rate_pit, Length: 181, dtype: int64

In [675]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     740
+    539
-    152
Name: gopher_ball, dtype: int64

In [676]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
55,Cam Bedrosian,28,LAA,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,0.0,0.0,,2.45,11.0,0.0,3.0,0.0,0.0,0.0,14.2,10.0,4.0,4.0,0.0,6.0,0.0,11.0,0.0,0.0,1.0,58.0,186.0,2.92,1.091,6.1,0.0,3.7,6.8,1.83,R,LAA,AL,11.0,0.0,0.0,14.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.00,...,,,,,,,,,,,,,,,,,,,,,,,,94,LAA,7,11,0,1,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.192,K,14.666667,1,4.0,14,7.0,11.0,25,0,0.0,,,0.0,-
85,Drew Butera,36,COL,NL,28.0,43.0,39.0,4.0,6.0,2.0,0.0,0.0,4.0,0.0,0.0,2.0,11.0,0.154,0.190,0.205,0.396,1.0,8.0,3.0,0.0,1.0,1.0,0.0,2/31,R,NL,0.0,0.0,,5.40,1.0,0.0,1.0,0.0,0.0,0.0,1.2,3.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,8.0,114.0,1.99,1.800,16.2,0.0,0.0,5.4,,R,COL,NL,31.0,12.0,7.0,125.1,99.0,94.0,5.0,0.0,2.0,1.000,0.0,-2.0,1.0,10.0,1.0,7.11,...,3.0,0.0,1.0,1.000,1.0,6.0,1.0,1.0,10.0,1.0,0.0,0.0,0.0,-1.0,0.0,1.0,7.25,3.76,1.0,3.0,5.0,2.0,29%,166,COL,11,29,12,28,29,1,25,5,0,0,0,0,0,0,0,0,0,0,C,0.142857,,D,0,,0,0.0,,0.000,,2,12,9,11.0,25,0,0.0,,5,62,D [12-25],0.375,Z,1.666667,2,0.0,11,4.0,4.0,14,0,0.0,,,0.0,-
195,Logan Forsythe,33,MIA,NL,12.0,38.0,34.0,2.0,4.0,1.0,0.0,1.0,2.0,0.0,0.0,4.0,12.0,0.118,0.211,0.235,0.446,22.0,8.0,0.0,0.0,0.0,0.0,0.0,/43D1,R,NL,0.0,0.0,,9.00,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,6.0,66.0,6.19,3.000,18.0,0.0,9.0,0.0,0.00,R,MIA,NL,11.0,9.0,7.0,76.1,43.0,29.0,14.0,0.0,5.0,1.000,1.0,12.0,0.0,0.0,0.0,5.07,...,,,,,,,,,,,,,,,,,,,,,,,,372,MIA,10,13,11,12,11,1,0,4,6,0,0,0,0,0,0,2,0,0,2B,0.166667,,E+,9,23.0,0,0.0,,0.000,,4,14,11,15.0,33,0,0.0,,4,63,E+23 [14-33],0.400,Z,1.000000,1,6.0,16,0.0,0.0,n,0,0.0,,,0.0,-
254,Jedd Gyorko,31,MIL,NL,42.0,135.0,117.0,19.0,29.0,3.0,0.0,9.0,17.0,0.0,0.0,15.0,38.0,0.248,0.333,0.504,0.838,121.0,59.0,4.0,1.0,0.0,2.0,1.0,35/HD1,R,NL,0.0,0.0,,0.00,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,,3.19,1.000,9.0,0.0,0.0,0.0,,R,MIL,NL,42.0,34.0,21.0,270.1,191.0,165.0,25.0,1.0,11.0,0.995,2.0,8.0,1.0,4.0,1.0,6.33,...,,,,,,,,,,,,,,,,,,,,,,,,486,MIL,8,43,35,42,40,1,0,30,0,11,0,0,0,0,0,1,7,0,1B,0.404762,,C+,11,25.0,0,0.0,,0.000,,4,14,10,14.0,32,0,0.0,,8,55,C+25 [14-32],0.250,M,1.000000,1,0.0,11,0.0,0.0,n,0,0.0,,,0.0,-
277,Tyler Heineman,29,SFG,NL,15.0,50.0,42.0,3.0,8.0,1.0,0.0,0.0,1.0,1.0,0.0,4.0,6.0,0.190,0.292,0.214,0.506,44.0,9.0,0.0,2.0,2.0,0.0,0.0,2/1,S,NL,0.0,0.0,,0.00,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,3.19,1.000,9.0,0.0,0.0,0.0,,R,SFG,NL,16.0,13.0,11.0,119.0,122.0,110.0,9.0,3.0,0.0,0.975,1.0,6.0,1.0,10.0,1.0,9.00,...,9.0,3.0,0.0,0.975,1.0,6.0,1.0,1.0,10.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,9.08,7.93,0.0,4.0,8.0,5.0,38%,522,SFG,2,15,13,15,15,1,15,0,0,0,0,0,0,0,0,0,0,0,C,0.066667,,D+,0,,0,0.0,,0.077,*,3,13,4,7.0,21,1,8.0,/22,6,61,D+* [13-21/22],0.333,Y,1.000000,1,0.0,11,0.0,0.0,n,0,0.0,,,0.0,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1404,Taylor Williams,28,SDP,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,NL,0.0,0.0,,9.00,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,64.0,1.19,2.000,18.0,0.0,0.0,9.0,,R,TOT,ZZ,15.0,0.0,0.0,14.2,1.0,0.0,1.0,0.0,0.0,1.000,,,,,,0.61,...,,,,,,,,,,,,,,,,,,,,,,,,1254,2TM,4,15,0,1,15,15,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.400,Z,1.000000,1,0.0,11,7.0,7.0,21,0,0.0,,,0.0,-
1414,Brandon Workman,31,BOS,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,4.05,7.0,0.0,6.0,0.0,0.0,4.0,6.2,8.0,3.0,3.0,0.0,4.0,0.0,8.0,0.0,0.0,0.0,31.0,120.0,2.59,1.800,10.8,0.0,5.4,10.8,2.00,R,TOT,ZZ,21.0,0.0,0.0,19.2,3.0,0.0,3.0,0.0,0.0,1.000,,,,,,1.37,...,,,,,,,,,,,,,,,,,,,,,,,,1269,2TM,6,21,0,0,21,21,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.296,X,6.666667,1,5.0,15,9.0,14.0,32,0,0.0,,,0.0,-
1417,Jimmy Yacabonis,28,SEA,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,1.0,0.0,3.86,2.0,1.0,1.0,0.0,0.0,0.0,2.1,2.0,1.0,1.0,0.0,3.0,0.0,1.0,1.0,0.0,0.0,13.0,126.0,7.48,2.143,7.7,0.0,11.6,3.9,0.33,R,SEA,AL,2.0,1.0,0.0,2.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.00,...,,,,,,,,,,,,,,,,,,,,,,,,1271,SEA,4,2,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.222,L,2.333333,1,8.0,22,3.0,11.0,25,3,14.0,/32,,0.0,-
1427,Kyle Zimmer,28,KCR,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,1.0,0.0,1.0,1.57,16.0,1.0,4.0,0.0,0.0,0.0,23.0,14.0,4.0,4.0,0.0,10.0,0.0,26.0,1.0,0.0,2.0,91.0,295.0,2.36,1.043,5.5,0.0,3.9,10.2,2.60,R,KCR,AL,16.0,1.0,0.0,23.0,6.0,2.0,4.0,0.0,1.0,1.000,,,1.0,9.0,0.0,2.35,...,,,,,,,,,,,,,,,,,,,,,,,,1285,KCR,2,16,1,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.175,K,23.000000,1,4.0,14,10.0,14.0,32,0,0.0,,,0.0,-


### Pitcher Control Number

In [677]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

11.0    130
12.0    119
13.0     99
10.0     90
14.0     82
9.0      53
15.0     42
8.0      31
16.0     27
18.0     22
17.0     21
7.0      14
19.0     12
21.0     11
0.0       9
20.0      9
24.0      6
6.0       5
25.0      4
22.0      4
4.0       3
5.0       3
26.0      3
27.0      2
23.0      2
31.0      1
32.0      1
Name: control_rate, dtype: int64

In [678]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    626
51     130
46     119
45      99
52      90
44      82
53      53
43      42
54      31
42      27
36      22
41      21
55      14
35      12
33      11
34       9
65       9
26       6
56       5
32       4
25       4
24       3
62       3
61       3
23       2
31       2
15       1
14       1
Name: PCN, dtype: int64

### Probable Hit Number

In [679]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     643
8     135
9     128
7     125
6     104
5      59
10     58
11     43
12     30
4      24
14     19
3      14
13     12
15      9
18      9
16      6
2       5
24      3
21      3
22      1
27      1
Name: hit_rate_pit, dtype: int64

In [680]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [681]:
players["hit_rate_pit"].value_counts()

8.0     135
9.0     128
7.0     125
6.0     104
5.0      59
10.0     58
11.0     43
12.0     30
4.0      24
14.0     19
0.0      17
3.0      14
13.0     12
15.0      9
18.0      9
16.0      6
2.0       5
24.0      3
21.0      3
22.0      1
27.0      1
Name: hit_rate_pit, dtype: int64

In [682]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    626
55     135
54     128
56     125
61     104
62      59
53      58
52      43
51      30
63      24
45      19
66      17
64      14
46      12
41       9
44       9
43       6
65       5
34       3
31       3
24       1
33       1
Name: PPH, dtype: int64

### Pitcher Rating

In [683]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [684]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

       626
+M1     42
+Z1     37
+K1     36
+L1     35
      ... 
M3       1
+L7      1
-L4      1
-W6      1
K4       1
Name: goph_lett_inn, Length: 117, dtype: int64

In [685]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      626
(13-25)         19
(13-24)         17
(13-23)         17
(13-25/26)      16
              ... 
(15-33/36)       1
(21-26/31)       1
(26-33)          1
(15-35)          1
(16-24)          1
Name: bb_k_hbp, Length: 248, dtype: int64

In [686]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               626
(13-25)         19
(13-24)         17
(13-23)         17
(14-25)         16
              ... 
(21-31/32)       1
(24-n)           1
(11-34)          1
(12-32/34)       1
(11-15/22)       1
Name: bb_k_hbp, Length: 248, dtype: int64

In [687]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

J+1 (11-n)           5
+X1 (14-31)          4
+Z1 (11-n)           4
-Z1 (11-n)           4
+Z1 (14-22)          3
                    ..
+W3 (15-32/34)       1
-J+1 (25-41/44)      1
+J+1 (15-36)         1
-Z1 (23-32/34)       1
+J1 (14-34/35)       1
Name: pitcher_rating, Length: 723, dtype: int64

In [688]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [689]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,...,,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,...,,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41],,,,,,,,,,0,0.0,,,,,,,,,,,
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,...,,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35],,,,,,,,,,0,0.0,,,,,,,,,,,
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,...,,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.2,L,4.0,1.0,4.0,14.0,15.0,19.0,41,0,0.0,,,0.333333,+,11.0,51.0,6.0,61.0,+L1,(14-41),+L1 (14-41)
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,...,,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32],,,,,,,,,,0,0.0,,,,,,,,,,,
5,Jo Adell,21,LAA,AL,38.0,132.0,124.0,9.0,20.0,4.0,0.0,3.0,7.0,0.0,1.0,7.0,55.0,0.161,0.212,0.266,0.478,31.0,33.0,3.0,1.0,0.0,0.0,0.0,9/8H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAA,AL,38.0,33.0,33.0,295.0,77.0,72.0,2.0,3.0,0.0,0.961,-6.0,-25.0,,,,2.26,...,,,,,,,,,,,,,,,,,10,LAA,1st,38,33,38,38,0,0,0,0,0,0,0,4,34,38,0,0,1,OF,0.184211,,D,5,15.0,0,0.0,,0.0,,2,12,15,17.0,35,0,0.0,,5,62,D15 [12-35],,,,,,,,,,0,0.0,,,,,,,,,,,
6,Ehire Adrianza,30,MIN,AL,44.0,101.0,89.0,10.0,17.0,7.0,0.0,0.0,3.0,1.0,0.0,11.0,23.0,0.191,0.287,0.27,0.557,56.0,24.0,3.0,1.0,0.0,0.0,0.0,5H/64D1,S,AL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,65.0,19.19,1.0,9.0,9.0,0.0,0.0,,R,MIN,AL,38.0,24.0,22.0,239.1,81.0,29.0,51.0,1.0,9.0,0.988,1.0,6.0,2.0,10.0,0.0,3.01,...,,,,,,,,,,,,,,,,,11,MIN,8,44,24,44,37,1,0,0,5,23,9,0,0,0,0,1,6,5,3B,0.068182,,D+,0,,0,0.0,,0.045,,4,14,8,12.0,26,0,0.0,,6,61,D+ [14-26],0.25,M,1.0,1.0,0.0,11.0,0.0,0.0,n,7,7.0,/21,,1.0,+,14.0,44.0,7.0,56.0,+M1,(11-n/21),+M1 (11-n/21)
7,Jesus Aguilar,30,MIA,NL,51.0,216.0,188.0,31.0,52.0,10.0,0.0,8.0,34.0,0.0,1.0,23.0,40.0,0.277,0.352,0.457,0.809,120.0,86.0,5.0,1.0,0.0,4.0,0.0,3D/5H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,32.0,31.0,29.0,262.0,266.0,243.0,20.0,3.0,36.0,0.989,-5.0,-24.0,0.0,0.0,0.0,9.03,...,,,,,,,,,,,,,,,,,12,MIA,7,51,50,51,31,0,0,31,0,1,0,0,0,0,0,20,1,0,1B,0.666667,#,B,6,16.0,0,0.0,,0.0,,4,14,7,11.0,25,0,0.0,,9,54,#B16 [14-25],,,,,,,,,,0,0.0,,,,,,,,,,,
8,Nick Ahmed,30,ARI,NL,57.0,217.0,199.0,29.0,53.0,10.0,1.0,5.0,29.0,4.0,0.0,18.0,46.0,0.266,0.327,0.402,0.729,94.0,80.0,3.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,NL,57.0,56.0,56.0,484.1,189.0,61.0,121.0,7.0,33.0,0.963,4.0,11.0,4.0,10.0,0.0,3.38,...,,,,,,,,,,,,,,,,,13,ARI,7,57,56,57,57,0,0,0,0,0,57,0,0,0,0,0,1,0,SS,0.508772,,B,3,13.0,1,4.0,(14),0.073,,3,13,8,11.0,25,0,0.0,,9,54,B13(14) [13-25],,,,,,,,,,0,0.0,,,,,,,,,,,
9,Shogo Akiyama,32,CIN,NL,54.0,183.0,155.0,16.0,38.0,6.0,1.0,0.0,9.0,7.0,3.0,25.0,34.0,0.245,0.357,0.297,0.654,76.0,46.0,1.0,2.0,0.0,0.0,0.0,*78/HD,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,52.0,47.0,34.0,381.0,89.0,87.0,2.0,0.0,0.0,1.0,-1.0,-2.0,,,,2.1,...,,,,,,,,,,,,,,,,,15,CIN,1st,54,47,54,52,0,0,0,0,0,0,36,21,0,52,1,5,0,OF,0.166667,,C+,0,,1,1.0,(11),0.121,**,5,15,7,12.0,26,0,0.0,,7,56,C+(11)** [15-26],,,,,,,,,,0,0.0,,,,,,,,,,,


In [690]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
1421,Ryan Yarbrough,28,TBR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,4.0,0.2,3.56,11.0,9.0,0.0,0.0,0.0,0.0,55.2,54.0,22.0,22.0,5.0,12.0,1.0,44.0,7.0,0.0,2.0,234.0,119.0,3.8,1.186,8.7,0.8,1.9,7.1,3.67,L,TBR,AL,11.0,9.0,0.0,55.2,10.0,2.0,8.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.62,...,,,,,,,,,,,,,,,,,1275,TBR,3,11,9,0,11,11,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.251,W,55.666667,5,2.0,12,7.0,9.0,23,1,10.0,/24,,0.092593,,11.0,51,8.0,55,W5,(12-23/24),W5 (12-23/24)
1422,Eric Yardley,29,MIL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,2.0,0.0,1.0,1.54,24.0,0.0,4.0,0.0,0.0,0.0,23.1,19.0,6.0,4.0,2.0,10.0,0.0,19.0,1.0,0.0,0.0,97.0,298.0,4.09,1.243,7.3,0.8,3.9,7.3,1.9,R,MIL,NL,24.0,0.0,0.0,23.1,3.0,0.0,3.0,0.0,0.0,1.0,,,1.0,9.0,0.0,1.16,...,,,,,,,,,,,,,,,,,1276,MIL,2,24,0,0,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.221,L,23.333333,1,4.0,14,7.0,11.0,25,0,0.0,,,0.105263,+,11.0,51,7.0,56,+L1,(14-25),+L1 (14-25)
1423,Kirby Yates,33,SDP,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,1.0,0.0,12.46,6.0,0.0,3.0,0.0,0.0,2.0,4.1,7.0,6.0,6.0,1.0,4.0,0.0,8.0,0.0,0.0,2.0,25.0,37.0,5.27,2.538,14.5,2.1,8.3,16.6,2.0,R,SDP,NL,6.0,0.0,0.0,4.1,3.0,1.0,1.0,1.0,0.0,0.667,,,0.0,0.0,0.0,4.15,...,,,,,,,,,,,,,,,,,1278,SDP,7,6,0,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.333,Y,4.333333,1,6.0,16,12.0,18.0,36,0,0.0,,,0.142857,+,16.0,42,10.0,53,+Y1,(16-36),+Y1 (16-36)
1424,Huascar Ynoa,22,ATL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,0.0,0.0,,5.82,9.0,5.0,1.0,0.0,0.0,0.0,21.2,23.0,14.0,14.0,2.0,13.0,1.0,17.0,2.0,1.0,2.0,100.0,83.0,4.9,1.662,9.6,0.8,5.4,7.1,1.31,R,ATL,NL,9.0,5.0,0.0,21.2,8.0,4.0,4.0,0.0,0.0,1.0,,,0.0,0.0,0.0,3.32,...,,,,,,,,,,,,,,,,,1280,ATL,2,9,5,0,9,9,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.271,W,21.666667,2,5.0,15,6.0,11.0,25,1,12.0,/26,,0.086957,,14.0,44,8.0,55,W2,(15-25/26),W2 (15-25/26)
1425,Alex Young,26,ARI,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,NL,2.0,4.0,0.333,5.44,15.0,7.0,0.0,0.0,0.0,0.0,46.1,51.0,30.0,28.0,11.0,14.0,0.0,39.0,1.0,0.0,1.0,204.0,85.0,5.57,1.403,9.9,2.1,2.7,7.6,2.79,L,ARI,NL,15.0,7.0,0.0,46.1,6.0,2.0,4.0,0.0,0.0,1.0,,,1.0,4.0,0.0,1.17,...,,,,,,,,,,,,,,,,,1281,ARI,2,15,7,0,15,15,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.27,W,46.333333,3,2.0,12,7.0,9.0,23,0,0.0,,,0.215686,+,12.0,46,9.0,54,+W3,(12-23),+W3 (12-23)
1426,T.J. Zeuch,24,TOR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.0,1.59,3.0,1.0,1.0,0.0,0.0,0.0,11.1,9.0,2.0,2.0,1.0,4.0,0.0,3.0,0.0,0.0,0.0,47.0,284.0,4.87,1.147,7.1,0.8,3.2,2.4,0.75,R,TOR,AL,3.0,1.0,0.0,11.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,...,,,,,,,,,,,,,,,,,1283,TOR,2,3,1,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.209,L,11.333333,4,3.0,13,2.0,5.0,15,0,0.0,,,0.111111,+,10.0,52,7.0,56,+L4,(13-15),+L4 (13-15)
1427,Kyle Zimmer,28,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.0,1.57,16.0,1.0,4.0,0.0,0.0,0.0,23.0,14.0,4.0,4.0,0.0,10.0,0.0,26.0,1.0,0.0,2.0,91.0,295.0,2.36,1.043,5.5,0.0,3.9,10.2,2.6,R,KCR,AL,16.0,1.0,0.0,23.0,6.0,2.0,4.0,0.0,1.0,1.0,,,1.0,9.0,0.0,2.35,...,,,,,,,,,,,,,,,,,1285,KCR,2,16,1,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.175,K,23.0,1,4.0,14,10.0,14.0,32,0,0.0,,,0.0,-,10.0,52,6.0,61,-K1,(14-32),-K1 (14-32)
1428,Bruce Zimmermann,25,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.71,2.0,1.0,1.0,0.0,0.0,0.0,7.0,6.0,6.0,6.0,2.0,2.0,0.0,7.0,2.0,0.0,2.0,31.0,61.0,6.62,1.143,7.7,2.6,2.6,9.0,3.5,L,BAL,AL,2.0,1.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-29.0,,0.0,...,,,,,,,,,,,,,,,,,1286,BAL,1st,2,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.222,L,7.0,4,2.0,12,8.0,10.0,24,2,12.0,/26,,0.333333,+,12.0,46,7.0,56,+L4,(12-24/26),+L4 (12-24/26)
1429,Jordan Zimmermann,34,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.94,3.0,2.0,0.0,0.0,0.0,0.0,5.2,11.0,6.0,5.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,28.0,62.0,2.13,2.294,17.5,0.0,3.2,9.5,3.0,R,DET,AL,3.0,2.0,0.0,5.2,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.59,...,,,,,,,,,,,,,,,,,1287,DET,12,3,2,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.423,Z,5.666667,2,3.0,13,8.0,11.0,25,0,0.0,,,0.0,-,17.0,41,14.0,45,-Z2,(13-25),-Z2 (13-25)
1430,Tyler Zuber,25,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,2.0,0.333,4.09,23.0,0.0,8.0,0.0,0.0,0.0,22.0,15.0,11.0,10.0,4.0,20.0,1.0,30.0,1.0,0.0,1.0,99.0,113.0,5.69,1.591,6.1,1.6,8.2,12.3,1.5,R,KCR,AL,23.0,0.0,0.0,22.0,2.0,0.0,2.0,0.0,0.0,1.0,,,-1.0,-9.0,0.0,0.82,...,,,,,,,,,,,,,,,,,1288,KCR,1st,23,0,0,23,23,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.192,K,22.0,1,7.0,21,11.0,18.0,36,0,0.0,,,0.266667,+,13.0,45,5.0,62,+K1,(21-36),+K1 (21-36)


In [691]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [692]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [693]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [694]:
players["Primary_Pos_fld"].value_counts()

P     779
OF    245
C     109
2B     86
1B     84
3B     65
SS     53
Name: Primary_Pos_fld, dtype: int64

In [695]:
players["Primary_Pos_fld"].isnull().sum()

10

In [696]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.992512
2B    0.978698
3B    0.961908
C     0.992771
OF    0.984515
P     0.933492
SS    0.975736
Name: Fld%, dtype: float64

### Superior Rating

In [697]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [698]:
players["superior_rating"].value_counts()

S    855
     576
Name: superior_rating, dtype: int64

In [699]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,...,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,,
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,...,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41],,,,,,,,,,0,0.0,,,,,,,,,,,,S
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,...,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35],,,,,,,,,,0,0.0,,,,,,,,,,,,
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,...,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.2,L,4.0,1.0,4.0,14.0,15.0,19.0,41,0,0.0,,,0.333333,+,11.0,51.0,6.0,61.0,+L1,(14-41),+L1 (14-41),S
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,...,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32],,,,,,,,,,0,0.0,,,,,,,,,,,,S
5,Jo Adell,21,LAA,AL,38.0,132.0,124.0,9.0,20.0,4.0,0.0,3.0,7.0,0.0,1.0,7.0,55.0,0.161,0.212,0.266,0.478,31.0,33.0,3.0,1.0,0.0,0.0,0.0,9/8H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAA,AL,38.0,33.0,33.0,295.0,77.0,72.0,2.0,3.0,0.0,0.961,-6.0,-25.0,,,,2.26,...,,,,,,,,,,,,,,,,10,LAA,1st,38,33,38,38,0,0,0,0,0,0,0,4,34,38,0,0,1,OF,0.184211,,D,5,15.0,0,0.0,,0.0,,2,12,15,17.0,35,0,0.0,,5,62,D15 [12-35],,,,,,,,,,0,0.0,,,,,,,,,,,,
6,Ehire Adrianza,30,MIN,AL,44.0,101.0,89.0,10.0,17.0,7.0,0.0,0.0,3.0,1.0,0.0,11.0,23.0,0.191,0.287,0.27,0.557,56.0,24.0,3.0,1.0,0.0,0.0,0.0,5H/64D1,S,AL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,65.0,19.19,1.0,9.0,9.0,0.0,0.0,,R,MIN,AL,38.0,24.0,22.0,239.1,81.0,29.0,51.0,1.0,9.0,0.988,1.0,6.0,2.0,10.0,0.0,3.01,...,,,,,,,,,,,,,,,,11,MIN,8,44,24,44,37,1,0,0,5,23,9,0,0,0,0,1,6,5,3B,0.068182,,D+,0,,0,0.0,,0.045,,4,14,8,12.0,26,0,0.0,,6,61,D+ [14-26],0.25,M,1.0,1.0,0.0,11.0,0.0,0.0,n,7,7.0,/21,,1.0,+,14.0,44.0,7.0,56.0,+M1,(11-n/21),+M1 (11-n/21),S
7,Jesus Aguilar,30,MIA,NL,51.0,216.0,188.0,31.0,52.0,10.0,0.0,8.0,34.0,0.0,1.0,23.0,40.0,0.277,0.352,0.457,0.809,120.0,86.0,5.0,1.0,0.0,4.0,0.0,3D/5H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIA,NL,32.0,31.0,29.0,262.0,266.0,243.0,20.0,3.0,36.0,0.989,-5.0,-24.0,0.0,0.0,0.0,9.03,...,,,,,,,,,,,,,,,,12,MIA,7,51,50,51,31,0,0,31,0,1,0,0,0,0,0,20,1,0,1B,0.666667,#,B,6,16.0,0,0.0,,0.0,,4,14,7,11.0,25,0,0.0,,9,54,#B16 [14-25],,,,,,,,,,0,0.0,,,,,,,,,,,,
8,Nick Ahmed,30,ARI,NL,57.0,217.0,199.0,29.0,53.0,10.0,1.0,5.0,29.0,4.0,0.0,18.0,46.0,0.266,0.327,0.402,0.729,94.0,80.0,3.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,NL,57.0,56.0,56.0,484.1,189.0,61.0,121.0,7.0,33.0,0.963,4.0,11.0,4.0,10.0,0.0,3.38,...,,,,,,,,,,,,,,,,13,ARI,7,57,56,57,57,0,0,0,0,0,57,0,0,0,0,0,1,0,SS,0.508772,,B,3,13.0,1,4.0,(14),0.073,,3,13,8,11.0,25,0,0.0,,9,54,B13(14) [13-25],,,,,,,,,,0,0.0,,,,,,,,,,,,
9,Shogo Akiyama,32,CIN,NL,54.0,183.0,155.0,16.0,38.0,6.0,1.0,0.0,9.0,7.0,3.0,25.0,34.0,0.245,0.357,0.297,0.654,76.0,46.0,1.0,2.0,0.0,0.0,0.0,*78/HD,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,52.0,47.0,34.0,381.0,89.0,87.0,2.0,0.0,0.0,1.0,-1.0,-2.0,,,,2.1,...,,,,,,,,,,,,,,,,15,CIN,1st,54,47,54,52,0,0,0,0,0,0,36,21,0,52,1,5,0,OF,0.166667,,C+,0,,1,1.0,(11),0.121,**,5,15,7,12.0,26,0,0.0,,7,56,C+(11)** [15-26],,,,,,,,,,0,0.0,,,,,,,,,,,,S


### Arm Rating

In [700]:
players["G"].value_counts()

12.0    76
11.0    63
5.0     62
2.0     60
1.0     59
        ..
73.0     1
66.0     1
63.0     1
65.0     1
67.0     1
Name: G, Length: 66, dtype: int64

In [701]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.38939308963729563

In [702]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [703]:
players["arm_rating"].value_counts()

8    1225
9     206
Name: arm_rating, dtype: int64

### Range Rating

In [704]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.1997463415278735

In [705]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    4.009094
2B    1.252797
3B    0.946688
C     6.520698
OF    1.383624
P     0.119291
SS    1.059025
Name: po_gp, dtype: float64

In [706]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [707]:
players["range_rating"].value_counts()

4    1273
5     158
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [708]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [709]:
players["CS%"].value_counts()

0%      19
27%      9
50%      8
14%      6
33%      6
18%      5
40%      4
17%      4
25%      3
20%      3
23%      2
29%      2
31%      2
15%      2
21%      2
13%      2
30%      2
38%      2
36%      2
32%      2
60%      1
9%       1
71%      1
46%      1
22%      1
11%      1
56%      1
19%      1
6%       1
35%      1
41%      1
10%      1
12%      1
45%      1
100%     1
26%      1
24%      1
16%      1
67%      1
Name: CS%, dtype: int64

In [710]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.24405909750303553

In [711]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [712]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    1330
        49
-1      24
-2      17
-3      11
Name: cs_num, dtype: int64

In [713]:
players["G_cat"].isnull().sum()

1320

In [714]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      1375
-1      24
-2      17
-3      11
-4       4
Name: cs_num, dtype: int64

In [715]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num
275,Jonah Heim,25,OAK,AL,13.0,41.0,38.0,5.0,8.0,0.0,0.0,0.0,5.0,0.0,0.0,3.0,3.0,0.211,0.268,0.211,0.479,39.0,8.0,1.0,0.0,0.0,0.0,0.0,2/DH,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,12.0,12.0,12.0,95.0,111.0,105.0,6.0,0.0,0.0,1.0,2.0,27.0,2.0,25.0,0.0,10.52,...,0.0,1.0,10.52,9.25,0.0,3.0,2.0,3.0,60%,520,OAK,1st,13,12,13,12,0,12,0,0,0,0,0,0,0,0,1,1,0,C,0.384615,,C,0,,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,7,56,C [13-16],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.461538,9,8.076923,4,0.6,-4
337,Andrew Knizner,25,STL,NL,8.0,17.0,16.0,1.0,4.0,1.0,0.0,0.0,4.0,0.0,0.0,0.0,5.0,0.25,0.235,0.313,0.548,48.0,5.0,2.0,0.0,0.0,1.0,0.0,/2HD,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STL,NL,7.0,4.0,4.0,38.0,47.0,40.0,6.0,1.0,0.0,0.979,1.0,22.0,0.0,0.0,0.0,10.89,...,0.0,0.0,10.89,6.57,1.0,2.0,1.0,2.0,67%,645,STL,2,8,4,8,7,0,7,0,0,0,0,0,0,0,0,1,2,1,C,0.5,,C+,0,,0,0.0,,0.0,,0,n,11,11.0,25,0,0.0,,8,55,C+ [n-25],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.75,9,5.0,4,0.666667,-4
483,Roberto Perez,31,CLE,AL,32.0,110.0,97.0,6.0,16.0,2.0,0.0,1.0,5.0,0.0,0.0,11.0,38.0,0.165,0.264,0.216,0.48,33.0,21.0,2.0,2.0,0.0,0.0,0.0,2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,32.0,30.0,25.0,256.0,312.0,291.0,21.0,0.0,2.0,1.0,7.0,35.0,6.0,28.0,1.0,10.97,...,3.0,0.0,10.97,9.75,0.0,7.0,4.0,10.0,71%,899,CLE,7,32,30,32,32,0,32,0,0,0,0,0,0,0,0,0,0,0,C,0.15625,,D,2,12.0,0,0.0,,0.0,,4,14,12,16.0,34,1,17.0,/35,5,62,D12 [14-34/35],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.65625,9,9.09375,4,0.714286,-4
496,Manny Pina,33,MIL,NL,15.0,45.0,39.0,4.0,9.0,1.0,0.0,2.0,5.0,0.0,0.0,3.0,11.0,0.231,0.333,0.41,0.744,99.0,16.0,0.0,3.0,0.0,0.0,0.0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIL,NL,13.0,12.0,8.0,98.1,124.0,117.0,7.0,0.0,2.0,1.0,3.0,31.0,3.0,37.0,1.0,11.35,...,2.0,0.0,11.35,9.54,0.0,2.0,4.0,5.0,56%,911,MIL,7,15,12,15,13,0,13,0,0,0,0,0,0,0,0,0,3,0,C,0.333333,,C+,8,22.0,0,0.0,,0.0,,2,12,9,11.0,25,2,13.0,/31,7,56,C+22 [12-25/31],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.466667,9,7.8,4,0.555556,-4


### Fielder Rating

In [716]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

S84       625
84        468
S85        96
S94        77
94         47
85         36
S95        19
S94 -1     16
S94 -2     12
94 -1       8
S94 -3      7
95          7
94 -2       5
94 -3       4
S94 -4      3
94 -4       1
Name: fielder_rating, dtype: int64

In [717]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [718]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [719]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [720]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,...,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.45,8,7.166667,4,,,84
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,...,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,2.217391,5,,,S85
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,...,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35],,,,,,,,,,0,0.0,,,,,,,,,,,,,2.425926,8,0.981481,4,,,84
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,...,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.2,L,4.0,1.0,4.0,14.0,15.0,19.0,41.0,0,0.0,,,0.333333,+,11.0,51.0,6.0,61.0,+L1,(14-41),+L1 (14-41),S,0.333333,8,0.0,4,,,S84
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,...,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,0.9375,4,,,S84


In [721]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_k_hbp', 'pitcher_rating', 'superior_rating', 'a_gp', 'arm_rating',
       'po_gp', 'range_rating', 'cs_rate', 'cs_num', 'fielder_rating'],
      dtype='object', length=185)

In [722]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,...,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.45,8,7.166667,4,,,84
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,2.22,OF,,,,,,...,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,2.217391,5,,,S85
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,3.47,SS,,,,,,...,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35],,,,,,,,,,0,0.0,,,,,,,,,,,,,2.425926,8,0.981481,4,,,84
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,0.33,P,,,,,,...,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.2,L,4.0,1.0,4.0,14.0,15.0,19.0,41.0,0,0.0,,,0.333333,+,11.0,51.0,6.0,61.0,+L1,(14-41),+L1 (14-41),S,0.333333,8,0.0,4,,,S84
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,7.5,1B,,,,,,...,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,0.9375,4,,,S84


In [723]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,José Abreu,33,CHW,AL,60.0,262.0,240.0,43.0,76.0,15.0,0.0,19.0,60.0,0.0,0.0,18.0,59.0,0.317,0.37,0.617,0.987,166.0,148.0,10.0,3.0,0.0,1.0,1.0,*3/D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,54.0,54.0,53.0,470.0,462.0,430.0,27.0,5.0,39.0,0.989,3.0,7.0,5.0,13.0,-1.0,8.75,8.46,1B,,,,,,...,,,,,,,,,,,,,,,,3,CHW,7,60,60,60,54,0,0,54,0,0,0,0,0,0,0,6,0,0,1B,1.0,#,A,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,10,53,#A23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.45,8,7.166667,4,,,84
1,Ronald Acuna Jr.,22,ATL,NL,46.0,202.0,160.0,46.0,40.0,11.0,0.0,14.0,29.0,8.0,1.0,38.0,60.0,0.25,0.406,0.581,0.987,155.0,93.0,3.0,4.0,0.0,0.0,2.0,*89,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,46.0,46.0,26.0,376.2,102.0,102.0,0.0,0.0,0.0,1.0,9.0,30.0,,,,2.44,2.22,OF,,,,,,...,,,,,,,,,,,,,,,,4,ATL,3,46,46,46,46,0,0,0,0,0,0,0,34,28,46,0,0,0,OF,0.630435,#,C+,13,31.0,0,0.0,,0.14,**,7,21,11,18.0,36,1,19.0,/41,7,56,#C+31** [21-36/41],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,2.217391,5,,,S85
2,Willy Adames,24,TBR,AL,54.0,205.0,185.0,29.0,48.0,15.0,1.0,8.0,23.0,2.0,1.0,20.0,74.0,0.259,0.332,0.481,0.813,124.0,89.0,4.0,0.0,0.0,0.0,0.0,*6/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TBR,AL,53.0,51.0,50.0,450.0,193.0,53.0,131.0,9.0,27.0,0.953,-4.0,-11.0,2.0,5.0,0.0,3.68,3.47,SS,,,,,,...,,,,,,,,,,,,,,,,6,TBR,3,54,51,54,53,0,0,0,0,0,53,0,0,0,0,0,1,1,SS,0.425926,,B,6,16.0,1,7.0,(21),0.045,,4,14,13,17.0,35,0,0.0,,8,55,B16(21) [14-35],,,,,,,,,,0,0.0,,,,,,,,,,,,,2.425926,8,0.981481,4,,,84
3,Austin Adams,29,SDP,NL,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.5,3.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,2.0,0.0,7.0,0.0,0.0,1.0,17.0,103.0,4.44,1.25,6.8,2.3,4.5,15.8,3.5,R,SDP,NL,3.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,2.25,0.33,P,,,,,,...,,,,,,,,,,,,,,,,7,SDP,4,3,0,1,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.2,L,4.0,1.0,4.0,14.0,15.0,19.0,41.0,0,0.0,,,0.333333,+,11.0,51.0,6.0,61.0,+L1,(14-41),+L1 (14-41),S,0.333333,8,0.0,4,,,S84
4,Matt Adams,31,ATL,NL,16.0,51.0,49.0,4.0,9.0,2.0,0.0,2.0,9.0,0.0,0.0,2.0,18.0,0.184,0.216,0.347,0.563,44.0,17.0,3.0,0.0,0.0,0.0,0.0,D/H3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,2.0,2.0,2.0,17.0,15.0,15.0,0.0,0.0,2.0,1.0,0.0,32.0,0.0,0.0,0.0,7.94,7.5,1B,,,,,,...,,,,,,,,,,,,,,,,9,ATL,9,16,13,16,2,0,0,2,0,0,0,0,0,0,0,11,3,0,1B,0.5625,,D+,8,22.0,0,0.0,,0.0,,1,11,13,14.0,32,0,0.0,,6,61,D+22 [11-32],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.0,8,0.9375,4,,,S84


In [724]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

12.0    74
3.0     65
4.0     63
11.0    61
2.0     60
        ..
38.0     7
46.0     7
45.0     6
41.0     6
61.0     1
Name: Games_Played, Length: 61, dtype: int64

In [725]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "Bats", "pitcher_rating", "PCN", 
                                "PPH", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "batter_rating": "Batter Rating",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "pitcher_rating": "Pitcher Rating",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,José Abreu,33,CHW,60.0,,,1B,84,#A23 [12-24],53,R,,,,,1B,0,0,54,0,0,0,0,0,0,0,6,0,0
1,Ronald Acuna Jr.,22,ATL,46.0,,,OF,S85,#C+31** [21-36/41],56,R,,,,,OF,0,0,0,0,0,0,0,34,28,46,0,0,0
2,Willy Adames,24,TBR,54.0,,,SS,84,B16(21) [14-35],55,R,,,,,SS,0,0,0,0,0,53,0,0,0,0,0,1,1
3,Austin Adams,29,SDP,3.0,0.0,1.0,P,S84,G [n-66],66,R,+L1 (14-41),51.0,61.0,R,P,3,0,0,0,0,0,0,0,0,0,0,0,0
4,Matt Adams,31,ATL,16.0,,,1B,S84,D+22 [11-32],61,L,,,,,1B,0,0,2,0,0,0,0,0,0,0,11,3,0


In [726]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [727]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           779
OF          245
C           109
2B           86
1B           84
3B           65
SS           53
DH_PH_PR     10
              0
Name: Primary, dtype: int64

In [728]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,José Abreu,33,CHW,60.0,,,1B,84,#A23 [12-24],53,R,,,,,1B,0,0,54,0,0,0,0,0,0,0,6,0,0
1,Ronald Acuna Jr.,22,ATL,46.0,,,OF,S85,#C+31** [21-36/41],56,R,,,,,OF,0,0,0,0,0,0,0,34,28,46,0,0,0
2,Willy Adames,24,TBR,54.0,,,SS,84,B16(21) [14-35],55,R,,,,,SS,0,0,0,0,0,53,0,0,0,0,0,1,1
3,Austin Adams,29,SDP,3.0,0.0,1.0,P,S84,G [n-66],66,R,+L1 (14-41),51.0,61.0,R,P,3,0,0,0,0,0,0,0,0,0,0,0,0
4,Matt Adams,31,ATL,16.0,,,1B,S84,D+22 [11-32],61,L,,,,,1B,0,0,2,0,0,0,0,0,0,0,11,3,0


In [729]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [730]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [731]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [732]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [733]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
324,Carson Kelly,25,ARI,39.0,0.0,1.0,C-P,S94,C21 [12-24],55,R,-M1 (11-n),53.0,54.0,R,C,1,38,0,0,0,0,0,0,0,0,1,2,0
663,Stephen Vogt,35,ARI,26.0,,,C-1B,S94 -1,D13 [14-26],62,L,,,,,C,0,23,1,0,0,0,0,0,0,0,1,3,0
669,Christian Walker,29,ARI,57.0,,,1B,84,B14(15) [13-24],54,R,,,,,1B,0,0,43,0,0,0,0,0,0,0,14,1,0
579,Pavin Smith,24,ARI,12.0,,,1B-OF,S84,B14(22)* [14-25],55,L,,,,,1B,0,0,5,0,0,0,3,0,2,5,2,0,0
134,Kevin Cron,27,ARI,8.0,,,1B,S84,G [12-33/41],66,R,,,,,1B,0,0,1,0,0,0,0,0,0,0,6,3,0
376,Ketel Marte,26,ARI,45.0,,,2B-OF-SS,S84,B+11(12) [11-15/16],53,S,,,,,2B,0,0,0,41,0,2,0,3,0,3,1,0,0
534,Josh Rojas,26,ARI,17.0,,,2B-SS-OF,S84,D+ [14-26],61,L,,,,,2B,0,0,0,8,0,2,1,0,0,1,5,1,0
644,Josh VanMeter,25,ARI,12.0,,,2B-3B-1B,84,D+15 [14-25/26],61,L,,,,,2B,0,0,3,17,2,0,0,0,0,0,5,4,1
647,Ildemaro Vargas,28,ARI,8.0,,,2B-1B-3B,84,D [12-25],62,S,,,,,2B,0,0,5,16,3,0,0,0,0,0,0,6,1
184,Eduardo Escobar,31,ARI,54.0,,,3B-2B,84,C13(16) [12-23],56,S,,,,,3B,0,0,0,3,47,0,0,0,0,0,4,0,0


In [734]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'ARI':                    Name  Age   Tm     G  GS GF Positions     DEF  \
 324        Carson Kelly   25  ARI  39.0   0  1       C-P    S94    
 663        Stephen Vogt   35  ARI  26.0             C-1B  S94 -1   
 669    Christian Walker   29  ARI  57.0               1B     84    
 579         Pavin Smith   24  ARI  12.0            1B-OF    S84    
 134          Kevin Cron   27  ARI   8.0               1B    S84    
 376         Ketel Marte   26  ARI  45.0         2B-OF-SS    S84    
 534          Josh Rojas   26  ARI  17.0         2B-SS-OF    S84    
 644       Josh VanMeter   25  ARI  12.0         2B-3B-1B     84    
 647     Ildemaro Vargas   28  ARI   8.0         2B-1B-3B     84    
 184     Eduardo Escobar   31  ARI  54.0            3B-2B     84    
 346           Jake Lamb   29  ARI  18.0            3B-1B    S85    
 693          Andy Young   26  ARI  12.0         3B-2B-OF     84    
 388      Wyatt Mathisen   26  ARI   9.0               3B     84    
 8            Nick Ahmed   

In [735]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [736]:
create_excel(my_dict)

31

## Format Excel file

In [737]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [738]:
# wb.sheetnames

In [739]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "BRO": "Brooklyn Dodgers",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCA": "Kansas City Athletics",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MLN": "Milwaukee Braves",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYG": "New York Giants",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBD": "Tampa Bay Devil Rays",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators",
    "WSH": "Washington Senators"
}

In [740]:
# team_dict["ATL"]

In [741]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 3
width_2 = 4
width_3 = 5
width_4 = 6
width_5 = 8
width_6 = 12
width_7 = 14
width_8 = 22
width_9 = 24

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_4    
    sheet.column_dimensions['B'].width = width_8
    sheet.column_dimensions['C'].width = width_2
    sheet.column_dimensions['D'].width = width_3
    sheet.column_dimensions['E'].width = width_3
    sheet.column_dimensions['F'].width = width_2
    sheet.column_dimensions['G'].width = width_2
    sheet.column_dimensions['H'].width = width_7
    sheet.column_dimensions['I'].width = width_5
    sheet.column_dimensions['J'].width = width_9
    sheet.column_dimensions['K'].width = width_3
    sheet.column_dimensions['L'].width = width_1
    sheet.column_dimensions['M'].width = width_9
    sheet.column_dimensions['N'].width = width_3
    sheet.column_dimensions['O'].width = width_3
    sheet.column_dimensions['P'].width = width_1
    sheet.column_dimensions['Q'].width = width_6
    sheet.column_dimensions['R'].width = width_3
    sheet.column_dimensions['S'].width = width_3
    sheet.column_dimensions['T'].width = width_3
    sheet.column_dimensions['U'].width = width_3
    sheet.column_dimensions['V'].width = width_3
    sheet.column_dimensions['W'].width = width_3
    sheet.column_dimensions['X'].width = width_3
    sheet.column_dimensions['Y'].width = width_3
    sheet.column_dimensions['Z'].width = width_3
    sheet.column_dimensions['AA'].width = width_3
    sheet.column_dimensions['AB'].width = width_3
    sheet.column_dimensions['AC'].width = width_3
    sheet.column_dimensions['AD'].width = width_3
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment
            cell.font = Font(size = 14)
    
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
    sheet['A2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['B2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['C2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['D2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['E2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['F2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['G2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['H2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['I2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['J2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['K2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['L2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['M2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['N2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['O2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['P2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Q2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['R2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['S2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['T2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['U2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['V2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['W2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['X2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Y2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Z2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AA2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AB2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AC2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AD2'].font = Font(size = 14, bold = True, color='00FFFFFF')
            

wb.save('../data/' + year + ' rosters ' + ' formatted.xlsx')

# Clean up

## Remove unwanted files

In [742]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')