# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [731]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [732]:
year = '1969'

In [733]:
url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'
url_app = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-appearances-fielding.shtml'

In [734]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/1969-specialpos_c-fielding.shtml'

In [735]:
session_requests = requests.session()

result = session_requests.get(url_bat, headers = dict(referer = url_bat))
result.status_code

200

In [736]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [737]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [738]:
soup = BeautifulSoup(result.content, 'lxml')

In [739]:
# print(soup.prettify())

In [740]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [741]:
bat = pullTable(url_bat, 'players_standard_batting')

In [742]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [743]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [744]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [745]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [746]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [747]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [748]:
findTables(url_app)

['teams_appearances', 'players_players_appearances_fielding']

In [749]:
app = pullTable(url_app, 'players_players_appearances_fielding')

In [750]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
1008,1009.0,Carl Yastrzemski*,29.0,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.87,136.0,306,14,1,0,2,9,*73/8
1009,1010.0,Don Young,23.0,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91.0,101,4,5,8,0,5,8/H97
1010,1011.0,Chris Zachary*,25.0,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.5,0.5,0.5,1.0,181.0,1,0,0,0,0,0,/1
1011,1012.0,Bill Zepp,22.0,MIN,AL,4,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1
1012,,LgAvg per 600 PA,,,,207,600,532,64,131,19,3,12,59,8,5,54,91,0.247,0.319,0.366,0.685,,195,12,4,7,4,6,


In [751]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [752]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [753]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    680
L    267
S     65
Name: Bats, dtype: int64

In [754]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1007,1008,Billy Wynne,25,CHW,AL,20,50,41,1,5,0,1,0,2,0,0,2,6,0.122,0.163,0.171,0.334,-9,7,1,0,7,0,0,1,L
1008,1009,Carl Yastrzemski,29,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.87,136,306,14,1,0,2,9,*73/8,L
1009,1010,Don Young,23,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91,101,4,5,8,0,5,8/H97,R
1010,1011,Chris Zachary,25,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.5,0.5,0.5,1.0,181,1,0,0,0,0,0,/1,L
1011,1012,Bill Zepp,22,MIN,AL,4,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100,0,0,0,0,0,0,/1,R


In [755]:
bat.drop(columns=["Rk"], inplace=True)

In [756]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1007,Billy Wynne,25,CHW,AL,20,50,41,1,5,0,1,0,2,0,0,2,6,0.122,0.163,0.171,0.334,-9,7,1,0,7,0,0,1,L
1008,Carl Yastrzemski,29,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.87,136,306,14,1,0,2,9,*73/8,L
1009,Don Young,23,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91,101,4,5,8,0,5,8/H97,R
1010,Chris Zachary,25,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.5,0.5,0.5,1.0,181,1,0,0,0,0,0,/1,L
1011,Bill Zepp,22,MIN,AL,4,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100,0,0,0,0,0,0,/1,R


In [757]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
442,443.0,John Wyatt,34.0,OAK,AL,0,1,0.0,5.4,4,0,1,0,0,0,8.1,8,5,5,0,6,1,5,2,0,2,40,67,4.19,1.68,8.6,0.0,6.5,5.4,0.83
443,444.0,Billy Wynne,25.0,CHW,AL,7,7,0.5,4.06,20,20,0,6,1,0,128.2,143,63,58,14,50,8,67,3,1,3,564,96,4.12,1.5,10.0,1.0,3.5,4.7,1.34
444,445.0,Chris Zachary,25.0,KCR,AL,0,1,0.0,7.85,8,2,2,0,0,0,18.1,27,17,16,4,7,0,6,0,0,4,87,48,5.84,1.855,13.3,2.0,3.4,2.9,0.86
445,446.0,Bill Zepp,22.0,MIN,AL,0,0,,6.75,4,0,1,0,0,0,5.1,6,7,4,1,4,1,2,0,0,0,27,59,6.45,1.875,10.1,1.7,6.8,3.4,0.5
446,,LgAvg per 180 IP,,,,10,10,0.491,3.66,54,20,16,5,1,4,180.0,169,83,73,16,70,8,115,5,1,7,766,99,3.65,1.327,8.5,0.8,3.5,5.8,1.66


In [758]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [759]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [760]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    324
L    122
Name: Throws, dtype: int64

In [761]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
441,442,Clyde Wright,28,CAL,AL,1,8,0.111,4.1,37,5,9,0,0,0,63.2,66,33,29,4,30,5,31,1,0,3,272,85,3.81,1.508,9.3,0.6,4.2,4.4,1.03,L
442,443,John Wyatt,34,OAK,AL,0,1,0.0,5.4,4,0,1,0,0,0,8.1,8,5,5,0,6,1,5,2,0,2,40,67,4.19,1.68,8.6,0.0,6.5,5.4,0.83,R
443,444,Billy Wynne,25,CHW,AL,7,7,0.5,4.06,20,20,0,6,1,0,128.2,143,63,58,14,50,8,67,3,1,3,564,96,4.12,1.5,10.0,1.0,3.5,4.7,1.34,R
444,445,Chris Zachary,25,KCR,AL,0,1,0.0,7.85,8,2,2,0,0,0,18.1,27,17,16,4,7,0,6,0,0,4,87,48,5.84,1.855,13.3,2.0,3.4,2.9,0.86,R
445,446,Bill Zepp,22,MIN,AL,0,0,,6.75,4,0,1,0,0,0,5.1,6,7,4,1,4,1,2,0,0,0,27,59,6.45,1.875,10.1,1.7,6.8,3.4,0.5,R


In [762]:
pit.drop(columns=["Rk"], inplace=True)

In [763]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [764]:
players.shape

(1018, 62)

In [765]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos Summary
829,830.0,Carl Yastrzemski,29.0,BOS,AL,165,162,129,1391.1,471,427,38,6,31,0.987,13.0,11.0,3.01,2.82,OF-1B
830,831.0,Don Young,23.0,CHC,NL,100,88,65,769.2,200,191,4,5,0,0.975,0.0,0.0,2.28,1.95,OF
831,832.0,Chris Zachary,25.0,KCR,AL,8,2,0,18.1,2,0,2,0,0,1.0,,,0.98,0.25,P
832,833.0,Bill Zepp,22.0,MIN,AL,4,0,0,5.1,1,1,0,0,0,1.0,,,1.69,0.25,P
833,,LgAvg,,,,194,151,119,1354.0,650,454,181,15,41,0.977,0.0,0.0,4.22,3.28,


In [766]:
fld = fld[fld["Name"] != "LgAvg"]

In [767]:
fld.drop(columns=["Rk"], inplace=True)

In [768]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [769]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rctch,RF/9,RF/G,PB,WP,SB,CS,CS%
101,102.0,Joe Torre,28.0,STL,NL,17,16,15,145.2,99,90,8,1,0,0.99,1,8,1,6.05,5.76,2,7,3,6,67%
102,103.0,Freddie Velazquez,31.0,SEP,AL,5,4,4,37.0,27,27,0,0,0,1.0,-1,-32,-1,6.57,5.4,1,2,5,0,0%
103,104.0,Dave Watkins,25.0,PHI,NL,54,33,30,336.2,264,241,18,5,2,0.981,-5,-18,-5,6.92,4.8,13,21,20,13,39%
104,105.0,Bob Watson,23.0,HOU,NL,1,0,0,2.0,5,5,0,0,0,1.0,0,0,0,22.5,5.0,1,0,0,0,
105,,LgAvg,,,,19,17,14,151.0,116,106,9,1,1,0.988,0,0,0,6.86,5.95,2,5,8,5,38%


In [770]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [771]:
cat = cat[cat["Name"] != "LgAvg"]

In [772]:
cat.drop(columns=["Rk"], inplace=True)

In [773]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [774]:
app.tail()

Unnamed: 0,Rk,Name,Age,Tm,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,PH,PR
844,845,Billy Wynne,25,CHW,3,20,20,20,20,20,0,0,0,0,0,0,0,0,0,0,0
845,846,Carl Yastrzemski,29,BOS,9,162,162,162,162,0,0,22,0,0,0,140,4,0,143,0,0
846,847,Don Young,23,CHC,2,101,88,101,100,0,0,0,0,0,0,3,94,8,100,2,6
847,848,Chris Zachary,25,KCR,6,8,2,8,8,8,0,0,0,0,0,0,0,0,0,0,0
848,849,Bill Zepp,22,MIN,1st,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0


In [775]:
players = pd.merge(players, app, how='left', on=["Name", "Age"], suffixes=('', '_app'))

In [776]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [777]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,,,,,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,,,,,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0


In [778]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [779]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'RF/9_cat',
 'RF/G_cat',
 'PB',
 'WP_cat',
 'SB_cat',
 'CS_cat',
 'CS%',
 'Rk',
 'Tm_app',
 'Yrs',
 'G_app',
 'GS_app',
 'Batt

In [780]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [781]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     450
OF    224
C      97
2B     64
1B     60
SS     59
3B     48
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [782]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [783]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    340
0.166667     13
0.500000      9
0.125000      9
0.100000      8
           ... 
0.317647      1
0.186916      1
0.032787      1
0.408451      1
0.017857      1
Name: rbi_per_g, Length: 460, dtype: int64

In [784]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     982
#     36
Name: clutch, dtype: int64

In [785]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#
13,Dick Allen,27,PHI,NL,118,506,438,79,126,23,3,32,89,9,3,64,144,0.288,0.375,0.573,0.949,165.0,251,10,0,0,4,10,*3/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,117.0,117.0,111.0,1021.1,1094.0,1024.0,54.0,16.0,100.0,0.985,-13.0,-15.0,9.5,9.21,1B,,,,,,,,,,,,,,,,,,,,,,,12,PHI,7,118,117,118,117,0,0,117,0,0,0,0,0,0,0,1,0,1B,0.754237,#
44,Sal Bando,25,OAK,AL,162,734,609,106,171,25,3,31,113,1,4,111,82,0.281,0.4,0.484,0.885,153.0,295,12,11,2,1,5,*5,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,162.0,162.0,162.0,1480.2,523.0,178.0,321.0,24.0,36.0,0.954,2.0,2.0,3.03,3.08,3B,,,,,,,,,,,,,,,,,,,,,,,38,OAK,4,162,162,162,162,0,0,0,0,162,0,0,0,0,0,0,0,3B,0.697531,#
46,Ernie Banks,38,CHC,NL,155,629,565,60,143,19,2,23,106,0,0,42,101,0.253,0.309,0.416,0.725,92.0,235,15,7,8,7,7,*3/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,153.0,147.0,139.0,1301.2,1510.0,1419.0,87.0,4.0,116.0,0.997,-7.0,-6.0,10.41,9.84,1B,,,,,,,,,,,,,,,,,,,,,,,40,CHC,17,155,147,155,153,0,0,153,0,0,0,0,0,0,0,6,0,1B,0.683871,#
60,Johnny Bench,21,CIN,NL,148,592,532,83,156,23,1,26,90,6,6,49,86,0.293,0.353,0.487,0.84,129.0,259,7,4,0,7,7,*2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,147.0,135.0,131.0,1249.0,876.0,793.0,76.0,7.0,10.0,0.992,4.0,4.0,6.26,5.91,C,CIN,NL,147.0,135.0,131.0,1249.0,876.0,793.0,76.0,7.0,10.0,0.992,4.0,4.0,4.0,6.26,5.91,14.0,63.0,30.0,40.0,57%,52,CIN,3,148,135,148,147,0,147,0,0,0,0,0,0,0,0,11,0,C,0.608108,#
171,Roberto Clemente,34,PIT,NL,138,570,507,87,175,20,12,19,91,4,1,56,73,0.345,0.411,0.544,0.955,168.0,276,19,3,0,4,16,*9/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,135.0,128.0,117.0,1124.0,245.0,226.0,14.0,5.0,1.0,0.98,11.0,12.0,1.92,1.78,OF,,,,,,,,,,,,,,,,,,,,,,,149,PIT,15,138,128,138,135,0,0,0,0,0,0,0,0,135,135,6,0,OF,0.65942,#
191,Chip Coulter,24,STL,NL,6,21,19,3,6,1,1,0,4,0,1,2,6,0.316,0.381,0.474,0.855,139.0,9,0,0,0,0,0,/4,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STL,NL,6.0,6.0,4.0,50.0,25.0,8.0,16.0,1.0,5.0,0.96,0.0,5.0,4.32,4.0,2B,,,,,,,,,,,,,,,,,,,,,,,165,STL,1st,6,6,6,6,0,0,0,6,0,0,0,0,0,0,0,0,2B,0.666667,#
218,Tommy Davis,30,TOT,MLB,147,582,533,54,142,32,1,7,89,20,5,38,55,0.266,0.318,0.37,0.688,95.0,197,19,5,1,5,5,*7H/38,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,134.0,129.0,91.0,1109.0,221.0,210.0,4.0,7.0,0.0,0.968,-21.0,-23.0,1.74,1.6,OF-1B,,,,,,,,,,,,,,,,,,,,,,,186,2TM,11,147,129,147,134,0,0,1,0,0,0,132,1,0,133,18,0,OF,0.605442,#
219,Tommy Davis,30,SEP,AL,123,494,454,52,123,29,1,6,80,19,4,30,46,0.271,0.318,0.379,0.697,97.0,172,17,4,1,5,5,*7H/3,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,134.0,129.0,91.0,1109.0,221.0,210.0,4.0,7.0,0.0,0.968,-21.0,-23.0,1.74,1.6,OF-1B,,,,,,,,,,,,,,,,,,,,,,,186,2TM,11,147,129,147,134,0,0,1,0,0,0,132,1,0,133,18,0,OF,0.650407,#
258,Mike Epstein,26,WSA,AL,131,500,403,73,112,18,1,30,85,2,5,85,99,0.278,0.414,0.551,0.965,176.0,222,8,10,0,2,10,*3H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,WSA,AL,118.0,115.0,86.0,984.0,1115.0,1035.0,69.0,11.0,99.0,0.99,-6.0,-8.0,10.1,9.36,1B,,,,,,,,,,,,,,,,,,,,,,,220,WSA,4,131,115,131,118,0,0,118,0,0,0,0,0,0,0,15,0,1B,0.648855,#


### Letter Rating

In [786]:
players["BA"].value_counts()

0.000    162
0.250     24
0.200     16
0.167     14
0.333     14
        ... 
0.444      1
0.198      1
0.357      1
0.417      1
0.322      1
Name: BA, Length: 251, dtype: int64

In [787]:
players["BA"].isnull().sum()

57

In [788]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [789]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [790]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      219
C+     159
B      127
C       95
B+      75
D       69
D+      61
E       51
E+      48
A       34
F       31
AAA     25
G+      12
A+      10
AA       2
Name: bat_letter, dtype: int64

In [791]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
7,Hank Aguirre,38,CHC,NL,41,5,5,2,2,0,0,0,0,0,0,0,1,0.4,0.4,0.4,0.8,114.0,2,0,0,0,0,0,1,R,NL,1.0,0.0,1.0,2.6,41.0,0.0,9.0,0.0,0.0,1.0,45.0,45.0,13.0,13.0,2.0,12.0,1.0,19.0,2.0,0.0,1.0,186.0,156.0,3.18,1.267,9.0,0.4,2.4,3.8,1.58,L,CHC,NL,41.0,0.0,0.0,45.0,14.0,1.0,13.0,0.0,0.0,1.0,,,2.8,0.34,P,,,,,,,,,,,,,,,,,,,,,,,8,CHC,15,41,0,41,41,41,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
15,Lloyd Allen,19,CAL,AL,4,2,2,0,1,1,0,0,0,0,0,0,0,0.5,0.5,1.0,1.5,323.0,2,0,0,0,0,0,/1,R,AL,0.0,1.0,0.0,5.4,4.0,1.0,0.0,0.0,0.0,0.0,10.0,5.0,7.0,6.0,1.0,10.0,0.0,5.0,0.0,0.0,0.0,44.0,67.0,5.81,1.5,4.5,0.9,9.0,4.5,0.5,R,CAL,AL,4.0,1.0,0.0,10.0,5.0,1.0,4.0,0.0,0.0,1.0,,,4.5,1.25,P,,,,,,,,,,,,,,,,,,,,,,,14,CAL,1st,4,1,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
62,Frank Bertaina,25,TOT,AL,17,14,12,2,5,1,0,1,3,0,0,1,2,0.417,0.462,0.75,1.212,243.0,9,2,0,1,0,0,1,L,AL,1.0,3.0,0.25,5.62,17.0,5.0,3.0,0.0,0.0,0.0,41.2,44.0,30.0,26.0,8.0,26.0,0.0,30.0,0.0,1.0,2.0,193.0,63.0,5.44,1.68,9.5,1.7,5.6,6.5,1.15,L,TOT,AL,17.0,5.0,0.0,41.2,12.0,3.0,7.0,2.0,0.0,0.833,,,2.16,0.59,P,,,,,,,,,,,,,,,,,,,,,,,54,2TM,6,17,5,17,17,17,0,0,0,0,0,0,0,0,0,0,0,P,0.176471,,AAA
64,Frank Bertaina,25,BAL,AL,3,2,1,0,1,0,0,0,0,0,0,1,0,1.0,1.0,1.0,2.0,460.0,1,0,0,0,0,0,/1,L,AL,0.0,0.0,,0.0,3.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,20.0,,2.34,0.667,1.5,0.0,4.5,7.5,1.67,L,TOT,AL,17.0,5.0,0.0,41.2,12.0,3.0,7.0,2.0,0.0,0.833,,,2.16,0.59,P,,,,,,,,,,,,,,,,,,,,,,,54,2TM,6,17,5,17,17,17,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
72,Ron Blomberg,20,NYY,AL,4,7,6,0,3,0,0,0,0,0,0,1,0,0.5,0.571,0.5,1.071,208.0,3,0,0,0,0,0,/7H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,2.0,2.0,0.0,13.0,2.0,2.0,0.0,0.0,0.0,1.0,0.0,9.0,1.38,1.0,OF,,,,,,,,,,,,,,,,,,,,,,,62,NYY,1st,4,2,4,2,0,0,0,0,0,0,2,0,0,2,2,0,OF,0.0,,AAA
166,Rickey Clark,23,CAL,AL,6,2,2,0,1,0,0,0,0,0,0,0,0,0.5,0.5,0.5,1.0,189.0,1,1,0,0,0,0,/1,R,AL,0.0,0.0,,5.59,6.0,1.0,2.0,0.0,0.0,0.0,9.2,12.0,6.0,6.0,2.0,7.0,1.0,6.0,0.0,0.0,0.0,48.0,65.0,6.13,1.966,11.2,1.9,6.5,5.6,0.86,R,CAL,AL,6.0,1.0,0.0,9.2,1.0,0.0,0.0,1.0,0.0,0.0,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,146,CAL,3,6,1,6,6,6,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
199,Jerry Crider,27,MIN,AL,21,9,9,3,4,2,0,0,0,0,0,0,2,0.444,0.444,0.667,1.111,203.0,6,0,0,0,0,0,1,R,AL,1.0,0.0,1.0,4.71,21.0,1.0,5.0,0.0,0.0,1.0,28.2,31.0,15.0,15.0,3.0,15.0,6.0,16.0,2.0,0.0,1.0,130.0,80.0,4.53,1.605,9.7,0.9,4.7,5.0,1.07,R,MIN,AL,21.0,1.0,0.0,28.2,6.0,2.0,4.0,0.0,1.0,1.0,,,1.88,0.29,P,,,,,,,,,,,,,,,,,,,,,,,171,MIN,1st,21,1,21,21,21,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
227,Rick Dempsey,19,MIN,AL,5,7,6,1,3,1,0,0,0,0,0,1,0,0.5,0.571,0.667,1.238,241.0,4,0,0,0,0,0,/2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,3.0,0.0,0.0,10.2,6.0,5.0,0.0,1.0,0.0,0.833,0.0,0.0,4.22,1.67,C,MIN,AL,3.0,0.0,0.0,10.2,6.0,5.0,0.0,1.0,0.0,0.833,0.0,0.0,0.0,4.22,1.67,0.0,0.0,1.0,0.0,0%,193,MIN,1st,5,0,5,3,0,3,0,0,0,0,0,0,0,0,2,0,C,0.0,,AAA
263,Roy Face,41,MON,NL,44,3,2,0,1,0,0,0,0,0,0,0,0,0.5,0.5,0.5,1.0,182.0,1,0,0,1,0,0,1,R,NL,4.0,2.0,0.667,3.94,44.0,0.0,27.0,0.0,0.0,5.0,59.1,62.0,29.0,26.0,11.0,15.0,3.0,34.0,0.0,1.0,0.0,253.0,94.0,4.53,1.298,9.4,1.7,2.3,5.2,2.27,R,MON,NL,44.0,0.0,0.0,59.1,10.0,3.0,7.0,0.0,0.0,1.0,,,1.52,0.23,P,,,,,,,,,,,,,,,,,,,,,,,225,MON,16,44,0,44,44,44,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
282,George Foster,20,SFG,NL,9,5,5,1,2,0,0,0,1,0,0,0,1,0.4,0.4,0.4,0.8,126.0,2,0,0,0,0,0,/7H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,8.0,0.0,0.0,17.1,3.0,3.0,0.0,0.0,0.0,1.0,0.0,-21.0,1.56,0.38,OF,,,,,,,,,,,,,,,,,,,,,,,242,SFG,1st,9,0,9,8,0,0,0,0,0,0,8,0,0,8,1,3,OF,0.111111,,AAA


In [792]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.223

### HR Number

In [793]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     585
2      78
1      73
4      63
3      60
6      46
5      37
7      24
8      15
9      14
12     10
10      8
11      3
27      1
18      1
Name: hr_rate, dtype: int64

In [794]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      585
12     78
11     73
14     63
13     60
16     46
15     37
21     24
22     15
23     14
26     10
24      8
25      3
36      1
53      1
Name: hr_num_bat, dtype: int64

### Triple Number

In [795]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     726
1     166
2      72
3      22
4      14
6       6
9       5
7       4
12      2
5       1
Name: triple_rate, dtype: int64

In [796]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     726
2.0      42
5.0      42
3.0      41
4.0      41
6.0      35
7.0      28
1.0      18
8.0      17
9.0      14
12.0      4
10.0      4
13.0      2
11.0      2
14.0      1
18.0      1
Name: triple_val, dtype: int64

In [797]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        726
(12)     42
(15)     42
(13)     41
(14)     41
(16)     35
(21)     28
(11)     18
(22)     17
(23)     14
(24)      4
(26)      4
(25)      2
(31)      2
(32)      1
(36)      1
Name: triple_num, dtype: int64

### Speed Rating

In [798]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [799]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    677
0.010     10
0.013      8
0.042      8
0.022      8
        ... 
0.363      1
0.133      1
0.100      1
0.166      1
0.160      1
Name: speed_score, Length: 138, dtype: int64

In [800]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [801]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        916
**       45
*        32
****     13
***      12
Name: speed_rating, dtype: int64

In [802]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
138,Bert Campaneris,27,OAK,AL,135,592,547,71,142,15,2,2,25,62,8,30,62,0.26,0.302,0.305,0.608,75.0,167,5,4,10,1,2,*6H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,125.0,125.0,120.0,1117.1,632.0,220.0,391.0,21.0,72.0,0.967,11.0,11.0,4.92,4.89,SS,,,,,,,,,,,,,,,,,,,,,,,120,OAK,6,135,125,135,125,0,0,0,0,0,125,0,0,0,0,5,5,SS,0.185185,,B,1,11.0,1,2.0,(12),0.395,****
346,Dick Hall,38,BAL,AL,39,10,7,1,2,0,0,0,2,1,0,1,1,0.286,0.375,0.286,0.661,87.0,2,0,0,2,0,0,1,R,AL,5.0,2.0,0.714,1.92,39.0,0.0,17.0,0.0,0.0,6.0,65.2,49.0,14.0,14.0,3.0,9.0,6.0,31.0,1.0,0.0,0.0,246.0,186.0,2.62,0.883,6.7,0.4,1.2,4.2,3.44,R,BAL,AL,39.0,0.0,0.0,65.2,10.0,4.0,6.0,0.0,2.0,1.0,,,1.37,0.26,P,,,,,,,,,,,,,,,,,,,,,,,294,BAL,17,39,0,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.051282,,B+,0,,0,0.0,,0.333,****
349,Jimmie Hall,31,CLE,AL,4,12,10,1,0,0,0,0,0,1,0,2,3,0.0,0.167,0.0,0.167,-50.0,0,1,0,0,0,1,/78H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,65.0,62.0,51.0,528.2,155.0,144.0,6.0,5.0,4.0,0.968,-8.0,-17.0,2.55,2.31,OF-1B,,,,,,,,,,,,,,,,,,,,,,,295,3TM,7,95,62,95,65,0,0,7,0,0,0,12,24,22,58,33,0,OF,0.0,,G,0,,0,0.0,,0.5,****
367,Tommy Harper,28,SEP,AL,148,640,537,78,126,10,2,9,41,73,18,95,90,0.235,0.349,0.311,0.66,89.0,167,8,1,4,3,2,458/H79,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SEP,AL,144.0,137.0,125.0,1203.0,522.0,232.0,268.0,22.0,42.0,0.958,-22.0,-22.0,3.74,3.47,3B-2B-OF,,,,,,,,,,,,,,,,,,,,,,,307,SEP,8,148,137,148,141,0,0,0,59,59,0,4,22,1,27,8,1,3B,0.277027,,C+,3,13.0,1,4.0,(14),0.363,****
405,Herman Hill,23,MIN,AL,16,2,2,4,0,0,0,0,0,1,2,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,H/8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,2.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,,0.0,-72.0,0.0,0.0,OF,,,,,,,,,,,,,,,,,,,,,,,341,MIN,1st,16,0,16,2,0,0,0,0,0,0,0,2,0,2,1,13,OF,0.0,,G,0,,0,0.0,,inf,****
447,Bart Johnson,19,CHW,AL,4,8,6,1,1,0,0,0,1,1,0,2,1,0.167,0.375,0.167,0.542,53.0,1,0,0,0,0,0,/1,R,AL,1.0,3.0,0.25,3.22,4.0,3.0,1.0,0.0,0.0,0.0,22.1,22.0,11.0,8.0,2.0,6.0,0.0,18.0,0.0,1.0,2.0,91.0,123.0,2.87,1.254,8.9,0.8,2.4,7.3,3.0,R,CHW,AL,4.0,3.0,0.0,22.1,5.0,2.0,2.0,1.0,0.0,0.8,,,1.61,1.0,P,,,,,,,,,,,,,,,,,,,,,,,379,CHW,1st,4,3,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,D,0,,0,0.0,,0.333,****
471,Von Joshua,21,LAD,NL,14,8,8,2,2,0,0,0,0,1,0,0,2,0.25,0.25,0.25,0.5,45.0,2,0,0,0,0,0,H/798,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAD,NL,8.0,2.0,1.0,26.2,5.0,4.0,0.0,1.0,0.0,0.8,0.0,-9.0,1.35,0.5,OF,,,,,,,,,,,,,,,,,,,,,,,395,LAD,1st,14,2,14,8,0,0,0,0,0,0,6,1,2,8,1,9,OF,0.0,,C+,0,,0,0.0,,0.5,****
477,Pat Kelly,24,KCR,AL,112,474,417,61,110,20,4,8,32,40,13,49,70,0.264,0.348,0.388,0.737,106.0,162,5,5,2,0,3,98/H7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,107.0,106.0,92.0,938.0,254.0,237.0,12.0,5.0,3.0,0.98,1.0,2.0,2.39,2.33,OF,,,,,,,,,,,,,,,,,,,,,,,401,KCR,3,112,106,112,107,0,0,0,0,0,0,2,47,61,107,4,2,OF,0.285714,,B,3,13.0,1,4.0,(14),0.303,****
519,Bill Landis,26,BOS,AL,46,14,11,0,0,0,0,0,0,1,0,3,4,0.0,0.214,0.0,0.214,-37.0,0,0,0,0,0,0,1/H,L,AL,5.0,5.0,0.5,5.25,45.0,5.0,9.0,0.0,0.0,1.0,82.1,82.0,53.0,48.0,7.0,49.0,3.0,50.0,3.0,1.0,5.0,369.0,74.0,4.29,1.591,9.0,0.8,5.4,5.5,1.02,L,BOS,AL,45.0,5.0,0.0,82.1,20.0,2.0,16.0,2.0,0.0,0.9,,,1.97,0.4,P,,,,,,,,,,,,,,,,,,,,,,,435,BOS,4,46,5,46,45,45,0,0,0,0,0,0,0,0,0,0,1,P,0.0,,G,0,,0,0.0,,0.333,****
660,Scott Northey,22,KCR,AL,20,68,61,11,16,2,2,1,7,6,3,7,19,0.262,0.338,0.41,0.748,109.0,25,0,0,0,0,0,8/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,18.0,16.0,12.0,134.0,37.0,35.0,1.0,1.0,0.0,0.973,0.0,1.0,2.42,2.0,OF,,,,,,,,,,,,,,,,,,,,,,,562,KCR,1st,20,16,20,18,0,0,0,0,0,0,0,18,0,18,0,3,OF,0.35,,B,2,12.0,4,6.0,(16),0.333,****


### Base on Balls Number

In [803]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     286
3     204
2     165
4     124
1      96
5      62
6      28
7      19
9      11
8       7
18      6
24      3
10      3
12      2
36      1
14      1
Name: walk_rate, dtype: int64

In [804]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     286
13    204
12    165
14    124
11     96
15     62
16     28
21     19
23     11
22      7
36      6
46      3
24      3
26      2
32      1
66      1
Name: bb_num, dtype: int64

### Batter K Number

In [805]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     107
4      97
5      92
3      84
6      82
7      72
18     54
10     50
9      47
12     42
8      38
11     37
2      34
14     31
15     21
13     19
36     17
24     16
16     15
22     11
20      9
21      9
19      8
17      7
23      4
27      3
29      3
1       3
26      2
28      2
30      1
25      1
Name: k_rate, dtype: int64

In [806]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     107
8.0      89
9.0      86
7.0      84
10.0     78
6.0      63
12.0     54
18.0     53
11.0     51
13.0     45
14.0     41
5.0      39
15.0     34
16.0     28
24.0     22
36.0     20
4.0      20
20.0     16
21.0     14
17.0     12
19.0     11
27.0     10
22.0     10
23.0      6
29.0      5
3.0       5
25.0      5
26.0      4
28.0      3
2.0       2
30.0      1
Name: k_val, dtype: int64

In [807]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     107
22     89
23     86
21     84
24     78
16     63
26     54
36     53
25     51
31     45
32     41
15     39
33     34
34     28
46     22
14     20
66     20
42     16
43     14
35     12
41     11
53     10
44     10
45      6
55      5
51      5
13      5
52      4
54      3
12      2
56      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [808]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0     937
1      68
2       8
18      2
3       2
9       1
Name: hbp_rate, dtype: int64

In [809]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [810]:
players["hbp_val"].value_counts()

0.0     937
8.0      11
10.0     10
12.0      8
14.0      7
11.0      6
15.0      6
9.0       6
7.0       4
16.0      4
13.0      4
5.0       3
18.0      3
6.0       2
23.0      2
27.0      1
24.0      1
19.0      1
36.0      1
17.0      1
Name: hbp_val, dtype: int64

In [811]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       937
/22     11
/24     10
/26      8
/32      7
/25      6
/23      6
/33      6
/31      4
/21      4
/34      4
/36      3
/15      3
/45      2
/16      2
/46      1
/41      1
/53      1
/66      1
/35      1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [812]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     219
8     164
7     135
9     113
6      86
4      74
5      57
3      48
10     38
2      30
11     13
12     11
18     10
14      6
1       5
13      3
36      3
15      2
16      1
Name: hit_rate, dtype: int64

In [813]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    224
55    164
56    135
54    113
61     86
63     74
62     57
64     48
53     38
65     30
52     13
51     11
41     10
45      6
11      3
46      3
44      2
43      1
Name: PH_num_bat, dtype: int64

### Batter Rating

In [814]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [815]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " (" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + ")")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22)
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,,,,,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16)
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,,,,,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23)
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15)
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23)


In [816]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
1013,Billy Wynne,25,CHW,AL,20,50,41,1,5,0,1,0,2,0,0,2,6,0.122,0.163,0.171,0.334,-9.0,7,1,0,7,0,0,1,L,AL,7.0,7.0,0.5,4.06,20.0,20.0,0.0,6.0,1.0,0.0,128.2,143.0,63.0,58.0,14.0,50.0,8.0,67.0,3.0,1.0,3.0,564.0,96.0,4.12,1.5,10.0,1.0,3.5,4.7,1.34,R,CHW,AL,20.0,20.0,6.0,128.2,30.0,11.0,19.0,0.0,2.0,1.0,,,2.1,1.5,P,,,,,,,,,,,,,,,,,,,,,,,845,CHW,3,20,20,20,20,20,0,0,0,0,0,0,0,0,0,0,0,P,0.1,,E+,0,,7,7.0,(21),0.0,,1,11,4,5.0,15,0,0.0,,4,63,E+(21) (11-15)
1014,Carl Yastrzemski,29,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.87,136.0,306,14,1,0,2,9,*73/8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,165.0,162.0,129.0,1391.1,471.0,427.0,38.0,6.0,31.0,0.987,13.0,11.0,3.01,2.82,OF-1B,,,,,,,,,,,,,,,,,,,,,,,846,BOS,9,162,162,162,162,0,0,22,0,0,0,140,4,0,143,0,0,OF,0.685185,#,B,9,23.0,0,0.0,,0.081,*,5,15,5,10.0,24,0,0.0,,8,55,#B23* (15-24)
1015,Don Young,23,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91.0,101,4,5,8,0,5,8/H97,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,100.0,88.0,65.0,769.2,200.0,191.0,4.0,5.0,0.0,0.975,0.0,0.0,2.28,1.95,OF,,,,,,,,,,,,,,,,,,,,,,,847,CHC,2,101,88,101,100,0,0,0,0,0,0,3,94,8,100,2,6,OF,0.267327,,C+,3,13.0,2,5.0,(15),0.011,,4,14,8,12.0,26,1,13.0,/31,7,56,C+13(15) (14-26/31)
1016,Chris Zachary,25,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.5,0.5,0.5,1.0,181.0,1,0,0,0,0,0,/1,L,AL,0.0,1.0,0.0,7.85,8.0,2.0,2.0,0.0,0.0,0.0,18.1,27.0,17.0,16.0,4.0,7.0,0.0,6.0,0.0,0.0,4.0,87.0,48.0,5.84,1.855,13.3,2.0,3.4,2.9,0.86,R,KCR,AL,8.0,2.0,0.0,18.1,2.0,0.0,2.0,0.0,0.0,1.0,,,0.98,0.25,P,,,,,,,,,,,,,,,,,,,,,,,848,KCR,6,8,2,8,8,8,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,18,41,AAA (n-36)
1017,Bill Zepp,22,MIN,AL,4,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,AL,0.0,0.0,,6.75,4.0,0.0,1.0,0.0,0.0,0.0,5.1,6.0,7.0,4.0,1.0,4.0,1.0,2.0,0.0,0.0,0.0,27.0,59.0,6.45,1.875,10.1,1.7,6.8,3.4,0.5,R,MIN,AL,4.0,0.0,0.0,5.1,1.0,1.0,0.0,0.0,0.0,1.0,,,1.69,0.25,P,,,,,,,,,,,,,,,,,,,,,,,849,MIN,1st,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n)


In [817]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.300,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.20,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22)
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.250,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.000,0.0,-1.0,5.42,2.79,OF-1B,,,,,,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12,0,0.0,,0.000,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16)
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.250,0.250,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,,,,,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.000,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23)
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.250,0.285,0.310,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15)
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.000,0.0,0.0,3.81,2.75,C,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125000,,C,0,,0,0.0,,0.000,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1013,Billy Wynne,25,CHW,AL,20,50,41,1,5,0,1,0,2,0,0,2,6,0.122,0.163,0.171,0.334,-9.0,7,1,0,7,0,0,1,L,AL,7.0,7.0,0.500,4.06,20.0,20.0,0.0,6.0,1.0,0.0,128.2,143.0,63.0,58.0,14.0,50.0,8.0,67.0,3.0,1.0,3.0,564.0,96.0,4.12,1.500,10.0,1.0,3.5,4.7,1.34,R,CHW,AL,20.0,20.0,6.0,128.2,30.0,11.0,19.0,0.0,2.0,1.000,,,2.10,1.50,P,,,,,,,,,,,,,,,,,,,,,,,845,CHW,3,20,20,20,20,20,0,0,0,0,0,0,0,0,0,0,0,P,0.100000,,E+,0,,7,7.0,(21),0.000,,1,11,4,5.0,15,0,0.0,,4,63,E+(21) (11-15)
1014,Carl Yastrzemski,29,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.870,136.0,306,14,1,0,2,9,*73/8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,165.0,162.0,129.0,1391.1,471.0,427.0,38.0,6.0,31.0,0.987,13.0,11.0,3.01,2.82,OF-1B,,,,,,,,,,,,,,,,,,,,,,,846,BOS,9,162,162,162,162,0,0,22,0,0,0,140,4,0,143,0,0,OF,0.685185,#,B,9,23,0,0.0,,0.081,*,5,15,5,10.0,24,0,0.0,,8,55,#B23* (15-24)
1015,Don Young,23,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91.0,101,4,5,8,0,5,8/H97,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,100.0,88.0,65.0,769.2,200.0,191.0,4.0,5.0,0.0,0.975,0.0,0.0,2.28,1.95,OF,,,,,,,,,,,,,,,,,,,,,,,847,CHC,2,101,88,101,100,0,0,0,0,0,0,3,94,8,100,2,6,OF,0.267327,,C+,3,13,2,5.0,(15),0.011,,4,14,8,12.0,26,1,13.0,/31,7,56,C+13(15) (14-26/31)
1016,Chris Zachary,25,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.500,0.500,0.500,1.000,181.0,1,0,0,0,0,0,/1,L,AL,0.0,1.0,0.000,7.85,8.0,2.0,2.0,0.0,0.0,0.0,18.1,27.0,17.0,16.0,4.0,7.0,0.0,6.0,0.0,0.0,4.0,87.0,48.0,5.84,1.855,13.3,2.0,3.4,2.9,0.86,R,KCR,AL,8.0,2.0,0.0,18.1,2.0,0.0,2.0,0.0,0.0,1.000,,,0.98,0.25,P,,,,,,,,,,,,,,,,,,,,,,,848,KCR,6,8,2,8,8,8,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,AAA,0,,0,0.0,,0.000,,0,n,18,18.0,36,0,0.0,,18,41,AAA (n-36)


In [818]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [819]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [820]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

### Pitcher Letter Rating

In [821]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.2587898230088493

In [822]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [823]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     588
M     138
W     107
X      55
L      53
Y      28
K      21
Z+     11
J+     10
J       7
Name: pit_letter, dtype: int64

In [824]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      566
M     138
W     107
X      55
L      53
Y      28
Z      22
K      21
Z+     11
J+     10
J       7
Name: pit_letter, dtype: int64

In [825]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.171

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [826]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

2.000000      8
8.000000      5
65.666667     5
5.333333      5
4.000000      5
12.333333     5
11.000000     5
5.000000      4
86.333333     4
6.000000      4
2.333333      4
9.666667      4
7.333333      4
22.333333     4
100.333333    3
16.333333     3
4.666667      3
1.666667      3
87.666667     3
15.666667     3
17.666667     3
42.000000     3
51.333333     3
3.000000      3
22.000000     3
Name: IP_real, dtype: int64

In [827]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

2.0    153
1.0     93
3.0     51
6.0     40
5.0     40
4.0     32
7.0     29
8.0     11
0.0      2
9.0      1
Name: IE, dtype: int64

In [828]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
211,Vic Davalillo,29,STL,NL,63,105,98,15,26,3,0,2,10,1,1,7,8,0.265,0.314,0.357,0.671,88.0,35,3,0,0,0,0,H98/71,L,NL,0.0,0.0,,inf,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,17.0,,,,,,,0.0,L,TOT,MLB,50.0,26.0,20.0,284.0,74.0,73.0,1.0,0.0,1.0,1.0,4.0,15.0,2.35,1.48,OF-1B-P,,,,,,,,,,,,,,,,,,,,,,,181,2TM,7,96,26,96,50,2,0,3,0,0,0,3,12,31,46,50,13,OF,0.15873,,B,3,13.0,0,0.0,,0.036,,2,12,3,5.0,15,0,0.0,,9,54,B13 (12-15),1.0,Z,0.0,0.0
509,Frank Kreutzer,30,WSA,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,R,AL,0.0,0.0,,4.5,4.0,0.0,1.0,0.0,0.0,0.0,2.0,3.0,1.0,1.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,11.0,94.0,3.51,2.5,13.5,0.0,9.0,9.0,1.0,L,WSA,AL,4.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,,,4.5,0.25,P,,,,,,,,,,,,,,,,,,,,,,,425,WSA,6,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,2.0,0.0


In [829]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

2.0    153
1.0     95
3.0     51
6.0     40
5.0     40
4.0     32
7.0     29
8.0     11
9.0      1
Name: IE, dtype: int64

In [830]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
83,Dave Boswell,24,MIN,AL,40,103,94,7,16,2,1,2,9,0,0,2,29,0.17,0.196,0.277,0.472,29.0,26,3,1,6,0,0,1/H,R,AL,20.0,12.0,0.625,3.23,39.0,38.0,0.0,10.0,0.0,0.0,256.1,215.0,105.0,92.0,18.0,99.0,2.0,190.0,8.0,0.0,10.0,1070.0,115.0,3.19,1.225,7.5,0.6,3.5,6.7,1.92,R,MIN,AL,39.0,38.0,10.0,256.1,42.0,9.0,31.0,2.0,2.0,0.952,,,1.4,1.03,P,,,,,,,,,,,,,,,,,,,,,,,73,MIN,6,40,38,40,39,39,0,0,0,0,0,0,0,0,0,0,1,P,0.225,,D+,4,14.0,2,6.0,(16),0.0,,1,11,10,11.0,25,0,0.0,,6,61,D+14(16) (11-25),0.223,M,256.333333,7.0
151,Steve Carlton,24,STL,NL,32,87,80,5,17,4,1,1,7,0,0,3,27,0.213,0.241,0.325,0.566,58.0,26,3,0,4,0,0,1/H,L,NL,17.0,11.0,0.607,2.17,31.0,31.0,0.0,12.0,2.0,0.0,236.1,185.0,66.0,57.0,15.0,93.0,6.0,210.0,4.0,0.0,7.0,968.0,164.0,2.79,1.176,7.0,0.6,3.5,8.0,2.26,L,STL,NL,31.0,31.0,12.0,236.1,38.0,1.0,34.0,3.0,1.0,0.921,,,1.33,1.13,P,,,,,,,,,,,,,,,,,,,,,,,131,STL,5,32,31,32,31,31,0,0,0,0,0,0,0,0,0,0,1,P,0.21875,,C,2,12.0,2,4.0,(14),0.0,,1,11,11,12.0,26,0,0.0,,7,56,C12(14) (11-26),0.212,L,236.333333,8.0
201,Mike Cuellar,32,BAL,AL,39,113,103,6,12,1,2,0,5,0,0,0,43,0.117,0.114,0.165,0.279,-23.0,17,0,0,8,2,0,1,L,AL,23.0,11.0,0.676,2.38,39.0,39.0,0.0,18.0,5.0,0.0,290.2,213.0,94.0,77.0,18.0,79.0,7.0,182.0,1.0,2.0,3.0,1137.0,149.0,2.89,1.005,6.6,0.6,2.4,5.6,2.3,L,BAL,AL,39.0,39.0,18.0,290.2,56.0,9.0,45.0,2.0,7.0,0.964,,,1.67,1.38,P,,,,,,,,,,,,,,,,,,,,,,,173,BAL,7,39,39,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.128205,,E+,0,,6,6.0,(16),0.0,,0,n,14,14.0,32,0,0.0,,4,63,E+(16) (n-32),0.202,L,290.666667,7.0
203,Ray Culp,27,BOS,AL,32,89,79,9,12,4,0,1,5,0,1,4,33,0.152,0.193,0.241,0.433,18.0,19,3,0,6,0,0,1,R,AL,17.0,8.0,0.68,3.81,32.0,32.0,0.0,9.0,2.0,0.0,227.0,195.0,103.0,96.0,25.0,79.0,6.0,172.0,6.0,0.0,8.0,944.0,101.0,3.55,1.207,7.7,1.0,3.1,6.8,2.18,R,BOS,AL,32.0,32.0,9.0,227.0,59.0,20.0,31.0,8.0,1.0,0.864,,,2.02,1.59,P,,,,,,,,,,,,,,,,,,,,,,,175,BOS,7,32,32,32,32,32,0,0,0,0,0,0,0,0,0,0,0,P,0.15625,,D,3,13.0,0,0.0,,0.0,,2,12,13,15.0,33,0,0.0,,5,62,D13 (12-33),0.227,M,227.0,7.0
229,Larry Dierker,22,HOU,NL,39,127,118,7,17,2,0,1,7,0,0,2,38,0.144,0.158,0.186,0.345,-3.0,22,3,0,7,0,0,1,R,NL,20.0,13.0,0.606,2.33,39.0,37.0,0.0,20.0,4.0,0.0,305.1,240.0,97.0,79.0,18.0,72.0,6.0,232.0,1.0,0.0,9.0,1207.0,152.0,2.47,1.022,7.1,0.5,2.1,6.8,3.22,R,HOU,NL,39.0,37.0,20.0,305.1,59.0,14.0,42.0,3.0,5.0,0.949,,,1.65,1.44,P,,,,,,,,,,,,,,,,,,,,,,,195,HOU,6,39,37,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.179487,,D,2,12.0,0,0.0,,0.0,,1,11,11,12.0,26,0,0.0,,5,62,D12 (11-26),0.212,L,305.333333,8.0
233,Chuck Dobson,25,OAK,AL,35,92,79,6,8,1,0,0,9,0,0,2,12,0.101,0.12,0.114,0.234,-32.0,9,1,0,9,2,0,1,R,AL,15.0,13.0,0.536,3.86,35.0,35.0,0.0,11.0,1.0,0.0,235.1,244.0,111.0,101.0,16.0,80.0,2.0,137.0,1.0,1.0,9.0,998.0,88.0,3.26,1.377,9.3,0.6,3.1,5.2,1.71,R,OAK,AL,35.0,35.0,11.0,235.1,38.0,7.0,29.0,2.0,4.0,0.947,,,1.38,1.03,P,,,,,,,,,,,,,,,,,,,,,,,199,OAK,4,35,35,35,35,35,0,0,0,0,0,0,0,0,0,0,0,P,0.257143,,E,0,,0,0.0,,0.0,,1,11,5,6.0,16,0,0.0,,3,64,E (11-16),0.266,W,235.333333,7.0
310,Gary Gentry,22,NYM,NL,35,84,74,2,6,1,0,0,1,0,0,1,52,0.081,0.104,0.095,0.198,-44.0,7,0,1,7,1,0,1,R,NL,13.0,12.0,0.52,3.43,35.0,35.0,0.0,6.0,3.0,0.0,233.2,192.0,94.0,89.0,24.0,81.0,5.0,154.0,5.0,0.0,9.0,962.0,106.0,3.63,1.168,7.4,0.9,3.1,5.9,1.9,R,NYM,NL,35.0,35.0,6.0,233.2,54.0,13.0,41.0,0.0,4.0,1.0,,,2.08,1.54,P,,,,,,,,,,,,,,,,,,,,,,,268,NYM,1st,35,35,35,35,35,0,0,0,0,0,0,0,0,0,0,0,P,0.028571,,F,0,,0,0.0,,0.0,,0,n,22,22.0,44,0,0.0,,3,64,F (n-44),0.219,L,233.666667,7.0
316,Bob Gibson,33,STL,NL,37,127,118,11,29,6,0,1,8,5,2,3,36,0.246,0.262,0.322,0.584,63.0,38,1,0,5,1,0,1/H,R,NL,20.0,13.0,0.606,2.18,35.0,35.0,0.0,28.0,4.0,0.0,314.0,251.0,84.0,76.0,12.0,95.0,7.0,269.0,10.0,2.0,4.0,1270.0,164.0,2.3,1.102,7.2,0.3,2.7,7.7,2.83,R,STL,NL,35.0,35.0,28.0,314.0,56.0,21.0,32.0,3.0,5.0,0.946,,,1.52,1.51,P,,,,,,,,,,,,,,,,,,,,,,,272,STL,11,37,35,37,35,35,0,0,0,0,0,0,0,0,0,1,1,P,0.216216,,C+,1,11.0,0,0.0,,0.2,***,1,11,10,11.0,25,0,0.0,,8,55,C+11*** (11-25),0.215,L,314.0,9.0
342,Santiago Guzman,19,STL,NL,1,3,3,0,1,0,0,0,0,0,0,0,1,0.333,0.333,0.333,0.667,88.0,1,0,0,0,0,0,/1,R,NL,0.0,1.0,0.0,4.91,1.0,1.0,0.0,0.0,0.0,0.0,7.1,9.0,4.0,4.0,2.0,3.0,1.0,7.0,0.0,0.0,0.0,31.0,77.0,5.37,1.636,11.0,2.5,3.7,8.6,2.33,R,STL,NL,1.0,1.0,0.0,7.1,1.0,1.0,0.0,0.0,0.0,1.0,,,1.23,1.0,P,,,,,,,,,,,,,,,,,,,,,,,290,STL,1st,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,A,0,,0,0.0,,0.0,,0,n,12,12.0,26,0,0.0,,12,51,A (n-26),0.321,Y,7.333333,7.0
358,Bill Hands,29,CHC,NL,41,115,98,5,9,0,0,0,3,0,0,4,45,0.092,0.136,0.092,0.228,-38.0,9,0,1,11,0,0,1,R,NL,20.0,14.0,0.588,2.49,41.0,41.0,0.0,18.0,3.0,0.0,300.0,268.0,102.0,83.0,21.0,73.0,8.0,181.0,6.0,0.0,5.0,1220.0,162.0,3.0,1.137,8.0,0.6,2.2,5.4,2.48,R,CHC,NL,41.0,41.0,18.0,300.0,75.0,16.0,56.0,3.0,2.0,0.96,,,2.16,1.76,P,,,,,,,,,,,,,,,,,,,,,,,300,CHC,5,41,41,41,41,41,0,0,0,0,0,0,0,0,0,0,0,P,0.073171,,E,0,,0,0.0,,0.0,,1,11,14,15.0,33,0,0.0,,3,64,E (11-33),0.235,M,300.0,7.0


In [831]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [832]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     576
3     144
4     131
2      70
5      49
6      17
7      13
9       5
8       5
1       4
10      2
18      1
15      1
Name: bb_rate, dtype: int64

In [833]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     144
4.0     131
2.0      70
5.0      49
6.0      17
7.0      13
0.0      10
9.0       5
8.0       5
1.0       4
10.0      2
15.0      1
18.0      1
Name: bb_rate, dtype: int64

In [834]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    566
13     144
14     131
12      70
15      49
16      17
11      14
21      13
23       5
22       5
24       2
36       1
33       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [835]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

5.0     120
4.0      89
6.0      84
3.0      43
7.0      41
8.0      26
9.0      16
2.0      16
0.0      10
1.0       2
11.0      2
18.0      1
12.0      1
10.0      1
Name: k_rate_pit, dtype: int64

In [836]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
54,Fred Beene,26,BAL,AL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,S,AL,0.0,0.0,,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.2,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,,3.63,1.125,6.8,0.0,3.4,0.0,0.0,R,BAL,AL,2.0,0.0,0.0,2.2,1.0,1.0,0.0,0.0,0.0,1.0,,,3.38,0.5,P,,,,,,,,,,,,,,,,,,,,,,,48,BAL,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.2,L,2.666667,1,3.0,13,0.0
205,John Cumberland,22,NYY,AL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,R,AL,0.0,0.0,,4.5,2.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,2.0,2.0,0.0,4.0,1.0,0.0,0.0,0.0,0.0,18.0,86.0,5.51,1.75,6.8,0.0,9.0,0.0,0.0,L,NYY,AL,2.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.0,1.0,,,2.25,0.5,P,,,,,,,,,,,,,,,,,,,,,,,177,NYY,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.214,L,4.0,2,8.0,22,0.0
208,Bobby Darwin,26,LAD,NL,6,0,0,1,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1H,R,NL,0.0,0.0,,9.82,3.0,0.0,1.0,0.0,0.0,0.0,3.2,4.0,4.0,4.0,0.0,5.0,0.0,0.0,2.0,0.0,2.0,21.0,38.0,8.24,2.455,9.8,0.0,12.3,0.0,0.0,R,LAD,NL,3.0,0.0,0.0,3.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,180,LAD,2,6,0,6,3,3,0,0,0,0,0,0,0,0,0,0,3,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.286,X,3.666667,1,9.0,23,0.0
211,Vic Davalillo,29,STL,NL,63,105,98,15,26,3,0,2,10,1,1,7,8,0.265,0.314,0.357,0.671,88.0,35,3,0,0,0,0,H98/71,L,NL,0.0,0.0,,inf,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,17.0,,,,,,,0.0,L,TOT,MLB,50.0,26.0,20.0,284.0,74.0,73.0,1.0,0.0,1.0,1.0,4.0,15.0,2.35,1.48,OF-1B-P,,,,,,,,,,,,,,,,,,,,,,,181,2TM,7,96,26,96,50,2,0,3,0,0,0,3,12,31,46,50,13,OF,0.15873,,B,3,13.0,0,0.0,,0.036,,2,12,3,5.0,15,0,0.0,,9,54,B13 (12-15),1.0,Z,0.0,1,18.0,36,0.0
252,Jim Ellis,24,STL,NL,2,1,0,0,0,0,0,0,1,0,0,0,0,0.0,,,,,0,0,0,1,0,0,/1,R,NL,0.0,0.0,,1.69,2.0,1.0,1.0,0.0,0.0,0.0,5.1,7.0,1.0,1.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,25.0,228.0,4.2,1.875,11.8,0.0,5.1,0.0,0.0,L,STL,NL,2.0,1.0,0.0,5.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,216,STL,2,2,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.5,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.318,Y,5.333333,3,4.0,14,0.0
529,Dave Lemonds,20,CHC,NL,2,1,1,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,L,NL,0.0,1.0,0.0,3.86,2.0,1.0,0.0,0.0,0.0,0.0,4.2,5.0,2.0,2.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,21.0,113.0,5.72,2.143,9.6,0.0,9.6,0.0,0.0,L,CHC,NL,2.0,1.0,0.0,4.2,1.0,0.0,1.0,0.0,0.0,1.0,,,1.93,0.5,P,,,,,,,,,,,,,,,,,,,,,,,445,CHC,1st,2,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G (n-66),0.312,Y,4.666667,2,9.0,23,0.0
563,Marty Martinez,27,HOU,NL,78,213,198,14,61,5,4,0,15,0,0,10,21,0.308,0.34,0.374,0.713,102.0,74,5,0,4,1,1,H765/241,S,NL,0.0,0.0,,13.5,1.0,0.0,1.0,0.0,0.0,0.0,0.2,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,43.0,22.01,1.5,13.5,13.5,0.0,0.0,,R,HOU,NL,62.0,49.0,27.0,397.0,154.0,77.0,66.0,11.0,7.0,0.929,-6.0,-19.0,3.24,2.31,OF-SS-3B-C-2B-P,HOU,NL,7.0,4.0,1.0,35.0,30.0,24.0,3.0,3.0,0.0,0.9,-1.0,-34.0,-1.0,6.94,3.86,1.0,4.0,1.0,3.0,75%,475,HOU,4,78,49,78,59,1,7,0,1,15,17,21,0,0,21,21,2,OF,0.192308,,A,0,,2,2.0,(12),0.0,,2,12,4,6.0,16,0,0.0,,10,53,A(12) (12-16),0.333,Y,0.666667,1,0.0,11,0.0
780,Dennis Ribant,27,STL,NL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1H,R,NL,0.0,0.0,,13.5,1.0,0.0,0.0,0.0,0.0,0.0,1.1,4.0,2.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0,35.0,14.51,3.75,27.0,6.8,6.8,0.0,0.0,R,TOT,NL,8.0,0.0,0.0,9.2,2.0,1.0,0.0,1.0,0.0,0.5,,,0.93,0.13,P,,,,,,,,,,,,,,,,,,,,,,,652,2TM,6,9,0,9,8,8,0,0,0,0,0,0,0,0,0,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.5,Z,1.333333,1,4.0,14,0.0
800,Les Rohr,23,NYM,NL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,0.0,,20.25,1.0,0.0,0.0,0.0,0.0,0.0,1.1,5.0,4.0,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,10.0,24.0,4.76,4.5,33.8,0.0,6.8,0.0,0.0,L,NYM,NL,1.0,0.0,0.0,1.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,669,NYM,3,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.556,Z,1.333333,1,4.0,14,0.0
854,Bill Short,31,CIN,NL,4,1,1,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,L,NL,0.0,0.0,,15.43,4.0,0.0,1.0,0.0,0.0,0.0,2.1,4.0,4.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,11.0,29.0,3.79,2.143,15.4,0.0,3.9,0.0,0.0,L,CIN,NL,4.0,0.0,0.0,2.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,713,CIN,6,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G (n-66),0.4,Z,2.333333,1,3.0,13,0.0


In [837]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

8.0     90
9.0     76
7.0     71
10.0    59
11.0    39
6.0     33
12.0    25
5.0     16
13.0    10
0.0     10
15.0     5
16.0     4
14.0     4
4.0      3
20.0     2
3.0      2
17.0     1
18.0     1
22.0     1
Name: k_val_pit, dtype: int64

In [838]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    566
22      90
23      76
21      71
24      59
25      39
16      33
26      25
15      16
n       10
31      10
33       5
32       4
34       4
14       3
42       2
13       2
44       1
35       1
36       1
Name: k_num_pit, dtype: int64

In [839]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [840]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    964
1     47
2      4
3      3
Name: hbp_rate_pit, dtype: int64

In [841]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [842]:
players["hbp_val_pit"].value_counts()

0.0     964
11.0     13
9.0      11
10.0     10
8.0       6
12.0      3
13.0      3
6.0       2
7.0       2
15.0      1
5.0       1
3.0       1
14.0      1
Name: hbp_val_pit, dtype: int64

In [843]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       964
/25     13
/23     11
/24     10
/22      6
/31      3
/26      3
/21      2
/16      2
/33      1
/13      1
/15      1
/32      1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [844]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [845]:
players["WP"].value_counts()

0.0     113
1.0      77
3.0      51
4.0      46
2.0      44
5.0      34
6.0      20
7.0      17
8.0      16
9.0      11
12.0      5
16.0      4
10.0      4
14.0      3
15.0      3
11.0      2
13.0      2
Name: WP, dtype: int64

In [846]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit
32,Gerry Arrigo,28,CIN,NL,20,34,31,1,5,0,0,0,0,0,0,2,11,0.161,0.212,0.161,0.373,4.0,5,2,0,1,0,0,1,L,NL,4.0,7.0,0.364,4.15,20.0,16.0,0.0,1.0,0.0,0.0,91.0,89.0,50.0,42.0,9.0,61.0,3.0,35.0,8.0,0.0,11.0,422.0,91.0,5.3,1.648,8.8,0.9,6.0,3.5,0.57,L,CIN,NL,20.0,16.0,1.0,91.0,10.0,1.0,8.0,1.0,0.0,0.9,,,0.89,0.45,P,,,,,,,,,,,,,,,,,,,,,,,29,CIN,9,20,16,20,20,20,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,2,12,12,14.0,32,0,0.0,,5,62,D (12-32),0.252,W,91.0,5,5.0,15,3.0,8.0,22,1,9.0,/23
38,Stan Bahnsen,24,NYY,AL,40,71,60,1,5,1,0,0,1,0,0,2,33,0.083,0.113,0.1,0.213,-39.0,6,1,0,9,0,0,1,R,AL,9.0,16.0,0.36,3.83,40.0,33.0,4.0,5.0,2.0,1.0,220.2,222.0,102.0,94.0,28.0,90.0,9.0,130.0,0.0,0.0,10.0,956.0,91.0,4.2,1.414,9.1,1.1,3.7,5.3,1.44,R,NYY,AL,40.0,33.0,5.0,220.1,52.0,13.0,36.0,3.0,4.0,0.942,,,2.0,1.23,P,,,,,,,,,,,,,,,,,,,,,,,32,NYY,3,40,33,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.025,,F,0,,0,0.0,,0.0,,1,11,17,18.0,36,0,0.0,,3,64,F (11-36),0.256,W,220.666667,6,3.0,13,5.0,8.0,22,0,0.0,
83,Dave Boswell,24,MIN,AL,40,103,94,7,16,2,1,2,9,0,0,2,29,0.17,0.196,0.277,0.472,29.0,26,3,1,6,0,0,1/H,R,AL,20.0,12.0,0.625,3.23,39.0,38.0,0.0,10.0,0.0,0.0,256.1,215.0,105.0,92.0,18.0,99.0,2.0,190.0,8.0,0.0,10.0,1070.0,115.0,3.19,1.225,7.5,0.6,3.5,6.7,1.92,R,MIN,AL,39.0,38.0,10.0,256.1,42.0,9.0,31.0,2.0,2.0,0.952,,,1.4,1.03,P,,,,,,,,,,,,,,,,,,,,,,,73,MIN,6,40,38,40,39,39,0,0,0,0,0,0,0,0,0,0,1,P,0.225,,D+,4,14.0,2,6.0,(16),0.0,,1,11,10,11.0,25,0,0.0,,6,61,D+14(16) (11-25),0.223,M,256.333333,7,3.0,13,6.0,9.0,23,0,0.0,
85,Jim Bouton,30,TOT,MLB,73,13,13,0,0,0,0,0,0,0,0,0,10,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,1,R,MLB,2.0,3.0,0.4,3.96,73.0,2.0,25.0,1.0,0.0,2.0,122.2,109.0,64.0,54.0,13.0,50.0,5.0,100.0,4.0,2.0,12.0,530.0,91.0,3.58,1.296,8.0,1.0,3.7,7.3,2.0,R,TOT,ZZ,73.0,2.0,1.0,122.2,31.0,10.0,20.0,1.0,1.0,0.968,,,2.2,0.41,P,,,,,,,,,,,,,,,,,,,,,,,75,2TM,8,73,2,73,73,73,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,28,28.0,54,0,0.0,,0,66,G (n-54),0.229,M,122.666667,2,3.0,13,7.0,10.0,24,0,0.0,
177,Tony Cloninger,28,CIN,NL,36,78,72,10,12,1,0,1,5,1,0,3,26,0.167,0.2,0.222,0.422,16.0,16,1,0,3,0,0,1/H,R,NL,11.0,17.0,0.393,5.03,35.0,34.0,1.0,6.0,2.0,0.0,189.2,184.0,123.0,106.0,24.0,103.0,4.0,103.0,5.0,2.0,14.0,854.0,75.0,4.78,1.513,8.7,1.1,4.9,4.9,1.0,R,CIN,NL,35.0,34.0,6.0,189.2,35.0,9.0,23.0,3.0,3.0,0.914,,,1.52,0.91,P,,,,,,,,,,,,,,,,,,,,,,,153,CIN,9,36,34,36,35,35,0,0,0,0,0,0,0,0,0,1,0,P,0.138889,,D,3,13.0,0,0.0,,0.077,*,1,11,12,13.0,31,0,0.0,,6,61,D13* (11-31),0.247,M,189.666667,5,4.0,14,4.0,8.0,22,0,0.0,
180,Joe Coleman,22,WSA,AL,40,92,84,2,9,1,0,0,4,0,0,4,40,0.107,0.148,0.119,0.267,-22.0,10,1,0,4,0,0,1,R,AL,12.0,13.0,0.48,3.27,40.0,36.0,2.0,12.0,4.0,1.0,247.2,222.0,102.0,90.0,26.0,100.0,7.0,182.0,6.0,0.0,12.0,1041.0,106.0,3.69,1.3,8.1,0.9,3.6,6.6,1.82,R,WSA,AL,40.0,36.0,12.0,247.1,58.0,21.0,35.0,2.0,3.0,0.966,,,2.04,1.4,P,,,,,,,,,,,,,,,,,,,,,,,156,WSA,5,40,36,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.1,,E,0,,0,0.0,,0.0,,2,12,16,18.0,36,0,0.0,,4,63,E (12-36),0.237,M,247.666667,6,3.0,13,6.0,9.0,23,0,0.0,
291,Woodie Fryman,29,PHI,NL,36,85,76,3,9,0,0,1,3,1,0,1,26,0.118,0.13,0.158,0.288,-19.0,12,0,0,8,0,0,1,R,NL,12.0,15.0,0.444,4.41,36.0,35.0,0.0,10.0,1.0,0.0,228.1,243.0,123.0,112.0,15.0,89.0,3.0,150.0,11.0,1.0,12.0,1010.0,81.0,3.36,1.454,9.6,0.6,3.5,5.9,1.69,L,PHI,NL,36.0,35.0,10.0,228.1,41.0,3.0,38.0,0.0,1.0,1.0,,,1.62,1.14,P,,,,,,,,,,,,,,,,,,,,,,,249,PHI,4,36,35,36,36,36,0,0,0,0,0,0,0,0,0,0,0,P,0.083333,,E+,4,14.0,0,0.0,,0.111,**,0,n,11,11.0,25,0,0.0,,4,63,E+14** (n-25),0.267,W,228.333333,6,3.0,13,5.0,8.0,22,0,0.0,
362,Jim Hannan,29,WSA,AL,35,56,52,2,6,0,0,0,0,0,0,1,29,0.115,0.132,0.115,0.247,-28.0,6,0,0,3,0,0,1,R,AL,7.0,6.0,0.538,3.64,35.0,28.0,1.0,1.0,1.0,0.0,158.1,138.0,73.0,64.0,17.0,91.0,2.0,72.0,2.0,0.0,10.0,685.0,95.0,4.76,1.446,7.8,1.0,5.2,4.1,0.79,R,WSA,AL,35.0,28.0,1.0,158.1,32.0,11.0,21.0,0.0,2.0,1.0,,,1.82,0.91,P,,,,,,,,,,,,,,,,,,,,,,,302,WSA,8,35,28,35,35,35,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,E+,0,,0,0.0,,0.0,,1,11,19,20.0,42,0,0.0,,4,63,E+ (11-42),0.233,M,158.333333,5,5.0,15,4.0,9.0,23,0,0.0,
402,Dennis Higgins,29,WSA,AL,55,12,11,1,1,0,0,0,1,0,0,0,5,0.091,0.091,0.091,0.182,-47.0,1,1,0,1,0,0,1,R,AL,10.0,9.0,0.526,3.48,55.0,0.0,35.0,0.0,0.0,16.0,85.1,79.0,42.0,33.0,7.0,56.0,7.0,71.0,3.0,0.0,15.0,383.0,100.0,3.99,1.582,8.3,0.7,5.9,7.5,1.27,R,WSA,AL,55.0,0.0,0.0,85.1,11.0,4.0,7.0,0.0,0.0,1.0,,,1.16,0.2,P,,,,,,,,,,,,,,,,,,,,,,,338,WSA,4,55,0,55,55,55,0,0,0,0,0,0,0,0,0,0,0,P,0.018182,,E,0,,0,0.0,,0.0,,0,n,15,15.0,33,0,0.0,,3,64,E (n-33),0.244,M,85.333333,2,5.0,15,7.0,12.0,26,0,0.0,
445,Tommy John,26,CHW,AL,33,88,79,6,9,2,0,0,1,0,0,7,27,0.114,0.184,0.139,0.323,-10.0,11,1,0,1,1,0,1,R,AL,9.0,11.0,0.45,3.25,33.0,33.0,0.0,6.0,2.0,0.0,232.1,230.0,91.0,84.0,16.0,90.0,10.0,128.0,1.0,1.0,15.0,984.0,119.0,3.48,1.377,8.9,0.6,3.5,5.0,1.42,L,CHW,AL,33.0,33.0,6.0,232.0,82.0,16.0,66.0,0.0,4.0,1.0,,,3.18,2.48,P,,,,,,,,,,,,,,,,,,,,,,,377,CHW,7,33,33,33,33,33,0,0,0,0,0,0,0,0,0,0,0,P,0.030303,,E+,0,,0,0.0,,0.0,,3,13,11,14.0,32,0,0.0,,4,63,E+ (13-32),0.258,W,232.333333,7,3.0,13,5.0,8.0,22,0,0.0,


In [847]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        331
[WP]    121
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [848]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    50
0.166667    11
0.125000    10
0.076923     9
0.200000     8
            ..
0.101449     1
0.148148     1
0.110236     1
0.047809     1
0.186047     1
Name: hr_rate_pit, Length: 265, dtype: int64

In [849]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     730
+    208
-     80
Name: gopher_ball, dtype: int64

In [850]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
5,Mike Adamson,21,BAL,AL,6,2,1,0,0,0,0,0,0,0,0,0,1,0.000,0.000,0.000,0.000,-100.0,0,0,0,1,0,0,/1,R,AL,0.0,1.0,0.000,4.50,6.0,0.0,1.0,0.0,0.0,0.0,8.0,10.0,4.0,4.0,0.0,6.0,2.0,2.0,0.0,0.0,1.0,38.0,83.0,4.26,2.000,11.3,0.0,6.8,2.3,0.33,R,BAL,AL,6.0,0.0,0.0,8.0,5.0,2.0,3.0,0.0,0.0,1.000,,,5.63,0.83,P,,,,,,,,,,,,,,,,,,,,,,,6,BAL,3,6,0,6,6,6,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G (n-36),0.312,Y,8.000000,1,6.0,16,2.0,8.0,22,0,0.0,,,0.000000,-
7,Hank Aguirre,38,CHC,NL,41,5,5,2,2,0,0,0,0,0,0,0,1,0.400,0.400,0.400,0.800,114.0,2,0,0,0,0,0,1,R,NL,1.0,0.0,1.000,2.60,41.0,0.0,9.0,0.0,0.0,1.0,45.0,45.0,13.0,13.0,2.0,12.0,1.0,19.0,2.0,0.0,1.0,186.0,156.0,3.18,1.267,9.0,0.4,2.4,3.8,1.58,L,CHC,NL,41.0,0.0,0.0,45.0,14.0,1.0,13.0,0.0,0.0,1.000,,,2.80,0.34,P,,,,,,,,,,,,,,,,,,,,,,,8,CHC,15,41,0,41,41,41,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,AAA,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,14,45,AAA (n-21),0.262,W,45.000000,1,2.0,12,4.0,6.0,16,0,0.0,,,0.044444,-
54,Fred Beene,26,BAL,AL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,0,0,0,/1,S,AL,0.0,0.0,,0.00,2.0,0.0,0.0,0.0,0.0,0.0,2.2,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,,3.63,1.125,6.8,0.0,3.4,0.0,0.00,R,BAL,AL,2.0,0.0,0.0,2.2,1.0,1.0,0.0,0.0,0.0,1.000,,,3.38,0.50,P,,,,,,,,,,,,,,,,,,,,,,,48,BAL,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.200,L,2.666667,1,3.0,13,0.0,0.0,n,0,0.0,,,0.000000,-
64,Frank Bertaina,25,BAL,AL,3,2,1,0,1,0,0,0,0,0,0,1,0,1.000,1.000,1.000,2.000,460.0,1,0,0,0,0,0,/1,L,AL,0.0,0.0,,0.00,3.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,20.0,,2.34,0.667,1.5,0.0,4.5,7.5,1.67,L,TOT,AL,17.0,5.0,0.0,41.2,12.0,3.0,7.0,2.0,0.0,0.833,,,2.16,0.59,P,,,,,,,,,,,,,,,,,,,,,,,54,2TM,6,17,5,17,17,17,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,AAA,0,,0,0.0,,0.0,,18,36,0,0.0,n,0,0.0,,18,41,AAA (36-n),0.059,J+,6.000000,2,5.0,15,9.0,14.0,32,0,0.0,,,0.000000,-
87,Jim Bouton,30,HOU,NL,16,4,4,0,0,0,0,0,0,0,0,0,3,0.000,0.000,0.000,0.000,-100.0,0,0,0,0,0,0,1,R,NL,0.0,2.0,0.000,4.11,16.0,1.0,7.0,1.0,0.0,1.0,30.2,32.0,16.0,14.0,1.0,12.0,1.0,32.0,2.0,0.0,4.0,135.0,87.0,2.22,1.435,9.4,0.3,3.5,9.4,2.67,R,TOT,ZZ,73.0,2.0,1.0,122.2,31.0,10.0,20.0,1.0,1.0,0.968,,,2.20,0.41,P,,,,,,,,,,,,,,,,,,,,,,,75,2TM,8,73,2,73,73,73,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.0,,0,n,27,27.0,53,0,0.0,,0,66,G (n-53),0.264,W,30.666667,2,3.0,13,9.0,12.0,26,1,13.0,/31,,0.031250,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
990,Ron Willis,25,HOU,NL,3,0,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,0.00,3.0,0.0,2.0,0.0,0.0,0.0,2.1,3.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,11.0,,0.79,1.286,11.6,0.0,0.0,7.7,,R,TOT,NL,29.0,0.0,0.0,34.2,13.0,5.0,8.0,0.0,0.0,1.000,,,3.38,0.45,P,,,,,,,,,,,,,,,,,,,,,,,828,2TM,4,29,0,29,29,29,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.273,W,2.333333,1,0.0,11,7.0,7.0,21,0,0.0,,,0.000000,-
1000,Dooley Womack,29,TOT,MLB,39,8,7,1,1,0,0,0,1,0,0,0,2,0.143,0.143,0.143,0.286,-19.0,1,0,0,1,0,0,1,L,MLB,4.0,2.0,0.667,3.29,39.0,0.0,9.0,0.0,0.0,0.0,65.2,64.0,25.0,24.0,1.0,23.0,7.0,40.0,3.0,0.0,7.0,276.0,109.0,2.68,1.325,8.8,0.1,3.2,5.5,1.74,R,TOT,ZZ,39.0,0.0,0.0,65.2,30.0,9.0,20.0,1.0,0.0,0.967,,,3.97,0.74,P,,,,,,,,,,,,,,,,,,,,,,,836,2TM,4,39,0,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.025641,,D,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,4,63,D (n-23),0.256,W,65.666667,2,3.0,13,5.0,8.0,22,0,0.0,,[WP],0.015625,-
1001,Dooley Womack,29,HOU,NL,30,7,6,1,1,0,0,0,1,0,0,0,1,0.167,0.167,0.167,0.333,-5.0,1,0,0,1,0,0,1,L,NL,2.0,1.0,0.667,3.51,30.0,0.0,6.0,0.0,0.0,0.0,51.1,49.0,21.0,20.0,1.0,20.0,5.0,32.0,3.0,0.0,7.0,217.0,101.0,2.86,1.344,8.6,0.2,3.5,5.6,1.60,R,TOT,ZZ,39.0,0.0,0.0,65.2,30.0,9.0,20.0,1.0,0.0,0.967,,,3.97,0.74,P,,,,,,,,,,,,,,,,,,,,,,,836,2TM,4,39,0,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.033333,,D,0,,0,0.0,,0.0,,0,n,5,5.0,15,0,0.0,,5,62,D (n-15),0.253,W,51.333333,2,3.0,13,5.0,8.0,22,0,0.0,,[WP],0.020408,-
1002,Dooley Womack,29,SEP,AL,9,1,1,0,0,0,0,0,0,0,0,0,1,0.000,0.000,0.000,0.000,-100.0,0,0,0,0,0,0,/1,L,AL,2.0,1.0,0.667,2.51,9.0,0.0,3.0,0.0,0.0,0.0,14.1,15.0,4.0,4.0,0.0,3.0,2.0,8.0,0.0,0.0,0.0,59.0,147.0,2.02,1.256,9.4,0.0,1.9,5.0,2.67,R,TOT,ZZ,39.0,0.0,0.0,65.2,30.0,9.0,20.0,1.0,0.0,0.967,,,3.97,0.74,P,,,,,,,,,,,,,,,,,,,,,,,836,2TM,4,39,0,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G (n-66),0.268,W,14.333333,2,2.0,12,5.0,7.0,21,0,0.0,,,0.000000,-


### Pitcher Control Number

In [851]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

11.0    118
12.0    109
13.0     64
14.0     43
10.0     39
9.0      23
15.0     16
16.0     12
18.0      5
17.0      5
8.0       4
20.0      4
7.0       3
22.0      2
19.0      2
36.0      1
0.0       1
21.0      1
Name: control_rate, dtype: int64

In [852]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    566
51     118
46     109
45      64
44      43
52      39
53      23
43      16
42      12
36       5
41       5
54       4
34       4
55       3
32       2
35       2
65       1
11       1
33       1
Name: PCN, dtype: int64

### Probable Hit Number

In [853]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     567
8     147
7      89
9      89
10     41
6      27
11     14
4      10
12      8
5       8
13      6
16      4
18      2
3       2
14      1
15      1
2       1
22      1
Name: hit_rate_pit, dtype: int64

In [854]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [855]:
players["hit_rate_pit"].value_counts()

8.0     147
7.0      89
9.0      89
10.0     41
6.0      27
11.0     14
4.0      10
12.0      8
5.0       8
13.0      6
16.0      4
18.0      2
3.0       2
14.0      1
2.0       1
15.0      1
0.0       1
22.0      1
Name: hit_rate_pit, dtype: int64

In [856]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    566
55     147
56      89
54      89
53      41
61      27
52      14
63      10
62       8
51       8
46       6
43       4
41       2
64       2
33       1
65       1
45       1
44       1
66       1
Name: PPH, dtype: int64

### Pitcher Rating

In [857]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [858]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

        566
+M2      32
W2       15
+W2      14
X2       11
       ... 
L3        1
+L6       1
Y6        1
+Z+3      1
-W3       1
Name: goph_lett_inn, Length: 118, dtype: int64

In [859]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      566
(13-22)         37
(13-23)         27
(13-21)         25
(12-21)         24
              ... 
(16-22/24)       1
(23-31)          1
(16-25/26)       1
(11-26)          1
(15-23/25)       1
Name: bb_k_hbp, Length: 100, dtype: int64

In [860]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               566
(13-22)         37
(13-23)         27
(13-21)         25
(12-21)         24
              ... 
(23-31)          1
(16-25/26)       1
(11-26)          1
(15-23/24)       1
(13-32/33)       1
Name: bb_k_hbp, Length: 100, dtype: int64

In [861]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

+M2 (14-25)          5
W6 (13-22)  [WP]     4
M7 (13-23)  [WP]     4
X2 (13-21)           4
+M2 (12-21)          3
                    ..
+L1 (13-22)          1
+X3 (14-22)  [WP]    1
-L3 (14-31)          1
+M3 (13-22)          1
K2 (13-25)  [WP]     1
Name: pitcher_rating, Length: 401, dtype: int64

In [862]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [863]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,...,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,...,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,...,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23),0.227,M,85.333333,2.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.106667,+,11.0,51.0,7.0,56.0,+M2,(14-23),+M2 (14-23) [WP]
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,...,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15),,,,,,,,,,0,0.0,,,,,,,,,,,
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,...,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,
5,Mike Adamson,21,BAL,AL,6,2,1,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,1,0,0,/1,R,AL,0.0,1.0,0.0,4.5,6.0,0.0,1.0,0.0,0.0,0.0,8.0,10.0,4.0,4.0,0.0,6.0,2.0,2.0,0.0,0.0,1.0,38.0,83.0,4.26,2.0,11.3,0.0,6.8,2.3,0.33,R,BAL,AL,6.0,0.0,0.0,8.0,5.0,2.0,3.0,0.0,0.0,1.0,,,5.63,0.83,P,,...,,,,,,,,,,,,,,,,,,6,BAL,3,6,0,6,6,6,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G (n-36),0.312,Y,8.0,1.0,6.0,16.0,2.0,8.0,22.0,0,0.0,,,0.0,-,15.0,43.0,9.0,54.0,-Y1,(16-22),-Y1 (16-22)
6,Tommie Agee,26,NYM,NL,149,635,565,97,153,23,4,26,76,12,9,59,137,0.271,0.342,0.464,0.806,122.0,262,5,3,6,2,2,*8/9H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,146.0,141.0,130.0,1265.0,346.0,334.0,7.0,5.0,0.0,0.986,11.0,10.0,2.43,2.34,OF,,...,,,,,,,,,,,,,,,,,,7,NYM,8,149,141,149,146,0,0,0,0,0,0,0,142,9,146,3,0,OF,0.510067,,B,6,16.0,1,7.0,(21),0.074,,3,13,8,11.0,25,0,0.0,,9,54,B16(21) (13-25),,,,,,,,,,0,0.0,,,,,,,,,,,
7,Hank Aguirre,38,CHC,NL,41,5,5,2,2,0,0,0,0,0,0,0,1,0.4,0.4,0.4,0.8,114.0,2,0,0,0,0,0,1,R,NL,1.0,0.0,1.0,2.6,41.0,0.0,9.0,0.0,0.0,1.0,45.0,45.0,13.0,13.0,2.0,12.0,1.0,19.0,2.0,0.0,1.0,186.0,156.0,3.18,1.267,9.0,0.4,2.4,3.8,1.58,L,CHC,NL,41.0,0.0,0.0,45.0,14.0,1.0,13.0,0.0,0.0,1.0,,,2.8,0.34,P,,...,,,,,,,,,,,,,,,,,,8,CHC,15,41,0,41,41,41,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,14,45,AAA (n-21),0.262,W,45.0,1.0,2.0,12.0,4.0,6.0,16.0,0,0.0,,,0.044444,-,11.0,51.0,9.0,54.0,-W1,(12-16),-W1 (12-16)
8,Jack Aker,28,TOT,AL,53,11,10,0,1,0,0,0,0,0,0,0,6,0.1,0.182,0.1,0.282,-18.0,1,0,1,0,0,0,1,R,AL,8.0,6.0,0.571,3.17,53.0,0.0,32.0,0.0,0.0,14.0,82.1,76.0,32.0,29.0,8.0,35.0,9.0,47.0,5.0,0.0,0.0,348.0,112.0,4.09,1.348,8.3,0.9,3.8,5.1,1.34,R,TOT,AL,53.0,0.0,0.0,82.2,32.0,14.0,18.0,0.0,4.0,1.0,,,3.48,0.6,P,,...,,,,,,,,,,,,,,,,,,9,2TM,6,53,0,53,53,53,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,E,0,,0,0.0,,0.0,,0,n,20,20.0,42,3,23.0,/45,3,64,E (n-42/45),0.247,M,82.333333,2.0,4.0,14.0,5.0,9.0,23.0,1,10.0,/24,,0.105263,+,12.0,46.0,8.0,55.0,+M2,(14-23/24),+M2 (14-23/24)
9,Jack Aker,28,SEP,AL,15,2,1,0,0,0,0,0,0,0,0,0,1,0.0,0.5,0.0,0.5,53.0,0,0,1,0,0,0,1,R,AL,0.0,2.0,0.0,7.56,15.0,0.0,9.0,0.0,0.0,3.0,16.2,25.0,15.0,14.0,4.0,13.0,4.0,7.0,1.0,0.0,0.0,85.0,49.0,7.31,2.28,13.5,2.2,7.0,3.8,0.54,R,TOT,AL,53.0,0.0,0.0,82.2,32.0,14.0,18.0,0.0,4.0,1.0,,,3.48,0.6,P,,...,,,,,,,,,,,,,,,,,,9,2TM,6,53,0,53,53,53,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,18,36.0,/66,0,66,G (n-36/66),0.352,Z+,16.666667,1.0,6.0,16.0,3.0,9.0,23.0,0,0.0,,,0.16,+,17.0,41.0,11.0,52.0,+Z+1,(16-23),+Z+1 (16-23)


In [864]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
1008,Woody Woodward,26,CIN,NL,97,274,241,36,63,12,0,0,15,3,2,24,40,0.261,0.333,0.311,0.645,78.0,75,7,2,7,0,3,6/H4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,95.0,78.0,60.0,698.2,411.0,148.0,249.0,14.0,36.0,0.966,-2.0,-4.0,5.11,4.18,SS-2B,,...,,,,,,,,,,,,,,,,,,840,CIN,7,97,78,97,93,0,0,0,2,0,93,0,0,0,0,0,9,SS,0.154639,,B,0,,0,0.0,,0.039,,3,13,5,8.0,22,0,0.0,,8,55,B (13-22),,,,,,,,,,0,0.0,,,,,,,,,,,
1009,Al Worthington,40,MIN,AL,46,6,5,0,0,0,0,0,0,0,0,0,4,0.0,0.0,0.0,0.0,-100.0,0,0,0,1,0,0,1,R,AL,4.0,1.0,0.8,4.57,46.0,0.0,19.0,0.0,0.0,3.0,61.0,65.0,31.0,31.0,7.0,20.0,2.0,51.0,0.0,0.0,1.0,257.0,81.0,3.31,1.393,9.6,1.0,3.0,7.5,2.55,R,MIN,AL,46.0,0.0,0.0,61.0,8.0,2.0,6.0,0.0,1.0,1.0,,,1.18,0.17,P,,...,,,,,,,,,,,,,,,,,,841,MIN,14,46,0,46,46,46,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,24,24.0,46,0,0.0,,0,66,G (n-46),0.274,W,61.0,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.107692,+,12.0,46.0,9.0,54.0,+W1,(13-24),+W1 (13-24)
1010,Clyde Wright,28,CAL,AL,40,12,11,0,2,0,0,0,0,0,0,1,6,0.182,0.25,0.182,0.432,26.0,2,0,0,0,0,0,1/H,R,AL,1.0,8.0,0.111,4.1,37.0,5.0,9.0,0.0,0.0,0.0,63.2,66.0,33.0,29.0,4.0,30.0,5.0,31.0,1.0,0.0,3.0,272.0,85.0,3.81,1.508,9.3,0.6,4.2,4.4,1.03,L,CAL,AL,37.0,5.0,0.0,63.2,13.0,1.0,12.0,0.0,1.0,1.0,,,1.84,0.35,P,,...,,,,,,,,,,,,,,,,,,842,CAL,4,40,5,40,37,37,0,0,0,0,0,0,0,0,0,0,3,P,0.0,,D+,0,,0,0.0,,0.0,,3,13,18,21.0,43,0,0.0,,6,61,D+ (13-43),0.274,W,63.666667,2.0,4.0,14.0,4.0,8.0,22.0,0,0.0,,,0.060606,,13.0,45.0,9.0,54.0,W2,(14-22),W2 (14-22)
1011,John Wyatt,34,OAK,AL,4,1,1,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,AL,0.0,1.0,0.0,5.4,4.0,0.0,1.0,0.0,0.0,0.0,8.1,8.0,5.0,5.0,0.0,6.0,1.0,5.0,2.0,0.0,2.0,40.0,67.0,4.19,1.68,8.6,0.0,6.5,5.4,0.83,R,OAK,AL,4.0,0.0,0.0,8.1,1.0,0.0,1.0,0.0,0.0,1.0,,,1.08,0.25,P,,...,,,,,,,,,,,,,,,,,,843,OAK,9,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G (n-66),0.25,M,8.333333,2.0,5.0,15.0,4.0,9.0,23.0,2,11.0,/25,,0.0,-,14.0,44.0,7.0,56.0,-M2,(15-23/25),-M2 (15-23/25)
1012,Jim Wynn,27,HOU,NL,149,653,495,113,133,17,1,33,87,23,7,148,142,0.269,0.436,0.507,0.943,167.0,251,5,3,2,5,14,*8,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,NL,149.0,148.0,139.0,1292.2,332.0,318.0,9.0,5.0,3.0,0.985,-14.0,-13.0,2.28,2.19,OF,,...,,,,,,,,,,,,,,,,,,844,HOU,7,149,148,149,149,0,0,0,0,0,0,0,149,0,149,0,0,OF,0.583893,,B,9,23.0,0,0.0,,0.099,*,8,22,8,16.0,34,0,0.0,,7,56,B23* (22-34),,,,,,,,,,0,0.0,,,,,,,,,,,
1013,Billy Wynne,25,CHW,AL,20,50,41,1,5,0,1,0,2,0,0,2,6,0.122,0.163,0.171,0.334,-9.0,7,1,0,7,0,0,1,L,AL,7.0,7.0,0.5,4.06,20.0,20.0,0.0,6.0,1.0,0.0,128.2,143.0,63.0,58.0,14.0,50.0,8.0,67.0,3.0,1.0,3.0,564.0,96.0,4.12,1.5,10.0,1.0,3.5,4.7,1.34,R,CHW,AL,20.0,20.0,6.0,128.2,30.0,11.0,19.0,0.0,2.0,1.0,,,2.1,1.5,P,,...,,,,,,,,,,,,,,,,,,845,CHW,3,20,20,20,20,20,0,0,0,0,0,0,0,0,0,0,0,P,0.1,,E+,0,,7,7.0,(21),0.0,,1,11,4,5.0,15,0,0.0,,4,63,E+(21) (11-15),0.28,X,128.666667,6.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.097902,,13.0,45.0,9.0,54.0,X6,(13-21),X6 (13-21)
1014,Carl Yastrzemski,29,BOS,AL,162,707,603,96,154,28,2,40,111,15,7,101,91,0.255,0.362,0.507,0.87,136.0,306,14,1,0,2,9,*73/8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,165.0,162.0,129.0,1391.1,471.0,427.0,38.0,6.0,31.0,0.987,13.0,11.0,3.01,2.82,OF-1B,,...,,,,,,,,,,,,,,,,,,846,BOS,9,162,162,162,162,0,0,22,0,0,0,140,4,0,143,0,0,OF,0.685185,#,B,9,23.0,0,0.0,,0.081,*,5,15,5,10.0,24,0,0.0,,8,55,#B23* (15-24),,,,,,,,,,0,0.0,,,,,,,,,,,
1015,Don Young,23,CHC,NL,101,323,272,36,65,12,3,6,27,1,5,38,74,0.239,0.343,0.371,0.714,91.0,101,4,5,8,0,5,8/H97,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,100.0,88.0,65.0,769.2,200.0,191.0,4.0,5.0,0.0,0.975,0.0,0.0,2.28,1.95,OF,,...,,,,,,,,,,,,,,,,,,847,CHC,2,101,88,101,100,0,0,0,0,0,0,3,94,8,100,2,6,OF,0.267327,,C+,3,13.0,2,5.0,(15),0.011,,4,14,8,12.0,26,1,13.0,/31,7,56,C+13(15) (14-26/31),,,,,,,,,,0,0.0,,,,,,,,,,,
1016,Chris Zachary,25,KCR,AL,8,2,2,0,1,0,0,0,0,0,0,0,1,0.5,0.5,0.5,1.0,181.0,1,0,0,0,0,0,/1,L,AL,0.0,1.0,0.0,7.85,8.0,2.0,2.0,0.0,0.0,0.0,18.1,27.0,17.0,16.0,4.0,7.0,0.0,6.0,0.0,0.0,4.0,87.0,48.0,5.84,1.855,13.3,2.0,3.4,2.9,0.86,R,KCR,AL,8.0,2.0,0.0,18.1,2.0,0.0,2.0,0.0,0.0,1.0,,,0.98,0.25,P,,...,,,,,,,,,,,,,,,,,,848,KCR,6,8,2,8,8,8,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,18,41,AAA (n-36),0.338,Z+,18.333333,2.0,3.0,13.0,2.0,5.0,15.0,0,0.0,,,0.148148,+,14.0,44.0,11.0,52.0,+Z+2,(13-15),+Z+2 (13-15)
1017,Bill Zepp,22,MIN,AL,4,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,AL,0.0,0.0,,6.75,4.0,0.0,1.0,0.0,0.0,0.0,5.1,6.0,7.0,4.0,1.0,4.0,1.0,2.0,0.0,0.0,0.0,27.0,59.0,6.45,1.875,10.1,1.7,6.8,3.4,0.5,R,MIN,AL,4.0,0.0,0.0,5.1,1.0,1.0,0.0,0.0,0.0,1.0,,,1.69,0.25,P,,...,,,,,,,,,,,,,,,,,,849,MIN,1st,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.261,W,5.333333,1.0,5.0,15.0,3.0,8.0,22.0,0,0.0,,,0.166667,+,13.0,45.0,8.0,55.0,+W1,(15-22),+W1 (15-22)


In [865]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [866]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [867]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [868]:
players["Primary_Pos_fld"].value_counts()

P     450
OF    224
C      97
2B     64
1B     60
SS     59
3B     48
Name: Primary_Pos_fld, dtype: int64

In [869]:
players["Primary_Pos_fld"].isnull().sum()

16

In [870]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.984917
2B    0.974281
3B    0.952354
C     0.984464
OF    0.971108
P     0.948482
SS    0.957862
Name: Fld%, dtype: float64

### Superior Rating

In [871]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [872]:
players["superior_rating"].value_counts()

     662
S    356
Name: superior_rating, dtype: int64

In [873]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,...,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,,
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,...,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,S
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,...,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23),0.227,M,85.333333,2.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.106667,+,11.0,51.0,7.0,56.0,+M2,(14-23),+M2 (14-23) [WP],
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,...,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15),,,,,,,,,,0,0.0,,,,,,,,,,,,
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,...,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S
5,Mike Adamson,21,BAL,AL,6,2,1,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,1,0,0,/1,R,AL,0.0,1.0,0.0,4.5,6.0,0.0,1.0,0.0,0.0,0.0,8.0,10.0,4.0,4.0,0.0,6.0,2.0,2.0,0.0,0.0,1.0,38.0,83.0,4.26,2.0,11.3,0.0,6.8,2.3,0.33,R,BAL,AL,6.0,0.0,0.0,8.0,5.0,2.0,3.0,0.0,0.0,1.0,,,5.63,0.83,P,,...,,,,,,,,,,,,,,,,,6,BAL,3,6,0,6,6,6,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G (n-36),0.312,Y,8.0,1.0,6.0,16.0,2.0,8.0,22.0,0,0.0,,,0.0,-,15.0,43.0,9.0,54.0,-Y1,(16-22),-Y1 (16-22),S
6,Tommie Agee,26,NYM,NL,149,635,565,97,153,23,4,26,76,12,9,59,137,0.271,0.342,0.464,0.806,122.0,262,5,3,6,2,2,*8/9H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,146.0,141.0,130.0,1265.0,346.0,334.0,7.0,5.0,0.0,0.986,11.0,10.0,2.43,2.34,OF,,...,,,,,,,,,,,,,,,,,7,NYM,8,149,141,149,146,0,0,0,0,0,0,0,142,9,146,3,0,OF,0.510067,,B,6,16.0,1,7.0,(21),0.074,,3,13,8,11.0,25,0,0.0,,9,54,B16(21) (13-25),,,,,,,,,,0,0.0,,,,,,,,,,,,
7,Hank Aguirre,38,CHC,NL,41,5,5,2,2,0,0,0,0,0,0,0,1,0.4,0.4,0.4,0.8,114.0,2,0,0,0,0,0,1,R,NL,1.0,0.0,1.0,2.6,41.0,0.0,9.0,0.0,0.0,1.0,45.0,45.0,13.0,13.0,2.0,12.0,1.0,19.0,2.0,0.0,1.0,186.0,156.0,3.18,1.267,9.0,0.4,2.4,3.8,1.58,L,CHC,NL,41.0,0.0,0.0,45.0,14.0,1.0,13.0,0.0,0.0,1.0,,,2.8,0.34,P,,...,,,,,,,,,,,,,,,,,8,CHC,15,41,0,41,41,41,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,14,45,AAA (n-21),0.262,W,45.0,1.0,2.0,12.0,4.0,6.0,16.0,0,0.0,,,0.044444,-,11.0,51.0,9.0,54.0,-W1,(12-16),-W1 (12-16),S
8,Jack Aker,28,TOT,AL,53,11,10,0,1,0,0,0,0,0,0,0,6,0.1,0.182,0.1,0.282,-18.0,1,0,1,0,0,0,1,R,AL,8.0,6.0,0.571,3.17,53.0,0.0,32.0,0.0,0.0,14.0,82.1,76.0,32.0,29.0,8.0,35.0,9.0,47.0,5.0,0.0,0.0,348.0,112.0,4.09,1.348,8.3,0.9,3.8,5.1,1.34,R,TOT,AL,53.0,0.0,0.0,82.2,32.0,14.0,18.0,0.0,4.0,1.0,,,3.48,0.6,P,,...,,,,,,,,,,,,,,,,,9,2TM,6,53,0,53,53,53,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,E,0,,0,0.0,,0.0,,0,n,20,20.0,42,3,23.0,/45,3,64,E (n-42/45),0.247,M,82.333333,2.0,4.0,14.0,5.0,9.0,23.0,1,10.0,/24,,0.105263,+,12.0,46.0,8.0,55.0,+M2,(14-23/24),+M2 (14-23/24),S
9,Jack Aker,28,SEP,AL,15,2,1,0,0,0,0,0,0,0,0,0,1,0.0,0.5,0.0,0.5,53.0,0,0,1,0,0,0,1,R,AL,0.0,2.0,0.0,7.56,15.0,0.0,9.0,0.0,0.0,3.0,16.2,25.0,15.0,14.0,4.0,13.0,4.0,7.0,1.0,0.0,0.0,85.0,49.0,7.31,2.28,13.5,2.2,7.0,3.8,0.54,R,TOT,AL,53.0,0.0,0.0,82.2,32.0,14.0,18.0,0.0,4.0,1.0,,,3.48,0.6,P,,...,,,,,,,,,,,,,,,,,9,2TM,6,53,0,53,53,53,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,18,36.0,/66,0,66,G (n-36/66),0.352,Z+,16.666667,1.0,6.0,16.0,3.0,9.0,23.0,0,0.0,,,0.16,+,17.0,41.0,11.0,52.0,+Z+1,(16-23),+Z+1 (16-23),S


### Arm Rating

In [874]:
players["G"].value_counts()

2.0      25
1.0      25
8.0      21
6.0      21
39.0     21
         ..
70.0      1
98.0      1
108.0     1
80.0      1
161.0     1
Name: G, Length: 164, dtype: int64

In [875]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.6294223223281828

In [876]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [877]:
players["arm_rating"].value_counts()

8    707
9    311
Name: arm_rating, dtype: int64

### Range Rating

In [878]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.3321958253049682

In [879]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    5.308363
2B    1.643537
3B    0.988488
C     4.089045
OF    1.362530
P     0.183019
SS    1.347834
Name: po_gp, dtype: float64

In [880]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [881]:
players["range_rating"].value_counts()

4    821
5    197
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [882]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [883]:
players["CS%"].value_counts()

0%     12
32%     7
33%     7
23%     6
45%     5
50%     5
41%     4
48%     3
26%     3
25%     3
54%     2
38%     2
27%     2
29%     2
40%     2
35%     2
31%     2
39%     2
55%     2
42%     2
44%     2
28%     1
20%     1
15%     1
46%     1
75%     1
43%     1
56%     1
13%     1
67%     1
19%     1
21%     1
8%      1
57%     1
34%     1
59%     1
37%     1
36%     1
24%     1
Name: CS%, dtype: int64

In [884]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.32139547272016

In [885]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [886]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    932
-2     27
-3     22
-1     20
       17
Name: cs_num, dtype: int64

In [887]:
players["G_cat"].isnull().sum()

913

In [888]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      940
-2     27
-3     22
-1     20
-4      9
Name: cs_num, dtype: int64

In [889]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num
60,Johnny Bench,21,CIN,NL,148,592,532,83,156,23,1,26,90,6,6,49,86,0.293,0.353,0.487,0.84,129.0,259,7,4,0,7,7,*2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,147.0,135.0,131.0,1249.0,876.0,793.0,76.0,7.0,10.0,0.992,4.0,4.0,6.26,5.91,C,CIN,...,4.0,4.0,4.0,6.26,5.91,14.0,63.0,30.0,40.0,57%,52,CIN,3,148,135,148,147,0,147,0,0,0,0,0,0,0,0,11,0,C,0.608108,#,B+,6,16.0,0,0.0,,0.038,,3,13,5,8.0,22,0,0.0,,9,54,#B+16 (13-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.513514,9,5.358108,4,0.571429,-4
207,Clay Dalrymple,32,BAL,AL,37,94,80,8,19,1,1,3,6,0,0,13,8,0.238,0.34,0.388,0.728,103.0,31,6,0,0,1,1,2H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,30.0,23.0,15.0,208.0,133.0,116.0,17.0,0.0,2.0,1.0,5.0,29.0,5.75,4.43,C,BAL,...,5.0,29.0,5.0,5.75,4.43,2.0,3.0,10.0,12.0,55%,179,BAL,10,37,23,37,30,0,30,0,0,0,0,0,0,0,0,10,0,C,0.162162,,C+,6,16.0,2,8.0,(22),0.0,,5,15,3,8.0,22,0,0.0,,7,56,C+16(22) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.459459,9,3.135135,4,0.545455,-4
259,Andy Etchebarren,26,BAL,AL,73,255,217,29,54,9,2,3,26,1,2,28,42,0.249,0.35,0.35,0.701,96.0,76,11,7,1,2,8,2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,72.0,59.0,50.0,556.0,411.0,380.0,27.0,4.0,1.0,0.99,6.0,13.0,6.59,5.65,C,BAL,...,6.0,13.0,6.0,6.59,5.65,3.0,17.0,11.0,16.0,59%,221,BAL,6,73,59,73,72,0,72,0,0,0,0,0,0,0,0,11,0,C,0.356164,,C+,2,12.0,1,3.0,(13),0.013,,4,14,6,10.0,24,1,11.0,/25,8,55,C+12(13) (14-24/25),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.369863,9,5.205479,4,0.592593,-4
336,Jerry Grote,26,NYM,NL,113,406,365,38,92,12,3,6,40,2,1,32,59,0.252,0.313,0.351,0.663,84.0,128,10,1,6,2,5,*2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYM,NL,112.0,100.0,95.0,918.2,788.0,718.0,63.0,7.0,11.0,0.991,11.0,14.0,7.65,6.97,C,NYM,...,11.0,14.0,11.0,7.65,6.97,4.0,36.0,31.0,40.0,56%,286,NYM,6,113,100,113,112,0,112,0,0,0,0,0,0,0,0,6,0,C,0.353982,,B,2,12.0,1,3.0,(13),0.019,,3,13,5,8.0,22,0,0.0,,8,55,B12(13) (13-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.557522,9,6.353982,4,0.56338,-4
397,Jack Hiatt,26,SFG,NL,69,245,194,18,38,4,0,7,34,0,0,48,58,0.196,0.352,0.325,0.677,92.0,63,6,0,1,2,5,2/H3,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,63.0,60.0,49.0,522.0,392.0,359.0,30.0,3.0,10.0,0.992,4.0,8.0,6.71,6.17,C-1B,SFG,...,4.0,10.0,4.0,6.57,6.08,4.0,16.0,16.0,19.0,54%,335,SFG,6,69,60,69,63,0,60,3,0,0,0,0,0,0,0,7,0,C,0.492754,,C,7,21.0,0,0.0,,0.0,,7,21,9,16.0,34,0,0.0,,6,61,C21 (21-34),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.434783,9,5.202899,4,0.542857,-4
563,Marty Martinez,27,HOU,NL,78,213,198,14,61,5,4,0,15,0,0,10,21,0.308,0.34,0.374,0.713,102.0,74,5,0,4,1,1,H765/241,S,NL,0.0,0.0,,13.5,1.0,0.0,1.0,0.0,0.0,0.0,0.2,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,43.0,22.01,1.5,13.5,13.5,0.0,0.0,,R,HOU,NL,62.0,49.0,27.0,397.0,154.0,77.0,66.0,11.0,7.0,0.929,-6.0,-19.0,3.24,2.31,OF-SS-3B-C-2B-P,HOU,...,-1.0,-34.0,-1.0,6.94,3.86,1.0,4.0,1.0,3.0,75%,475,HOU,4,78,49,78,59,1,7,0,1,15,17,21,0,0,21,21,2,OF,0.192308,,A,0,,2,2.0,(12),0.0,,2,12,4,6.0,16,0,0.0,,10,53,A(12) (12-16),0.333,Y,0.666667,1.0,0.0,11.0,0.0,0.0,n,0,0.0,,,1.0,+,12.0,46.0,12.0,51.0,+Y1,(11-n),+Y1 (11-n),,0.846154,9,0.987179,4,0.75,-4
639,Thurman Munson,22,NYY,AL,26,97,86,6,22,1,2,1,9,0,1,10,10,0.256,0.33,0.349,0.679,94.0,30,5,0,0,1,1,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,25.0,24.0,24.0,218.0,139.0,119.0,18.0,2.0,0.0,0.986,1.0,6.0,5.66,5.48,C,NYY,...,1.0,6.0,1.0,5.66,5.48,5.0,9.0,6.0,7.0,54%,543,NYY,1st,26,24,26,25,0,25,0,0,0,0,0,0,0,0,1,0,C,0.346154,,B,2,12.0,3,5.0,(15),0.0,,4,14,4,8.0,22,0,0.0,,8,55,B12(15) (14-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.692308,9,4.576923,4,0.538462,-4
900,Ken Suarez,26,CLE,AL,36,102,85,7,25,5,0,1,9,1,0,15,12,0.294,0.4,0.388,0.788,119.0,33,5,0,2,0,5,2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,36.0,29.0,22.0,267.2,213.0,191.0,20.0,2.0,3.0,0.991,2.0,9.0,7.09,5.86,C,CLE,...,2.0,9.0,2.0,7.09,5.86,4.0,11.0,9.0,11.0,55%,755,CLE,4,36,29,36,36,0,36,0,0,0,0,0,0,0,0,0,0,C,0.25,,B+,1,11.0,0,0.0,,0.029,,5,15,4,9.0,23,0,0.0,,9,54,B+11 (15-23),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.555556,9,5.305556,4,0.55,-4
928,Joe Torre,28,STL,NL,159,678,602,72,174,29,6,18,101,0,0,66,85,0.289,0.361,0.447,0.808,126.0,269,10,5,0,5,13,*32/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STL,NL,161.0,158.0,146.0,1406.2,1458.0,1360.0,91.0,7.0,117.0,0.995,-4.0,-3.0,9.28,9.01,1B-C,STL,...,1.0,8.0,1.0,6.05,5.76,2.0,7.0,3.0,6.0,67%,780,STL,10,159,158,159,159,0,18,143,0,0,0,0,0,0,0,1,0,1B,0.63522,#,B+,4,14.0,1,5.0,(15),0.0,,4,14,5,9.0,23,0,0.0,,9,54,#B+14(15) (14-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.572327,8,8.553459,5,0.666667,-4


### Fielder Rating

In [890]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

84        391
S84       223
94         79
95         74
85         56
S94        51
S85        35
S95        31
94 -2      24
94 -3      16
94 -1      14
94 -4       7
S94 -3      6
S94 -1      5
S94 -2      3
S94 -4      1
S85 -4      1
84 -1       1
Name: fielder_rating, dtype: int64

In [891]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [892]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [893]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [894]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,...,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.088435,9,2.034014,4,,,94
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,...,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.040816,8,1.326531,4,,,S84
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,...,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23),0.227,M,85.333333,2.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.106667,+,11.0,51.0,7.0,56.0,+M2,(14-23),+M2 (14-23) [WP],,0.410714,8,0.160714,4,,,84
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,...,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15),,,,,,,,,,0,0.0,,,,,,,,,,,,,2.214286,8,1.880952,4,,,84
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,...,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.25,9,1.125,4,,,S94


In [895]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [896]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.088435,9,2.034014,4,,,94
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,,,,,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.040816,8,1.326531,4,,,S84
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,,,,,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23),0.227,M,85.333333,2.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.106667,+,11.0,51.0,7.0,56.0,+M2,(14-23),+M2 (14-23) [WP],,0.410714,8,0.160714,4,,,84
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15),,,,,,,,,,0,0.0,,,,,,,,,,,,,2.214286,8,1.880952,4,,,84
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.25,9,1.125,4,,,S94


In [900]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating,Games_Played,DH
0,Hank Aaron,35,ATL,NL,147,639,547,100,164,30,3,44,97,9,10,87,47,0.3,0.396,0.607,1.003,177.0,332,14,2,0,3,19,*9/3H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,148.0,145.0,130.0,1273.2,317.0,299.0,13.0,5.0,6.0,0.984,6.0,6.0,2.2,2.11,OF-1B,,,,,,,,,,,,,,,,,,,,,,,1,ATL,16,147,145,147,145,0,0,4,0,0,0,0,0,144,144,2,0,OF,0.659864,#,B+,10,24.0,1,11.0,(25),0.051,,5,15,3,8.0,22,0,0.0,,9,54,#B+24(25) (15-22),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.088435,9,2.034014,4,,,94,147.0,0
1,Tommie Aaron,29,ATL,NL,49,66,60,13,15,2,0,1,5,0,1,6,6,0.25,0.318,0.333,0.652,82.0,20,1,0,0,0,0,H3/7,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,24.0,8.0,8.0,111.1,67.0,65.0,2.0,0.0,5.0,1.0,0.0,-1.0,5.42,2.79,OF-1B,,,,,,,,,,,,,,,,,,,,,,,2,ATL,5,49,8,49,24,0,0,16,0,0,0,8,0,0,8,24,3,OF,0.102041,,C+,2,12.0,0,0.0,,0.0,,3,13,3,6.0,16,0,0.0,,8,55,C+12 (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.040816,8,1.326531,4,,,S84,49.0,0
2,Ted Abernathy,36,CHC,NL,56,8,8,1,2,1,0,0,1,0,0,0,2,0.25,0.25,0.375,0.625,65.0,3,0,0,0,0,0,1,R,NL,4.0,3.0,0.571,3.16,56.0,0.0,20.0,0.0,0.0,3.0,85.1,75.0,38.0,30.0,8.0,42.0,11.0,55.0,1.0,1.0,5.0,374.0,128.0,3.95,1.371,7.9,0.8,4.4,5.8,1.31,R,CHC,NL,56.0,0.0,0.0,85.1,34.0,9.0,23.0,2.0,0.0,0.941,,,3.38,0.57,P,,,,,,,,,,,,,,,,,,,,,,,3,CHC,11,56,0,56,56,56,0,0,0,0,0,0,0,0,0,0,0,P,0.017857,,C+,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,9,54,C+ (n-23),0.227,M,85.333333,2.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.106667,+,11.0,51.0,7.0,56.0,+M2,(14-23),+M2 (14-23) [WP],,0.410714,8,0.160714,4,,,84,56.0,0
3,Jerry Adair,32,KCR,AL,126,461,432,29,108,9,1,5,48,1,3,20,36,0.25,0.285,0.31,0.596,67.0,134,24,3,2,4,4,*4/H65,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,118.0,113.0,96.0,980.0,525.0,237.0,279.0,9.0,42.0,0.983,-2.0,-2.0,4.74,4.37,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,4,KCR,12,126,113,126,118,0,0,0,109,1,8,0,0,0,0,9,0,2B,0.380952,,C+,2,12.0,0,0.0,,0.009,,2,12,3,5.0,15,0,0.0,,8,55,C+12 (12-15),,,,,,,,,,0,0.0,,,,,,,,,,,,,2.214286,8,1.880952,4,,,84,126.0,0
4,Doug Adams,26,CHW,AL,8,15,14,1,3,0,0,0,1,0,0,1,3,0.214,0.267,0.214,0.481,33.0,3,1,0,0,0,0,/H2,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,3.81,2.75,C,CHW,AL,4.0,3.0,2.0,26.0,11.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,3.81,2.75,1.0,0.0,0.0,0.0,,5,CHW,1st,8,3,8,4,0,4,0,0,0,0,0,0,0,0,5,0,C,0.125,,C,0,,0,0.0,,0.0,,2,12,7,9.0,23,0,0.0,,7,56,C (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.25,9,1.125,4,,,S94,8.0,0


In [897]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

2.0      28
6.0      26
1.0      24
8.0      23
4.0      22
         ..
84.0      1
136.0     1
79.0      1
146.0     1
163.0     1
Name: Games_Played, Length: 160, dtype: int64

In [902]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "pitcher_rating", "PCN", "PPH", 
                                "Bats", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Hank Aaron,35,ATL,147.0,,,OF-1B,94,#B+24(25) (15-22),54,,,,R,,OF,0,0,4,0,0,0,0,0,144,144,0,2,0
1,Tommie Aaron,29,ATL,49.0,,,OF-1B,S84,C+12 (13-16),55,,,,R,,OF,0,0,16,0,0,0,8,0,0,8,0,24,3
2,Ted Abernathy,36,CHC,56.0,0.0,20.0,P,84,C+ (n-23),54,+M2 (14-23) [WP],51.0,56.0,R,R,P,56,0,0,0,0,0,0,0,0,0,0,0,0
3,Jerry Adair,32,KCR,126.0,,,2B-SS-3B,84,C+12 (12-15),55,,,,R,,2B,0,0,0,109,1,8,0,0,0,0,0,9,0
4,Doug Adams,26,CHW,8.0,,,C,S94,C (12-23),56,,,,L,,C,0,4,0,0,0,0,0,0,0,0,0,5,0


In [903]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [904]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           450
OF          224
C            97
2B           64
1B           60
SS           59
3B           48
DH_PH_PR     16
              0
Name: Primary, dtype: int64

In [905]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Hank Aaron,35,ATL,147.0,,,OF-1B,94,#B+24(25) (15-22),54,,,,R,,OF,0,0,4,0,0,0,0,0,144,144,0,2,0
1,Tommie Aaron,29,ATL,49.0,,,OF-1B,S84,C+12 (13-16),55,,,,R,,OF,0,0,16,0,0,0,8,0,0,8,0,24,3
2,Ted Abernathy,36,CHC,56.0,0.0,20.0,P,84,C+ (n-23),54,+M2 (14-23) [WP],51.0,56.0,R,R,P,56,0,0,0,0,0,0,0,0,0,0,0,0
3,Jerry Adair,32,KCR,126.0,,,2B-SS-3B,84,C+12 (12-15),55,,,,R,,2B,0,0,0,109,1,8,0,0,0,0,0,9,0
4,Doug Adams,26,CHW,8.0,,,C,S94,C (12-23),56,,,,L,,C,0,4,0,0,0,0,0,0,0,0,0,5,0


In [906]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [907]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [908]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [909]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [910]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
228,Bob Didier,20,ATL,114.0,,,C,S94 -1,B (13-21),55,,,,S,,C,0,114,0,0,0,0,0,0,0,0,0,0,0
922,Bob Tillman,32,ATL,69.0,,,C,94,D+26 (13-25),61,,,,R,,C,0,69,0,0,0,0,0,0,0,0,0,1,0
420,Walt Hriniak,26,ATL,7.0,,,C,94 -1,D (22-26),63,,,,L,,C,0,25,0,0,0,0,0,0,0,0,0,16,0
158,Orlando Cepeda,31,ATL,154.0,,,1B,85,B15* (13-21),55,,,,R,,1B,0,0,153,0,0,0,0,0,0,0,0,2,0
285,Tito Francona,35,ATL,51.0,,,1B-OF,84,B+13 (14-21),54,,,,L,,1B,0,0,26,0,0,0,16,0,0,16,0,44,0
100,Jim Breazeale,19,ATL,2.0,,,1B,84,G (46-n),66,,,,L,,1B,0,0,1,0,0,0,0,0,0,0,0,1,0
615,Felix Millan,25,ATL,162.0,,,2B,85,B11(12)* (12-14),54,,,,R,,2B,0,0,0,162,0,0,0,0,0,0,0,0,0
90,Clete Boyer,32,ATL,144.0,,,3B,85,C+14 (14-24),55,,,,R,,3B,0,0,0,0,141,0,0,0,0,0,0,3,0
33,Bob Aspromonte,31,ATL,82.0,,,3B-OF-SS-2B,85,B12(13) (12-15),55,,,,R,,3B,0,0,0,2,23,18,24,0,0,24,0,27,2
261,Darrell Evans,22,ATL,12.0,,,3B,84,C+ (11-25),55,,,,L,,3B,0,0,0,0,6,0,0,0,0,0,0,6,0


In [911]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'ATL':                 Name  Age   Tm      G  GS  GF    Positions     DEF  \
 228       Bob Didier   20  ATL  114.0                    C  S94 -1   
 922      Bob Tillman   32  ATL   69.0                    C     94    
 420     Walt Hriniak   26  ATL    7.0                    C   94 -1   
 158   Orlando Cepeda   31  ATL  154.0                   1B     85    
 285    Tito Francona   35  ATL   51.0                1B-OF     84    
 100    Jim Breazeale   19  ATL    2.0                   1B     84    
 615     Felix Millan   25  ATL  162.0                   2B     85    
 90       Clete Boyer   32  ATL  144.0                   3B     85    
 33    Bob Aspromonte   31  ATL   82.0          3B-OF-SS-2B     85    
 261    Darrell Evans   22  ATL   12.0                   3B     84    
 434    Sonny Jackson   24  ATL   98.0                   SS     85    
 303      Gil Garrido   28  ATL   82.0                   SS    S84    
 0         Hank Aaron   35  ATL  147.0                OF-1B     94    

In [912]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [913]:
create_excel(my_dict)

25

## Format Excel file

In [923]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [915]:
# wb.sheetnames

In [922]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators"
}

In [917]:
# team_dict["ATL"]

In [924]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 8.43
width_2 = 15.0
width_3 = 17.0
width_4 = 6.33

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_1    
    sheet.column_dimensions['B'].width = width_3
    sheet.column_dimensions['C'].width = width_1
    sheet.column_dimensions['D'].width = width_1
    sheet.column_dimensions['E'].width = width_1
    sheet.column_dimensions['F'].width = width_1
    sheet.column_dimensions['G'].width = width_1
    sheet.column_dimensions['H'].width = width_2
    sheet.column_dimensions['I'].width = width_1
    sheet.column_dimensions['J'].width = width_3
    sheet.column_dimensions['K'].width = width_1
    sheet.column_dimensions['L'].width = width_3
    sheet.column_dimensions['M'].width = width_4
    sheet.column_dimensions['N'].width = width_4
    sheet.column_dimensions['O'].width = width_4
    sheet.column_dimensions['P'].width = width_4
    sheet.column_dimensions['Q'].width = width_4
    sheet.column_dimensions['R'].width = width_4
    sheet.column_dimensions['S'].width = width_4
    sheet.column_dimensions['T'].width = width_4
    sheet.column_dimensions['U'].width = width_4
    sheet.column_dimensions['V'].width = width_4
    sheet.column_dimensions['W'].width = width_4
    sheet.column_dimensions['X'].width = width_4
    sheet.column_dimensions['Y'].width = width_4
    sheet.column_dimensions['Z'].width = width_4
    sheet.column_dimensions['AA'].width = width_4
    sheet.column_dimensions['AB'].width = width_4
    sheet.column_dimensions['AC'].width = width_4
    sheet.column_dimensions['AD'].width = width_4
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment

            

wb.save('../data/' + year + ' rosters ' + ' formatted.xlsx')

# Clean up

## Remove unwanted files

In [925]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')