# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [189]:
year = '1977'

In [190]:
url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'
url_app = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-appearances-fielding.shtml'

In [191]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/1977-specialpos_c-fielding.shtml'

In [192]:
session_requests = requests.session()

result = session_requests.get(url_bat, headers = dict(referer = url_bat))
result.status_code

200

In [193]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [194]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [195]:
soup = BeautifulSoup(result.content, 'lxml')

In [196]:
# print(soup.prettify())

In [197]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [198]:
bat = pullTable(url_bat, 'players_standard_batting')

In [199]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [200]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [201]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [202]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [203]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [204]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [205]:
findTables(url_app)

['teams_appearances', 'players_players_appearances_fielding']

In [206]:
app = pullTable(url_app, 'players_players_appearances_fielding')

In [207]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
855,856.0,Pat Zachry,25.0,NYM,NL,19,43,42,0,6,0,0,0,1,0,0,0,13,0.143,0.143,0.143,0.286,-21.0,6,4,0,1,0,0,1
856,857.0,Joe Zdeb,24.0,KCR,AL,105,217,195,26,58,5,2,2,23,6,5,16,23,0.297,0.346,0.374,0.72,96.0,73,5,0,3,3,3,7H/9D58
857,858.0,George Zeber#,26.0,NYY,AL,25,75,65,8,21,3,0,3,10,0,0,9,11,0.323,0.405,0.508,0.913,149.0,33,1,0,1,0,1,4/56DH
858,859.0,Richie Zisk,28.0,CHW,AL,141,599,531,78,154,17,6,30,101,0,4,55,98,0.29,0.355,0.514,0.869,134.0,273,15,3,1,9,7,*9D/7H
859,,LgAvg per 600 PA,,,,190,600,535,69,141,24,4,13,65,11,6,51,82,0.263,0.328,0.399,0.726,,213,12,3,7,5,5,


In [208]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [209]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [210]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    539
L    245
S     75
Name: Bats, dtype: int64

In [211]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
854,855,Pat Zachry,25,CIN,NL,12,27,22,0,3,0,0,0,0,0,0,1,11,0.136,0.174,0.136,0.31,-16,3,0,0,4,0,0,1,R
855,856,Pat Zachry,25,NYM,NL,19,43,42,0,6,0,0,0,1,0,0,0,13,0.143,0.143,0.143,0.286,-21,6,4,0,1,0,0,1,R
856,857,Joe Zdeb,24,KCR,AL,105,217,195,26,58,5,2,2,23,6,5,16,23,0.297,0.346,0.374,0.72,96,73,5,0,3,3,3,7H/9D58,R
857,858,George Zeber,26,NYY,AL,25,75,65,8,21,3,0,3,10,0,0,9,11,0.323,0.405,0.508,0.913,149,33,1,0,1,0,1,4/56DH,S
858,859,Richie Zisk,28,CHW,AL,141,599,531,78,154,17,6,30,101,0,4,55,98,0.29,0.355,0.514,0.869,134,273,15,3,1,9,7,*9D/7H,R


In [212]:
bat.drop(columns=["Rk"], inplace=True)

In [213]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
854,Pat Zachry,25,CIN,NL,12,27,22,0,3,0,0,0,0,0,0,1,11,0.136,0.174,0.136,0.31,-16,3,0,0,4,0,0,1,R
855,Pat Zachry,25,NYM,NL,19,43,42,0,6,0,0,0,1,0,0,0,13,0.143,0.143,0.143,0.286,-21,6,4,0,1,0,0,1,R
856,Joe Zdeb,24,KCR,AL,105,217,195,26,58,5,2,2,23,6,5,16,23,0.297,0.346,0.374,0.72,96,73,5,0,3,3,3,7H/9D58,R
857,George Zeber,26,NYY,AL,25,75,65,8,21,3,0,3,10,0,0,9,11,0.323,0.405,0.508,0.913,149,33,1,0,1,0,1,4/56DH,S
858,Richie Zisk,28,CHW,AL,141,599,531,78,154,17,6,30,101,0,4,55,98,0.29,0.355,0.514,0.869,134,273,15,3,1,9,7,*9D/7H,R


In [214]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
454,455.0,Pat Zachry,25.0,TOT,NL,10,13,0.435,4.25,31,31,0,5,1,0,194.2,207,104,92,21,77,5,99,4,1,0,845,90,4.37,1.459,9.6,1.0,3.6,4.6,1.29
455,456.0,Pat Zachry,25.0,CIN,NL,3,7,0.3,5.04,12,12,0,3,0,0,75.0,78,45,42,7,29,1,36,1,0,0,321,78,4.19,1.427,9.4,0.8,3.5,4.3,1.24
456,457.0,Pat Zachry,25.0,NYM,NL,7,6,0.538,3.76,19,19,0,2,1,0,119.2,129,59,50,14,48,4,63,3,1,0,524,99,4.49,1.479,9.7,1.1,3.6,4.7,1.31
457,458.0,Geoff Zahn*,31.0,MIN,AL,12,14,0.462,4.68,34,32,0,7,1,0,198.0,234,116,103,20,66,4,88,5,0,5,870,85,4.24,1.515,10.6,0.9,3.0,4.0,1.33
458,,LgAvg per 180 IP,,,,10,10,0.499,4.04,51,20,16,4,1,4,180.0,182,91,81,18,66,6,103,4,1,6,772,99,4.03,1.378,9.1,0.9,3.3,5.1,1.56


In [215]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [216]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [217]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    315
L    143
Name: Throws, dtype: int64

In [218]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
453,454,Wilbur Wood,35,CHW,AL,7,8,0.467,4.99,24,18,2,5,1,0,122.2,139,75,68,10,50,0,42,10,0,0,548,83,4.58,1.541,10.2,0.7,3.7,3.1,0.84,L
454,455,Pat Zachry,25,TOT,NL,10,13,0.435,4.25,31,31,0,5,1,0,194.2,207,104,92,21,77,5,99,4,1,0,845,90,4.37,1.459,9.6,1.0,3.6,4.6,1.29,R
455,456,Pat Zachry,25,CIN,NL,3,7,0.3,5.04,12,12,0,3,0,0,75.0,78,45,42,7,29,1,36,1,0,0,321,78,4.19,1.427,9.4,0.8,3.5,4.3,1.24,R
456,457,Pat Zachry,25,NYM,NL,7,6,0.538,3.76,19,19,0,2,1,0,119.2,129,59,50,14,48,4,63,3,1,0,524,99,4.49,1.479,9.7,1.1,3.6,4.7,1.31,R
457,458,Geoff Zahn,31,MIN,AL,12,14,0.462,4.68,34,32,0,7,1,0,198.0,234,116,103,20,66,4,88,5,0,5,870,85,4.24,1.515,10.6,0.9,3.0,4.0,1.33,L


In [219]:
pit.drop(columns=["Rk"], inplace=True)

In [220]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [221]:
players.shape

(1067, 62)

In [222]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos Summary
886,887.0,Geoff Zahn,31.0,MIN,AL,34,32,7,198.0,58,16,40,2,5,0.966,,,2.55,1.65,P
887,888.0,Joe Zdeb,24.0,KCR,AL,94,41,31,451.1,100,93,4,3,0,0.97,1.0,2.0,1.93,1.03,OF-3B
888,889.0,George Zeber,26.0,NYY,AL,25,16,12,164.1,102,42,56,4,11,0.961,-1.0,-7.0,5.37,3.92,2B-3B-SS
889,890.0,Richie Zisk,28.0,CHW,AL,109,108,97,931.1,223,210,9,4,3,0.982,-9.0,-11.0,2.12,2.01,OF
890,,LgAvg,,,,198,154,120,1379.0,669,465,189,15,42,0.978,0.0,0.0,4.27,3.29,


In [223]:
fld = fld[fld["Name"] != "LgAvg"]

In [224]:
fld.drop(columns=["Rk"], inplace=True)

In [225]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [226]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rctch,RF/9,RF/G,PB,WP,SB,CS,CS%
96,97.0,Earl Williams,28.0,OAK,AL,36,33,16,269.0,188,165,21,2,2,0.989,2,9,2,6.22,5.17,2,10,26,20,43%
97,98.0,John Wockenfuss,28.0,DET,AL,37,33,31,296.2,198,175,20,3,2,0.985,0,0,0,5.92,5.27,5,4,20,12,38%
98,99.0,Butch Wynegar,21.0,MIN,AL,142,138,135,1247.0,765,676,84,5,8,0.993,4,4,4,5.49,5.35,10,41,81,60,43%
99,100.0,Steve Yeager,28.0,LAD,NL,123,119,90,1015.0,797,690,89,18,12,0.977,6,7,6,6.91,6.33,6,19,56,36,39%
100,,LgAvg,,,,20,17,15,156.0,110,97,11,2,1,0.985,0,0,0,6.21,5.29,1,5,12,7,37%


In [227]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [228]:
cat = cat[cat["Name"] != "LgAvg"]

In [229]:
cat.drop(columns=["Rk"], inplace=True)

In [230]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [231]:
app.tail()

Unnamed: 0,Rk,Name,Age,Tm,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
895,896,Pat Zachry,25,2TM,2,31,31,31,31,31,0,0,0,0,0,0,0,0,0,0,0,0
896,897,Geoff Zahn,31,MIN,5,34,32,0,34,34,0,0,0,0,0,0,0,0,0,0,0,0
897,898,Joe Zdeb,24,KCR,1st,105,41,105,94,0,0,0,0,1,0,87,1,8,93,4,27,12
898,899,George Zeber,26,NYY,1st,25,16,25,24,0,0,0,21,2,2,0,0,0,0,1,1,0
899,900,Richie Zisk,28,CHW,7,141,137,141,108,0,0,0,0,0,0,8,0,101,108,28,4,0


In [232]:
players = pd.merge(players, app, how='left', on=["Name", "Age"], suffixes=('', '_app'))

In [233]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [234]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,AL,1.0,1.0,1.0,9.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,,,,,,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,,,,,,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,,,,,,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0


In [235]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [236]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'RF/9_cat',
 'RF/G_cat',
 'PB',
 'WP_cat',
 'SB_cat',
 'CS_cat',
 'CS%',
 'Rk',
 'Tm_app',
 'Yrs',
 'G_app',
 'GS_app',
 'Batt

In [237]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [238]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     455
OF    226
C      93
1B     78
2B     75
SS     72
3B     58
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [239]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [240]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    206
0.166667     12
0.500000      9
0.333333      9
0.250000      9
           ... 
0.335821      1
0.464000      1
0.532374      1
0.116667      1
0.318182      1
Name: rbi_per_g, Length: 470, dtype: int64

In [241]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     1021
#      46
Name: clutch, dtype: int64

In [242]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
51,Kevin Bell,21,CHW,AL,9.0,32.0,28.0,4.0,5.0,1.0,0.0,1.0,6.0,0.0,0.0,3.0,8.0,0.179,0.25,0.321,0.571,55.0,9.0,0.0,0.0,0.0,1.0,0.0,/657,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,10.0,9.0,7.0,77.1,35.0,12.0,21.0,2.0,7.0,0.943,-1.0,-19.0,3.84,3.3,SS-3B-OF,,,,,,,,,,,,,,,,,,,,,,,59,CHW,2,9,9,9,9,0,0,0,0,4,5,1,0,0,1,0,0,0,SS,0.666667,#
53,Johnny Bench,29,CIN,NL,142.0,563.0,494.0,67.0,136.0,34.0,2.0,31.0,109.0,2.0,4.0,58.0,95.0,0.275,0.348,0.54,0.889,133.0,267.0,10.0,1.0,0.0,7.0,8.0,*2/7H359,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,148.0,134.0,105.0,1143.1,816.0,735.0,69.0,12.0,13.0,0.985,2.0,2.0,6.33,5.43,C-OF-1B-3B,CIN,NL,135.0,125.0,102.0,1065.1,781.0,705.0,66.0,10.0,10.0,0.987,2.0,2.0,2.0,6.51,5.71,3.0,39.0,64.0,42.0,40%,61,CIN,11,142,134,142,142,0,135,4,0,1,0,7,0,1,8,0,6,0,C,0.767606,#
59,Kurt Bevacqua,30,TEX,AL,39.0,104.0,96.0,13.0,32.0,7.0,2.0,5.0,28.0,0.0,1.0,6.0,13.0,0.333,0.365,0.604,0.97,159.0,58.0,6.0,0.0,0.0,2.0,1.0,H5/9734D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TEX,AL,35.0,17.0,7.0,182.0,74.0,42.0,31.0,1.0,4.0,0.986,2.0,16.0,3.61,2.09,OF-3B-2B-1B,,,,,,,,,,,,,,,,,,,,,,,67,TEX,7,39,18,39,30,0,0,5,5,11,0,6,0,8,14,3,16,3,OF,0.717949,#
73,Bobby Bonds,31,CAL,AL,158.0,679.0,592.0,103.0,156.0,23.0,9.0,37.0,115.0,41.0,18.0,74.0,141.0,0.264,0.342,0.52,0.862,136.0,308.0,9.0,2.0,1.0,10.0,5.0,*9D/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,140.0,139.0,131.0,1222.1,281.0,272.0,5.0,4.0,0.0,0.986,5.0,5.0,2.04,1.98,OF,,,,,,,,,,,,,,,,,,,,,,,80,CAL,10,158,157,158,140,0,0,0,0,0,0,0,0,140,140,18,1,0,OF,0.727848,#
88,George Brett,24,KCR,AL,139.0,627.0,564.0,105.0,176.0,32.0,13.0,22.0,88.0,14.0,12.0,55.0,24.0,0.312,0.373,0.532,0.905,142.0,300.0,12.0,2.0,3.0,3.0,9.0,*5/HD6,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,136.0,134.0,124.0,1185.0,461.0,115.0,325.0,21.0,33.0,0.954,16.0,16.0,3.34,3.24,3B-SS,,,,,,,,,,,,,,,,,,,,,,,95,KCR,5,139,135,139,135,0,0,0,0,135,1,0,0,0,0,3,3,1,3B,0.633094,#
106,Jeff Burroughs,26,ATL,NL,154.0,671.0,579.0,91.0,157.0,19.0,1.0,41.0,114.0,4.0,1.0,86.0,126.0,0.271,0.362,0.52,0.882,123.0,301.0,8.0,0.0,0.0,6.0,2.0,*9/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,154.0,153.0,138.0,1346.0,265.0,249.0,9.0,7.0,3.0,0.974,-26.0,-23.0,1.73,1.68,OF,,,,,,,,,,,,,,,,,,,,,,,115,ATL,8,154,153,154,154,0,0,0,0,0,0,0,0,154,154,0,1,0,OF,0.74026,#
123,Rod Carew,31,MIN,AL,155.0,694.0,616.0,128.0,239.0,38.0,16.0,14.0,100.0,23.0,13.0,69.0,55.0,0.388,0.449,0.57,1.019,178.0,351.0,6.0,3.0,1.0,5.0,15.0,*3/H4D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,155.0,148.0,137.0,1319.0,1597.0,1463.0,124.0,10.0,161.0,0.994,8.0,7.0,10.83,10.24,1B-2B,,,,,,,,,,,,,,,,,,,,,,,136,MIN,11,155,148,155,151,0,0,151,4,0,0,0,0,0,0,1,7,0,1B,0.645161,#
128,Rico Carty,37,CLE,AL,127.0,521.0,461.0,50.0,129.0,23.0,1.0,15.0,80.0,1.0,2.0,56.0,51.0,0.28,0.355,0.432,0.787,118.0,199.0,17.0,0.0,0.0,4.0,6.0,*D/H3,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,2.0,2.0,1.0,16.0,23.0,20.0,3.0,0.0,4.0,1.0,0.0,-8.0,12.94,11.5,1B,,,,,,,,,,,,,,,,,,,,,,,141,CLE,13,127,124,127,2,0,0,2,0,0,0,0,0,0,0,123,3,0,1B,0.629921,#
133,Ron Cey,29,LAD,NL,153.0,669.0,564.0,77.0,136.0,22.0,3.0,30.0,110.0,3.0,4.0,93.0,106.0,0.241,0.347,0.45,0.797,114.0,254.0,11.0,2.0,3.0,7.0,6.0,*5,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAD,NL,153.0,153.0,137.0,1352.2,502.0,138.0,346.0,18.0,29.0,0.964,4.0,3.0,3.22,3.16,3B,,,,,,,,,,,,,,,,,,,,,,,147,LAD,7,153,153,153,153,0,0,0,0,153,0,0,0,0,0,0,0,0,3B,0.718954,#
153,Al Cowens,25,KCR,AL,162.0,661.0,606.0,98.0,189.0,32.0,14.0,23.0,112.0,16.0,12.0,41.0,64.0,0.312,0.361,0.525,0.885,137.0,318.0,14.0,8.0,1.0,5.0,4.0,*98/HD,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCR,AL,159.0,151.0,136.0,1352.0,327.0,307.0,14.0,6.0,1.0,0.982,6.0,6.0,2.14,2.02,OF,,,,,,,,,,,,,,,,,,,,,,,172,KCR,4,162,151,162,158,0,0,0,0,0,0,0,27,142,158,2,6,0,OF,0.691358,#


### Letter Rating

In [243]:
players["BA"].value_counts()

0.000    98
0.250    20
0.333    13
0.167    12
0.241    12
         ..
0.178     1
0.137     1
0.170     1
0.339     1
0.181     1
Name: BA, Length: 217, dtype: int64

In [244]:
players["BA"].isnull().sum()

257

In [245]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [246]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [247]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      355
B      142
C+     136
B+     108
C       71
D+      59
A       55
D       43
E+      25
F       22
E       18
AAA     16
A+       7
AA       7
G+       3
Name: bat_letter, dtype: int64

In [248]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
40,Rick Baldwin,24,NYM,NL,40.0,4.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.5,0.5,0.5,1.0,177.0,2.0,0.0,0.0,0.0,0.0,0.0,1,L,NL,1.0,2.0,0.333,4.45,40.0,0.0,13.0,0.0,0.0,1.0,62.2,62.0,32.0,31.0,6.0,31.0,9.0,23.0,5.0,1.0,2.0,274.0,84.0,4.97,1.484,8.9,0.9,4.5,3.3,0.74,R,NYM,NL,40.0,0.0,0.0,62.2,18.0,6.0,12.0,0.0,3.0,1.0,,,2.59,0.45,P,,,,,,,,,,,,,,,,,,,,,,,43,NYM,3,40,0,40,40,40,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
110,Mike Caldwell,28,TOT,MLB,14.0,5.0,4.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.6,1.0,1.6,318.0,4.0,0.0,0.0,0.0,0.0,0.0,1,R,MLB,5.0,8.0,0.385,4.46,35.0,12.0,10.0,2.0,0.0,1.0,119.0,126.0,69.0,59.0,7.0,44.0,8.0,49.0,2.0,2.0,5.0,522.0,91.0,3.84,1.429,9.5,0.5,3.3,3.7,1.11,L,TOT,ZZ,35.0,12.0,2.0,119.0,38.0,7.0,31.0,0.0,3.0,1.0,,,2.87,1.09,P,,,,,,,,,,,,,,,,,,,,,,,123,2TM,7,35,12,14,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
111,Mike Caldwell,28,CIN,NL,14.0,5.0,4.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.6,1.0,1.6,318.0,4.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,0.0,0.0,,4.01,14.0,0.0,5.0,0.0,0.0,1.0,24.2,25.0,11.0,11.0,1.0,8.0,1.0,11.0,0.0,0.0,1.0,105.0,99.0,3.35,1.338,9.1,0.4,2.9,4.0,1.38,L,TOT,ZZ,35.0,12.0,2.0,119.0,38.0,7.0,31.0,0.0,3.0,1.0,,,2.87,1.09,P,,,,,,,,,,,,,,,,,,,,,,,123,2TM,7,35,12,14,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
207,Mike Easler,26,PIT,NL,10.0,19.0,18.0,3.0,8.0,2.0,0.0,1.0,5.0,0.0,0.0,0.0,1.0,0.444,0.421,0.722,1.143,198.0,13.0,0.0,0.0,0.0,1.0,0.0,/H97,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,4.0,3.0,3.0,30.0,7.0,7.0,0.0,0.0,0.0,1.0,0.0,12.0,2.1,1.75,OF,,,,,,,,,,,,,,,,,,,,,,,229,PIT,5,10,3,10,4,0,0,0,0,0,0,1,0,3,4,0,6,0,OF,0.5,,AAA
211,Rawly Eastwick,26,STL,NL,41.0,7.0,5.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,0.4,0.571,0.4,0.971,168.0,2.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.7,41.0,1.0,20.0,0.0,0.0,4.0,53.2,74.0,34.0,28.0,6.0,21.0,3.0,30.0,0.0,1.0,2.0,248.0,83.0,4.25,1.77,12.4,1.0,3.5,5.0,1.43,R,TOT,NL,64.0,1.0,0.0,97.0,11.0,5.0,6.0,0.0,0.0,1.0,,,1.02,0.17,P,,,,,,,,,,,,,,,,,,,,,,,231,2TM,4,64,1,64,64,64,0,0,0,0,0,0,0,0,0,0,0,0,P,0.02439,,AAA
249,Roger Freed,31,STL,NL,49.0,95.0,83.0,10.0,33.0,2.0,1.0,5.0,21.0,0.0,0.0,11.0,9.0,0.398,0.463,0.627,1.09,191.0,52.0,2.0,0.0,0.0,1.0,0.0,H3/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,STL,NL,24.0,19.0,5.0,142.2,115.0,107.0,7.0,1.0,13.0,0.991,-2.0,-17.0,7.19,4.75,1B-OF,,,,,,,,,,,,,,,,,,,,,,,270,STL,6,49,19,49,24,0,0,18,0,0,0,0,0,7,7,0,26,0,1B,0.428571,,AAA
459,Gene Locklear,27,NYY,AL,1.0,5.0,5.0,1.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.6,0.6,0.6,1.2,231.0,3.0,0.0,0.0,0.0,0.0,0.0,/7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,1.0,1.0,0.0,8.0,3.0,2.0,0.0,1.0,0.0,0.667,-1.0,-150.0,2.25,2.0,OF,,,,,,,,,,,,,,,,,,,,,,,484,NYY,5,1,1,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,OF,2.0,#,AAA
474,Mickey Mahler,24,ATL,NL,5.0,9.0,6.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.5,0.625,0.667,1.292,232.0,4.0,0.0,0.0,1.0,0.0,0.0,/1,S,NL,1.0,2.0,0.333,6.26,5.0,5.0,0.0,0.0,0.0,0.0,23.0,31.0,19.0,16.0,4.0,9.0,0.0,14.0,1.0,0.0,1.0,109.0,72.0,5.09,1.739,12.1,1.6,3.5,5.5,1.56,L,ATL,NL,5.0,5.0,0.0,23.0,4.0,0.0,3.0,1.0,1.0,0.75,,,1.17,0.6,P,,,,,,,,,,,,,,,,,,,,,,,500,ATL,1st,5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA
479,Mike Marshall,34,TOT,MLB,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,416.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,MLB,3.0,2.0,0.6,4.75,16.0,4.0,5.0,0.0,0.0,1.0,41.2,54.0,25.0,22.0,1.0,15.0,1.0,24.0,2.0,0.0,4.0,189.0,89.0,3.12,1.656,11.7,0.2,3.2,5.2,1.6,R,TOT,ZZ,16.0,4.0,0.0,41.2,10.0,2.0,7.0,1.0,1.0,0.9,,,1.94,0.56,P,,,,,,,,,,,,,,,,,,,,,,,505,2TM,10,16,4,4,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,AAA
480,Mike Marshall,34,ATL,NL,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,416.0,1.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,1.0,0.0,1.0,9.0,4.0,0.0,0.0,0.0,0.0,0.0,6.0,12.0,6.0,6.0,1.0,2.0,0.0,6.0,0.0,0.0,0.0,32.0,52.0,3.91,2.333,18.0,1.5,3.0,9.0,3.0,R,TOT,ZZ,16.0,4.0,0.0,41.2,10.0,2.0,7.0,1.0,1.0,0.9,,,1.94,0.56,P,,,,,,,,,,,,,,,,,,,,,,,505,2TM,10,16,4,4,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,0.25,,AAA


In [249]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.22399999999999998

### HR Number

In [250]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     617
2      83
1      73
4      68
3      63
5      49
6      39
7      21
9      18
8      17
12      7
10      5
18      4
24      1
17      1
11      1
Name: hr_rate, dtype: int64

In [251]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      617
12     83
11     73
14     68
13     63
15     49
16     39
21     21
23     18
22     17
26      7
24      5
36      4
46      1
35      1
25      1
Name: hr_num_bat, dtype: int64

### Triple Number

In [252]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     741
1     177
2      91
3      28
4      16
12      4
5       4
9       2
6       2
18      1
10      1
Name: triple_rate, dtype: int64

In [253]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     741
3.0      56
4.0      52
5.0      47
2.0      45
6.0      40
7.0      27
8.0      17
1.0      16
9.0      12
12.0      6
10.0      5
11.0      2
18.0      1
Name: triple_val, dtype: int64

In [254]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        741
(13)     56
(14)     52
(15)     47
(12)     45
(16)     40
(21)     27
(22)     17
(11)     16
(23)     12
(26)      6
(24)      5
(25)      2
(36)      1
Name: triple_num, dtype: int64

### Speed Rating

In [255]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [256]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    676
0.013     11
0.083      7
0.031      7
0.012      7
        ... 
0.412      1
0.120      1
0.086      1
0.325      1
0.153      1
Name: speed_score, Length: 162, dtype: int64

In [257]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [258]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        914
**       70
*        36
***      24
****     23
Name: speed_rating, dtype: int64

In [259]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
8,Matt Alexander,30,OAK,AL,90.0,47.0,42.0,24.0,10.0,1.0,0.0,0.0,2.0,26.0,14.0,4.0,6.0,0.238,0.304,0.262,0.566,59.0,11.0,0.0,0.0,1.0,0.0,0.0,H8D69/745,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,48.0,14.0,2.0,89.2,23.0,21.0,2.0,0.0,0.0,1.0,-2.0,-32.0,2.31,0.48,OF-2B-3B-SS,,,,,,,,,,,,,,,,,,,,,,,10,OAK,5,90,14,90,37,0,0,0,4,1,0,7,17,10,32,13,0,67,OF,0.022222,,C+,0,,0,0.0,,2.0,****
103,Glenn Burke,24,LAD,NL,83.0,175.0,169.0,16.0,43.0,8.0,0.0,1.0,13.0,13.0,5.0,5.0,22.0,0.254,0.28,0.32,0.6,62.0,54.0,3.0,1.0,0.0,0.0,1.0,8H/79,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAD,NL,74.0,35.0,27.0,432.0,102.0,98.0,1.0,3.0,0.0,0.971,-7.0,-21.0,2.06,1.34,OF,,,,,,,,,,,,,,,,,,,,,,,111,LAD,2,83,35,83,74,0,0,0,0,0,0,5,65,5,74,0,9,15,OF,0.156627,,B,1,11.0,0,0.0,,0.325,****
116,Joe Cannon,23,HOU,NL,9.0,17.0,17.0,3.0,2.0,2.0,0.0,0.0,1.0,1.0,1.0,0.0,5.0,0.118,0.118,0.235,0.353,-4.0,4.0,0.0,0.0,0.0,0.0,0.0,/H7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,NL,3.0,3.0,3.0,25.0,7.0,7.0,0.0,0.0,0.0,1.0,0.0,19.0,2.52,2.33,OF,,,,,,,,,,,,,,,,,,,,,,,131,HOU,1st,9,3,9,3,0,0,0,0,0,0,3,0,0,3,0,4,2,OF,0.111111,,E+,0,,0,0.0,,inf,****
131,Cesar Cedeno,26,HOU,NL,141.0,598.0,530.0,92.0,148.0,36.0,8.0,14.0,71.0,61.0,14.0,47.0,50.0,0.279,0.346,0.457,0.802,123.0,242.0,10.0,11.0,2.0,8.0,7.0,*8/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,NL,137.0,136.0,123.0,1184.2,350.0,335.0,14.0,1.0,2.0,0.997,5.0,5.0,2.65,2.55,OF,,,,,,,,,,,,,,,,,,,,,,,145,HOU,8,141,136,141,137,0,0,0,0,0,0,0,137,0,137,0,3,2,OF,0.503546,,B,3,13.0,2,5.0,(15),0.412,****
194,Miguel Dilone,22,PIT,NL,29.0,49.0,44.0,5.0,6.0,0.0,0.0,0.0,0.0,12.0,0.0,2.0,3.0,0.136,0.174,0.136,0.31,-16.0,6.0,0.0,0.0,3.0,0.0,0.0,H/789,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,17.0,7.0,4.0,85.0,22.0,21.0,1.0,0.0,0.0,1.0,2.0,21.0,2.33,1.29,OF,,,,,,,,,,,,,,,,,,,,,,,213,PIT,4,29,7,29,17,0,0,0,0,0,0,8,7,2,17,0,12,5,OF,0.0,,E+,0,,0,0.0,,1.5,****
195,Mike Dimmel,22,BAL,AL,25.0,6.0,5.0,8.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,1.0,0.0,0.0,9H/8,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,23.0,0.0,0.0,38.0,15.0,14.0,1.0,0.0,0.0,1.0,1.0,22.0,3.55,0.65,OF,,,,,,,,,,,,,,,,,,,,,,,214,BAL,1st,25,0,25,23,0,0,0,0,0,0,0,1,22,23,0,0,12,OF,0.0,,G,0,,0,0.0,,inf,****
314,Larry Harlow,25,BAL,AL,46.0,54.0,48.0,4.0,10.0,0.0,1.0,0.0,0.0,6.0,1.0,5.0,8.0,0.208,0.283,0.25,0.533,51.0,12.0,0.0,0.0,1.0,0.0,0.0,8H/9,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,38.0,13.0,12.0,158.2,53.0,47.0,0.0,6.0,0.0,0.887,-1.0,-5.0,2.67,1.24,OF,,,,,,,,,,,,,,,,,,,,,,,336,BAL,2,46,13,46,38,0,0,0,0,0,0,0,37,1,38,0,3,14,OF,0.0,,C,0,,4,4.0,(14),0.429,****
456,Larry Lintz,27,OAK,AL,41.0,40.0,30.0,11.0,4.0,1.0,0.0,0.0,0.0,13.0,5.0,8.0,13.0,0.133,0.333,0.167,0.5,44.0,5.0,0.0,1.0,1.0,0.0,1.0,4H/D65,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,31.0,9.0,7.0,114.0,67.0,29.0,37.0,1.0,9.0,0.985,2.0,17.0,5.21,2.13,2B-SS-3B,,,,,,,,,,,,,,,,,,,,,,,481,OAK,5,41,9,41,30,0,0,0,28,1,2,0,0,0,0,5,0,23,2B,0.0,,E+,0,,0,0.0,,1.083,****
475,Sheldon Mallory,23,OAK,AL,64.0,143.0,126.0,19.0,27.0,4.0,1.0,0.0,5.0,12.0,5.0,11.0,18.0,0.214,0.291,0.262,0.553,55.0,33.0,2.0,3.0,2.0,1.0,0.0,H98/7D3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,49.0,30.0,24.0,300.1,102.0,96.0,3.0,3.0,0.0,0.971,1.0,2.0,2.97,2.02,OF-1B,,,,,,,,,,,,,,,,,,,,,,,501,OAK,1st,64,31,64,48,0,0,4,0,0,0,9,16,22,45,8,9,17,OF,0.078125,,C,0,,1,1.0,(11),0.333,****
497,Bake McBride,28,TOT,NL,128.0,446.0,402.0,76.0,127.0,25.0,6.0,15.0,61.0,36.0,7.0,32.0,44.0,0.316,0.364,0.52,0.884,132.0,209.0,5.0,3.0,1.0,8.0,4.0,98H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,106.0,94.0,65.0,781.0,198.0,188.0,8.0,2.0,1.0,0.99,4.0,6.0,2.26,1.85,OF,,,,,,,,,,,,,,,,,,,,,,,522,2TM,5,128,95,128,105,0,0,0,0,0,0,0,52,55,105,0,27,4,OF,0.476562,,A,4,14.0,2,6.0,(16),0.31,****


### Base on Balls Number

In [260]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     430
3     174
2     159
4     117
1      87
5      50
6      23
7      15
8       6
12      2
9       2
18      1
10      1
Name: walk_rate, dtype: int64

In [261]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     430
13    174
12    159
14    117
11     87
15     50
16     23
21     15
22      6
26      2
23      2
36      1
24      1
Name: bb_num, dtype: int64

### Batter K Number

In [262]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     296
4     123
5     114
6      97
7      73
3      72
8      46
2      43
9      37
18     31
12     25
10     20
36     12
14     12
15     12
11     10
13      8
16      8
17      8
24      7
21      5
1       5
20      2
27      1
Name: k_rate, dtype: int64

In [263]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     296
9.0     100
8.0      95
7.0      94
6.0      75
10.0     67
12.0     53
5.0      47
11.0     41
18.0     34
4.0      32
15.0     21
13.0     20
14.0     19
36.0     12
3.0      10
16.0      8
24.0      8
17.0      7
21.0      6
20.0      5
19.0      4
2.0       4
27.0      3
23.0      3
22.0      2
28.0      1
Name: k_val, dtype: int64

In [264]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     296
23    100
22     95
21     94
16     75
24     67
26     53
15     47
25     41
36     34
14     32
33     21
31     20
32     19
66     12
13     10
46      8
34      8
35      7
43      6
42      5
12      4
41      4
53      3
45      3
44      2
54      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [265]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0    1016
1      45
2       4
5       1
4       1
Name: hbp_rate, dtype: int64

In [266]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [267]:
players["hbp_val"].value_counts()

0.0     1016
11.0       8
10.0       7
8.0        6
6.0        6
7.0        4
12.0       4
9.0        3
16.0       2
20.0       2
13.0       2
14.0       2
21.0       1
5.0        1
19.0       1
17.0       1
1.0        1
Name: hbp_val, dtype: int64

In [268]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       1016
/25       8
/24       7
/22       6
/16       6
/21       4
/26       4
/23       3
/42       2
/31       2
/34       2
/32       2
/11       1
/43       1
/15       1
/35       1
/41       1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [269]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     355
8     170
9     147
7      96
6      73
10     66
4      37
5      34
3      22
11     19
2      18
12     14
14      4
18      4
13      3
36      2
15      1
22      1
1       1
Name: hit_rate, dtype: int64

In [270]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    356
55    170
54    147
56     96
61     73
53     66
63     37
62     34
64     22
52     19
65     18
51     14
45      4
41      4
46      3
11      2
33      1
44      1
Name: PH_num_bat, dtype: int64

### Batter Rating

In [271]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [272]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " [" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + "]")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,AL,1.0,1.0,1.0,9.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22]
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,,,,,,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16]
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,,,,,,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36]
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,,,,,,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26]
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36]


In [273]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
1062,Mike Willis,26,TOR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,6.0,0.25,3.94,43.0,3.0,28.0,0.0,0.0,5.0,107.1,105.0,48.0,47.0,15.0,38.0,6.0,59.0,0.0,0.0,2.0,451.0,107.0,4.52,1.332,8.8,1.3,3.2,4.9,1.55,L,TOR,AL,43.0,3.0,0.0,107.1,26.0,6.0,19.0,1.0,1.0,0.962,,,2.1,0.58,P,,,,,,,,,,,,,,,,,,,,,,,878,TOR,1st,43,3,0,43,43,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1063,Jim Willoughby,28,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,6.0,2.0,0.75,4.94,31.0,0.0,16.0,0.0,0.0,2.0,54.2,54.0,32.0,30.0,5.0,18.0,3.0,33.0,2.0,0.0,2.0,234.0,92.0,3.82,1.317,8.9,0.8,3.0,5.4,1.83,R,BOS,AL,31.0,0.0,0.0,54.2,23.0,12.0,10.0,1.0,1.0,0.957,,,3.62,0.71,P,,,,,,,,,,,,,,,,,,,,,,,879,BOS,7,31,0,0,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1064,Rick Wise,31,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,11.0,5.0,0.688,4.77,26.0,20.0,2.0,4.0,2.0,0.0,128.1,151.0,68.0,68.0,19.0,28.0,1.0,85.0,4.0,1.0,1.0,555.0,95.0,4.09,1.395,10.6,1.3,2.0,6.0,3.04,R,BOS,AL,26.0,20.0,4.0,128.1,31.0,8.0,23.0,0.0,1.0,1.0,,,2.17,1.19,P,,,,,,,,,,,,,,,,,,,,,,,883,BOS,13,26,20,0,26,26,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1065,Wilbur Wood,35,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,7.0,8.0,0.467,4.99,24.0,18.0,2.0,5.0,1.0,0.0,122.2,139.0,75.0,68.0,10.0,50.0,0.0,42.0,10.0,0.0,0.0,548.0,83.0,4.58,1.541,10.2,0.7,3.7,3.1,0.84,L,CHW,AL,24.0,18.0,5.0,122.2,36.0,3.0,33.0,0.0,1.0,1.0,,,2.64,1.5,P,,,,,,,,,,,,,,,,,,,,,,,887,CHW,16,24,18,0,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1066,Geoff Zahn,31,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,12.0,14.0,0.462,4.68,34.0,32.0,0.0,7.0,1.0,0.0,198.0,234.0,116.0,103.0,20.0,66.0,4.0,88.0,5.0,0.0,5.0,870.0,85.0,4.24,1.515,10.6,0.9,3.0,4.0,1.33,L,MIN,AL,34.0,32.0,7.0,198.0,58.0,16.0,40.0,2.0,5.0,0.966,,,2.55,1.65,P,,,,,,,,,,,,,,,,,,,,,,,897,MIN,5,34,32,0,34,34,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [274]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.250,0.250,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.000,-1.0,-22.0,9.00,9.00,1B-C,DET,AL,1.0,1.0,1.0,9.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26,0,0.0,,0.000,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22]
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,,,,,,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12,0,0.0,,0.000,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16]
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.000,0.000,0.000,0.000,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.00,0.00,OF,,,,,,,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.000000,,G,0,,0,0.0,,0.000,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36]
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,,,,,,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26]
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.250,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.300,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.000,,,1.46,0.50,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36,0,0.0,,0.000,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,Mike Willis,26,TOR,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,2.0,6.0,0.250,3.94,43.0,3.0,28.0,0.0,0.0,5.0,107.1,105.0,48.0,47.0,15.0,38.0,6.0,59.0,0.0,0.0,2.0,451.0,107.0,4.52,1.332,8.8,1.3,3.2,4.9,1.55,L,TOR,AL,43.0,3.0,0.0,107.1,26.0,6.0,19.0,1.0,1.0,0.962,,,2.10,0.58,P,,,,,,,,,,,,,,,,,,,,,,,878,TOR,1st,43,3,0,43,43,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1063,Jim Willoughby,28,BOS,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,6.0,2.0,0.750,4.94,31.0,0.0,16.0,0.0,0.0,2.0,54.2,54.0,32.0,30.0,5.0,18.0,3.0,33.0,2.0,0.0,2.0,234.0,92.0,3.82,1.317,8.9,0.8,3.0,5.4,1.83,R,BOS,AL,31.0,0.0,0.0,54.2,23.0,12.0,10.0,1.0,1.0,0.957,,,3.62,0.71,P,,,,,,,,,,,,,,,,,,,,,,,879,BOS,7,31,0,0,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1064,Rick Wise,31,BOS,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,11.0,5.0,0.688,4.77,26.0,20.0,2.0,4.0,2.0,0.0,128.1,151.0,68.0,68.0,19.0,28.0,1.0,85.0,4.0,1.0,1.0,555.0,95.0,4.09,1.395,10.6,1.3,2.0,6.0,3.04,R,BOS,AL,26.0,20.0,4.0,128.1,31.0,8.0,23.0,0.0,1.0,1.000,,,2.17,1.19,P,,,,,,,,,,,,,,,,,,,,,,,883,BOS,13,26,20,0,26,26,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1065,Wilbur Wood,35,CHW,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,7.0,8.0,0.467,4.99,24.0,18.0,2.0,5.0,1.0,0.0,122.2,139.0,75.0,68.0,10.0,50.0,0.0,42.0,10.0,0.0,0.0,548.0,83.0,4.58,1.541,10.2,0.7,3.7,3.1,0.84,L,CHW,AL,24.0,18.0,5.0,122.2,36.0,3.0,33.0,0.0,1.0,1.000,,,2.64,1.50,P,,,,,,,,,,,,,,,,,,,,,,,887,CHW,16,24,18,0,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [275]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [276]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [277]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

### Pitcher Letter Rating

In [278]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.27423799126637577

In [279]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [280]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     634
W     147
M     108
X      89
L      29
Y      27
Z+     15
K      12
J+      4
J       2
Name: pit_letter, dtype: int64

In [281]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      609
W     147
M     108
X      89
L      29
Y      27
Z      25
Z+     15
K      12
J+      4
J       2
Name: pit_letter, dtype: int64

In [282]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.174

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [283]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

21.000000     5
14.333333     5
6.333333      4
6.000000      4
7.000000      4
3.000000      3
106.333333    3
4.333333      3
8.333333      3
7.666667      3
72.666667     3
87.333333     3
27.333333     3
1.333333      3
108.000000    3
2.000000      3
20.666667     3
22.333333     3
20.333333     3
2.333333      3
5.666667      3
18.333333     3
2.666667      3
98.666667     3
151.000000    3
Name: IP_real, dtype: int64

In [284]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

2.0    123
1.0     70
3.0     69
6.0     52
4.0     46
7.0     45
5.0     39
8.0     12
0.0      2
Name: IE, dtype: int64

In [285]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
917,Ed Farmer,27,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,17.0,,,,,,,0.0,R,BAL,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,247,BAL,5,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],1.0,Z,0.0,0.0
965,Rick Kreuger,28,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,9.0,,,,,,,,L,BOS,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,446,BOS,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],1.0,Z,0.0,0.0


In [286]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

2.0    123
1.0     72
3.0     69
6.0     52
4.0     46
7.0     45
5.0     39
8.0     12
Name: IE, dtype: int64

In [287]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
68,Vida Blue,27,OAK,AL,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,S,AL,14.0,19.0,0.424,3.83,38.0,38.0,0.0,16.0,1.0,0.0,279.2,284.0,138.0,119.0,23.0,86.0,5.0,157.0,1.0,0.0,11.0,1184.0,104.0,3.62,1.323,9.1,0.7,2.8,5.1,1.83,L,OAK,AL,38.0,38.0,16.0,279.2,51.0,6.0,42.0,3.0,2.0,0.941,,,1.54,1.26,P,,,,,,,,,,,,,,,,,,,,,,,75,OAK,9,38,38,2,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.259,W,279.666667,7.0
89,Ken Brett,28,TOT,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/H,L,AL,13.0,14.0,0.481,4.53,34.0,34.0,0.0,7.0,0.0,0.0,224.2,258.0,120.0,113.0,25.0,53.0,0.0,80.0,4.0,2.0,6.0,950.0,88.0,4.23,1.384,10.3,1.0,2.1,3.2,1.51,L,TOT,AL,34.0,34.0,7.0,224.2,61.0,11.0,48.0,2.0,6.0,0.967,,,2.36,1.74,P,,,,,,,,,,,,,,,,,,,,,,,96,2TM,10,35,34,1,34,34,0,0,0,0,0,0,0,0,0,0,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.289,X,224.666667,7.0
90,Ken Brett,28,CAL,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/H,L,AL,7.0,10.0,0.412,4.25,21.0,21.0,0.0,5.0,0.0,0.0,142.0,157.0,73.0,67.0,15.0,38.0,0.0,41.0,3.0,1.0,4.0,598.0,92.0,4.4,1.373,10.0,1.0,2.4,2.6,1.08,L,TOT,AL,34.0,34.0,7.0,224.2,61.0,11.0,48.0,2.0,6.0,0.967,,,2.36,1.74,P,,,,,,,,,,,,,,,,,,,,,,,96,2TM,10,35,34,1,34,34,0,0,0,0,0,0,0,0,0,0,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.282,X,142.0,7.0
115,John Candelaria,23,PIT,NL,33.0,88.0,80.0,5.0,18.0,3.0,2.0,0.0,11.0,0.0,1.0,4.0,18.0,0.225,0.262,0.313,0.574,53.0,25.0,0.0,0.0,4.0,0.0,0.0,1,L,NL,20.0,5.0,0.8,2.34,33.0,33.0,0.0,6.0,1.0,0.0,230.2,197.0,64.0,60.0,29.0,50.0,2.0,133.0,2.0,2.0,1.0,917.0,169.0,3.9,1.071,7.7,1.1,2.0,5.2,2.66,L,PIT,NL,33.0,33.0,6.0,230.2,37.0,6.0,30.0,1.0,3.0,0.973,,,1.4,1.09,P,,,,,,,,,,,,,,,,,,,,,,,129,PIT,3,33,33,33,33,33,0,0,0,0,0,0,0,0,0,0,0,0,P,0.333333,,C+,0,,4,4.0,(14),0.0,,2,12,7,9.0,23,0,0.0,,7,56,C+(14) [12-23],0.228,M,230.666667,7.0
124,Steve Carlton,32,PHI,NL,36.0,112.0,97.0,7.0,26.0,4.0,0.0,3.0,15.0,0.0,0.0,2.0,20.0,0.268,0.282,0.402,0.684,78.0,39.0,0.0,1.0,7.0,3.0,0.0,1,L,NL,23.0,10.0,0.697,2.64,36.0,36.0,0.0,17.0,2.0,0.0,283.0,229.0,99.0,83.0,25.0,89.0,5.0,198.0,4.0,7.0,3.0,1135.0,153.0,3.47,1.124,7.3,0.8,2.8,6.3,2.22,L,PHI,NL,36.0,36.0,17.0,283.0,57.0,4.0,52.0,1.0,2.0,0.982,,,1.78,1.56,P,,,,,,,,,,,,,,,,,,,,,,,137,PHI,13,36,36,36,36,36,0,0,0,0,0,0,0,0,0,0,0,0,P,0.416667,,B,4,14.0,0,0.0,,0.0,,1,11,6,7.0,21,0,0.0,,8,55,B14 [11-21],0.22,L,283.0,8.0
142,Jim Colborn,31,KCR,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,18.0,14.0,0.563,3.62,36.0,35.0,0.0,6.0,1.0,0.0,239.0,233.0,106.0,96.0,22.0,81.0,2.0,103.0,13.0,3.0,8.0,1023.0,113.0,4.25,1.314,8.8,0.8,3.1,3.9,1.27,R,KCR,AL,36.0,35.0,6.0,239.0,71.0,27.0,39.0,5.0,3.0,0.93,,,2.49,1.83,P,,,,,,,,,,,,,,,,,,,,,,,160,KCR,9,37,36,1,36,36,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.251,W,239.0,7.0
212,Dennis Eckersley,22,CLE,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/DH,R,AL,14.0,13.0,0.519,3.53,33.0,33.0,0.0,12.0,3.0,0.0,247.1,214.0,100.0,97.0,31.0,54.0,11.0,191.0,7.0,0.0,3.0,1006.0,112.0,3.56,1.084,7.8,1.1,2.0,7.0,3.54,R,CLE,AL,33.0,33.0,12.0,247.1,30.0,6.0,22.0,2.0,1.0,0.933,,,1.02,0.85,P,,,,,,,,,,,,,,,,,,,,,,,232,CLE,3,34,33,1,33,33,0,0,0,0,0,0,0,0,0,1,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.226,M,247.333333,7.0
275,Dave Goltz,28,MIN,AL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/DH,R,AL,20.0,11.0,0.645,3.36,39.0,39.0,0.0,19.0,2.0,0.0,303.0,284.0,129.0,113.0,23.0,91.0,4.0,186.0,2.0,1.0,9.0,1253.0,119.0,3.42,1.238,8.4,0.7,2.7,5.5,2.04,R,MIN,AL,39.0,39.0,19.0,303.0,66.0,20.0,41.0,5.0,4.0,0.924,,,1.81,1.56,P,,,,,,,,,,,,,,,,,,,,,,,297,MIN,6,41,40,2,39,39,0,0,0,0,0,0,0,0,0,1,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.245,M,303.0,8.0
299,Ron Guidry,26,NYY,AL,5.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/H,L,AL,16.0,7.0,0.696,2.82,31.0,25.0,4.0,9.0,5.0,1.0,210.2,174.0,72.0,66.0,12.0,65.0,2.0,176.0,0.0,0.0,6.0,850.0,140.0,2.73,1.134,7.4,0.5,2.8,7.5,2.71,L,NYY,AL,31.0,25.0,9.0,210.2,35.0,7.0,27.0,1.0,2.0,0.971,,,1.45,1.1,P,,,,,,,,,,,,,,,,,,,,,,,321,NYY,3,36,25,5,31,31,0,0,0,0,0,0,0,0,0,0,0,5,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.222,L,210.666667,7.0
305,Ed Halicki,26,SFG,NL,37.0,96.0,85.0,7.0,15.0,4.0,0.0,2.0,5.0,0.0,0.0,4.0,29.0,0.176,0.211,0.294,0.505,35.0,25.0,0.0,0.0,5.0,1.0,0.0,1,R,NL,16.0,12.0,0.571,3.32,37.0,37.0,0.0,7.0,2.0,0.0,257.2,241.0,105.0,95.0,27.0,70.0,5.0,168.0,7.0,3.0,1.0,1076.0,118.0,3.69,1.207,8.4,0.9,2.4,5.9,2.4,R,SFG,NL,37.0,37.0,7.0,257.2,42.0,15.0,24.0,3.0,1.0,0.929,,,1.36,1.05,P,,,,,,,,,,,,,,,,,,,,,,,327,SFG,4,37,37,37,37,37,0,0,0,0,0,0,0,0,0,0,0,0,P,0.135135,,D+,5,15.0,0,0.0,,0.0,,2,12,11,13.0,31,0,0.0,,6,61,D+15 [12-31],0.241,M,257.666667,7.0


In [288]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [289]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     620
3     177
4      90
2      89
5      47
1      11
6      10
8       9
7       8
9       2
18      1
12      1
11      1
10      1
Name: bb_rate, dtype: int64

In [290]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     177
4.0      90
2.0      89
5.0      47
1.0      11
0.0      11
6.0      10
8.0       9
7.0       8
9.0       2
11.0      1
18.0      1
12.0      1
10.0      1
Name: bb_rate, dtype: int64

In [291]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    609
13     177
14      90
12      89
15      47
11      22
16      10
22       9
21       8
23       2
25       1
26       1
36       1
24       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [292]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

4.0     134
5.0     107
3.0      63
6.0      58
7.0      33
2.0      28
0.0      10
8.0       9
1.0       6
9.0       3
10.0      3
11.0      3
14.0      1
Name: k_rate_pit, dtype: int64

In [293]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
500,Larry McCall,24,NYY,AL,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1H,L,AL,0.0,1.0,0.0,7.5,2.0,0.0,0.0,0.0,0.0,0.0,6.0,12.0,7.0,5.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,33.0,56.0,5.41,2.167,18.0,1.5,1.5,0.0,0.0,R,NYY,AL,2.0,0.0,0.0,6.0,2.0,1.0,0.0,1.0,0.0,0.5,,,1.5,0.5,P,,,,,,,,,,,,,,,,,,,,,,,523,NYY,1st,3,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,1,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.375,Z,6.0,3,1.0,11,0.0
510,Joey McLaughlin,20,ATL,NL,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,15.0,3.0,2.0,1.0,0.0,0.0,0.0,6.0,10.0,10.0,10.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,31.0,31.0,10.74,2.167,15.0,4.5,4.5,0.0,0.0,R,ATL,NL,3.0,2.0,0.0,6.0,3.0,0.0,3.0,0.0,0.0,1.0,,,4.5,1.0,P,,,,,,,,,,,,,,,,,,,,,,,537,ATL,1st,3,2,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.357,Z+,6.0,2,3.0,13,0.0
520,Mario Mendoza,26,PIT,NL,70.0,86.0,81.0,5.0,16.0,3.0,0.0,0.0,4.0,0.0,0.0,3.0,10.0,0.198,0.226,0.235,0.461,23.0,19.0,1.0,0.0,2.0,0.0,0.0,65/H1,R,NL,0.0,0.0,,13.5,1.0,0.0,1.0,0.0,0.0,0.0,2.0,3.0,3.0,3.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,10.0,35.0,12.24,2.5,13.5,4.5,9.0,0.0,0.0,R,PIT,NL,65.0,17.0,10.0,263.1,138.0,41.0,87.0,10.0,13.0,0.928,-1.0,-4.0,4.37,1.97,SS-3B-P,,,,,,,,,,,,,,,,,,,,,,,545,PIT,4,70,17,70,65,1,0,0,0,19,46,0,0,0,0,0,5,4,SS,0.057143,,C,0,,0,0.0,,0.0,,1,11,4,5.0,15,0,0.0,,7,56,C [11-15],0.375,Z,2.0,2,7.0,21,0.0
568,Jeff Newman,28,OAK,AL,94.0,172.0,162.0,17.0,36.0,9.0,0.0,4.0,15.0,2.0,0.0,4.0,24.0,0.222,0.244,0.352,0.596,63.0,57.0,2.0,1.0,4.0,1.0,1.0,2/1H,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,,5.74,1.0,9.0,0.0,0.0,0.0,,R,OAK,AL,95.0,42.0,31.0,467.1,296.0,251.0,36.0,9.0,5.0,0.97,2.0,5.0,5.53,3.02,C-P,OAK,AL,94.0,42.0,31.0,466.1,296.0,251.0,36.0,9.0,5.0,0.97,2.0,5.0,2.0,5.54,3.05,4.0,20.0,27.0,29.0,52%,600,OAK,2,94,42,94,94,1,94,0,0,0,0,0,0,0,0,0,1,0,C,0.159574,,C,4,14.0,0,0.0,,0.071,,1,11,5,6.0,16,0,0.0,,8,55,C14 [11-16],0.25,M,1.0,1,0.0,11,0.0
683,Ray Sadecki,36,NYM,NL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,1.0,0.0,6.0,4.0,0.0,2.0,0.0,0.0,0.0,3.0,3.0,2.0,2.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,14.0,70.0,10.07,2.0,9.0,3.0,9.0,0.0,0.0,L,NYM,NL,4.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,713,NYM,18,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.273,W,3.0,1,8.0,22,0.0
917,Ed Farmer,27,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,17.0,,,,,,,0.0,R,BAL,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,247,BAL,5,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],1.0,Z,0.0,1,18.0,36,0.0
965,Rick Kreuger,28,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,9.0,,,,,,,,L,BOS,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,446,BOS,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],1.0,Z,0.0,1,0.0,11,0.0
969,Gary Lance,28,KCR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,4.5,1.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,10.0,108.0,5.74,2.0,9.0,0.0,9.0,0.0,0.0,R,KCR,AL,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,459,KCR,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.25,M,2.0,2,7.0,21,0.0
991,Randy Miller,24,BAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,40.5,1.0,0.0,0.0,0.0,0.0,0.0,0.2,4.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,15.0,2.74,6.0,54.0,0.0,0.0,0.0,,R,BAL,AL,1.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,554,BAL,1st,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.667,Z,0.666667,1,0.0,11,0.0
1061,Randy Wiles,25,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,1.0,0.5,10.13,5.0,0.0,4.0,0.0,0.0,0.0,2.2,5.0,3.0,3.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,16.0,46.0,10.99,3.0,16.9,3.4,10.1,0.0,0.0,L,CHW,AL,5.0,0.0,0.0,2.2,1.0,0.0,1.0,0.0,0.0,1.0,,,3.38,0.2,P,,,,,,,,,,,,,,,,,,,,,,,873,CHW,1st,5,0,0,5,5,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.385,Z,2.666667,1,7.0,21,0.0


In [294]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

7.0     103
9.0      81
8.0      75
6.0      57
10.0     47
5.0      22
11.0     16
4.0      11
0.0      10
12.0      8
13.0      7
15.0      5
3.0       5
14.0      4
2.0       3
16.0      2
17.0      1
18.0      1
Name: k_val_pit, dtype: int64

In [295]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    609
21     103
23      81
22      75
16      57
24      47
15      22
25      16
14      11
n       10
26       8
31       7
13       5
33       5
32       4
12       3
34       2
36       1
35       1
Name: k_num_pit, dtype: int64

In [296]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [297]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    1035
1      26
3       3
2       2
7       1
Name: hbp_rate_pit, dtype: int64

In [298]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [299]:
players["hbp_val_pit"].value_counts()

0.0     1035
10.0       6
9.0        5
13.0       4
7.0        4
8.0        4
18.0       3
6.0        2
3.0        1
4.0        1
11.0       1
12.0       1
Name: hbp_val_pit, dtype: int64

In [300]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       1035
/24       6
/23       5
/21       4
/31       4
/22       4
/36       3
/16       2
/25       1
/14       1
/13       1
/26       1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [301]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [302]:
players["WP"].value_counts()

0.0     105
1.0      75
2.0      75
3.0      57
4.0      48
5.0      28
6.0      22
8.0      16
7.0      11
9.0       5
11.0      4
10.0      4
12.0      3
14.0      2
20.0      1
17.0      1
21.0      1
Name: WP, dtype: int64

In [303]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit
68,Vida Blue,27,OAK,AL,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,S,AL,14.0,19.0,0.424,3.83,38.0,38.0,0.0,16.0,1.0,0.0,279.2,284.0,138.0,119.0,23.0,86.0,5.0,157.0,1.0,0.0,11.0,1184.0,104.0,3.62,1.323,9.1,0.7,2.8,5.1,1.83,L,OAK,AL,38.0,38.0,16.0,279.2,51.0,6.0,42.0,3.0,2.0,0.941,,,1.54,1.26,P,,,,,,,,,,,,,,,,,,,,,,,75,OAK,9,38,38,2,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,36,36.0,66,0,0.0,,0,66,G [n-66],0.259,W,279.666667,7,3.0,13,5.0,8.0,22,0,0.0,
385,Tom Johnson,26,MIN,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,16.0,7.0,0.696,3.13,71.0,0.0,54.0,0.0,0.0,15.0,146.2,152.0,57.0,51.0,11.0,47.0,8.0,87.0,5.0,0.0,10.0,629.0,128.0,3.59,1.357,9.3,0.7,2.9,5.3,1.85,R,MIN,AL,71.0,0.0,0.0,146.2,36.0,9.0,26.0,1.0,0.0,0.972,,,2.15,0.49,P,,,,,,,,,,,,,,,,,,,,,,,410,MIN,4,71,0,1,71,71,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.263,W,146.666667,2,3.0,13,5.0,8.0,22,0,0.0,
450,Dennis Leonard,26,KCR,AL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1/D,R,AL,20.0,12.0,0.625,3.04,38.0,37.0,1.0,21.0,5.0,1.0,292.2,246.0,117.0,99.0,18.0,79.0,0.0,244.0,8.0,2.0,14.0,1186.0,134.0,2.76,1.11,7.6,0.6,2.4,7.5,3.09,R,KCR,AL,38.0,37.0,21.0,292.2,54.0,21.0,29.0,4.0,2.0,0.926,,,1.54,1.32,P,,,,,,,,,,,,,,,,,,,,,,,475,KCR,4,39,38,2,38,38,0,0,0,0,0,0,0,0,0,1,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.224,M,292.666667,8,2.0,12,7.0,9.0,23,0,0.0,
467,Sparky Lyle,32,NYY,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,13.0,5.0,0.722,2.17,72.0,0.0,60.0,0.0,0.0,26.0,137.0,131.0,41.0,33.0,7.0,33.0,6.0,68.0,2.0,0.0,10.0,554.0,183.0,3.18,1.197,8.6,0.5,2.2,4.5,2.06,L,NYY,AL,72.0,0.0,0.0,137.0,26.0,2.0,22.0,2.0,0.0,0.923,,,1.58,0.33,P,,,,,,,,,,,,,,,,,,,,,,,492,NYY,11,72,0,1,72,72,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.252,W,137.0,2,2.0,12,4.0,6.0,16,0,0.0,
570,Phil Niekro,38,ATL,NL,44.0,123.0,109.0,2.0,19.0,1.0,0.0,0.0,7.0,0.0,0.0,0.0,15.0,0.174,0.18,0.183,0.364,-6.0,20.0,3.0,1.0,12.0,1.0,0.0,1,R,NL,16.0,20.0,0.444,4.03,44.0,43.0,1.0,20.0,2.0,0.0,330.1,315.0,166.0,148.0,26.0,164.0,12.0,262.0,8.0,3.0,17.0,1428.0,111.0,3.74,1.45,8.6,0.7,4.5,7.1,1.6,R,ATL,NL,44.0,43.0,20.0,330.1,71.0,20.0,51.0,0.0,5.0,1.0,,,1.93,1.61,P,,,,,,,,,,,,,,,,,,,,,,,602,ATL,14,44,43,44,44,44,0,0,0,0,0,0,0,0,0,0,0,0,P,0.159091,,D+,0,,0,0.0,,0.0,,0,n,4,4.0,14,0,0.0,,6,61,D+ [n-14],0.251,W,330.333333,8,4.0,14,7.0,11.0,25,0,0.0,
580,Fred Norman,34,CIN,NL,35.0,84.0,73.0,4.0,8.0,1.0,0.0,0.0,2.0,0.0,0.0,5.0,26.0,0.11,0.167,0.123,0.29,-21.0,9.0,1.0,0.0,6.0,0.0,0.0,1,S,NL,14.0,13.0,0.519,3.38,35.0,34.0,0.0,8.0,1.0,0.0,221.1,200.0,97.0,83.0,28.0,98.0,9.0,160.0,3.0,1.0,11.0,945.0,116.0,4.31,1.346,8.1,1.1,4.0,6.5,1.63,L,CIN,NL,35.0,34.0,8.0,221.1,44.0,9.0,31.0,4.0,2.0,0.909,,,1.63,1.14,P,,,,,,,,,,,,,,,,,,,,,,,608,CIN,13,35,34,35,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.057143,,E,0,,0,0.0,,0.0,,2,12,11,13.0,31,0,0.0,,3,64,E [12-31],0.237,M,221.333333,6,4.0,14,6.0,10.0,24,0,0.0,
651,Jerry Reuss,28,PIT,NL,35.0,80.0,70.0,3.0,12.0,2.0,0.0,0.0,6.0,0.0,0.0,6.0,26.0,0.171,0.237,0.2,0.437,18.0,14.0,1.0,0.0,4.0,0.0,0.0,1/H,L,NL,10.0,13.0,0.435,4.11,33.0,33.0,0.0,8.0,2.0,0.0,208.0,225.0,109.0,95.0,11.0,71.0,2.0,116.0,4.0,1.0,11.0,894.0,96.0,3.39,1.423,9.7,0.5,3.1,5.0,1.63,L,PIT,NL,33.0,33.0,8.0,208.0,50.0,7.0,40.0,3.0,2.0,0.94,,,2.03,1.42,P,,,,,,,,,,,,,,,,,,,,,,,679,PIT,9,35,34,35,33,33,0,0,0,0,0,0,0,0,0,0,1,0,P,0.171429,,D+,0,,0,0.0,,0.0,,3,13,12,15.0,33,0,0.0,,5,62,D+ [13-33],0.275,W,208.0,6,3.0,13,5.0,8.0,22,0,0.0,
667,Steve Rogers,27,MON,NL,40.0,112.0,96.0,1.0,10.0,0.0,0.0,0.0,3.0,0.0,1.0,4.0,27.0,0.104,0.14,0.104,0.244,-32.0,10.0,1.0,0.0,12.0,0.0,0.0,1,R,NL,17.0,16.0,0.515,3.1,40.0,40.0,0.0,17.0,4.0,0.0,301.2,272.0,122.0,104.0,16.0,81.0,3.0,206.0,5.0,1.0,14.0,1235.0,123.0,2.92,1.17,8.1,0.5,2.4,6.1,2.54,R,MON,NL,40.0,40.0,17.0,301.2,90.0,20.0,63.0,7.0,5.0,0.922,,,2.48,2.08,P,,,,,,,,,,,,,,,,,,,,,,,695,MON,5,40,40,40,40,40,0,0,0,0,0,0,0,0,0,0,0,0,P,0.075,,E,0,,0,0.0,,0.0,,1,11,9,10.0,24,0,0.0,,3,64,E [11-24],0.237,M,301.666667,8,2.0,12,6.0,8.0,22,0,0.0,
689,Rick Sawyer,29,SDP,NL,56.0,27.0,20.0,2.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,5.0,8.0,0.15,0.32,0.15,0.47,38.0,3.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,7.0,6.0,0.538,5.84,56.0,9.0,15.0,0.0,0.0,0.0,111.0,136.0,77.0,72.0,15.0,55.0,11.0,45.0,7.0,2.0,10.0,508.0,61.0,5.36,1.721,11.0,1.2,4.5,3.6,0.82,R,SDP,NL,56.0,9.0,0.0,111.0,29.0,3.0,24.0,2.0,2.0,0.931,,,2.19,0.48,P,,,,,,,,,,,,,,,,,,,,,,,719,SDP,4,56,9,56,56,56,0,0,0,0,0,0,0,0,0,0,0,0,P,0.035714,,D,0,,0,0.0,,0.0,,7,21,11,18.0,36,0,0.0,,4,63,D [21-36],0.305,X,111.0,2,4.0,14,3.0,7.0,21,0,0.0,
876,Jim Bibby,32,CLE,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,12.0,13.0,0.48,3.57,37.0,30.0,5.0,9.0,2.0,2.0,206.2,197.0,100.0,82.0,17.0,73.0,2.0,141.0,4.0,4.0,12.0,876.0,111.0,3.56,1.306,8.6,0.7,3.2,6.1,1.93,R,CLE,AL,37.0,30.0,9.0,206.2,40.0,14.0,21.0,5.0,0.0,0.875,,,1.52,0.95,P,,,,,,,,,,,,,,,,,,,,,,,68,CLE,6,37,30,0,37,37,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.247,M,206.666667,6,3.0,13,6.0,9.0,23,0,0.0,


In [304]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        360
[WP]     98
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [305]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    39
0.142857     8
0.200000     7
0.333333     6
0.083333     6
            ..
0.108571     1
0.125984     1
0.099010     1
0.135417     1
0.118812     1
Name: hr_rate_pit, Length: 291, dtype: int64

In [306]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     781
+    209
-     77
Name: gopher_ball, dtype: int64

In [307]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
5,Santo Alcala,24,CIN,NL,7.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,1.0,1.0,0.500,5.74,7.0,2.0,2.0,0.0,0.0,0.0,15.2,22.0,11.0,10.0,1.0,7.0,1.0,9.0,1.0,1.0,2.0,73.0,70.0,3.95,1.851,12.6,0.6,4.0,5.2,1.29,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.000,,,1.46,0.50,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,24,24.0,46,0,0.0,,0,66,G [n-46],0.338,Z+,15.666667,2,3.0,13,4.0,7.0,21,0,0.0,,,0.045455,-
111,Mike Caldwell,28,CIN,NL,14.0,5.0,4.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.6,1.0,1.6,318.0,4.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,0.0,0.0,,4.01,14.0,0.0,5.0,0.0,0.0,1.0,24.2,25.0,11.0,11.0,1.0,8.0,1.0,11.0,0.0,0.0,1.0,105.0,99.0,3.35,1.338,9.1,0.4,2.9,4.0,1.38,L,TOT,ZZ,35.0,12.0,2.0,119.0,38.0,7.0,31.0,0.0,3.0,1.000,,,2.87,1.09,P,,,,,,,,,,,,,,,,,,,,,,,123,2TM,7,35,12,14,35,35,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,AAA,0,,0,0.0,,0.0,,7,21,0,0.0,n,0,0.0,,14,45,AAA [21-n],0.258,W,24.666667,2,3.0,13,4.0,7.0,21,0,0.0,,,0.040000,-
118,Doug Capilla,25,STL,NL,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,15.43,2.0,0.0,0.0,0.0,0.0,0.0,2.1,2.0,4.0,4.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,11.0,29.0,4.45,1.714,7.7,0.0,7.7,3.9,0.50,L,TOT,NL,24.0,16.0,1.0,108.2,22.0,6.0,13.0,3.0,0.0,0.864,,,1.57,0.79,P,,,,,,,,,,,,,,,,,,,,,,,132,2TM,2,24,16,24,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.222,L,2.333333,1,7.0,21,3.0,10.0,24,0,0.0,,,0.000000,-
170,John D'Acquisto,25,STL,NL,3.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,4.32,3.0,2.0,1.0,0.0,0.0,0.0,8.1,5.0,4.0,4.0,0.0,10.0,0.0,9.0,1.0,0.0,2.0,39.0,94.0,4.54,1.800,5.4,0.0,10.8,9.7,0.90,R,TOT,NL,20.0,14.0,0.0,52.1,12.0,4.0,6.0,2.0,3.0,0.833,,,1.72,0.50,P,,,,,,,,,,,,,,,,,,,,,,,189,2TM,5,20,14,20,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],0.179,K,8.333333,3,9.0,23,8.0,17.0,35,1,18.0,/36,,0.000000,-
196,Tom Dixon,22,HOU,NL,9.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,3.0,0.0,0.0,/1,R,NL,1.0,0.0,1.000,3.26,9.0,4.0,0.0,1.0,0.0,0.0,30.1,40.0,12.0,11.0,0.0,7.0,0.0,15.0,1.0,1.0,1.0,135.0,111.0,2.54,1.549,11.9,0.0,2.1,4.5,2.14,R,HOU,NL,9.0,4.0,1.0,30.1,10.0,2.0,6.0,2.0,0.0,0.800,,,2.37,0.89,P,,,,,,,,,,,,,,,,,,,,,,,215,HOU,1st,9,4,9,9,9,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,0,66,G [n-21],0.315,Y,30.333333,3,2.0,12,4.0,6.0,16,0,0.0,,,0.000000,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1042,Stan Thomas,27,NYY,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.000,7.11,3.0,0.0,2.0,0.0,0.0,0.0,6.1,7.0,7.0,5.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,31.0,59.0,4.32,1.737,9.9,0.0,5.7,1.4,0.25,R,TOT,AL,16.0,9.0,1.0,64.2,15.0,3.0,10.0,2.0,1.0,0.867,,,1.81,0.81,P,,,,,,,,,,,,,,,,,,,,,,,808,2TM,4,16,9,0,16,16,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.259,W,6.333333,2,5.0,15,1.0,6.0,16,0,0.0,,,0.000000,-
1051,Jim Umbarger,24,OAK,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,5.0,0.167,6.55,12.0,8.0,1.0,1.0,0.0,0.0,44.0,62.0,40.0,32.0,3.0,28.0,2.0,24.0,4.0,0.0,4.0,213.0,62.0,4.72,2.045,12.7,0.6,5.7,4.9,0.86,L,TOT,AL,15.0,10.0,1.0,57.0,12.0,0.0,11.0,1.0,1.0,0.917,,,1.74,0.73,P,,,,,,,,,,,,,,,,,,,,,,,834,2TM,3,15,10,0,15,15,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.343,Z+,44.000000,4,5.0,15,4.0,9.0,23,1,10.0,/24,,0.048387,-
1053,John Verhoeven,24,TOT,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,2.0,0.000,3.00,9.0,0.0,5.0,0.0,0.0,0.0,15.0,13.0,6.0,5.0,0.0,6.0,1.0,9.0,1.0,0.0,0.0,65.0,142.0,2.94,1.267,7.8,0.0,3.6,5.4,1.50,R,TOT,AL,9.0,0.0,0.0,15.0,7.0,1.0,6.0,0.0,0.0,1.000,,,4.20,0.78,P,,,,,,,,,,,,,,,,,,,,,,,842,2TM,2,9,0,0,9,9,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.224,M,15.000000,2,3.0,13,5.0,8.0,22,1,9.0,/23,,0.000000,-
1054,John Verhoeven,24,CAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,2.0,0.000,3.86,3.0,0.0,1.0,0.0,0.0,0.0,4.2,4.0,3.0,2.0,0.0,4.0,1.0,3.0,1.0,0.0,0.0,23.0,110.0,4.67,1.714,7.7,0.0,7.7,5.8,0.75,R,TOT,AL,9.0,0.0,0.0,15.0,7.0,1.0,6.0,0.0,0.0,1.000,,,4.20,0.78,P,,,,,,,,,,,,,,,,,,,,,,,842,2TM,2,9,0,0,9,9,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.222,L,4.666667,2,6.0,16,5.0,11.0,25,2,13.0,/31,,0.000000,-


### Pitcher Control Number

In [308]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

12.0    120
11.0    106
13.0     83
14.0     50
10.0     37
15.0     15
17.0     11
9.0       9
16.0      9
18.0      4
8.0       3
24.0      2
22.0      2
20.0      2
36.0      2
7.0       1
4.0       1
21.0      1
Name: control_rate, dtype: int64

In [309]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    609
46     120
51     106
45      83
44      50
52      37
43      15
41      11
42       9
53       9
36       4
54       3
11       2
34       2
32       2
26       2
33       1
55       1
62       1
Name: PCN, dtype: int64

### Probable Hit Number

In [310]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     609
8     137
9     126
10     66
7      57
11     23
6      15
12      8
14      5
13      5
4       5
15      2
18      2
5       2
24      1
20      1
22      1
2       1
36      1
Name: hit_rate_pit, dtype: int64

In [311]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [312]:
players["hit_rate_pit"].value_counts()

8.0     137
9.0     126
10.0     66
7.0      57
11.0     23
6.0      15
12.0      8
14.0      5
4.0       5
13.0      5
5.0       2
18.0      2
15.0      2
24.0      1
20.0      1
36.0      1
22.0      1
2.0       1
Name: hit_rate_pit, dtype: int64

In [313]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    609
55     137
54     126
53      66
56      57
52      23
61      15
51       8
46       5
45       5
63       5
41       2
44       2
62       2
33       1
31       1
35       1
65       1
11       1
Name: PPH, dtype: int64

### Pitcher Rating

In [314]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [315]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

       609
+M2     22
W6      15
+W6     15
W2      13
      ... 
W8       1
Z+4      1
-Y6      1
-X7      1
-L1      1
Name: goph_lett_inn, Length: 112, dtype: int64

In [316]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      609
(13-21)         64
(13-22)         34
(13-23)         26
(12-16)         25
              ... 
(21-36)          1
(36-n)           1
(13-26)          1
(16-24)          1
(12-15/16)       1
Name: bb_k_hbp, Length: 89, dtype: int64

In [317]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               609
(13-21)         64
(13-22)         34
(13-23)         26
(14-23)         25
              ... 
(11-12/13)       1
(16-24)          1
(22-26)          1
(11-13/14)       1
(15-32)          1
Name: bb_k_hbp, Length: 89, dtype: int64

In [318]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

+W6 (13-21)          5
+W7 (13-21)          4
+M2 (14-22)          4
W6 (13-21)  [WP]     3
+M2 (13-22)          3
                    ..
+X3 (12-16)          1
+X6 (13-22)  [WP]    1
+L4 (14-22)          1
+M2 (13-23)          1
+J1 (21-36)          1
Name: pitcher_rating, Length: 409, dtype: int64

In [319]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [320]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,...,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22],,,,,,,,,,0,0.0,,,,,,,,,,,
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,...,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16],,,,,,,,,,0,0.0,,,,,,,,,,,
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,...,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],,,,,,,,,,0,0.0,,,,,,,,,,,
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,...,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36],0.272,W,117.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.103175,+,13.0,45.0,9.0,54.0,+W3,(14-23),+W3 (14-23) [WP]
5,Santo Alcala,24,CIN,NL,7.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,1.0,1.0,0.5,5.74,7.0,2.0,2.0,0.0,0.0,0.0,15.2,22.0,11.0,10.0,1.0,7.0,1.0,9.0,1.0,1.0,2.0,73.0,70.0,3.95,1.851,12.6,0.6,4.0,5.2,1.29,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,24,24.0,46,0,0.0,,0,66,G [n-46],0.338,Z+,15.666667,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.045455,-,15.0,43.0,11.0,52.0,-Z+2,(13-21),-Z+2 (13-21)
6,Santo Alcala,24,MON,NL,31.0,27.0,25.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,13.0,0.08,0.08,0.2,0.28,-26.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,2.0,6.0,0.25,4.69,31.0,10.0,6.0,0.0,0.0,2.0,101.2,104.0,55.0,53.0,12.0,47.0,5.0,64.0,2.0,2.0,4.0,447.0,81.0,4.46,1.485,9.2,1.1,4.2,5.7,1.36,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.064516,,F,18,36.0,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,3,64,F36 [n-35],0.261,W,101.666667,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.115385,+,12.0,46.0,8.0,55.0,+W3,(14-23),+W3 (14-23)
7,Gary Alexander,24,SFG,NL,51.0,143.0,119.0,17.0,36.0,4.0,2.0,5.0,20.0,3.0,1.0,20.0,33.0,0.303,0.406,0.496,0.901,142.0,59.0,5.0,2.0,0.0,2.0,2.0,2H/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,34.0,31.0,28.0,276.1,188.0,174.0,8.0,6.0,0.0,0.968,-10.0,-43.0,5.93,5.35,C-OF,SFG,...,188.0,174.0,8.0,6.0,0.0,0.968,-10.0,-45.0,-10.0,6.1,5.52,2.0,13.0,46.0,7.0,13%,9,SFG,3,51,31,51,34,0,33,0,0,0,0,0,0,1,1,0,19,0,C,0.392157,,B+,5,15.0,2,7.0,(21),0.064,,5,15,8,13.0,31,1,14.0,/32,9,54,B+15(21) [15-31/32],,,,,,,,,,0,0.0,,,,,,,,,,,
8,Matt Alexander,30,OAK,AL,90.0,47.0,42.0,24.0,10.0,1.0,0.0,0.0,2.0,26.0,14.0,4.0,6.0,0.238,0.304,0.262,0.566,59.0,11.0,0.0,0.0,1.0,0.0,0.0,H8D69/745,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,48.0,14.0,2.0,89.2,23.0,21.0,2.0,0.0,0.0,1.0,-2.0,-32.0,2.31,0.48,OF-2B-3B-SS,,...,,,,,,,,,,,,,,,,,10,OAK,5,90,14,90,37,0,0,0,4,1,0,7,17,10,32,13,0,67,OF,0.022222,,C+,0,,0,0.0,,2.0,****,3,13,5,8.0,22,0,0.0,,8,55,C+**** [13-22],,,,,,,,,,0,0.0,,,,,,,,,,,
9,Dick Allen,35,OAK,AL,54.0,200.0,171.0,19.0,41.0,4.0,0.0,5.0,31.0,1.0,3.0,24.0,36.0,0.24,0.33,0.351,0.681,89.0,60.0,4.0,1.0,0.0,4.0,0.0,3/HD,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,50.0,50.0,24.0,387.0,433.0,389.0,37.0,7.0,36.0,0.984,0.0,-1.0,9.91,8.52,1B,,...,,,,,,,,,,,,,,,,,11,OAK,15,54,51,54,50,0,0,50,0,0,0,0,0,0,0,1,3,0,1B,0.574074,,C+,4,14.0,0,0.0,,0.018,,4,14,6,10.0,24,0,0.0,,7,56,C+14 [14-24],,,,,,,,,,0,0.0,,,,,,,,,,,


In [321]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
1057,Tom Walker,28,CAL,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,9.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,2.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,8.0,52.0,14.74,1.5,13.5,9.0,0.0,4.5,,R,TOT,ZZ,12.0,0.0,0.0,21.0,3.0,1.0,2.0,0.0,0.0,1.0,,,1.29,0.25,P,,...,,,,,,,,,,,,,,,,,848,2TM,6,12,0,11,12,12,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.375,Z,2.0,2,0.0,11,4.0,4.0,14,0,0.0,,,0.666667,+,14.0,44,14.0,45,+Z2,(11-14),+Z2 (11-14)
1058,Mike Wallace,26,TEX,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,7.56,5.0,0.0,0.0,0.0,0.0,0.0,8.1,10.0,7.0,7.0,1.0,10.0,0.0,2.0,0.0,0.0,1.0,43.0,57.0,7.42,2.4,10.8,1.1,10.8,2.2,0.2,L,TEX,AL,5.0,0.0,0.0,8.1,3.0,0.0,3.0,0.0,0.0,1.0,,,3.24,0.6,P,,...,,,,,,,,,,,,,,,,,850,TEX,5,5,0,0,5,5,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.303,X,8.333333,2,8.0,22,2.0,10.0,24,0,0.0,,,0.1,+,17.0,41,8.0,55,+X2,(22-24),+X2 (22-24)
1059,Gary Wheelock,25,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,6.0,9.0,0.4,4.89,17.0,17.0,0.0,2.0,0.0,0.0,88.1,94.0,58.0,48.0,16.0,26.0,0.0,47.0,2.0,0.0,3.0,382.0,84.0,4.98,1.358,9.6,1.6,2.6,4.8,1.81,R,SEA,AL,17.0,17.0,2.0,88.1,21.0,10.0,9.0,2.0,0.0,0.905,,,1.94,1.12,P,,...,,,,,,,,,,,,,,,,,863,SEA,2,17,17,0,17,17,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.266,W,88.333333,5,2.0,12,4.0,6.0,16,0,0.0,,,0.170213,+,11.0,51,9.0,54,+W5,(12-16),+W5 (12-16)
1060,Milt Wilcox,27,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,6.0,2.0,0.75,3.64,20.0,13.0,4.0,1.0,0.0,0.0,106.1,96.0,46.0,43.0,13.0,37.0,1.0,82.0,1.0,1.0,1.0,441.0,118.0,3.86,1.251,8.1,1.1,3.1,6.9,2.22,R,DET,AL,20.0,13.0,1.0,106.1,20.0,4.0,14.0,2.0,2.0,0.9,,,1.52,0.9,P,,...,,,,,,,,,,,,,,,,,872,DET,7,20,13,0,20,20,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.238,M,106.333333,5,3.0,13,7.0,10.0,24,0,0.0,,,0.135417,+,11.0,51,8.0,55,+M5,(13-24),+M5 (13-24)
1061,Randy Wiles,25,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,1.0,0.5,10.13,5.0,0.0,4.0,0.0,0.0,0.0,2.2,5.0,3.0,3.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,16.0,46.0,10.99,3.0,16.9,3.4,10.1,0.0,0.0,L,CHW,AL,5.0,0.0,0.0,2.2,1.0,0.0,1.0,0.0,0.0,1.0,,,3.38,0.2,P,,...,,,,,,,,,,,,,,,,,873,CHW,1st,5,0,0,5,5,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.385,Z,2.666667,1,7.0,21,0.0,0.0,n,0,0.0,,,0.2,+,18.0,36,11.0,52,+Z1,(21-n),+Z1 (21-n)
1062,Mike Willis,26,TOR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,6.0,0.25,3.94,43.0,3.0,28.0,0.0,0.0,5.0,107.1,105.0,48.0,47.0,15.0,38.0,6.0,59.0,0.0,0.0,2.0,451.0,107.0,4.52,1.332,8.8,1.3,3.2,4.9,1.55,L,TOR,AL,43.0,3.0,0.0,107.1,26.0,6.0,19.0,1.0,1.0,0.962,,,2.1,0.58,P,,...,,,,,,,,,,,,,,,,,878,TOR,1st,43,3,0,43,43,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.254,W,107.333333,2,3.0,13,5.0,8.0,22,0,0.0,,,0.142857,+,11.0,51,8.0,55,+W2,(13-22),+W2 (13-22)
1063,Jim Willoughby,28,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,6.0,2.0,0.75,4.94,31.0,0.0,16.0,0.0,0.0,2.0,54.2,54.0,32.0,30.0,5.0,18.0,3.0,33.0,2.0,0.0,2.0,234.0,92.0,3.82,1.317,8.9,0.8,3.0,5.4,1.83,R,BOS,AL,31.0,0.0,0.0,54.2,23.0,12.0,10.0,1.0,1.0,0.957,,,3.62,0.71,P,,...,,,,,,,,,,,,,,,,,879,BOS,7,31,0,0,31,31,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.252,W,54.666667,2,3.0,13,5.0,8.0,22,0,0.0,,,0.092593,,11.0,51,8.0,55,W2,(13-22),W2 (13-22)
1064,Rick Wise,31,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,11.0,5.0,0.688,4.77,26.0,20.0,2.0,4.0,2.0,0.0,128.1,151.0,68.0,68.0,19.0,28.0,1.0,85.0,4.0,1.0,1.0,555.0,95.0,4.09,1.395,10.6,1.3,2.0,6.0,3.04,R,BOS,AL,26.0,20.0,4.0,128.1,31.0,8.0,23.0,0.0,1.0,1.0,,,2.17,1.19,P,,...,,,,,,,,,,,,,,,,,883,BOS,13,26,20,0,26,26,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.289,X,128.333333,5,2.0,12,6.0,8.0,22,0,0.0,,,0.125828,+,12.0,46,10.0,53,+X5,(12-22),+X5 (12-22)
1065,Wilbur Wood,35,CHW,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,7.0,8.0,0.467,4.99,24.0,18.0,2.0,5.0,1.0,0.0,122.2,139.0,75.0,68.0,10.0,50.0,0.0,42.0,10.0,0.0,0.0,548.0,83.0,4.58,1.541,10.2,0.7,3.7,3.1,0.84,L,CHW,AL,24.0,18.0,5.0,122.2,36.0,3.0,33.0,0.0,1.0,1.0,,,2.64,1.5,P,,...,,,,,,,,,,,,,,,,,887,CHW,16,24,18,0,24,24,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.285,X,122.666667,5,3.0,13,3.0,6.0,16,1,7.0,/21,,0.071942,,13.0,45,9.0,54,X5,(13-16/21),X5 (13-16/21)
1066,Geoff Zahn,31,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,12.0,14.0,0.462,4.68,34.0,32.0,0.0,7.0,1.0,0.0,198.0,234.0,116.0,103.0,20.0,66.0,4.0,88.0,5.0,0.0,5.0,870.0,85.0,4.24,1.515,10.6,0.9,3.0,4.0,1.33,L,MIN,AL,34.0,32.0,7.0,198.0,58.0,16.0,40.0,2.0,5.0,0.966,,,2.55,1.65,P,,...,,,,,,,,,,,,,,,,,897,MIN,5,34,32,0,34,34,0,0,0,0,0,0,0,0,0,0,0,0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.293,X,198.0,6,3.0,13,4.0,7.0,21,0,0.0,,[WP],0.08547,,13.0,45,10.0,53,X6,(13-21),X6 (13-21) [WP]


In [322]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [323]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [324]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [325]:
players["Primary_Pos_fld"].value_counts()

P     455
OF    226
C      93
1B     78
2B     75
SS     72
3B     58
Name: Primary_Pos_fld, dtype: int64

In [326]:
players["Primary_Pos_fld"].isnull().sum()

10

In [327]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.985859
2B    0.974507
3B    0.963414
C     0.984022
OF    0.969794
P     0.943871
SS    0.962181
Name: Fld%, dtype: float64

### Superior Rating

In [328]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [329]:
players["superior_rating"].value_counts()

     714
S    353
Name: superior_rating, dtype: int64

In [330]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,...,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22],,,,,,,,,,0,0.0,,,,,,,,,,,,S
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,...,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16],,,,,,,,,,0,0.0,,,,,,,,,,,,
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,...,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],,,,,,,,,,0,0.0,,,,,,,,,,,,
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,...,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,,
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36],0.272,W,117.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.103175,+,13.0,45.0,9.0,54.0,+W3,(14-23),+W3 (14-23) [WP],S
5,Santo Alcala,24,CIN,NL,7.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,1.0,1.0,0.5,5.74,7.0,2.0,2.0,0.0,0.0,0.0,15.2,22.0,11.0,10.0,1.0,7.0,1.0,9.0,1.0,1.0,2.0,73.0,70.0,3.95,1.851,12.6,0.6,4.0,5.2,1.29,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,24,24.0,46,0,0.0,,0,66,G [n-46],0.338,Z+,15.666667,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.045455,-,15.0,43.0,11.0,52.0,-Z+2,(13-21),-Z+2 (13-21),S
6,Santo Alcala,24,MON,NL,31.0,27.0,25.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,13.0,0.08,0.08,0.2,0.28,-26.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,2.0,6.0,0.25,4.69,31.0,10.0,6.0,0.0,0.0,2.0,101.2,104.0,55.0,53.0,12.0,47.0,5.0,64.0,2.0,2.0,4.0,447.0,81.0,4.46,1.485,9.2,1.1,4.2,5.7,1.36,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.064516,,F,18,36.0,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,3,64,F36 [n-35],0.261,W,101.666667,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.115385,+,12.0,46.0,8.0,55.0,+W3,(14-23),+W3 (14-23),S
7,Gary Alexander,24,SFG,NL,51.0,143.0,119.0,17.0,36.0,4.0,2.0,5.0,20.0,3.0,1.0,20.0,33.0,0.303,0.406,0.496,0.901,142.0,59.0,5.0,2.0,0.0,2.0,2.0,2H/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SFG,NL,34.0,31.0,28.0,276.1,188.0,174.0,8.0,6.0,0.0,0.968,-10.0,-43.0,5.93,5.35,C-OF,SFG,...,174.0,8.0,6.0,0.0,0.968,-10.0,-45.0,-10.0,6.1,5.52,2.0,13.0,46.0,7.0,13%,9,SFG,3,51,31,51,34,0,33,0,0,0,0,0,0,1,1,0,19,0,C,0.392157,,B+,5,15.0,2,7.0,(21),0.064,,5,15,8,13.0,31,1,14.0,/32,9,54,B+15(21) [15-31/32],,,,,,,,,,0,0.0,,,,,,,,,,,,
8,Matt Alexander,30,OAK,AL,90.0,47.0,42.0,24.0,10.0,1.0,0.0,0.0,2.0,26.0,14.0,4.0,6.0,0.238,0.304,0.262,0.566,59.0,11.0,0.0,0.0,1.0,0.0,0.0,H8D69/745,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,48.0,14.0,2.0,89.2,23.0,21.0,2.0,0.0,0.0,1.0,-2.0,-32.0,2.31,0.48,OF-2B-3B-SS,,...,,,,,,,,,,,,,,,,10,OAK,5,90,14,90,37,0,0,0,4,1,0,7,17,10,32,13,0,67,OF,0.022222,,C+,0,,0,0.0,,2.0,****,3,13,5,8.0,22,0,0.0,,8,55,C+**** [13-22],,,,,,,,,,0,0.0,,,,,,,,,,,,S
9,Dick Allen,35,OAK,AL,54.0,200.0,171.0,19.0,41.0,4.0,0.0,5.0,31.0,1.0,3.0,24.0,36.0,0.24,0.33,0.351,0.681,89.0,60.0,4.0,1.0,0.0,4.0,0.0,3/HD,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OAK,AL,50.0,50.0,24.0,387.0,433.0,389.0,37.0,7.0,36.0,0.984,0.0,-1.0,9.91,8.52,1B,,...,,,,,,,,,,,,,,,,11,OAK,15,54,51,54,50,0,0,50,0,0,0,0,0,0,0,1,3,0,1B,0.574074,,C+,4,14.0,0,0.0,,0.018,,4,14,6,10.0,24,0,0.0,,7,56,C+14 [14-24],,,,,,,,,,0,0.0,,,,,,,,,,,,


### Arm Rating

In [331]:
players["G"].value_counts()

34.0     30
1.0      24
3.0      23
2.0      21
5.0      21
         ..
128.0     1
132.0     1
98.0      1
121.0     1
94.0      1
Name: G, Length: 158, dtype: int64

In [332]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.6693310140705703

In [333]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [334]:
players["arm_rating"].value_counts()

8    737
9    330
Name: arm_rating, dtype: int64

### Range Rating

In [335]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.2985561431476338

In [336]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    4.450346
2B    1.416129
3B    0.728850
C     3.801720
OF    1.478954
P     0.212445
SS    1.284694
Name: po_gp, dtype: float64

In [337]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [338]:
players["range_rating"].value_counts()

4    822
5    245
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [339]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [340]:
players["CS%"].value_counts()

0%     12
34%     6
33%     5
44%     5
28%     4
39%     4
38%     4
36%     4
43%     4
25%     4
42%     3
50%     3
35%     3
46%     3
41%     3
17%     3
31%     3
83%     3
29%     2
40%     2
11%     2
24%     2
30%     2
18%     1
37%     1
48%     1
57%     1
71%     1
45%     1
22%     1
13%     1
58%     1
52%     1
56%     1
10%     1
Name: CS%, dtype: int64

In [341]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.32800761827683617

In [342]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [343]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    977
-2     32
-3     22
       20
-1     16
Name: cs_num, dtype: int64

In [344]:
players["G_cat"].isnull().sum()

961

In [345]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      989
-2     32
-3     22
-1     16
-4      8
Name: cs_num, dtype: int64

In [346]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num
180,Bob Davis,25,SDP,NL,48.0,104.0,94.0,9.0,17.0,2.0,0.0,1.0,10.0,0.0,0.0,5.0,24.0,0.181,0.235,0.234,0.469,33.0,22.0,2.0,2.0,2.0,1.0,2.0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SDP,NL,46.0,31.0,15.0,263.1,159.0,136.0,19.0,4.0,2.0,0.975,4.0,18.0,5.3,3.37,C,SDP,...,18.0,4.0,5.3,3.37,0.0,8.0,12.0,16.0,57%,198,SDP,4,48,31,48,45,0,45,0,0,0,0,0,0,0,0,0,2,2,C,0.208333,,D+,2,12.0,0,0.0,,0.0,,2,12,8,10.0,24,1,11.0,/25,6,61,D+12 [12-24/25],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.395833,9,2.833333,4,0.571429,-4
187,Rick Dempsey,27,BAL,AL,91.0,314.0,270.0,27.0,61.0,7.0,4.0,3.0,34.0,2.0,3.0,34.0,34.0,0.226,0.314,0.315,0.629,77.0,85.0,9.0,2.0,5.0,3.0,1.0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,91.0,84.0,74.0,745.0,479.0,416.0,52.0,11.0,10.0,0.977,9.0,14.0,5.65,5.14,C,BAL,...,14.0,9.0,5.65,5.14,2.0,15.0,30.0,41.0,58%,206,BAL,9,91,84,91,91,0,91,0,0,0,0,0,0,0,0,0,2,0,C,0.373626,,C+,2,12.0,2,4.0,(14),0.024,,4,14,4,8.0,22,0,0.0,,7,56,C+12(14) [14-22],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.571429,9,4.571429,4,0.577465,-4
379,Cliff Johnson,29,TOT,MLB,107.0,339.0,286.0,46.0,85.0,16.0,0.0,22.0,54.0,0.0,2.0,43.0,53.0,0.297,0.407,0.584,0.991,171.0,167.0,3.0,10.0,0.0,0.0,2.0,7HD32/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,70.0,59.0,34.0,503.0,287.0,258.0,25.0,4.0,19.0,0.986,-1.0,-3.0,5.06,4.04,OF-1B-C,NYY,...,12.0,1.0,5.64,4.13,1.0,3.0,1.0,5.0,83%,404,2TM,6,107,76,107,65,0,15,21,0,0,0,33,0,4,34,25,26,0,OF,0.504673,,B+,9,23.0,0,0.0,,0.0,,5,15,6,11.0,25,1,12.0,/26,9,54,B+23 [15-25/26],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.233645,9,2.411215,5,0.833333,-4
380,Cliff Johnson,29,HOU,NL,51.0,171.0,144.0,22.0,43.0,8.0,0.0,10.0,23.0,0.0,1.0,23.0,30.0,0.299,0.409,0.563,0.972,169.0,81.0,1.0,4.0,0.0,0.0,2.0,7H3/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,70.0,59.0,34.0,503.0,287.0,258.0,25.0,4.0,19.0,0.986,-1.0,-3.0,5.06,4.04,OF-1B-C,NYY,...,12.0,1.0,5.64,4.13,1.0,3.0,1.0,5.0,83%,404,2TM,6,107,76,107,65,0,15,21,0,0,0,33,0,4,34,25,26,0,OF,0.45098,,B+,8,22.0,0,0.0,,0.0,,5,15,6,11.0,25,1,12.0,/26,9,54,B+22 [15-25/26],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.233645,9,2.411215,5,0.833333,-4
381,Cliff Johnson,29,NYY,AL,56.0,168.0,142.0,24.0,42.0,8.0,0.0,12.0,31.0,0.0,1.0,20.0,23.0,0.296,0.405,0.606,1.01,173.0,86.0,2.0,6.0,0.0,0.0,0.0,D2H3,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,70.0,59.0,34.0,503.0,287.0,258.0,25.0,4.0,19.0,0.986,-1.0,-3.0,5.06,4.04,OF-1B-C,NYY,...,12.0,1.0,5.64,4.13,1.0,3.0,1.0,5.0,83%,404,2TM,6,107,76,107,65,0,15,21,0,0,0,33,0,4,34,25,26,0,OF,0.553571,,B+,10,24.0,0,0.0,,0.0,,4,14,5,9.0,23,1,10.0,/24,9,54,B+24 [14-23/24],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.233645,9,2.411215,5,0.833333,-4
568,Jeff Newman,28,OAK,AL,94.0,172.0,162.0,17.0,36.0,9.0,0.0,4.0,15.0,2.0,0.0,4.0,24.0,0.222,0.244,0.352,0.596,63.0,57.0,2.0,1.0,4.0,1.0,1.0,2/1H,R,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,,5.74,1.0,9.0,0.0,0.0,0.0,,R,OAK,AL,95.0,42.0,31.0,467.1,296.0,251.0,36.0,9.0,5.0,0.97,2.0,5.0,5.53,3.02,C-P,OAK,...,5.0,2.0,5.54,3.05,4.0,20.0,27.0,29.0,52%,600,OAK,2,94,42,94,94,1,94,0,0,0,0,0,0,0,0,0,1,0,C,0.159574,,C,4,14.0,0,0.0,,0.071,,1,11,5,6.0,16,0,0.0,,8,55,C14 [11-16],0.25,M,1.0,1.0,0.0,11.0,0.0,0.0,n,7,7.0,/21,,0.0,-,14.0,44.0,7.0,56.0,-M1,(11-n/21),-M1 (11-n/21),,0.382979,9,2.670213,4,0.517857,-4
604,Lance Parrish,21,DET,AL,12.0,51.0,46.0,10.0,9.0,2.0,0.0,3.0,7.0,0.0,0.0,5.0,12.0,0.196,0.275,0.435,0.709,87.0,20.0,2.0,0.0,0.0,0.0,0.0,2,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,12.0,12.0,11.0,104.1,82.0,76.0,6.0,0.0,0.0,1.0,1.0,12.0,7.07,6.83,C,DET,...,12.0,1.0,7.07,6.83,1.0,5.0,2.0,5.0,71%,632,DET,1st,12,12,12,12,0,12,0,0,0,0,0,0,0,0,0,0,0,C,0.583333,,C,12,26.0,0,0.0,,0.0,,4,14,8,12.0,26,0,0.0,,6,61,C26 [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.5,9,6.333333,4,0.714286,-4
750,Jim Sundberg,26,TEX,AL,149.0,533.0,453.0,61.0,132.0,20.0,3.0,6.0,65.0,2.0,3.0,53.0,77.0,0.291,0.365,0.389,0.753,105.0,176.0,8.0,2.0,20.0,5.0,0.0,*2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TEX,AL,149.0,137.0,115.0,1214.0,909.0,801.0,103.0,5.0,12.0,0.994,14.0,14.0,6.7,6.07,C,TEX,...,14.0,14.0,6.7,6.07,5.0,38.0,43.0,55.0,56%,786,TEX,4,149,137,149,149,0,149,0,0,0,0,0,0,0,0,0,2,2,C,0.436242,,B+,2,12.0,1,3.0,(13),0.013,,4,14,5,9.0,23,0,0.0,,9,54,B+12(13) [14-23],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.691275,9,5.375839,4,0.561224,-4


### Fielder Rating

In [347]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

84        392
S84       233
95         94
94         83
85         81
S94        39
S95        37
S85        30
94 -2      28
94 -3      17
94 -1      12
S94 -2      4
S94 -1      4
S94 -3      4
94 -4       3
95 -4       3
S94 -4      2
84 -3       1
Name: fielder_rating, dtype: int64

In [348]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [349]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [350]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [351]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,...,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.066667,8,1.733333,4,0.0,,S84
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,...,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.031579,8,0.631579,4,,,84
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,...,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.0,8,0.0,4,,,84
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,...,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.190476,8,2.238095,4,,,84
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,...,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36],0.272,W,117.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.103175,+,13.0,45.0,9.0,54.0,+W3,(14-23),+W3 (14-23) [WP],S,0.289474,8,0.210526,4,,,S84


In [352]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [353]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,AL,1.0,1.0,1.0,9.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.066667,8,1.733333,4,0.0,,S84
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,,,,,,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.031579,8,0.631579,4,,,84
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,,,,,,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.0,8,0.0,4,,,84
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,,,,,,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.190476,8,2.238095,4,,,84
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36],0.272,W,117.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.103175,+,13.0,45.0,9.0,54.0,+W3,(14-23),+W3 (14-23) [WP],S,0.289474,8,0.210526,4,,,S84


In [354]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Bob Adams,25,DET,AL,15.0,24.0,24.0,2.0,6.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,5.0,0.25,0.25,0.542,0.792,105.0,13.0,0.0,0.0,0.0,0.0,0.0,H/32,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,3.0,3.0,3.0,27.0,27.0,26.0,1.0,0.0,1.0,1.0,-1.0,-22.0,9.0,9.0,1B-C,DET,AL,1.0,1.0,1.0,9.0,6.0,6.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.0,6.0,0.0,0.0,1.0,0.0,0%,3,DET,1st,15,3,15,3,0,1,2,0,0,0,0,0,0,0,0,12,0,1B,0.133333,,C+,12,26.0,0,0.0,,0.0,,0,n,8,8.0,22,0,0.0,,9,54,C+26 [n-22],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.066667,8,1.733333,4,0.0,,S84
1,Glenn Adams,29,MIN,AL,95.0,290.0,269.0,32.0,91.0,17.0,0.0,6.0,49.0,0.0,2.0,18.0,30.0,0.338,0.376,0.468,0.844,130.0,126.0,5.0,0.0,0.0,3.0,3.0,D97H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIN,AL,44.0,38.0,11.0,271.0,65.0,60.0,3.0,2.0,1.0,0.969,-3.0,-13.0,2.09,1.43,OF,,,,,,,,,,,,,,,,,,,,,,,4,MIN,3,95,80,95,44,0,0,0,0,0,0,16,0,28,44,46,14,0,OF,0.515789,,A+,2,12.0,0,0.0,,0.0,,2,12,4,6.0,16,0,0.0,,11,52,A+12 [12-16],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.031579,8,0.631579,4,,,84
2,Mike Adams,28,CHC,NL,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/87H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHC,NL,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,-120.0,0.0,0.0,OF,,,,,,,,,,,,,,,,,,,,,,,5,CHC,4,2,1,2,2,0,0,0,0,0,0,1,1,0,2,0,1,0,OF,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.0,8,0.0,4,,,84
3,Willie Aikens,22,CAL,AL,42.0,101.0,91.0,5.0,18.0,4.0,0.0,0.0,6.0,1.0,2.0,10.0,23.0,0.198,0.277,0.242,0.519,46.0,22.0,1.0,0.0,0.0,0.0,2.0,H3D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CAL,AL,13.0,12.0,6.0,99.0,105.0,94.0,8.0,3.0,10.0,0.971,0.0,-2.0,9.27,7.85,1B,,,,,,,,,,,,,,,,,,,,,,,6,CAL,1st,42,23,42,13,0,0,13,0,0,0,0,0,0,0,13,18,0,1B,0.142857,,C,0,,0,0.0,,0.042,,4,14,8,12.0,26,0,0.0,,6,61,C [14-26],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.190476,8,2.238095,4,,,84
4,Santo Alcala,24,TOT,NL,38.0,30.0,28.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.071,0.071,0.179,0.25,-34.0,5.0,0.0,0.0,2.0,0.0,0.0,1,R,NL,3.0,7.0,0.3,4.83,38.0,12.0,8.0,0.0,0.0,2.0,117.1,126.0,66.0,63.0,13.0,54.0,6.0,73.0,3.0,3.0,6.0,520.0,79.0,4.39,1.534,9.7,1.0,4.1,5.6,1.35,R,TOT,NL,38.0,12.0,0.0,117.1,19.0,8.0,11.0,0.0,0.0,1.0,,,1.46,0.5,P,,,,,,,,,,,,,,,,,,,,,,,7,2TM,2,38,12,38,38,38,0,0,0,0,0,0,0,0,0,0,0,0,P,0.052632,,F,18,36.0,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,2,65,F36 [n-36],0.272,W,117.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,[WP],0.103175,+,13.0,45.0,9.0,54.0,+W3,(14-23),+W3 (14-23) [WP],S,0.289474,8,0.210526,4,,,S84


In [355]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

3.0      24
13.0     23
2.0      22
1.0      22
4.0      22
         ..
161.0     1
77.0      1
88.0      1
98.0      1
135.0     1
Name: Games_Played, Length: 155, dtype: int64

In [356]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "Bats", "pitcher_rating", "PCN", 
                                "PPH", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "batter_rating": "Batter Rating",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "pitcher_rating": "Pitcher Rating",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Bob Adams,25,DET,15.0,,,1B-C,S84,C+26 [n-22],54,R,,,,,1B,0,1,2,0,0,0,0,0,0,0,0,12,0
1,Glenn Adams,29,MIN,95.0,,,OF,84,A+12 [12-16],52,L,,,,,OF,0,0,0,0,0,0,16,0,28,44,46,14,0
2,Mike Adams,28,CHC,2.0,,,OF,84,G [n-36],66,R,,,,,OF,0,0,0,0,0,0,1,1,0,2,0,1,0
3,Willie Aikens,22,CAL,42.0,,,1B,84,C [14-26],61,L,,,,,1B,0,0,13,0,0,0,0,0,0,0,13,18,0
4,Santo Alcala,24,TOT,38.0,12.0,8.0,P,S84,F36 [n-36],65,R,+W3 (14-23) [WP],45.0,54.0,R,P,38,0,0,0,0,0,0,0,0,0,0,0,0


In [357]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [358]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           455
OF          226
C            93
1B           78
2B           75
SS           72
3B           58
DH_PH_PR     10
              0
Name: Primary, dtype: int64

In [359]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Bob Adams,25,DET,15.0,,,1B-C,S84,C+26 [n-22],54,R,,,,,1B,0,1,2,0,0,0,0,0,0,0,0,12,0
1,Glenn Adams,29,MIN,95.0,,,OF,84,A+12 [12-16],52,L,,,,,OF,0,0,0,0,0,0,16,0,28,44,46,14,0
2,Mike Adams,28,CHC,2.0,,,OF,84,G [n-36],66,R,,,,,OF,0,0,0,0,0,0,1,1,0,2,0,1,0
3,Willie Aikens,22,CAL,42.0,,,1B,84,C [14-26],61,L,,,,,1B,0,0,13,0,0,0,0,0,0,0,13,18,0
4,Santo Alcala,24,TOT,38.0,12.0,8.0,P,S84,F36 [n-36],65,R,+W3 (14-23) [WP],45.0,54.0,R,P,38,0,0,0,0,0,0,0,0,0,0,0,0


In [360]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [361]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [362]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [363]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [364]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
622,Biff Pocoroba,23,ATL,113.0,,,C,94 -2,B+13 [15-22],54,S,,,,,C,0,100,0,0,0,0,0,0,0,0,0,23,0
574,Joe Nolan,26,ATL,62.0,,,C,S94,B+15 [15-23],54,L,,,,,C,0,19,0,0,0,0,0,0,0,0,0,48,0
152,Vic Correll,31,ATL,54.0,,,C,94 -1,C22 [15-26],61,R,,,,,C,0,49,0,0,0,0,0,0,0,0,0,7,0
559,Dale Murphy,21,ATL,18.0,,,C,94,#A13(15) [n-14],52,R,,,,,C,0,18,0,0,0,0,0,0,0,0,0,0,0
541,Willie Montanez,29,ATL,136.0,,,1B,85,B+15 [12-16],53,L,,,,,1B,0,0,134,0,0,0,0,0,0,0,0,2,0
598,Tom Paciorek,30,ATL,72.0,,,1B-OF-3B,84,C+13 [11-25],55,R,,,,,1B,0,0,32,0,1,0,4,2,3,9,0,35,0
271,Rod Gilbreath,24,ATL,128.0,,,2B-3B,85,C+13(14) [13-23],55,R,,,,,2B,0,0,0,122,1,0,0,0,0,0,0,6,1
676,Jerry Royster,24,ATL,140.0,,,3B-SS-2B-OF,85,C12(13)*** [13-22],56,R,,,,,3B,0,0,0,39,55,52,0,1,0,1,0,6,11
546,Junior Moore,24,ATL,112.0,,,3B-2B,84,B12(13) [13-16],55,R,,,,,3B,0,0,0,1,104,0,0,0,0,0,0,14,2
665,Pat Rockett,22,ATL,93.0,,,SS,85,B11 [13-21],55,R,,,,,SS,0,0,0,0,0,84,0,0,0,0,0,3,8


In [365]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'ATL':                  Name  Age   Tm      G  GS  GF    Positions    DEF  \
 622     Biff Pocoroba   23  ATL  113.0                    C  94 -2   
 574         Joe Nolan   26  ATL   62.0                    C   S94    
 152       Vic Correll   31  ATL   54.0                    C  94 -1   
 559       Dale Murphy   21  ATL   18.0                    C    94    
 541   Willie Montanez   29  ATL  136.0                   1B    85    
 598      Tom Paciorek   30  ATL   72.0             1B-OF-3B    84    
 271     Rod Gilbreath   24  ATL  128.0                2B-3B    85    
 676     Jerry Royster   24  ATL  140.0          3B-SS-2B-OF    85    
 546      Junior Moore   24  ATL  112.0                3B-2B    84    
 665       Pat Rockett   22  ATL   93.0                   SS    85    
 137     Darrel Chaney   29  ATL   74.0                SS-2B   S84    
 664    Craig Robinson   28  ATL   27.0                   SS   S84    
 52        Rob Belloir   28  ATL    6.0                   SS   S84    

In [366]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [367]:
create_excel(my_dict)

27

## Format Excel file

In [368]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [369]:
# wb.sheetnames

In [370]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "BRO": "Brooklyn Dodgers",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCA": "Kansas City Athletics",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MLN": "Milwaukee Braves",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYG": "New York Giants",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBD": "Tampa Bay Devil Rays",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators",
    "WSH": "Washington Senators"
}

In [371]:
# team_dict["ATL"]

In [372]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 3
width_2 = 4
width_3 = 5
width_4 = 6
width_5 = 8
width_6 = 12
width_7 = 14
width_8 = 22
width_9 = 24

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_4    
    sheet.column_dimensions['B'].width = width_8
    sheet.column_dimensions['C'].width = width_2
    sheet.column_dimensions['D'].width = width_3
    sheet.column_dimensions['E'].width = width_3
    sheet.column_dimensions['F'].width = width_2
    sheet.column_dimensions['G'].width = width_2
    sheet.column_dimensions['H'].width = width_7
    sheet.column_dimensions['I'].width = width_5
    sheet.column_dimensions['J'].width = width_9
    sheet.column_dimensions['K'].width = width_3
    sheet.column_dimensions['L'].width = width_1
    sheet.column_dimensions['M'].width = width_9
    sheet.column_dimensions['N'].width = width_3
    sheet.column_dimensions['O'].width = width_3
    sheet.column_dimensions['P'].width = width_1
    sheet.column_dimensions['Q'].width = width_6
    sheet.column_dimensions['R'].width = width_3
    sheet.column_dimensions['S'].width = width_3
    sheet.column_dimensions['T'].width = width_3
    sheet.column_dimensions['U'].width = width_3
    sheet.column_dimensions['V'].width = width_3
    sheet.column_dimensions['W'].width = width_3
    sheet.column_dimensions['X'].width = width_3
    sheet.column_dimensions['Y'].width = width_3
    sheet.column_dimensions['Z'].width = width_3
    sheet.column_dimensions['AA'].width = width_3
    sheet.column_dimensions['AB'].width = width_3
    sheet.column_dimensions['AC'].width = width_3
    sheet.column_dimensions['AD'].width = width_3
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment
            cell.font = Font(size = 14)
    
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
    sheet['A2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['B2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['C2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['D2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['E2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['F2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['G2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['H2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['I2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['J2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['K2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['L2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['M2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['N2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['O2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['P2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Q2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['R2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['S2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['T2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['U2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['V2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['W2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['X2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Y2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Z2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AA2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AB2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AC2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AD2'].font = Font(size = 14, bold = True, color='00FFFFFF')
            

wb.save('../data/' + year + ' rosters ' + ' formatted.xlsx')

# Clean up

## Remove unwanted files

In [373]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')