# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [3]:
year = '1955'

In [4]:
url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'
url_app = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-appearances-fielding.shtml'

In [5]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/1955-specialpos_c-fielding.shtml'

In [6]:
session_requests = requests.session()

result = session_requests.get(url_bat, headers = dict(referer = url_bat))
result.status_code

200

In [7]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [8]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [9]:
soup = BeautifulSoup(result.content, 'lxml')

In [10]:
# print(soup.prettify())

In [11]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [12]:
bat = pullTable(url_bat, 'players_standard_batting')

In [13]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [14]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [15]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [16]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [17]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [18]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [19]:
findTables(url_app)

['teams_appearances', 'players_players_appearances_fielding']

In [20]:
app = pullTable(url_app, 'players_players_appearances_fielding')

In [21]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,...,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
705,706.0,Don Zimmer,24.0,BRO,NL,88,309,280,38,67,...,0.443,0.731,88.0,124,7,2,4,4,5,46/5H
706,707.0,George Zuverink,30.0,TOT,AL,42,29,27,1,5,...,0.222,0.437,22.0,6,1,0,1,0,0,1
707,708.0,George Zuverink,30.0,DET,AL,14,4,4,0,0,...,0.0,0.0,-100.0,0,0,0,0,0,0,1
708,709.0,George Zuverink,30.0,BAL,AL,28,25,23,1,5,...,0.261,0.511,43.0,6,1,0,1,0,0,1
709,,LgAvg per 600 PA,,,,199,600,528,69,136,...,0.392,0.723,,207,12,3,7,4,5,


In [22]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [23]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [24]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    463
L    218
S     28
Name: Bats, dtype: int64

In [25]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
704,705,Gus Zernial,32,KCA,AL,120,454,413,62,105,...,0.812,116,210,17,3,0,8,1,*7H,R
705,706,Don Zimmer,24,BRO,NL,88,309,280,38,67,...,0.731,88,124,7,2,4,4,5,46/5H,R
706,707,George Zuverink,30,TOT,AL,42,29,27,1,5,...,0.437,22,6,1,0,1,0,0,1,R
707,708,George Zuverink,30,DET,AL,14,4,4,0,0,...,0.0,-100,0,0,0,0,0,0,1,R
708,709,George Zuverink,30,BAL,AL,28,25,23,1,5,...,0.511,43,6,1,0,1,0,0,1,R


In [26]:
bat.drop(columns=["Rk"], inplace=True)

In [27]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
704,Gus Zernial,32,KCA,AL,120,454,413,62,105,9,...,0.812,116,210,17,3,0,8,1,*7H,R
705,Don Zimmer,24,BRO,NL,88,309,280,38,67,10,...,0.731,88,124,7,2,4,4,5,46/5H,R
706,George Zuverink,30,TOT,AL,42,29,27,1,5,1,...,0.437,22,6,1,0,1,0,0,1,R
707,George Zuverink,30,DET,AL,14,4,4,0,0,0,...,0.0,-100,0,0,0,0,0,0,1,R
708,George Zuverink,30,BAL,AL,28,25,23,1,5,1,...,0.511,43,6,1,0,1,0,0,1,R


In [28]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,...,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
296,297.0,Early Wynn,35.0,CLE,AL,17,11,0.607,2.82,32,...,3,964,142,3.46,1.248,8.1,0.7,3.1,4.8,1.53
297,298.0,George Zuverink,30.0,TOT,AL,4,8,0.333,3.38,42,...,2,474,113,3.79,1.299,9.3,0.9,2.4,3.5,1.42
298,299.0,George Zuverink,30.0,DET,AL,0,5,0.0,6.99,14,...,0,141,56,5.79,1.835,12.1,1.9,4.4,4.1,0.93
299,300.0,George Zuverink,30.0,BAL,AL,4,3,0.571,2.19,28,...,2,333,173,3.13,1.124,8.3,0.5,1.8,3.2,1.82
300,,LgAvg per 180 IP,,,,10,10,0.495,4.01,49,...,4,777,100,4.0,1.395,8.9,0.9,3.7,4.4,1.2


In [29]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [30]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [31]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    214
L     86
Name: Throws, dtype: int64

In [32]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,...,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
295,296,Mel Wright,27,STL,NL,2,2,0.5,6.19,29,...,160,66,3.63,1.459,10.9,1.0,2.2,4.5,2.0,R
296,297,Early Wynn,35,CLE,AL,17,11,0.607,2.82,32,...,964,142,3.46,1.248,8.1,0.7,3.1,4.8,1.53,R
297,298,George Zuverink,30,TOT,AL,4,8,0.333,3.38,42,...,474,113,3.79,1.299,9.3,0.9,2.4,3.5,1.42,R
298,299,George Zuverink,30,DET,AL,0,5,0.0,6.99,14,...,141,56,5.79,1.835,12.1,1.9,4.4,4.1,0.93,R
299,300,George Zuverink,30,BAL,AL,4,3,0.571,2.19,28,...,333,173,3.13,1.124,8.3,0.5,1.8,3.2,1.82,R


In [33]:
pit.drop(columns=["Rk"], inplace=True)

In [34]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [35]:
players.shape

(709, 62)

In [36]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos Summary
582,583.0,Norm Zauchin,25.0,BOS,AL,126,125,124,1120.1,1227,1137,84,6,106,0.995,10.0,10.0,9.81,9.69,1B
583,584.0,Gus Zernial,32.0,KCA,AL,103,102,80,860.1,249,231,9,9,4,0.964,-2.0,-3.0,2.51,2.33,OF
584,585.0,Don Zimmer,24.0,BRO,NL,91,75,67,683.2,403,184,207,12,63,0.97,-2.0,-4.0,5.15,4.3,2B-SS-3B
585,586.0,George Zuverink,30.0,TOT,AL,42,6,0,114.2,37,9,25,3,2,0.919,,,2.67,0.81,P
586,,LgAvg,,,,177,141,115,1257.0,607,415,177,15,44,0.976,1.0,0.0,4.24,3.34,


In [37]:
fld = fld[fld["Name"] != "LgAvg"]

In [38]:
fld.drop(columns=["Rk"], inplace=True)

In [39]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [40]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,...,Rtot,Rtot/yr,Rctch,RF/9,RF/G,PB,WP,SB,CS,CS%
69,70.0,Wes Westrum,32.0,NYG,NL,68,51,23,439.2,311,...,1,3,1,6.28,4.51,4,7,13,11,46%
70,71.0,Charlie White,27.0,MLN,NL,10,9,6,75.0,41,...,-1,-16,-1,4.92,4.1,1,5,3,2,40%
71,72.0,Sammy White,27.0,BOS,AL,143,142,138,1255.2,754,...,1,1,1,5.32,5.19,5,27,39,35,47%
72,73.0,Red Wilson,26.0,DET,AL,72,62,56,586.1,322,...,-3,-6,-3,4.87,4.4,5,19,11,11,50%
73,,LgAvg,,,,17,15,12,132.0,85,...,0,0,0,5.67,4.81,1,3,4,4,47%


In [41]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [42]:
cat = cat[cat["Name"] != "LgAvg"]

In [43]:
cat.drop(columns=["Rk"], inplace=True)

In [44]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [45]:
app.tail()

Unnamed: 0,Rk,Name,Age,Tm,Yrs,G,GS,Batting,Defense,P,...,1B,2B,3B,SS,LF,CF,RF,OF,PH,PR
594,595,Bobby Young,30,2TM,6,77,64,77,71,0,...,0,69,2,0,0,0,0,0,7,3
595,596,Norm Zauchin,25,BOS,2,130,125,130,126,0,...,126,0,0,0,0,0,0,0,4,0
596,597,Gus Zernial,32,KCA,7,120,102,120,103,0,...,0,0,0,0,103,0,0,103,18,0
597,598,Don Zimmer,24,BRO,2,88,75,88,84,0,...,0,62,8,21,0,0,0,0,3,3
598,599,George Zuverink,30,2TM,4,42,6,42,42,42,...,0,0,0,0,0,0,0,0,0,0


In [46]:
players = pd.merge(players, app, how='left', on=["Name", "Age"], suffixes=('', '_app'))

In [47]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [48]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,...,0,27,0,0,30,0,104,126,2,0
1,Al Aber,27,DET,AL,39,19,17,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,...,0,0,0,0,0,0,0,0,0,0
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,...,4,0,0,0,13,58,46,96,20,4
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,...,0,6,51,0,0,0,0,0,25,17


In [49]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [50]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'RF/9_cat',
 'RF/G_cat',
 'PB',
 'WP_cat',
 'SB_cat',
 'CS_cat',
 'CS%',
 'Rk',
 'Tm_app',
 'Yrs',
 'G_app',
 'GS_app',
 'Batt

In [51]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [52]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     301
OF    153
C      70
3B     54
1B     46
SS     43
2B     35
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [53]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [54]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    237
0.142857      9
0.166667      9
0.111111      9
0.500000      8
           ... 
0.151899      1
0.663793      1
0.431373      1
0.361702      1
0.416107      1
Name: rbi_per_g, Length: 323, dtype: int64

In [55]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     682
#     33
Name: clutch, dtype: int64

In [56]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,...,0,30,0,104,126,2,0,OF,0.69281,#
27,Ernie Banks,24,CHC,NL,154,646,596,98,176,29,...,154,0,0,0,0,0,0,SS,0.75974,#
36,Gus Bell,26,CIN,NL,154,672,610,88,188,30,...,0,0,154,0,154,0,0,OF,0.675325,#
37,Lou Berberet,25,NYY,AL,2,6,5,1,2,0,...,0,0,0,0,0,1,0,C,1.0,#
38,Yogi Berra,30,NYY,AL,147,615,541,84,147,20,...,0,0,0,0,0,6,0,C,0.734694,#
55,Ray Boone,31,DET,AL,135,560,500,61,142,22,...,0,0,0,0,0,9,0,3B,0.859259,#
77,Smoky Burgess,28,TOT,NL,123,501,442,71,133,17,...,0,0,0,0,0,12,0,C,0.634146,#
79,Smoky Burgess,28,CIN,NL,116,477,421,67,129,15,...,0,0,0,0,0,12,0,C,0.663793,#
89,Roy Campanella,33,BRO,NL,123,522,446,81,142,20,...,0,0,0,0,0,4,0,C,0.869919,#
167,Del Ennis,30,PHI,NL,146,624,564,82,167,24,...,0,143,0,3,145,1,0,OF,0.821918,#


### Letter Rating

In [57]:
players["BA"].value_counts()

0.000    83
0.200    14
0.250    12
0.125    11
0.167    11
         ..
0.108     1
0.048     1
0.235     1
0.215     1
0.276     1
Name: BA, Length: 214, dtype: int64

In [58]:
players["BA"].isnull().sum()

45

In [59]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [60]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [61]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      129
B      101
C+      83
C       80
B+      61
D+      47
D       44
E       41
A       33
E+      27
AAA     24
F       19
A+      11
G+       8
AA       7
Name: bat_letter, dtype: int64

In [62]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
37,Lou Berberet,25,NYY,AL,2,6,5,1,2,0,...,0,0,0,0,1,0,C,1.0,#,AAA
43,Charlie Bishop,31,KCA,AL,4,2,2,0,1,0,...,0,0,0,0,0,0,P,0.0,,AAA
60,Roger Bowman,27,PIT,NL,7,3,2,0,1,0,...,0,0,0,0,0,0,P,0.0,,AAA
67,Dick Brodowski,22,BOS,AL,16,10,10,3,5,1,...,0,0,0,0,0,0,P,0.125,,AAA
70,Hal Brown,30,BOS,AL,2,1,1,0,1,0,...,0,0,0,0,0,10,P,0.5,,AAA
112,Rocky Colavito,21,CLE,AL,5,9,9,3,4,2,...,0,0,2,2,2,2,OF,0.0,,AAA
116,Joe Coleman,32,TOT,AL,23,8,7,2,5,1,...,0,0,0,0,0,0,P,0.043478,,AAA
117,Joe Coleman,32,BAL,AL,6,3,3,0,2,1,...,0,0,0,0,0,0,P,0.166667,,AAA
118,Joe Coleman,32,DET,AL,17,5,4,2,3,0,...,0,0,0,0,0,0,P,0.0,,AAA
200,Marion Fricano,31,KCA,AL,10,3,3,0,2,0,...,0,0,0,0,0,0,P,0.0,,AAA


In [63]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.223

### HR Number

In [64]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     423
2      51
1      43
3      42
5      33
4      32
6      24
7      19
8      16
9      13
12      7
10      5
36      3
24      1
11      1
16      1
18      1
Name: hr_rate, dtype: int64

In [65]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      423
12     51
11     43
13     42
15     33
14     32
16     24
21     19
22     16
23     13
26      7
24      5
66      3
36      1
34      1
25      1
46      1
Name: hr_num_bat, dtype: int64

### Triple Number

In [66]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     485
1     119
2      62
3      27
4      11
5       4
6       3
18      2
7       2
Name: triple_rate, dtype: int64

In [67]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     485
4.0      46
2.0      36
6.0      28
5.0      24
3.0      23
7.0      21
8.0      14
9.0      12
1.0       9
10.0      7
11.0      7
18.0      2
13.0      1
Name: triple_val, dtype: int64

In [68]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        485
(14)     46
(12)     36
(16)     28
(15)     24
(13)     23
(21)     21
(22)     14
(23)     12
(11)      9
(24)      7
(25)      7
(36)      2
(31)      1
Name: triple_num, dtype: int64

### Speed Rating

In [69]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [70]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    510
0.022      9
0.023      7
0.018      6
0.014      6
        ... 
0.168      1
0.065      1
0.044      1
0.138      1
0.084      1
Name: speed_score, Length: 93, dtype: int64

In [71]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [72]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        673
*        19
**       17
****      4
***       2
Name: speed_rating, dtype: int64

In [73]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
343,Kenny Kuhn,18,CLE,AL,4,7,6,0,2,0,...,0.0,,A,0,,0,0.0,,0.333,****
416,Sam Mele,33,BOS,AL,14,33,31,1,4,2,...,0.071429,,E+,0,,0,0.0,,0.5,****
495,Don Plarski,25,KCA,AL,8,11,11,0,1,0,...,0.0,,E,0,,0,0.0,,1.0,****
645,Marv Throneberry,21,NYY,AL,1,3,2,1,2,1,...,3.0,#,AAA,0,,0,0.0,,1.0,****


### Base on Balls Number

In [74]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     214
2     118
4     113
3     103
1      61
5      37
6      30
7      16
9       8
8       7
36      3
11      2
24      1
22      1
10      1
Name: walk_rate, dtype: int64

In [75]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     214
12    118
14    113
13    103
11     61
15     37
16     30
21     16
23      8
22      7
66      3
25      2
44      1
24      1
46      1
Name: bb_num, dtype: int64

### Batter K Number

In [76]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     109
4      96
3      84
6      63
5      59
2      52
7      46
9      38
8      29
11     21
12     20
13     15
18     14
10     14
14     10
36      9
16      7
1       7
22      5
15      4
19      3
17      2
21      2
30      1
23      1
24      1
25      1
26      1
27      1
Name: k_rate, dtype: int64

In [77]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     109
7.0      87
8.0      85
6.0      70
9.0      58
12.0     43
10.0     40
13.0     34
5.0      33
11.0     28
4.0      26
14.0     23
18.0     20
36.0      9
16.0      9
15.0      8
3.0       7
21.0      5
22.0      4
20.0      3
25.0      3
17.0      3
26.0      2
30.0      1
24.0      1
27.0      1
2.0       1
19.0      1
29.0      1
Name: k_val, dtype: int64

In [78]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     109
21     87
22     85
16     70
23     58
26     43
24     40
31     34
15     33
25     28
14     26
32     23
36     20
66      9
34      9
33      8
13      7
43      5
44      4
51      3
35      3
42      3
52      2
56      1
12      1
55      1
46      1
41      1
53      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [79]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0     665
1      42
2       3
3       2
36      1
5       1
4       1
Name: hbp_rate, dtype: int64

In [80]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [81]:
players["hbp_val"].value_counts()

0.0     665
9.0       8
13.0      6
12.0      5
10.0      5
8.0       5
14.0      3
11.0      3
6.0       2
7.0       2
5.0       2
3.0       1
18.0      1
1.0       1
17.0      1
4.0       1
16.0      1
15.0      1
19.0      1
36.0      1
Name: hbp_val, dtype: int64

In [82]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       665
/23      8
/31      6
/24      5
/22      5
/26      5
/25      3
/32      3
/21      2
/16      2
/15      2
/34      1
/33      1
/35      1
/66      1
/41      1
/11      1
/13      1
/14      1
/36      1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [83]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     128
8     112
7      87
9      79
6      68
5      48
10     47
3      41
4      39
2      14
12     13
11     12
13      5
24      5
18      5
1       4
36      3
14      2
22      2
16      1
Name: hit_rate, dtype: int64

In [84]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    132
55    112
56     87
54     79
61     68
62     48
53     47
64     41
63     39
65     14
51     13
52     12
41      5
46      5
31      5
11      3
33      2
45      2
43      1
Name: PH_num_bat, dtype: int64

### Batter Rating

In [85]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [86]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " (" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + ")")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,...,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16)
1,Al Aber,27,DET,AL,39,19,17,0,1,0,...,n,17,17.0,35,0,0.0,,2,65,F (n-35)
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,...,n,7,7.0,21,0,0.0,,5,62,D (n-21)
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,...,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32)
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,...,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23)


In [87]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
710,Gus Zernial,32,KCA,AL,120,454,413,62,105,9,...,12,7,9.0,23,0,0.0,,8,55,#B24(25) (12-23)
711,Don Zimmer,24,BRO,NL,88,309,280,38,67,10,...,12,8,10.0,24,0,0.0,,8,55,C+22(23)* (12-24)
712,George Zuverink,30,TOT,AL,42,29,27,1,5,1,...,11,9,10.0,24,0,0.0,,6,61,D+ (11-24)
713,George Zuverink,30,DET,AL,14,4,4,0,0,0,...,n,18,18.0,36,0,0.0,,0,66,G (n-36)
714,George Zuverink,30,BAL,AL,28,25,23,1,5,1,...,11,7,8.0,22,0,0.0,,7,56,C (11-22)


In [88]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,...,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,...,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16)
1,Al Aber,27,DET,AL,39,19,17,0,1,0,...,n,17,17.0,35,0,0.0,,2,65,F (n-35)
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,...,n,7,7.0,21,0,0.0,,5,62,D (n-21)
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,...,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32)
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,...,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
710,Gus Zernial,32,KCA,AL,120,454,413,62,105,9,...,12,7,9.0,23,0,0.0,,8,55,#B24(25) (12-23)
711,Don Zimmer,24,BRO,NL,88,309,280,38,67,10,...,12,8,10.0,24,0,0.0,,8,55,C+22(23)* (12-24)
712,George Zuverink,30,TOT,AL,42,29,27,1,5,1,...,11,9,10.0,24,0,0.0,,6,61,D+ (11-24)
713,George Zuverink,30,DET,AL,14,4,4,0,0,0,...,n,18,18.0,36,0,0.0,,0,66,G (n-36)


In [89]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [90]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [91]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

### Pitcher Letter Rating

In [92]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.27358085808580845

In [93]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [94]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     435
M      88
W      79
X      45
L      23
Y      19
Z+     12
K       6
J+      5
J       3
Name: pit_letter, dtype: int64

In [95]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      412
M      88
W      79
X      45
Z      23
L      23
Y      19
Z+     12
K       6
J+      5
J       3
Name: pit_letter, dtype: int64

In [96]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.175

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [97]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

2.000000      11
4.000000       5
3.000000       5
73.666667      5
1.666667       5
1.000000       4
15.333333      3
130.000000     3
17.000000      3
2.666667       3
2.333333       3
25.333333      3
11.666667      3
91.666667      2
11.000000      2
140.333333     2
44.666667      2
208.000000     2
25.666667      2
20.666667      2
19.000000      2
24.666667      2
3.333333       2
12.333333      2
18.333333      2
Name: IP_real, dtype: int64

In [98]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

2.0    102
3.0     48
1.0     36
4.0     35
5.0     35
6.0     28
7.0     17
0.0      2
Name: IE, dtype: int64

In [99]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
225,Charlie Gorin,27,MLN,NL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,0.0,,54.0,2.0,0.0,0.0,0.0,0.0,0.0,0.1,1.0,2.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,15.0,29.37,12.0,27.0,0.0,81.0,0.0,0.0,L,MLN,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,186,MLN,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.5,Z,0.333333,0.0
337,Dave Koslo,35,MLN,NL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,1.0,0.0,inf,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,17.0,,,,,,,,L,MLN,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,282,MLN,12,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),1.0,Z,0.0,0.0


In [100]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

2.0    102
3.0     48
1.0     38
4.0     35
5.0     35
6.0     28
7.0     17
Name: IE, dtype: int64

In [101]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
123,Gene Conley,24,MLN,NL,22,62,54,2,11,1,0,0,4,0,0,0,12,0.204,0.204,0.222,0.426,15.0,12,2,0,8,0,0,1,R,NL,11.0,7.0,0.611,4.16,22.0,21.0,1.0,10.0,0.0,0.0,158.0,152.0,81.0,73.0,23.0,52.0,9.0,107.0,1.0,0.0,2.0,669.0,91.0,3.91,1.291,8.7,1.3,3.0,6.1,2.06,R,MLN,NL,22.0,21.0,10.0,158.0,31.0,7.0,24.0,0.0,0.0,1.0,,,1.77,1.41,P,,,,,,,,,,,,,,,,,,,,,,,102,MLN,3,22,21,22,22,22,0,0,0,0,0,0,0,0,0,0,0,P,0.181818,,C,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,6,61,C (n-21),0.247,M,158.0,7.0
186,Ben Flowers,28,STL,NL,4,10,10,0,1,0,0,0,0,0,0,0,4,0.1,0.1,0.1,0.2,-46.0,1,1,0,0,0,0,/1,R,NL,1.0,0.0,1.0,3.62,4.0,4.0,0.0,0.0,0.0,0.0,27.1,27.0,12.0,11.0,1.0,12.0,0.0,19.0,0.0,0.0,0.0,120.0,113.0,2.77,1.427,8.9,0.3,4.0,6.3,1.58,R,TOT,ZZ,8.0,4.0,0.0,33.1,9.0,2.0,5.0,2.0,1.0,0.778,,,1.89,0.88,P,,,,,,,,,,,,,,,,,,,,,,,155,2TM,3,8,4,8,8,8,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,E,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,4,63,E (n-32),0.25,M,27.333333,7.0
189,Whitey Ford,26,NYY,AL,39,100,86,9,14,0,0,1,7,0,0,11,16,0.163,0.258,0.198,0.455,25.0,17,1,0,3,0,0,1,L,AL,18.0,7.0,0.72,2.63,39.0,33.0,4.0,18.0,5.0,2.0,253.2,188.0,83.0,74.0,20.0,113.0,7.0,137.0,1.0,1.0,7.0,1027.0,144.0,3.66,1.187,6.7,0.7,4.0,4.9,1.21,L,NYY,AL,39.0,33.0,18.0,253.2,52.0,10.0,41.0,1.0,1.0,0.981,,,1.81,1.31,P,,,,,,,,,,,,,,,,,,,,,,,158,NYY,4,39,33,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.179487,,D,3,13.0,0,0.0,,0.0,,4,14,6,10.0,24,0,0.0,,5,62,D13 (14-24),0.206,L,253.666667,7.0
209,Ned Garver,29,DET,AL,33,91,76,11,17,1,0,1,13,1,0,9,5,0.224,0.295,0.276,0.572,56.0,21,5,0,3,3,0,1,R,AL,12.0,16.0,0.429,3.98,33.0,32.0,1.0,16.0,1.0,0.0,230.2,251.0,115.0,102.0,21.0,67.0,10.0,83.0,5.0,1.0,6.0,992.0,98.0,3.77,1.379,9.8,0.8,2.6,3.2,1.24,R,DET,AL,33.0,32.0,16.0,230.2,60.0,17.0,40.0,3.0,4.0,0.95,,,2.22,1.73,P,,,,,,,,,,,,,,,,,,,,,,,174,DET,8,33,32,33,33,33,0,0,0,0,0,0,0,0,0,0,0,P,0.393939,,C+,2,12.0,0,0.0,,0.042,,4,14,2,6.0,16,0,0.0,,7,56,C+12 (14-16),0.273,W,230.666667,7.0
271,Billy Hoeft,23,DET,AL,36,96,82,9,17,2,0,0,4,0,0,10,25,0.207,0.293,0.232,0.525,45.0,19,1,0,4,0,0,1/H,L,AL,16.0,7.0,0.696,2.99,32.0,29.0,0.0,17.0,7.0,0.0,220.0,187.0,75.0,73.0,17.0,75.0,1.0,133.0,6.0,0.0,3.0,909.0,130.0,3.27,1.191,7.7,0.7,3.1,5.4,1.77,L,DET,AL,32.0,29.0,17.0,220.0,28.0,7.0,21.0,0.0,2.0,1.0,,,1.15,0.88,P,,,,,,,,,,,,,,,,,,,,,,,224,DET,4,36,29,36,32,32,0,0,0,0,0,0,0,0,0,0,4,P,0.111111,,C,0,,0,0.0,,0.0,,4,14,9,13.0,31,0,0.0,,6,61,C (14-31),0.226,M,220.0,7.0
305,Sam Jones,29,CHC,NL,36,85,77,2,14,1,0,0,5,0,0,3,27,0.182,0.213,0.195,0.407,10.0,15,4,0,5,0,0,1,R,NL,14.0,20.0,0.412,4.1,36.0,34.0,1.0,12.0,4.0,0.0,241.2,175.0,118.0,110.0,22.0,185.0,5.0,198.0,14.0,0.0,5.0,1070.0,101.0,4.38,1.49,6.5,0.8,6.9,7.4,1.07,R,CHC,NL,36.0,34.0,12.0,241.2,55.0,14.0,36.0,5.0,0.0,0.909,,,1.86,1.39,P,,,,,,,,,,,,,,,,,,,,,,,252,CHC,3,36,34,36,36,36,0,0,0,0,0,0,0,0,0,0,0,P,0.138889,,D+,0,,0,0.0,,0.0,,1,11,11,12.0,26,0,0.0,,6,61,D+ (11-26),0.201,L,241.666667,7.0
354,Frank Lary,25,DET,AL,36,91,82,4,16,2,0,0,2,0,0,4,29,0.195,0.233,0.22,0.452,24.0,18,1,0,5,0,0,1,R,AL,14.0,15.0,0.483,3.1,36.0,31.0,5.0,16.0,2.0,1.0,235.0,232.0,100.0,81.0,10.0,89.0,3.0,98.0,6.0,0.0,7.0,997.0,125.0,3.3,1.366,8.9,0.4,3.4,3.8,1.1,R,DET,AL,36.0,31.0,16.0,235.0,63.0,14.0,46.0,3.0,4.0,0.952,,,2.3,1.67,P,,,,,,,,,,,,,,,,,,,,,,,297,DET,2,36,31,36,36,36,0,0,0,0,0,0,0,0,0,0,0,P,0.055556,,D+,0,,0,0.0,,0.0,,2,12,11,13.0,31,0,0.0,,6,61,D+ (12-31),0.257,W,235.0,7.0
429,Paul Minner,31,CHC,NL,22,62,56,5,13,2,0,0,1,0,0,5,7,0.232,0.295,0.268,0.563,51.0,15,0,0,1,0,0,1,L,NL,9.0,9.0,0.5,3.48,22.0,22.0,0.0,7.0,1.0,0.0,157.2,173.0,67.0,61.0,15.0,47.0,10.0,53.0,1.0,0.0,2.0,675.0,119.0,3.84,1.395,9.9,0.9,2.7,3.0,1.13,L,CHC,NL,22.0,22.0,7.0,157.2,44.0,4.0,39.0,1.0,3.0,0.977,,,2.45,1.95,P,,,,,,,,,,,,,,,,,,,,,,,360,CHC,9,22,22,22,22,22,0,0,0,0,0,0,0,0,0,0,0,P,0.045455,,C+,0,,0,0.0,,0.0,,3,13,4,7.0,21,0,0.0,,8,55,C+ (13-21),0.276,W,157.666667,7.0
453,Don Newcombe,29,BRO,NL,57,125,117,18,42,9,1,7,23,1,0,6,18,0.359,0.395,0.632,1.028,164.0,74,1,1,1,0,0,1H,L,NL,20.0,5.0,0.8,3.2,34.0,31.0,1.0,17.0,1.0,0.0,233.2,222.0,103.0,83.0,35.0,38.0,1.0,143.0,1.0,0.0,0.0,943.0,128.0,3.59,1.113,8.6,1.3,1.5,5.5,3.76,R,BRO,NL,34.0,31.0,17.0,233.2,43.0,15.0,24.0,4.0,5.0,0.907,,,1.5,1.15,P,,,,,,,,,,,,,,,,,,,,,,,382,BRO,5,57,31,57,34,34,0,0,0,0,0,0,0,0,0,23,0,P,0.403509,,A+,6,16.0,1,7.0,(21),0.031,,2,12,5,7.0,21,0,0.0,,12,51,A+16(21) (12-21),0.246,M,233.666667,7.0
458,Willard Nixon,27,BOS,AL,31,81,69,7,18,4,1,0,7,0,0,8,20,0.261,0.338,0.348,0.685,79.0,24,2,0,4,0,0,1,L,AL,12.0,10.0,0.545,4.07,31.0,31.0,0.0,7.0,3.0,0.0,208.0,207.0,102.0,94.0,10.0,85.0,2.0,95.0,3.0,0.0,4.0,903.0,106.0,3.35,1.404,9.0,0.4,3.7,4.1,1.12,R,BOS,AL,31.0,31.0,7.0,208.0,62.0,18.0,43.0,1.0,3.0,0.984,,,2.64,1.97,P,,,,,,,,,,,,,,,,,,,,,,,387,BOS,6,31,31,31,31,31,0,0,0,0,0,0,0,0,0,0,0,P,0.225806,,B,0,,2,2.0,(12),0.0,,4,14,9,13.0,31,0,0.0,,8,55,B(12) (14-31),0.254,W,208.0,7.0


In [102]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [103]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     419
3      98
4      76
5      37
2      36
6      19
7       7
9       5
8       4
1       4
12      3
14      2
10      2
22      1
13      1
11      1
Name: bb_rate, dtype: int64

In [104]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     98
4.0     76
5.0     37
2.0     36
6.0     19
7.0      7
0.0      7
9.0      5
1.0      4
8.0      4
12.0     3
14.0     2
10.0     2
11.0     1
13.0     1
22.0     1
Name: bb_rate, dtype: int64

In [105]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    412
13      98
14      76
15      37
12      36
16      19
11      11
21       7
23       5
22       4
26       3
32       2
24       2
44       1
25       1
31       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [106]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

4.0    93
3.0    87
2.0    35
5.0    32
6.0    23
0.0    17
7.0    10
1.0     3
9.0     2
8.0     1
Name: k_rate_pit, dtype: int64

In [107]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
23,Fred Baczewski,29,CIN,NL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,0.0,,18.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,32.0,28.37,2.0,18.0,18.0,0.0,0.0,,L,CIN,NL,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,22,CIN,3,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.4,Z,1.0,1,0.0,11,0.0
35,Bill Bell,21,PIT,NL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,5.37,1.0,0.0,0.0,9.0,0.0,0.0,R,PIT,NL,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,34,PIT,2,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.0,J+,1.0,1,9.0,23,0.0
154,Sonny Dixon,30,KCA,AL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,S,AL,0.0,0.0,,16.2,2.0,0.0,1.0,0.0,0.0,0.0,1.2,6.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,31.0,10.17,3.6,32.4,5.4,0.0,0.0,,R,KCA,AL,2.0,0.0,0.0,1.2,1.0,1.0,0.0,0.0,0.0,1.0,,,5.4,0.5,P,,,,,,,,,,,,,,,,,,,,,,,131,KCA,3,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.545,Z,1.666667,1,0.0,11,0.0
205,Bill Froats,24,DET,AL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,7.0,,5.37,1.0,0.0,0.0,9.0,0.0,0.0,L,DET,AL,1.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,,,4.5,1.0,P,,,,,,,,,,,,,,,,,,,,,,,170,DET,1st,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.0,J+,2.0,2,10.0,24,0.0
225,Charlie Gorin,27,MLN,NL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,0.0,,54.0,2.0,0.0,0.0,0.0,0.0,0.0,0.1,1.0,2.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,15.0,29.37,12.0,27.0,0.0,81.0,0.0,0.0,L,MLN,NL,2.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,186,MLN,2,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.5,Z,0.333333,1,22.0,44,0.0
238,Bob Greenwood,27,PHI,NL,1,1,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,15.43,1.0,0.0,0.0,0.0,0.0,0.0,2.1,7.0,4.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,30.0,7.94,3.0,27.0,3.9,0.0,0.0,,R,PHI,NL,1.0,0.0,0.0,2.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,193,PHI,2,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.5,Z,2.333333,2,0.0,11,0.0
258,Bob Harrison,24,BAL,AL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,AL,0.0,0.0,,9.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,2.0,2.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,11.0,50.0,8.37,3.5,13.5,0.0,18.0,0.0,0.0,R,BAL,AL,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,211,BAL,1st,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.429,Z,2.0,2,13.0,31,0.0
330,Bobby Kline,26,WSH,AL,77,156,140,12,31,5,0,0,9,0,0,11,27,0.221,0.288,0.257,0.545,51.0,36,3,2,3,0,0,6/451H,R,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,4.0,3.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,20.0,18.37,5.0,36.0,9.0,9.0,0.0,0.0,R,WSH,AL,77.0,47.0,28.0,423.2,287.0,107.0,164.0,16.0,38.0,0.944,-1.0,-4.0,5.76,3.52,SS-2B-3B-P,,,,,,,,,,,,,,,,,,,,,,,275,WSH,1st,77,47,77,77,1,0,0,4,3,69,0,0,0,0,1,0,SS,0.116883,,C,0,,0,0.0,,0.0,,3,13,6,9.0,23,0,0.0,,7,56,C (13-23),0.571,Z,1.0,1,4.0,14,0.0
337,Dave Koslo,35,MLN,NL,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,L,NL,0.0,1.0,0.0,inf,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,17.0,,,,,,,,L,MLN,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,282,MLN,12,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),1.0,Z,0.0,1,0.0,11,0.0
481,Jim Pearce,30,CIN,NL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.0,,,,,0,0,0,0,0,0,/1,R,NL,0.0,1.0,0.0,10.8,2.0,1.0,0.0,0.0,0.0,0.0,3.1,8.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,43.0,2.37,2.4,21.6,0.0,0.0,0.0,,R,CIN,NL,2.0,1.0,0.0,3.1,1.0,1.0,0.0,0.0,0.0,1.0,,,2.7,0.5,P,,,,,,,,,,,,,,,,,,,,,,,408,CIN,5,2,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.444,Z,3.333333,2,0.0,11,0.0


In [108]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

7.0     71
6.0     58
9.0     43
8.0     37
5.0     21
10.0    20
0.0     17
12.0     9
4.0      6
11.0     6
13.0     4
14.0     4
15.0     3
3.0      2
18.0     1
20.0     1
Name: k_val_pit, dtype: int64

In [109]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    412
21      71
16      58
23      43
22      37
15      21
24      20
n       17
26       9
25       6
14       6
32       4
31       4
33       3
13       2
36       1
42       1
Name: k_num_pit, dtype: int64

In [110]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [111]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    684
1     22
2      8
3      1
Name: hbp_rate_pit, dtype: int64

In [112]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [113]:
players["hbp_val_pit"].value_counts()

0.0     684
8.0       6
10.0      6
12.0      3
7.0       3
9.0       2
6.0       2
11.0      2
15.0      2
2.0       1
14.0      1
5.0       1
17.0      1
16.0      1
Name: hbp_val_pit, dtype: int64

In [114]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       684
/24      6
/22      6
/21      3
/26      3
/25      2
/16      2
/23      2
/33      2
/12      1
/15      1
/32      1
/34      1
/35      1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [115]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [116]:
players["WP"].value_counts()

0.0     105
1.0      66
2.0      52
3.0      33
4.0      20
5.0       9
6.0       8
7.0       7
9.0       1
12.0      1
8.0       1
Name: WP, dtype: int64

In [117]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit
569,Herb Score,22,CLE,AL,33,95,84,5,10,1,0,0,2,0,1,3,40,0.119,0.149,0.131,0.28,-25.0,11,2,0,8,0,0,1,L,AL,16.0,10.0,0.615,2.85,33.0,32.0,0.0,11.0,2.0,0.0,227.1,158.0,85.0,72.0,18.0,154.0,1.0,245.0,1.0,0.0,12.0,978.0,141.0,3.29,1.372,6.3,0.7,6.1,9.7,1.59,L,CLE,AL,33.0,32.0,11.0,227.1,23.0,4.0,15.0,4.0,2.0,0.826,,,0.75,0.58,P,,,,,,,,,,,,,,,,,,,,,,,478,CLE,1st,33,32,33,33,33,0,0,0,0,0,0,0,0,0,0,0,P,0.060606,,E+,0,,0,0.0,,0.0,,1,11,15,16.0,34,0,0.0,,4,63,E+ (11-34),0.192,K,227.333333,7,6.0,16,9.0,15.0,33,0,0.0,


In [118]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        276
[WP]     27
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [119]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    32
0.142857     7
0.125000     6
0.100000     6
0.166667     6
            ..
0.075758     1
0.108333     1
0.157895     1
0.146552     1
0.132275     1
Name: hr_rate_pit, Length: 194, dtype: int64

In [120]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     502
+    152
-     61
Name: gopher_ball, dtype: int64

In [121]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
9,Hank Aguirre,24,CLE,AL,4,4,4,0,0,0,0,0,0,0,0,0,1,0.000,0.000,0.000,0.000,-100.0,0,0,0,0,0,0,/1,R,AL,2.0,0.0,1.000,1.42,4.0,1.0,1.0,1.0,1.0,0.0,12.2,6.0,3.0,2.0,0.0,12.0,0.0,6.0,0.0,0.0,1.0,54.0,290.0,4.26,1.421,4.3,0.0,8.5,4.3,0.50,L,CLE,AL,4.0,1.0,1.0,12.1,1.0,1.0,0.0,0.0,0.0,1.000,,,0.73,0.25,P,,,,,,,,,,,,,,,,,,,,,,,8,CLE,1st,4,1,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,9,9.0,23,0,0.0,,0,66,G (n-23),0.143,J,12.666667,3,8.0,22,4.0,12.0,26,0,0.0,,,0.000000,-
10,Bob Alexander,32,BAL,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,0,0,0,/1,R,AL,1.0,0.0,1.000,13.50,4.0,0.0,2.0,0.0,0.0,0.0,4.0,8.0,6.0,6.0,0.0,2.0,0.0,1.0,1.0,0.0,1.0,22.0,31.0,4.12,2.500,18.0,0.0,4.5,2.3,0.50,R,BAL,AL,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,,,,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,9,BAL,1st,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.421,Z,4.000000,1,3.0,13,2.0,5.0,15,2,7.0,/21,,0.000000,-
14,Vicente Amor,22,CHC,NL,4,0,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,0,0,0,/1,R,NL,0.0,1.0,0.000,4.50,4.0,0.0,3.0,0.0,0.0,0.0,6.0,11.0,3.0,3.0,0.0,3.0,1.0,3.0,0.0,1.0,1.0,31.0,97.0,2.87,2.333,16.5,0.0,4.5,4.5,1.00,R,CHC,NL,4.0,0.0,0.0,6.0,4.0,0.0,4.0,0.0,0.0,1.000,,,6.00,1.00,P,,,,,,,,,,,,,,,,,,,,,,,13,CHC,1st,4,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.393,Z,6.000000,2,3.0,13,3.0,6.0,16,0,0.0,,,0.000000,-
44,Bud Black,22,DET,AL,3,5,4,1,1,0,0,0,0,0,0,0,1,0.250,0.250,0.250,0.500,36.0,1,0,0,1,0,0,/1,R,AL,1.0,1.0,0.500,1.29,3.0,2.0,0.0,1.0,1.0,0.0,14.0,12.0,5.0,2.0,0.0,8.0,0.0,7.0,2.0,0.0,1.0,63.0,310.0,3.51,1.429,7.7,0.0,5.1,4.5,0.88,R,DET,AL,3.0,2.0,1.0,14.1,5.0,1.0,3.0,1.0,0.0,0.800,,,2.51,1.33,P,,,,,,,,,,,,,,,,,,,,,,,43,DET,2,3,2,3,3,3,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,C+,0,,0,0.0,,0.000,,0,n,7,7.0,21,0,0.0,,7,56,C+ (n-21),0.226,M,14.000000,5,5.0,15,4.0,9.0,23,1,10.0,/24,,0.000000,-
70,Hal Brown,30,BOS,AL,2,1,1,0,1,0,0,0,1,0,0,0,0,1.000,1.000,1.000,2.000,422.0,1,0,0,0,0,0,/1,R,AL,1.0,0.0,1.000,2.25,2.0,0.0,1.0,0.0,0.0,0.0,4.0,2.0,1.0,1.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,16.0,209.0,2.87,1.000,4.5,0.0,4.5,4.5,1.00,R,TOT,AL,17.0,5.0,1.0,61.0,12.0,3.0,7.0,2.0,1.0,0.833,,,1.48,0.59,P,,,,,,,,,,,,,,,,,,,,,,,64,2TM,5,27,5,27,17,17,0,0,0,0,0,0,0,0,0,0,10,P,0.500000,,AAA,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,36,11,AAA (n-n),0.143,J,4.000000,2,4.0,14,4.0,8.0,22,0,0.0,,,0.000000,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
643,Bob Thorpe,20,CHC,NL,2,0,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,3.00,2.0,0.0,2.0,0.0,0.0,0.0,3.0,4.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,154.0,2.37,1.333,12.0,0.0,0.0,0.0,,R,CHC,NL,2.0,0.0,0.0,3.0,2.0,0.0,2.0,0.0,0.0,1.000,,,6.00,1.00,P,,,,,,,,,,,,,,,,,,,,,,,542,CHC,1st,2,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.333,Y,3.000000,2,0.0,11,0.0,0.0,n,0,0.0,,,0.000000,-
688,Bob Wiesler,24,NYY,AL,16,15,14,1,2,0,0,0,1,0,0,0,3,0.143,0.133,0.143,0.276,-25.0,2,0,0,0,1,0,1,S,AL,0.0,2.0,0.000,3.91,16.0,7.0,5.0,0.0,0.0,0.0,53.0,39.0,27.0,23.0,1.0,49.0,0.0,22.0,1.0,1.0,2.0,236.0,97.0,4.61,1.660,6.6,0.2,8.3,3.7,0.45,L,NYY,AL,16.0,7.0,0.0,53.0,15.0,2.0,13.0,0.0,1.0,1.000,,,2.55,0.94,P,,,,,,,,,,,,,,,,,,,,,,,581,NYY,3,16,7,16,16,16,0,0,0,0,0,0,0,0,0,0,0,P,0.062500,,D,0,,0,0.0,,0.000,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.210,L,53.000000,3,7.0,21,3.0,10.0,24,0,0.0,,,0.025641,-
689,Bill Wight,33,TOT,AL,36,43,36,1,3,1,0,0,2,0,0,1,16,0.083,0.108,0.111,0.219,-39.0,4,0,0,6,0,0,1/3,L,AL,6.0,8.0,0.429,2.48,36.0,14.0,13.0,8.0,2.0,3.0,141.1,135.0,51.0,39.0,6.0,48.0,2.0,63.0,1.0,0.0,6.0,598.0,154.0,3.07,1.295,8.6,0.4,3.1,4.0,1.31,L,TOT,AL,37.0,14.0,8.0,142.0,55.0,3.0,49.0,3.0,1.0,0.945,0.0,0.0,3.30,1.41,P-1B,,,,,,,,,,,,,,,,,,,,,,,582,2TM,9,36,14,36,36,36,0,1,0,0,0,0,0,0,0,0,0,P,0.055556,,F,0,,0,0.0,,0.000,,1,11,13,14.0,32,0,0.0,,3,64,F (11-32),0.246,M,141.333333,4,3.0,13,4.0,7.0,21,0,0.0,,[WP],0.044444,-
690,Bill Wight,33,CLE,AL,17,1,0,0,0,0,0,0,0,0,0,0,0,0.000,,,,,0,0,0,1,0,0,1,L,AL,0.0,0.0,,2.63,17.0,0.0,8.0,0.0,0.0,1.0,24.0,24.0,8.0,7.0,0.0,9.0,1.0,9.0,0.0,0.0,1.0,102.0,155.0,2.74,1.375,9.0,0.0,3.4,3.4,1.00,L,TOT,AL,37.0,14.0,8.0,142.0,55.0,3.0,49.0,3.0,1.0,0.945,0.0,0.0,3.30,1.41,P-1B,,,,,,,,,,,,,,,,,,,,,,,582,2TM,9,36,14,36,36,36,0,1,0,0,0,0,0,0,0,0,0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G (n-n),0.258,W,24.000000,1,3.0,13,3.0,6.0,16,0,0.0,,,0.000000,-


### Pitcher Control Number

In [122]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

12.0    96
11.0    58
13.0    43
14.0    27
10.0    22
15.0    15
16.0    10
18.0     7
17.0     7
23.0     4
20.0     3
22.0     3
19.0     3
9.0      2
36.0     1
29.0     1
3.0      1
Name: control_rate, dtype: int64

In [123]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    412
46      96
51      58
45      43
44      27
52      22
43      15
42      10
36       7
41       7
31       4
32       3
34       3
35       3
53       2
21       1
63       1
11       1
Name: PCN, dtype: int64

### Probable Hit Number

In [124]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     416
8     103
9      67
7      44
10     32
6      14
11     13
14      4
16      4
4       4
12      3
13      2
18      2
21      1
15      1
17      1
20      1
5       1
3       1
36      1
Name: hit_rate_pit, dtype: int64

In [125]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [126]:
players["hit_rate_pit"].value_counts()

8.0     103
9.0      67
7.0      44
10.0     32
6.0      14
11.0     13
16.0      4
0.0       4
14.0      4
4.0       4
12.0      3
18.0      2
13.0      2
17.0      1
20.0      1
21.0      1
5.0       1
36.0      1
3.0       1
15.0      1
Name: hit_rate_pit, dtype: int64

In [127]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    412
55     103
54      67
56      44
53      32
61      14
52      13
43       4
45       4
63       4
66       4
51       3
41       2
46       2
34       1
44       1
42       1
62       1
11       1
35       1
64       1
Name: PPH, dtype: int64

### Pitcher Rating

In [128]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [129]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

        412
M2       13
+M2      11
+W3      11
+W2      11
       ... 
-M4       1
+K2       1
+X1       1
+J2       1
-Z+3      1
Name: goph_lett_inn, Length: 99, dtype: int64

In [130]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      412
(13-16)         36
(13-21)         33
(14-22)         23
(14-21)         21
              ... 
(14-15)          1
(22-26/33)       1
(13-16/22)       1
(26-33)          1
(23-26)          1
Name: bb_k_hbp, Length: 80, dtype: int64

In [131]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               412
(13-16)         36
(13-21)         33
(14-22)         23
(14-21)         21
              ... 
(12-24)          1
(32-42)          1
(22-26/33)       1
(16-21)          1
(15-16)          1
Name: bb_k_hbp, Length: 80, dtype: int64

In [132]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

-M2 (13-16)         4
+W5 (13-21)         4
M2 (14-21)          4
+Z1 (11-n)          3
M4 (14-22)          3
                   ..
+W5 (14-21/22)      1
W5 (14-22)          1
+Z1 (13-n)          1
+Z+3 (16-25)        1
+W6 (13-21)         1
Name: pitcher_rating, Length: 266, dtype: int64

In [133]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [134]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141.0,325,20,3,7,4,5,*974/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,153.0,151.0,138.0,1346.0,448.0,340.0,93.0,15.0,25.0,0.967,7.0,6.0,2.9,2.83,OF-2B,,...,,,,,,,,,,,,,,,,,,1,MLN,2,153,151,153,151,0,0,0,27,0,0,30,0,104,126,2,0,OF,0.69281,#,A,5,15.0,2,7.0,(21),0.018,,3,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,
1,Al Aber,27,DET,AL,39,19,17,0,1,0,0,0,0,0,0,0,9,0.059,0.059,0.059,0.118,-68.0,1,1,0,2,0,0,1,L,AL,6.0,3.0,0.667,3.38,39.0,1.0,22.0,0.0,0.0,3.0,80.0,86.0,32.0,30.0,9.0,28.0,1.0,37.0,0.0,0.0,1.0,346.0,116.0,3.95,1.425,9.7,1.0,3.2,4.2,1.32,L,DET,AL,39.0,1.0,0.0,80.1,24.0,4.0,17.0,3.0,2.0,0.875,,,2.35,0.54,P,,...,,,,,,,,,,,,,,,,,,2,DET,4,39,1,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,2,65,F (n-35),0.27,W,80.0,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.104651,+,12.0,46.0,9.0,54.0,+W2,(13-21),+W2 (13-21)
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,0,0,0,0,0,0,6,0.154,0.154,0.154,0.308,-15.0,4,1,0,4,0,0,1,R,AL,5.0,9.0,0.357,5.96,40.0,14.0,12.0,3.0,2.0,0.0,119.1,136.0,87.0,79.0,9.0,67.0,1.0,79.0,7.0,0.0,2.0,553.0,66.0,3.88,1.701,10.3,0.7,5.1,6.0,1.18,R,WSH,AL,40.0,14.0,3.0,119.1,31.0,8.0,22.0,1.0,2.0,0.968,,,2.26,0.75,P,,...,,,,,,,,,,,,,,,,,,3,WSH,1st,40,14,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.284,X,119.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.066176,,14.0,44.0,9.0,54.0,X3,(14-23),X3 (14-23)
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,3,6,32,2,8,89,69,0.243,0.413,0.359,0.773,118.0,111,6,3,3,3,2,89H7/3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,100.0,87.0,51.0,753.1,205.0,195.0,7.0,3.0,1.0,0.985,-7.0,-11.0,2.41,2.02,OF-1B,,...,,,,,,,,,,,,,,,,,,4,BAL,7,118,87,118,98,0,0,4,0,0,0,13,58,46,96,20,4,OF,0.271186,,C+,3,13.0,1,4.0,(14),0.014,,8,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32),,,,,,,,,,0,0.0,,,,,,,,,,,
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,3,2,23,2,0,24,25,0.251,0.352,0.386,0.738,93.0,66,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.25,,B,2,12.0,3,5.0,(15),0.037,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,
5,Bobby Adams,33,CIN,NL,64,176,150,23,41,11,2,2,20,2,0,20,21,0.273,0.368,0.413,0.781,103.0,62,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.3125,,B,2,12.0,2,4.0,(14),0.041,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(14) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,
6,Bobby Adams,33,CHW,AL,28,25,21,8,2,0,1,0,3,0,0,4,4,0.095,0.24,0.19,0.43,16.0,4,0,0,0,0,0,H/54,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.107143,,E,0,,18,18.0,(36),0.0,,6,16,6,12.0,26,0,0.0,,3,64,E(36) (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,
7,Joe Adcock,27,MLN,NL,84,324,288,40,76,14,0,15,45,0,2,31,44,0.264,0.339,0.469,0.807,116.0,135,13,2,2,1,3,3/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,78.0,77.0,75.0,690.1,777.0,725.0,44.0,8.0,68.0,0.99,-4.0,-8.0,10.03,9.86,1B,,...,,,,,,,,,,,,,,,,,,6,MLN,6,84,77,84,78,0,0,78,0,0,0,0,0,0,0,6,0,1B,0.535714,,B,7,21.0,0,0.0,,0.0,,3,13,5,8.0,22,0,0.0,,8,55,B21 (13-22),,,,,,,,,,0,0.0,,,,,,,,,,,
8,Harry Agganis,26,BOS,AL,25,94,83,11,26,10,1,0,10,2,0,10,10,0.313,0.383,0.458,0.841,118.0,38,3,0,0,1,0,3/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,20.0,20.0,20.0,181.0,225.0,208.0,14.0,3.0,15.0,0.987,3.0,17.0,11.04,11.1,1B,,...,,,,,,,,,,,,,,,,,,7,BOS,2,25,20,25,20,0,0,20,0,0,0,0,0,0,0,5,0,1B,0.4,,A,0,,1,1.0,(11),0.08,*,4,14,4,8.0,22,0,0.0,,10,53,A(11)* (14-22),,,,,,,,,,0,0.0,,,,,,,,,,,
9,Hank Aguirre,24,CLE,AL,4,4,4,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,AL,2.0,0.0,1.0,1.42,4.0,1.0,1.0,1.0,1.0,0.0,12.2,6.0,3.0,2.0,0.0,12.0,0.0,6.0,0.0,0.0,1.0,54.0,290.0,4.26,1.421,4.3,0.0,8.5,4.3,0.5,L,CLE,AL,4.0,1.0,1.0,12.1,1.0,1.0,0.0,0.0,0.0,1.0,,,0.73,0.25,P,,...,,,,,,,,,,,,,,,,,,8,CLE,1st,4,1,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,0,66,G (n-23),0.143,J,12.666667,3.0,8.0,22.0,4.0,12.0,26.0,0,0.0,,,0.0,-,12.0,46.0,4.0,63.0,-J3,(22-26),-J3 (22-26)


In [135]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
705,Eddie Yost,28,WSH,AL,122,490,375,64,91,17,5,7,48,4,3,95,54,0.243,0.407,0.371,0.778,115.0,139,7,11,6,3,0,*5H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,WSH,AL,107.0,105.0,99.0,899.0,336.0,100.0,217.0,19.0,22.0,0.943,-6.0,-8.0,3.17,2.96,3B,,...,,,,,,,,,,,,,,,,,,594,WSH,11,122,106,122,107,0,0,0,0,107,0,0,0,0,0,16,0,3B,0.393443,,C+,3,13.0,2,5.0,(15),0.024,,7,21,4,11.0,25,1,12.0,/26,7,56,C+13(15) (21-25/26),,,,,,,,,,0,0.0,,,,,,,,,,,
706,Bobby Young,30,TOT,AL,77,248,231,12,51,4,1,1,14,1,4,12,25,0.221,0.259,0.26,0.519,44.0,60,3,0,5,0,1,4H/5,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,70.0,64.0,52.0,551.0,330.0,145.0,180.0,5.0,56.0,0.985,-1.0,-2.0,5.31,4.64,2B-3B,,...,,,,,,,,,,,,,,,,,,595,2TM,6,77,64,77,71,0,0,0,69,2,0,0,0,0,0,7,3,2B,0.181818,,C,1,11.0,1,2.0,(12),0.018,,2,12,4,6.0,16,0,0.0,,7,56,C11(12) (12-16),,,,,,,,,,0,0.0,,,,,,,,,,,
707,Bobby Young,30,BAL,AL,59,201,186,5,37,3,0,1,8,1,4,11,23,0.199,0.244,0.231,0.475,33.0,43,3,0,4,0,1,4/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,70.0,64.0,52.0,551.0,330.0,145.0,180.0,5.0,56.0,0.985,-1.0,-2.0,5.31,4.64,2B-3B,,...,,,,,,,,,,,,,,,,,,595,2TM,6,77,64,77,71,0,0,0,69,2,0,0,0,0,0,7,3,2B,0.135593,,C,1,11.0,0,0.0,,0.023,,2,12,4,6.0,16,0,0.0,,7,56,C11 (12-16),,,,,,,,,,0,0.0,,,,,,,,,,,
708,Bobby Young,30,CLE,AL,18,47,45,7,14,1,1,0,6,0,0,1,2,0.311,0.326,0.378,0.704,86.0,17,0,0,1,0,0,4/H5,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,70.0,64.0,52.0,551.0,330.0,145.0,180.0,5.0,56.0,0.985,-1.0,-2.0,5.31,4.64,2B-3B,,...,,,,,,,,,,,,,,,,,,595,2TM,6,77,64,77,71,0,0,0,69,2,0,0,0,0,0,7,3,2B,0.333333,,A,0,,3,3.0,(13),0.0,,1,11,2,3.0,13,0,0.0,,11,52,A(13) (11-13),,,,,,,,,,0,0.0,,,,,,,,,,,
709,Norm Zauchin,25,BOS,AL,130,556,477,65,114,10,0,27,93,3,0,69,105,0.239,0.335,0.43,0.765,98.0,205,8,3,1,6,1,*3/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,126.0,125.0,124.0,1120.1,1227.0,1137.0,84.0,6.0,106.0,0.995,10.0,10.0,9.81,9.69,1B,,...,,,,,,,,,,,,,,,,,,596,BOS,2,130,125,130,126,0,0,126,0,0,0,0,0,0,0,4,0,1B,0.715385,#,C+,9,23.0,0,0.0,,0.02,,4,14,7,11.0,25,0,0.0,,7,56,#C+23 (14-25),,,,,,,,,,0,0.0,,,,,,,,,,,
710,Gus Zernial,32,KCA,AL,120,454,413,62,105,9,3,30,84,1,0,30,90,0.254,0.304,0.508,0.812,116.0,210,17,3,0,8,1,*7H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,KCA,AL,103.0,102.0,80.0,860.1,249.0,231.0,9.0,9.0,4.0,0.964,-2.0,-3.0,2.51,2.33,OF,,...,,,,,,,,,,,,,,,,,,597,KCA,7,120,102,120,103,0,0,0,0,0,0,103,0,0,103,18,0,OF,0.7,#,B,10,24.0,1,11.0,(25),0.01,,2,12,7,9.0,23,0,0.0,,8,55,#B24(25) (12-23),,,,,,,,,,0,0.0,,,,,,,,,,,
711,Don Zimmer,24,BRO,NL,88,309,280,38,67,10,1,15,50,5,3,19,66,0.239,0.289,0.443,0.731,88.0,124,7,2,4,4,5,46/5H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BRO,NL,91.0,75.0,67.0,683.2,403.0,184.0,207.0,12.0,63.0,0.97,-2.0,-4.0,5.15,4.3,2B-SS-3B,,...,,,,,,,,,,,,,,,,,,598,BRO,2,88,75,88,84,0,0,0,62,8,21,0,0,0,0,3,3,2B,0.568182,,C+,8,22.0,1,9.0,(23),0.081,*,2,12,8,10.0,24,0,0.0,,8,55,C+22(23)* (12-24),,,,,,,,,,0,0.0,,,,,,,,,,,
712,George Zuverink,30,TOT,AL,42,29,27,1,5,1,0,0,0,0,0,1,7,0.185,0.214,0.222,0.437,22.0,6,1,0,1,0,0,1,R,AL,4.0,8.0,0.333,3.38,42.0,6.0,22.0,0.0,0.0,4.0,114.2,118.0,55.0,43.0,11.0,31.0,4.0,44.0,5.0,0.0,2.0,474.0,113.0,3.79,1.299,9.3,0.9,2.4,3.5,1.42,R,TOT,AL,42.0,6.0,0.0,114.2,37.0,9.0,25.0,3.0,2.0,0.919,,,2.67,0.81,P,,...,,,,,,,,,,,,,,,,,,599,2TM,4,42,6,42,42,42,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D+,0,,0,0.0,,0.0,,1,11,9,10.0,24,0,0.0,,6,61,D+ (11-24),0.269,W,114.666667,3.0,2.0,12.0,3.0,5.0,15.0,0,0.0,,,0.09322,,12.0,46.0,9.0,54.0,W3,(12-15),W3 (12-15)
713,George Zuverink,30,DET,AL,14,4,4,0,0,0,0,0,0,0,0,0,2,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,1,R,AL,0.0,5.0,0.0,6.99,14.0,1.0,6.0,0.0,0.0,0.0,28.1,38.0,27.0,22.0,6.0,14.0,3.0,13.0,1.0,0.0,0.0,141.0,56.0,5.79,1.835,12.1,1.9,4.4,4.1,0.93,R,TOT,AL,42.0,6.0,0.0,114.2,37.0,9.0,25.0,3.0,2.0,0.919,,,2.67,0.81,P,,...,,,,,,,,,,,,,,,,,,599,2TM,4,42,6,42,42,42,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G (n-36),0.302,X,28.333333,2.0,4.0,14.0,3.0,7.0,21.0,0,0.0,,,0.157895,+,14.0,44.0,10.0,53.0,+X2,(14-21),+X2 (14-21)
714,George Zuverink,30,BAL,AL,28,25,23,1,5,1,0,0,0,0,0,1,5,0.217,0.25,0.261,0.511,43.0,6,1,0,1,0,0,1,R,AL,4.0,3.0,0.571,2.19,28.0,5.0,16.0,0.0,0.0,4.0,86.1,80.0,28.0,21.0,5.0,17.0,1.0,31.0,4.0,0.0,2.0,333.0,173.0,3.13,1.124,8.3,0.5,1.8,3.2,1.82,R,TOT,AL,42.0,6.0,0.0,114.2,37.0,9.0,25.0,3.0,2.0,0.919,,,2.67,0.81,P,,...,,,,,,,,,,,,,,,,,,599,2TM,4,42,6,42,42,42,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,C,0,,0,0.0,,0.0,,1,11,7,8.0,22,0,0.0,,7,56,C (11-22),0.256,W,86.333333,3.0,2.0,12.0,3.0,5.0,15.0,0,0.0,,,0.0625,,11.0,51.0,9.0,54.0,W3,(12-15),W3 (12-15)


In [136]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [137]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [138]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [139]:
players["Primary_Pos_fld"].value_counts()

P     301
OF    153
C      70
3B     54
1B     46
SS     43
2B     35
Name: Primary_Pos_fld, dtype: int64

In [140]:
players["Primary_Pos_fld"].isnull().sum()

13

In [141]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.991739
2B    0.968200
3B    0.957481
C     0.986132
OF    0.975187
P     0.949065
SS    0.954929
Name: Fld%, dtype: float64

### Superior Rating

In [142]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [143]:
players["superior_rating"].value_counts()

     469
S    246
Name: superior_rating, dtype: int64

In [144]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141.0,325,20,3,7,4,5,*974/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,153.0,151.0,138.0,1346.0,448.0,340.0,93.0,15.0,25.0,0.967,7.0,6.0,2.9,2.83,OF-2B,,...,,,,,,,,,,,,,,,,,1,MLN,2,153,151,153,151,0,0,0,27,0,0,30,0,104,126,2,0,OF,0.69281,#,A,5,15.0,2,7.0,(21),0.018,,3,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,
1,Al Aber,27,DET,AL,39,19,17,0,1,0,0,0,0,0,0,0,9,0.059,0.059,0.059,0.118,-68.0,1,1,0,2,0,0,1,L,AL,6.0,3.0,0.667,3.38,39.0,1.0,22.0,0.0,0.0,3.0,80.0,86.0,32.0,30.0,9.0,28.0,1.0,37.0,0.0,0.0,1.0,346.0,116.0,3.95,1.425,9.7,1.0,3.2,4.2,1.32,L,DET,AL,39.0,1.0,0.0,80.1,24.0,4.0,17.0,3.0,2.0,0.875,,,2.35,0.54,P,,...,,,,,,,,,,,,,,,,,2,DET,4,39,1,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,2,65,F (n-35),0.27,W,80.0,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.104651,+,12.0,46.0,9.0,54.0,+W2,(13-21),+W2 (13-21),
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,0,0,0,0,0,0,6,0.154,0.154,0.154,0.308,-15.0,4,1,0,4,0,0,1,R,AL,5.0,9.0,0.357,5.96,40.0,14.0,12.0,3.0,2.0,0.0,119.1,136.0,87.0,79.0,9.0,67.0,1.0,79.0,7.0,0.0,2.0,553.0,66.0,3.88,1.701,10.3,0.7,5.1,6.0,1.18,R,WSH,AL,40.0,14.0,3.0,119.1,31.0,8.0,22.0,1.0,2.0,0.968,,,2.26,0.75,P,,...,,,,,,,,,,,,,,,,,3,WSH,1st,40,14,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.284,X,119.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.066176,,14.0,44.0,9.0,54.0,X3,(14-23),X3 (14-23),
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,3,6,32,2,8,89,69,0.243,0.413,0.359,0.773,118.0,111,6,3,3,3,2,89H7/3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,100.0,87.0,51.0,753.1,205.0,195.0,7.0,3.0,1.0,0.985,-7.0,-11.0,2.41,2.02,OF-1B,,...,,,,,,,,,,,,,,,,,4,BAL,7,118,87,118,98,0,0,4,0,0,0,13,58,46,96,20,4,OF,0.271186,,C+,3,13.0,1,4.0,(14),0.014,,8,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32),,,,,,,,,,0,0.0,,,,,,,,,,,,
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,3,2,23,2,0,24,25,0.251,0.352,0.386,0.738,93.0,66,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.25,,B,2,12.0,3,5.0,(15),0.037,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,,
5,Bobby Adams,33,CIN,NL,64,176,150,23,41,11,2,2,20,2,0,20,21,0.273,0.368,0.413,0.781,103.0,62,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.3125,,B,2,12.0,2,4.0,(14),0.041,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(14) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,,
6,Bobby Adams,33,CHW,AL,28,25,21,8,2,0,1,0,3,0,0,4,4,0.095,0.24,0.19,0.43,16.0,4,0,0,0,0,0,H/54,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.107143,,E,0,,18,18.0,(36),0.0,,6,16,6,12.0,26,0,0.0,,3,64,E(36) (16-26),,,,,,,,,,0,0.0,,,,,,,,,,,,
7,Joe Adcock,27,MLN,NL,84,324,288,40,76,14,0,15,45,0,2,31,44,0.264,0.339,0.469,0.807,116.0,135,13,2,2,1,3,3/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,78.0,77.0,75.0,690.1,777.0,725.0,44.0,8.0,68.0,0.99,-4.0,-8.0,10.03,9.86,1B,,...,,,,,,,,,,,,,,,,,6,MLN,6,84,77,84,78,0,0,78,0,0,0,0,0,0,0,6,0,1B,0.535714,,B,7,21.0,0,0.0,,0.0,,3,13,5,8.0,22,0,0.0,,8,55,B21 (13-22),,,,,,,,,,0,0.0,,,,,,,,,,,,
8,Harry Agganis,26,BOS,AL,25,94,83,11,26,10,1,0,10,2,0,10,10,0.313,0.383,0.458,0.841,118.0,38,3,0,0,1,0,3/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,20.0,20.0,20.0,181.0,225.0,208.0,14.0,3.0,15.0,0.987,3.0,17.0,11.04,11.1,1B,,...,,,,,,,,,,,,,,,,,7,BOS,2,25,20,25,20,0,0,20,0,0,0,0,0,0,0,5,0,1B,0.4,,A,0,,1,1.0,(11),0.08,*,4,14,4,8.0,22,0,0.0,,10,53,A(11)* (14-22),,,,,,,,,,0,0.0,,,,,,,,,,,,
9,Hank Aguirre,24,CLE,AL,4,4,4,0,0,0,0,0,0,0,0,0,1,0.0,0.0,0.0,0.0,-100.0,0,0,0,0,0,0,/1,R,AL,2.0,0.0,1.0,1.42,4.0,1.0,1.0,1.0,1.0,0.0,12.2,6.0,3.0,2.0,0.0,12.0,0.0,6.0,0.0,0.0,1.0,54.0,290.0,4.26,1.421,4.3,0.0,8.5,4.3,0.5,L,CLE,AL,4.0,1.0,1.0,12.1,1.0,1.0,0.0,0.0,0.0,1.0,,,0.73,0.25,P,,...,,,,,,,,,,,,,,,,,8,CLE,1st,4,1,4,4,4,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,9,9.0,23,0,0.0,,0,66,G (n-23),0.143,J,12.666667,3.0,8.0,22.0,4.0,12.0,26.0,0,0.0,,,0.0,-,12.0,46.0,4.0,63.0,-J3,(22-26),-J3 (22-26),S


### Arm Rating

In [145]:
players["G"].value_counts()

2.0      35
1.0      27
7.0      19
4.0      17
33.0     16
         ..
73.0      1
51.0      1
110.0     1
85.0      1
139.0     1
Name: G, Length: 134, dtype: int64

In [146]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.6403176849772322

In [147]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [148]:
players["arm_rating"].value_counts()

8    483
9    232
Name: arm_rating, dtype: int64

### Range Rating

In [149]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.3113656585701265

In [150]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    5.301551
2B    1.988675
3B    0.828067
C     3.399080
OF    1.458032
P     0.172393
SS    1.150779
Name: po_gp, dtype: float64

In [151]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [152]:
players["range_rating"].value_counts()

4    570
5    145
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [153]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [154]:
players["CS%"].value_counts()

0%      13
50%      9
100%     7
42%      4
41%      4
52%      4
28%      3
49%      3
31%      2
40%      2
20%      2
44%      2
46%      2
59%      2
48%      2
47%      2
80%      1
29%      1
43%      1
56%      1
71%      1
33%      1
27%      1
38%      1
67%      1
Name: CS%, dtype: int64

In [155]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.4213395846570257

In [156]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [157]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    660
-3     25
       15
-2     10
-1      5
Name: cs_num, dtype: int64

In [158]:
players["G_cat"].isnull().sum()

635

In [159]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      665
-3     25
-4     10
-2     10
-1      5
Name: cs_num, dtype: int64

In [160]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num
38,Yogi Berra,30,NYY,AL,147,615,541,84,147,20,3,27,108,1,0,60,20,0.272,0.349,0.47,0.819,120.0,254,13,7,2,5,6,*2/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,145.0,141.0,133.0,1233.2,815.0,748.0,54.0,13.0,10.0,0.984,3.0,3.0,5.85,5.53,C,NYY,...,3.0,3.0,3.0,5.85,5.53,3.0,20.0,26.0,28.0,52%,37,NYY,10,147,141,147,145,0,145,0,0,0,0,0,0,0,0,6,0,C,0.734694,#,B,7,21.0,1,8.0,(22),0.006,,4,14,1,5.0,15,0,0.0,,9,54,#B21(22) (14-15),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.367347,9,5.088435,4,0.518519,-4
89,Roy Campanella,33,BRO,NL,123,522,446,81,142,20,1,32,107,2,3,56,41,0.318,0.395,0.583,0.978,152.0,260,14,6,5,9,9,*2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BRO,NL,121.0,117.0,111.0,1042.0,732.0,672.0,54.0,6.0,8.0,0.992,2.0,2.0,6.27,6.0,C,BRO,...,2.0,2.0,2.0,6.27,6.0,4.0,25.0,21.0,23.0,52%,76,BRO,8,123,118,123,121,0,121,0,0,0,0,0,0,0,0,4,0,C,0.869919,#,A,8,22.0,0,0.0,,0.013,,4,14,3,7.0,21,0,0.0,,10,53,#A22 (14-21),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.439024,9,5.463415,4,0.522727,-4
135,Del Crandall,25,MLN,NL,133,495,440,61,104,15,2,26,62,2,1,40,56,0.236,0.299,0.457,0.756,101.0,201,12,2,6,6,11,*2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,131.0,124.0,120.0,1123.0,688.0,611.0,67.0,10.0,8.0,0.985,7.0,7.0,5.43,5.18,C,MLN,...,7.0,7.0,7.0,5.43,5.18,7.0,12.0,27.0,34.0,56%,112,MLN,5,133,124,133,131,0,131,0,0,0,0,0,0,0,0,5,1,C,0.466165,,C+,9,23.0,1,10.0,(24),0.019,,3,13,4,7.0,21,0,0.0,,8,55,C+23(24) (13-21),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.503759,9,4.593985,4,0.557377,-4
182,Ed Fitz Gerald,31,WSH,AL,74,270,236,28,56,3,1,4,19,0,1,25,23,0.237,0.317,0.309,0.626,73.0,73,8,3,5,1,0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,WSH,AL,72.0,66.0,56.0,572.1,340.0,304.0,30.0,6.0,5.0,0.982,1.0,2.0,5.25,4.64,C,WSH,...,1.0,2.0,1.0,5.25,4.64,8.0,5.0,16.0,17.0,52%,153,WSH,8,74,66,74,72,0,72,0,0,0,0,0,0,0,0,4,1,C,0.256757,,C+,3,13.0,1,4.0,(14),0.0,,3,13,3,6.0,16,0,0.0,,7,56,C+13(14) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.405405,9,4.108108,4,0.515152,-4
187,Hank Foiles,26,CLE,AL,62,132,111,13,29,9,0,1,7,0,0,17,18,0.261,0.354,0.369,0.723,92.0,41,4,0,2,2,3,2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,41.0,30.0,20.0,261.0,248.0,222.0,23.0,3.0,7.0,0.988,1.0,5.0,8.45,5.98,C,CLE,...,1.0,5.0,1.0,8.45,5.98,3.0,10.0,9.0,13.0,59%,156,CLE,2,62,31,62,41,0,41,0,0,0,0,0,0,0,0,26,3,C,0.112903,,B,1,11.0,0,0.0,,0.0,,5,15,5,10.0,24,0,0.0,,8,55,B11 (15-24),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.370968,9,3.580645,4,0.590909,-4
310,Ray Katt,28,NYG,NL,124,355,326,27,70,7,2,7,28,0,0,22,38,0.215,0.268,0.313,0.581,53.0,102,13,2,4,1,4,*2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYG,NL,122.0,95.0,72.0,849.0,534.0,482.0,45.0,7.0,7.0,0.987,3.0,4.0,5.59,4.32,C,NYG,...,3.0,4.0,3.0,5.59,4.32,17.0,9.0,20.0,29.0,59%,257,NYG,4,124,95,124,122,0,122,0,0,0,0,0,0,0,0,3,0,C,0.225806,,C,4,14.0,1,5.0,(15),0.0,,2,12,4,6.0,16,0,0.0,,7,56,C14(15) (12-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.362903,9,3.887097,4,0.591837,-4
349,Hobie Landrith,25,CIN,NL,43,98,87,9,22,3,0,4,7,0,1,10,14,0.253,0.33,0.425,0.755,95.0,37,2,0,1,0,1,2H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CIN,NL,27.0,20.0,19.0,183.1,100.0,86.0,14.0,0.0,3.0,1.0,3.0,20.0,4.91,3.7,C,CIN,...,3.0,20.0,3.0,4.91,3.7,1.0,2.0,2.0,5.0,71%,292,CIN,6,43,20,43,27,0,27,0,0,0,0,0,0,0,0,18,0,C,0.162791,,B,7,21.0,0,0.0,,0.0,,4,14,5,9.0,23,0,0.0,,8,55,B21 (14-23),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.325581,9,2.0,4,0.714286,-4
449,Hal Naragon,26,CLE,AL,57,143,127,12,41,9,2,1,14,1,0,15,8,0.323,0.394,0.449,0.843,124.0,57,3,0,1,0,0,2H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CLE,AL,52.0,32.0,18.0,300.0,216.0,199.0,15.0,2.0,3.0,0.991,3.0,12.0,6.42,4.12,C,CLE,...,3.0,12.0,3.0,6.42,4.12,2.0,9.0,5.0,10.0,67%,378,CLE,3,57,32,57,52,0,52,0,0,0,0,0,0,0,0,16,0,C,0.245614,,A,1,11.0,2,3.0,(13),0.023,,4,14,2,6.0,16,0,0.0,,10,53,A11(13) (14-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.263158,9,3.491228,4,0.666667,-4
577,Jack Shepard,24,PIT,NL,94,301,264,24,63,10,2,2,23,1,0,33,25,0.239,0.321,0.314,0.635,72.0,83,8,0,2,2,3,2H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PIT,NL,77.0,71.0,59.0,595.0,328.0,288.0,34.0,6.0,6.0,0.982,0.0,0.0,4.87,4.18,C,PIT,...,0.0,0.0,0.0,4.87,4.18,13.0,6.0,20.0,22.0,52%,484,PIT,3,94,71,94,77,0,77,0,0,0,0,0,0,0,0,21,0,C,0.244681,,C+,1,11.0,1,2.0,(12),0.012,,4,14,3,7.0,21,0,0.0,,8,55,C+11(12) (14-21),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.361702,9,3.06383,4,0.52381,-4
581,Charlie Silvera,30,NYY,AL,14,32,26,1,5,0,0,0,1,0,0,6,4,0.192,0.344,0.192,0.536,49.0,5,0,0,0,0,0,2/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,11.0,7.0,4.0,72.2,51.0,47.0,4.0,0.0,1.0,1.0,1.0,17.0,6.32,4.64,C,NYY,...,1.0,17.0,1.0,6.32,4.64,1.0,2.0,1.0,4.0,80%,488,NYY,8,14,7,14,11,0,11,0,0,0,0,0,0,0,0,3,0,C,0.071429,,D+,0,,0,0.0,,0.0,,7,21,4,11.0,25,0,0.0,,6,61,D+ (21-25),,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.285714,9,3.357143,4,0.8,-4


### Fielder Rating

In [161]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

84        251
S84       148
94         79
85         55
95         48
S94        42
S85        28
94 -3      17
S95        14
94 -4       8
94 -2       8
S94 -3      7
S94 -1      3
S94 -2      2
94 -1       2
S94 -4      2
84 -3       1
Name: fielder_rating, dtype: int64

In [162]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [163]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [164]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [165]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,...,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141.0,325,20,3,7,4,5,*974/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,153.0,151.0,138.0,1346.0,448.0,340.0,93.0,15.0,25.0,0.967,7.0,6.0,2.9,2.83,OF-2B,,...,,,,,,,,,,1,MLN,2,153,151,153,151,0,0,0,27,0,0,30,0,104,126,2,0,OF,0.69281,#,A,5,15.0,2,7.0,(21),0.018,,3,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.607843,9,2.222222,5,,,95
1,Al Aber,27,DET,AL,39,19,17,0,1,0,0,0,0,0,0,0,9,0.059,0.059,0.059,0.118,-68.0,1,1,0,2,0,0,1,L,AL,6.0,3.0,0.667,3.38,39.0,1.0,22.0,0.0,0.0,3.0,80.0,86.0,32.0,30.0,9.0,28.0,1.0,37.0,0.0,0.0,1.0,346.0,116.0,3.95,1.425,9.7,1.0,3.2,4.2,1.32,L,DET,AL,39.0,1.0,0.0,80.1,24.0,4.0,17.0,3.0,2.0,0.875,,,2.35,0.54,P,,...,,,,,,,,,,2,DET,4,39,1,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,2,65,F (n-35),0.27,W,80.0,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.104651,+,12.0,46.0,9.0,54.0,+W2,(13-21),+W2 (13-21),,0.435897,8,0.102564,4,,,84
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,0,0,0,0,0,0,6,0.154,0.154,0.154,0.308,-15.0,4,1,0,4,0,0,1,R,AL,5.0,9.0,0.357,5.96,40.0,14.0,12.0,3.0,2.0,0.0,119.1,136.0,87.0,79.0,9.0,67.0,1.0,79.0,7.0,0.0,2.0,553.0,66.0,3.88,1.701,10.3,0.7,5.1,6.0,1.18,R,WSH,AL,40.0,14.0,3.0,119.1,31.0,8.0,22.0,1.0,2.0,0.968,,,2.26,0.75,P,,...,,,,,,,,,,3,WSH,1st,40,14,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.284,X,119.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.066176,,14.0,44.0,9.0,54.0,X3,(14-23),X3 (14-23),,0.55,8,0.2,4,,,84
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,3,6,32,2,8,89,69,0.243,0.413,0.359,0.773,118.0,111,6,3,3,3,2,89H7/3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,100.0,87.0,51.0,753.1,205.0,195.0,7.0,3.0,1.0,0.985,-7.0,-11.0,2.41,2.02,OF-1B,,...,,,,,,,,,,4,BAL,7,118,87,118,98,0,0,4,0,0,0,13,58,46,96,20,4,OF,0.271186,,C+,3,13.0,1,4.0,(14),0.014,,8,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.059322,8,1.652542,4,,,84
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,3,2,23,2,0,24,25,0.251,0.352,0.386,0.738,93.0,66,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,...,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.25,,B,2,12.0,3,5.0,(15),0.037,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,,,1.141304,8,0.51087,4,,,84


In [166]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'RF/9', 'RF/G', 'Pos_Summary_fld',
       'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat', 'CG_cat', 'Inn_cat', 'Ch_cat',
       'PO_cat', 'A_cat', 'E_cat', 'DP_cat', 'Fld%_cat', 'Rtot_cat',
       'Rtot/yr_cat', 'Rctch', 'RF/9_cat', 'RF/G_cat', 'PB', 'WP_cat',
       'SB_cat', 'CS_cat', 'CS%', 'Rk', 'Tm_app', 'Yrs', 'G_app', 'GS_app',
       'Batting', 'Defense'

In [167]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141.0,325,20,3,7,4,5,*974/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,153.0,151.0,138.0,1346.0,448.0,340.0,93.0,15.0,25.0,0.967,7.0,6.0,2.9,2.83,OF-2B,,,,,,,,,,,,,,,,,,,,,,,1,MLN,2,153,151,153,151,0,0,0,27,0,0,30,0,104,126,2,0,OF,0.69281,#,A,5,15.0,2,7.0,(21),0.018,,3,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.607843,9,2.222222,5,,,95
1,Al Aber,27,DET,AL,39,19,17,0,1,0,0,0,0,0,0,0,9,0.059,0.059,0.059,0.118,-68.0,1,1,0,2,0,0,1,L,AL,6.0,3.0,0.667,3.38,39.0,1.0,22.0,0.0,0.0,3.0,80.0,86.0,32.0,30.0,9.0,28.0,1.0,37.0,0.0,0.0,1.0,346.0,116.0,3.95,1.425,9.7,1.0,3.2,4.2,1.32,L,DET,AL,39.0,1.0,0.0,80.1,24.0,4.0,17.0,3.0,2.0,0.875,,,2.35,0.54,P,,,,,,,,,,,,,,,,,,,,,,,2,DET,4,39,1,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,2,65,F (n-35),0.27,W,80.0,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.104651,+,12.0,46.0,9.0,54.0,+W2,(13-21),+W2 (13-21),,0.435897,8,0.102564,4,,,84
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,0,0,0,0,0,0,6,0.154,0.154,0.154,0.308,-15.0,4,1,0,4,0,0,1,R,AL,5.0,9.0,0.357,5.96,40.0,14.0,12.0,3.0,2.0,0.0,119.1,136.0,87.0,79.0,9.0,67.0,1.0,79.0,7.0,0.0,2.0,553.0,66.0,3.88,1.701,10.3,0.7,5.1,6.0,1.18,R,WSH,AL,40.0,14.0,3.0,119.1,31.0,8.0,22.0,1.0,2.0,0.968,,,2.26,0.75,P,,,,,,,,,,,,,,,,,,,,,,,3,WSH,1st,40,14,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.284,X,119.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.066176,,14.0,44.0,9.0,54.0,X3,(14-23),X3 (14-23),,0.55,8,0.2,4,,,84
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,3,6,32,2,8,89,69,0.243,0.413,0.359,0.773,118.0,111,6,3,3,3,2,89H7/3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,100.0,87.0,51.0,753.1,205.0,195.0,7.0,3.0,1.0,0.985,-7.0,-11.0,2.41,2.02,OF-1B,,,,,,,,,,,,,,,,,,,,,,,4,BAL,7,118,87,118,98,0,0,4,0,0,0,13,58,46,96,20,4,OF,0.271186,,C+,3,13.0,1,4.0,(14),0.014,,8,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.059322,8,1.652542,4,,,84
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,3,2,23,2,0,24,25,0.251,0.352,0.386,0.738,93.0,66,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,,,,,,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.25,,B,2,12.0,3,5.0,(15),0.037,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,,,1.141304,8,0.51087,4,,,84


In [168]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,Rk,Tm_app,Yrs,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating,DH
0,Hank Aaron,21,MLN,NL,153,665,602,105,189,37,9,27,106,3,1,49,61,0.314,0.366,0.54,0.906,141.0,325,20,3,7,4,5,*974/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MLN,NL,153.0,151.0,138.0,1346.0,448.0,340.0,93.0,15.0,25.0,0.967,7.0,6.0,2.9,2.83,OF-2B,,,,,,,,,,,,,,,,,,,,,,,1,MLN,2,153,151,153,151,0,0,0,27,0,0,30,0,104,126,2,0,OF,0.69281,#,A,5,15.0,2,7.0,(21),0.018,,3,13,3,6.0,16,0,0.0,,10,53,#A15(21) (13-16),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.607843,9,2.222222,5,,,95,0
1,Al Aber,27,DET,AL,39,19,17,0,1,0,0,0,0,0,0,0,9,0.059,0.059,0.059,0.118,-68.0,1,1,0,2,0,0,1,L,AL,6.0,3.0,0.667,3.38,39.0,1.0,22.0,0.0,0.0,3.0,80.0,86.0,32.0,30.0,9.0,28.0,1.0,37.0,0.0,0.0,1.0,346.0,116.0,3.95,1.425,9.7,1.0,3.2,4.2,1.32,L,DET,AL,39.0,1.0,0.0,80.1,24.0,4.0,17.0,3.0,2.0,0.875,,,2.35,0.54,P,,,,,,,,,,,,,,,,,,,,,,,2,DET,4,39,1,39,39,39,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,F,0,,0,0.0,,0.0,,0,n,17,17.0,35,0,0.0,,2,65,F (n-35),0.27,W,80.0,2.0,3.0,13.0,4.0,7.0,21.0,0,0.0,,,0.104651,+,12.0,46.0,9.0,54.0,+W2,(13-21),+W2 (13-21),,0.435897,8,0.102564,4,,,84,0
2,Ted Abernathy,22,WSH,AL,40,31,26,1,4,0,0,0,0,0,0,0,6,0.154,0.154,0.154,0.308,-15.0,4,1,0,4,0,0,1,R,AL,5.0,9.0,0.357,5.96,40.0,14.0,12.0,3.0,2.0,0.0,119.1,136.0,87.0,79.0,9.0,67.0,1.0,79.0,7.0,0.0,2.0,553.0,66.0,3.88,1.701,10.3,0.7,5.1,6.0,1.18,R,WSH,AL,40.0,14.0,3.0,119.1,31.0,8.0,22.0,1.0,2.0,0.968,,,2.26,0.75,P,,,,,,,,,,,,,,,,,,,,,,,3,WSH,1st,40,14,40,40,40,0,0,0,0,0,0,0,0,0,0,0,P,0.0,,D,0,,0,0.0,,0.0,,0,n,7,7.0,21,0,0.0,,5,62,D (n-21),0.284,X,119.333333,3.0,4.0,14.0,5.0,9.0,23.0,0,0.0,,,0.066176,,14.0,44.0,9.0,54.0,X3,(14-23),X3 (14-23),,0.55,8,0.2,4,,,84,0
3,Cal Abrams,31,BAL,AL,118,407,309,56,75,12,3,6,32,2,8,89,69,0.243,0.413,0.359,0.773,118.0,111,6,3,3,3,2,89H7/3,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BAL,AL,100.0,87.0,51.0,753.1,205.0,195.0,7.0,3.0,1.0,0.985,-7.0,-11.0,2.41,2.02,OF-1B,,,,,,,,,,,,,,,,,,,,,,,4,BAL,7,118,87,118,98,0,0,4,0,0,0,13,58,46,96,20,4,OF,0.271186,,C+,3,13.0,1,4.0,(14),0.014,,8,22,6,14.0,32,0,0.0,,7,56,C+13(14) (22-32),,,,,,,,,,0,0.0,,,,,,,,,,,,,0.059322,8,1.652542,4,,,84,0
4,Bobby Adams,33,TOT,MLB,92,201,171,31,43,11,3,2,23,2,0,24,25,0.251,0.352,0.386,0.738,93.0,66,3,3,2,1,1,5H/4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,ZZ,57.0,45.0,30.0,370.2,157.0,47.0,105.0,5.0,13.0,0.968,9.0,28.0,3.69,2.67,3B-2B,,,,,,,,,,,,,,,,,,,,,,,5,2TM,10,92,45,92,56,0,0,0,6,51,0,0,0,0,0,25,17,3B,0.25,,B,2,12.0,3,5.0,(15),0.037,,4,14,4,8.0,22,1,9.0,/23,8,55,B12(15) (14-22/23),,,,,,,,,,0,0.0,,,,,,,,,,,,,1.141304,8,0.51087,4,,,84,0


In [169]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

2.0      31
1.0      28
7.0      21
6.0      18
4.0      17
         ..
91.0      1
93.0      1
81.0      1
121.0     1
143.0     1
Name: Games_Played, Length: 143, dtype: int64

In [170]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "pitcher_rating", "PCN", "PPH", 
                                "Bats", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Hank Aaron,21,MLN,153.0,,,OF-2B,95,#A15(21) (13-16),53,,,,R,,OF,0,0,0,27,0,0,30,0,104,126,0,2,0
1,Al Aber,27,DET,39.0,1.0,22.0,P,84,F (n-35),65,+W2 (13-21),46.0,54.0,L,L,P,39,0,0,0,0,0,0,0,0,0,0,0,0
2,Ted Abernathy,22,WSH,40.0,14.0,12.0,P,84,D (n-21),62,X3 (14-23),44.0,54.0,R,R,P,40,0,0,0,0,0,0,0,0,0,0,0,0
3,Cal Abrams,31,BAL,118.0,,,OF-1B,84,C+13(14) (22-32),56,,,,L,,OF,0,0,4,0,0,0,13,58,46,96,0,20,4
4,Bobby Adams,33,TOT,92.0,,,3B-2B,84,B12(15) (14-22/23),55,,,,R,,3B,0,0,0,6,51,0,0,0,0,0,0,25,17


In [171]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [172]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           301
OF          153
C            70
3B           54
1B           46
SS           43
2B           35
DH_PH_PR     13
              0
Name: Primary, dtype: int64

In [173]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Hank Aaron,21,MLN,153,,,OF-2B,95,#A15(21) (13-16),53,,,,R,,OF,0,0,0,27,0,0,30,0,104,126,0,2,0
1,Al Aber,27,DET,39,1.0,22.0,P,84,F (n-35),65,+W2 (13-21),46.0,54.0,L,L,P,39,0,0,0,0,0,0,0,0,0,0,0,0
2,Ted Abernathy,22,WSH,40,14.0,12.0,P,84,D (n-21),62,X3 (14-23),44.0,54.0,R,R,P,40,0,0,0,0,0,0,0,0,0,0,0,0
3,Cal Abrams,31,BAL,118,,,OF-1B,84,C+13(14) (22-32),56,,,,L,,OF,0,0,4,0,0,0,13,58,46,96,0,20,4
4,Bobby Adams,33,TOT,92,,,3B-2B,84,B12(15) (14-22/23),55,,,,R,,3B,0,0,0,6,51,0,0,0,0,0,0,25,17


In [174]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [175]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [176]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [177]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [178]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,batter_rating,BPH,pitcher_rating,PCN,PPH,B,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
598,Hal Smith,24,BAL,135,,,C,94 -3,B11(12) (12-14),54,,,,R,,C,0,125,0,0,0,0,0,0,0,0,0,12,0
443,Les Moss,30,BAL,29,,,C,S94 -1,A+14 (14-16),52,,,,R,,C,0,49,0,0,0,0,0,0,0,0,0,17,0
210,Tom Gastall,23,BAL,20,,,C,94,D (14-24),62,,,,R,,C,0,15,0,0,0,0,0,0,0,0,0,7,2
652,Gus Triandos,24,BAL,140,,,1B-C-3B,84 -3,B13(14) (13-21),54,,,,R,,1B,0,36,103,0,1,0,0,0,0,0,0,15,0
250,Bob Hale,21,BAL,67,,,1B,84,A+(11) (11-15),51,,,,L,,1B,0,0,44,0,0,0,0,0,0,0,0,27,0
670,Eddie Waitkus,35,BAL,38,,,1B,84,B(12) (14-22),55,,,,L,,1B,0,0,57,0,0,0,0,0,0,0,0,17,0
396,Fred Marsh,31,BAL,89,,,2B-3B-SS,85,C11(12) (14-21),56,,,,R,,2B,0,0,0,76,18,16,0,0,0,0,0,1,1
707,Bobby Young,30,BAL,59,,,2B-3B,S84,C11 (12-16),56,,,,L,,2B,0,0,0,69,2,0,0,0,0,0,0,7,3
362,Don Leppert,24,BAL,40,,,2B,84,E+(14) (14-22),63,,,,L,,2B,0,0,0,35,0,0,0,0,0,0,0,2,7
94,Wayne Causey,18,BAL,68,,,3B-2B-SS,84,D+11(12) (13-22),61,,,,L,,3B,0,0,0,7,55,1,0,0,0,0,0,9,3


In [179]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'BAL':                 Name  Age   Tm    G  GS  GF Positions     DEF  \
 598        Hal Smith   24  BAL  135                 C   94 -3   
 443         Les Moss   30  BAL   29                 C  S94 -1   
 210      Tom Gastall   23  BAL   20                 C     94    
 652     Gus Triandos   24  BAL  140           1B-C-3B   84 -3   
 250         Bob Hale   21  BAL   67                1B     84    
 670    Eddie Waitkus   35  BAL   38                1B     84    
 396       Fred Marsh   31  BAL   89          2B-3B-SS     85    
 707      Bobby Young   30  BAL   59             2B-3B    S84    
 362      Don Leppert   24  BAL   40                2B     84    
 94      Wayne Causey   18  BAL   68          3B-2B-SS     84    
 131        Billy Cox   35  BAL   53          3B-2B-SS    S85    
 511       Jim Pyburn   22  BAL   39             3B-OF    S84    
 318      Bob Kennedy   34  BAL   26          3B-OF-1B     85    
 390     Hank Majeski   38  BAL   16             3B-2B    S84    
 57

In [180]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [181]:
create_excel(my_dict)

17

## Format Excel file

In [200]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [183]:
# wb.sheetnames

In [199]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "BRO": "Brooklyn Dodgers",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCA": "Kansas City Athletics",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MLN": "Milwaukee Braves",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYG": "New York Giants",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBD": "Tampa Bay Devil Rays",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators",
    "WSH": "Washington Senators"
}

In [185]:
# team_dict["ATL"]

In [201]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 8.43
width_2 = 15.0
width_3 = 23.0
width_4 = 6.33
width_5 = 12.83

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_1    
    sheet.column_dimensions['B'].width = width_3
    sheet.column_dimensions['C'].width = width_1
    sheet.column_dimensions['D'].width = width_1
    sheet.column_dimensions['E'].width = width_1
    sheet.column_dimensions['F'].width = width_1
    sheet.column_dimensions['G'].width = width_1
    sheet.column_dimensions['H'].width = width_2
    sheet.column_dimensions['I'].width = width_1
    sheet.column_dimensions['J'].width = width_3
    sheet.column_dimensions['K'].width = width_1
    sheet.column_dimensions['L'].width = width_3
    sheet.column_dimensions['M'].width = width_4
    sheet.column_dimensions['N'].width = width_4
    sheet.column_dimensions['O'].width = width_4
    sheet.column_dimensions['P'].width = width_4
    sheet.column_dimensions['Q'].width = width_5
    sheet.column_dimensions['R'].width = width_4
    sheet.column_dimensions['S'].width = width_4
    sheet.column_dimensions['T'].width = width_4
    sheet.column_dimensions['U'].width = width_4
    sheet.column_dimensions['V'].width = width_4
    sheet.column_dimensions['W'].width = width_4
    sheet.column_dimensions['X'].width = width_4
    sheet.column_dimensions['Y'].width = width_4
    sheet.column_dimensions['Z'].width = width_4
    sheet.column_dimensions['AA'].width = width_4
    sheet.column_dimensions['AB'].width = width_4
    sheet.column_dimensions['AC'].width = width_4
    sheet.column_dimensions['AD'].width = width_4
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment
            cell.font = Font(size = 14)
    
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
            

wb.save('../data/' + year + ' rosters ' + ' formatted.xlsx')

# Clean up

## Remove unwanted files

In [202]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')