# Description

Python code to scrape player data from baseball-reference.com and rate players using SherCo PLUS ratings. Ratings are effective for any season since and including 1950.

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from lxml import html
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, parse_qs

from selenium import webdriver
import time

import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import Border, Side, PatternFill, Font, GradientFill, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.formatting import Rule
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.dimensions import ColumnDimension, DimensionHolder

from bisect import bisect

import os

# Scrape season data

***NOTE: SIMPLY CHANGE THE YEAR VALUE THEN RUN ALL CELLS BELOW. THE RESULT WILL BE A FORMATTED EXCEL FILE WITH THAT YEAR'S PLAYER RATINGS. COMMENT OUT THE CLEAN UP SECTION IF YOU DON'T WANT TO REMOVE INTERIM FILES***

In [1037]:
year = '2017'

In [1038]:
url_season = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standings.shtml'

url_bat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-batting.shtml'
url_pit = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-pitching.shtml'
url_fld = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-standard-fielding.shtml'
url_cat = 'https://www.baseball-reference.com/leagues/MLB/' + year + '-specialpos_c-fielding.shtml'

In [1039]:
url_cat

'https://www.baseball-reference.com/leagues/MLB/2017-specialpos_c-fielding.shtml'

In [1040]:
session_requests = requests.session()

result = session_requests.get(url_season, headers = dict(referer = url_season))
result.status_code

200

In [1041]:
# https://github.com/BenKite/baseball_data/blob/master/baseballReferenceScrape.py
def findTables(url):
    res = requests.get(url)
    ## The next two lines get around the issue with comments breaking the parsing.
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    divs = soup.find_all('div', id = "content")
    divs = divs[0].find_all("div", id=re.compile("^all"))
    ids = []
    for div in divs:
        searchme = str(div.find_all("table"))
        x = searchme[searchme.find("id=") + 3: searchme.find(">")]
        x = x.replace("\"", "")
        if len(x) > 0:
            ids.append(x)
    return(ids)

In [1042]:
findTables(url_season)

['postseason',
 'standings_E',
 'standings_E',
 'standings_C',
 'standings_W',
 'standings_E',
 'standings_E',
 'standings_C',
 'standings_W',
 'expanded_standings_overall']

In [1043]:
def pullTable(url, tableID):
    res = requests.get(url)
    ## Work around comments
    comm = re.compile("<!--|-->")
    soup = BeautifulSoup(comm.sub("", res.text), 'lxml')
    tables = soup.find_all('table', id = tableID)
    data_rows = tables[0].find_all('tr')
    data_header = tables[0].find_all('thead')
    data_header = data_header[0].find_all("tr")
    data_header = data_header[0].find_all("th")
    game_data = [[td.getText() for td in data_rows[i].find_all(['th','td'])]
        for i in range(len(data_rows))
        ]
    data = pd.DataFrame(game_data)
    header = []
    for i in range(len(data.columns)):
        header.append(data_header[i].getText())
    data.columns = header
    data = data.loc[data[header[0]] != header[0]]
    data = data.reset_index(drop = True)
    return(data)

In [1044]:
season = pullTable(url_season, 'expanded_standings_overall')
season.head()

Unnamed: 0,Rk,Tm,Lg,G,W,L,W-L%,R,RA,Rdiff,SOS,SRS,pythWL,Luck,vEast,vCent,vWest,Inter,Home,Road,ExInn,1Run,vRHP,vLHP,≥.500,<.500
0,1,LAD,NL,162,104,58,0.642,4.8,3.6,1.2,-0.2,0.9,102-60,2,24-10,23-9,41-35,16-4,57-24,47-34,8-4,25-19,71-43,33-15,36-33,68-25
1,2,CLE,AL,162,102,60,0.63,5.0,3.5,1.6,-0.1,1.5,108-54,-6,22-12,50-26,24-8,6-14,49-32,53-28,4-2,20-15,66-36,36-24,27-22,75-38
2,3,HOU,AL,162,101,61,0.624,5.5,4.3,1.2,0.0,1.2,99-63,2,21-13,15-17,50-26,15-5,48-33,53-28,4-4,19-13,80-37,21-24,18-15,83-46
3,4,WSN,NL,162,97,65,0.599,5.1,4.1,0.9,-0.3,0.6,96-66,1,47-29,19-15,21-11,10-10,47-34,50-31,7-4,30-21,75-50,22-15,23-19,74-46
4,5,BOS,AL,162,93,69,0.574,4.8,4.1,0.7,0.1,0.8,93-69,0,41-35,20-14,16-16,16-4,48-33,45-36,15-3,22-19,75-50,18-19,27-23,66-46


In [1045]:
season["Tm"] = [re.sub('w‑', '', x) for x in season["Tm"]]
season["Tm"] = [re.sub('y‑', '', x) for x in season["Tm"]]

In [1046]:
season

Unnamed: 0,Rk,Tm,Lg,G,W,L,W-L%,R,RA,Rdiff,SOS,SRS,pythWL,Luck,vEast,vCent,vWest,Inter,Home,Road,ExInn,1Run,vRHP,vLHP,≥.500,<.500
0,1.0,LAD,NL,162.0,104,58,0.642,4.8,3.6,1.2,-0.2,0.9,102-60,2.0,24-10,23-9,41-35,16-4,57-24,47-34,8-4,25-19,71-43,33-15,36-33,68-25
1,2.0,CLE,AL,162.0,102,60,0.63,5.0,3.5,1.6,-0.1,1.5,108-54,-6.0,22-12,50-26,24-8,6-14,49-32,53-28,4-2,20-15,66-36,36-24,27-22,75-38
2,3.0,HOU,AL,162.0,101,61,0.624,5.5,4.3,1.2,0.0,1.2,99-63,2.0,21-13,15-17,50-26,15-5,48-33,53-28,4-4,19-13,80-37,21-24,18-15,83-46
3,4.0,WSN,NL,162.0,97,65,0.599,5.1,4.1,0.9,-0.3,0.6,96-66,1.0,47-29,19-15,21-11,10-10,47-34,50-31,7-4,30-21,75-50,22-15,23-19,74-46
4,5.0,BOS,AL,162.0,93,69,0.574,4.8,4.1,0.7,0.1,0.8,93-69,0.0,41-35,20-14,16-16,16-4,48-33,45-36,15-3,22-19,75-50,18-19,27-23,66-46
5,6.0,ARI,NL,162.0,93,69,0.574,5.0,4.1,0.9,-0.2,0.8,96-66,-3.0,19-14,17-16,45-31,12-8,52-29,41-40,9-3,29-23,68-52,25-17,39-35,54-34
6,7.0,CHC,NL,162.0,92,70,0.568,5.1,4.3,0.8,-0.2,0.6,93-69,-1.0,21-13,46-30,13-19,12-8,48-33,44-37,7-3,26-17,71-56,21-14,35-35,57-35
7,8.0,NYY,AL,162.0,91,71,0.562,5.3,4.1,1.2,0.1,1.3,100-62,-9.0,44-32,18-15,14-19,15-5,51-30,40-41,5-6,18-26,67-48,24-23,26-22,65-49
8,9.0,COL,NL,162.0,87,75,0.537,5.1,4.7,0.4,-0.1,0.3,87-75,0.0,17-16,18-15,42-34,10-10,46-35,41-40,2-3,21-14,59-55,28-20,37-35,50-40
9,10.0,MIL,NL,162.0,86,76,0.531,4.5,4.3,0.2,-0.1,0.1,85-77,1.0,18-14,40-36,17-17,11-9,46-38,40-38,5-11,25-22,69-59,17-17,37-38,49-38


In [1047]:
my_teams = list(season["Tm"])
my_teams.remove("Avg")
my_teams

['LAD',
 'CLE',
 'HOU',
 'WSN',
 'BOS',
 'ARI',
 'CHC',
 'NYY',
 'COL',
 'MIL',
 'MIN',
 'STL',
 'LAA',
 'TBR',
 'KCR',
 'SEA',
 'TEX',
 'MIA',
 'TOR',
 'PIT',
 'BAL',
 'OAK',
 'ATL',
 'SDP',
 'NYM',
 'CIN',
 'CHW',
 'PHI',
 'SFG',
 'DET']

In [1048]:
team_url = 'https://www.baseball-reference.com/teams/NYY/' + year + '.shtml'

In [1049]:
findTables(team_url)

['team_batting',
 'team_pitching',
 'appearances',
 'coaches',
 'standard_fielding',
 'players_value_batting',
 'players_value_pitching']

In [1050]:
appearances = pullTable(team_url, 'appearances')
appearances.head()

Unnamed: 0,Name,Age,Unnamed: 3,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,WAR,Salary,Unnamed: 29
0,Miguel Andujar,22,do DO,R,R,"6' 0""",211,"Mar 2, 1995",1st,5,1,5,3,0,0,0,0,3,0,0,0,0,0,2,1,0,0.2,,
1,Tyler Austin,25,us US,R,R,"6' 2""",220,"Sep 6, 1991",2,20,11,20,14,0,0,8,0,0,0,0,0,7,7,6,3,0,-0.3,"$544,000",
2,Dellin Betances,29,us US,R,R,"6' 8""",265,"Mar 23, 1988",6,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,1.5,"$3,000,000",All-Star
3,Greg Bird,24,us US,L,R,"6' 4""",220,"Nov 9, 1992",2,48,41,48,46,0,0,46,0,0,0,0,0,0,0,2,6,0,0.0,"$545,500",
4,Chris Carter,30,us US,R,R,"6' 4""",245,"Dec 18, 1986",8,62,49,62,56,0,0,56,0,0,0,0,0,2,2,1,6,0,-0.8,"$3,500,000",


In [1051]:
appearances["Tm"] = "NYY"
appearances.head()

Unnamed: 0,Name,Age,Unnamed: 3,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,WAR,Salary,Unnamed: 29,Tm
0,Miguel Andujar,22,do DO,R,R,"6' 0""",211,"Mar 2, 1995",1st,5,1,5,3,0,0,0,0,3,0,0,0,0,0,2,1,0,0.2,,,NYY
1,Tyler Austin,25,us US,R,R,"6' 2""",220,"Sep 6, 1991",2,20,11,20,14,0,0,8,0,0,0,0,0,7,7,6,3,0,-0.3,"$544,000",,NYY
2,Dellin Betances,29,us US,R,R,"6' 8""",265,"Mar 23, 1988",6,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,1.5,"$3,000,000",All-Star,NYY
3,Greg Bird,24,us US,L,R,"6' 4""",220,"Nov 9, 1992",2,48,41,48,46,0,0,46,0,0,0,0,0,0,0,2,6,0,0.0,"$545,500",,NYY
4,Chris Carter,30,us US,R,R,"6' 4""",245,"Dec 18, 1986",8,62,49,62,56,0,0,56,0,0,0,0,0,2,2,1,6,0,-0.8,"$3,500,000",,NYY


In [1052]:
appearances.drop(columns=["WAR", "Salary", ""])

Unnamed: 0,Name,Age,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
0,Miguel Andujar,22,R,R,"6' 0""",211,"Mar 2, 1995",1st,5,1,5,3,0,0,0,0,3,0,0,0,0,0,2,1,0,NYY
1,Tyler Austin,25,R,R,"6' 2""",220,"Sep 6, 1991",2,20,11,20,14,0,0,8,0,0,0,0,0,7,7,6,3,0,NYY
2,Dellin Betances,29,R,R,"6' 8""",265,"Mar 23, 1988",6,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,NYY
3,Greg Bird,24,L,R,"6' 4""",220,"Nov 9, 1992",2,48,41,48,46,0,0,46,0,0,0,0,0,0,0,2,6,0,NYY
4,Chris Carter,30,R,R,"6' 4""",245,"Dec 18, 1986",8,62,49,62,56,0,0,56,0,0,0,0,0,2,2,1,6,0,NYY
5,Starlin Castro,27,R,R,"6' 2""",220,"Mar 24, 1990",8,112,110,112,109,0,0,0,109,0,0,0,0,0,0,2,2,0,NYY
6,Luis Cessa,25,R,R,"6' 0""",208,"Apr 25, 1992",2,10,5,0,10,10,0,0,0,0,0,0,0,0,0,0,0,0,NYY
7,Aroldis Chapman,29,L,L,"6' 4""",218,"Feb 28, 1988",8,52,0,7,52,52,0,0,0,0,0,0,0,0,0,0,0,0,NYY
8,Ji-Man Choi,26,L,R,"6' 1""",260,"May 19, 1991",2,6,4,6,6,0,0,6,0,0,0,0,0,0,0,0,2,0,NYY
9,Tyler Clippard,32,R,R,"6' 3""",200,"Feb 14, 1985",11,40,0,5,40,40,0,0,0,0,0,0,0,0,0,0,0,0,NYY


In [1053]:
# Now create function to pull appearances by team for all teams into one dataframe
base_app_url = 'https://www.baseball-reference.com/teams/'

def get_apps(year, teams_list):
    app = pd.DataFrame()
    team_count = 0
    for team in teams_list:
        one_team = pullTable(base_app_url + team + '/' + year + '.shtml', 'appearances')
        one_team = one_team.drop(columns=["", "WAR", "Salary"], errors='ignore')
        one_team["Tm"] = team        
        app = app.append(one_team, ignore_index=True)
        time.sleep(5)
        team_count += 1
        
    return app
        

In [1054]:
appearances = get_apps(year, my_teams)

In [1055]:
appearances.head()

Unnamed: 0,Name,Age,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
0,Luis Avilan,27,L,L,"6' 2""",220,"Jul 19, 1989",6,61,0,58,61,61,0,0,0,0,0,0,0,0,0,0,0,0,LAD
1,Pedro Baez,29,R,R,"6' 0""",232,"Mar 11, 1988",4,66,0,61,66,66,0,0,0,0,0,0,0,0,0,0,0,0,LAD
2,Austin Barnes,27,R,R,"5' 10""",187,"Dec 28, 1989",3,102,53,102,76,0,55,0,21,1,0,0,0,0,0,1,34,0,LAD
3,Cody Bellinger,21,L,L,"6' 4""",203,"Jul 13, 1995",1st,132,127,132,128,0,0,93,0,0,0,39,4,5,46,1,4,0,LAD
4,Walker Buehler,22,R,R,"6' 2""",185,"Jul 28, 1994",1st,8,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,LAD


In [1056]:
appearances.tail()

Unnamed: 0,Name,Age,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
1489,Drew VerHagen,26,R,R,"6' 6""",230,"Oct 22, 1990",4,24,2,2,24,24,0,0,0,0,0,0,0,0,0,0,0,0,DET
1490,Justin Verlander,34,R,R,"6' 5""",235,"Feb 20, 1983",13,28,28,2,28,28,0,0,0,0,0,0,0,0,0,0,0,0,DET
1491,Alex Wilson,30,R,R,"6' 0""",227,"Nov 3, 1986",5,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,DET
1492,Justin Wilson,29,L,L,"6' 2""",205,"Aug 18, 1987",6,42,0,2,42,42,0,0,0,0,0,0,0,0,0,0,0,0,DET
1493,Jordan Zimmermann,31,R,R,"6' 2""",225,"May 23, 1986",9,29,29,3,29,29,0,0,0,0,0,0,0,0,0,0,0,0,DET


In [1057]:
findTables(url_bat)

['teams_standard_batting', 'players_standard_batting']

In [1058]:
bat = pullTable(url_bat, 'players_standard_batting')

In [1059]:
findTables(url_pit)

['teams_standard_pitching', 'players_standard_pitching']

In [1060]:
pit = pullTable(url_pit, 'players_standard_pitching')

In [1061]:
findTables(url_fld)

['teams_standard_fielding', 'players_players_standard_fielding_fielding']

In [1062]:
fld = pullTable(url_fld, 'players_players_standard_fielding_fielding')

In [1063]:
findTables(url_cat)

['teams_standard_fielding',
 'teams_advanced_fielding_c',
 'teams_advanced_fielding_c_baserunning',
 'players_players_standard_fielding_fielding',
 'players_players_advanced_fielding_c_fielding',
 'players_players_advanced_fielding_c_baserunning_fielding']

In [1064]:
cat = pullTable(url_cat, 'players_players_standard_fielding_fielding')

In [1065]:
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
1449,1450.0,Jordan Zimmermann,31.0,DET,AL,3,6,6,0,1,0,0,0,0,0,0,0,1,0.167,0.167,0.167,0.333,-10.0,1,0,0,0,0,0,1
1450,1451.0,Ben Zobrist#,36.0,CHC,NL,128,496,435,58,101,20,3,12,50,2,2,54,71,0.232,0.318,0.375,0.693,79.0,163,13,2,2,3,2,479H/36
1451,1452.0,Mike Zunino,26.0,SEA,AL,124,435,387,52,97,25,0,25,64,1,0,39,160,0.251,0.331,0.509,0.84,125.0,197,8,8,0,1,0,*2/HD
1452,1453.0,Tony Zych,26.0,SEA,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1
1453,,LgAvg per 600 PA,,,,200,600,536,73,136,27,3,20,70,8,3,51,130,0.254,0.324,0.425,0.749,,228,12,6,3,4,3,


In [1066]:
bat = bat[bat["Name"] != "LgAvg per 600 PA"]

In [1067]:
def how_bats(names):
    bats = ""
    for name in names:
        if name.rfind("#") > -1:
            bats = "S"
        elif name.rfind("*") > -1:
            bats = "L"
        else:
            bats = "R"
    return bats

In [1068]:
bat["Bats"] = bat["Name"].apply(how_bats)
bat["Bats"].value_counts()

R    917
L    418
S    118
Name: Bats, dtype: int64

In [1069]:
bat["Name"] = [re.sub("[*#]", "", name) for name in bat["Name"]]
bat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1448,1449,Ryan Zimmerman,32,WSN,NL,144,576,524,90,159,33,0,36,108,1,0,44,126,0.303,0.358,0.573,0.93,134.0,300,16,3,0,5,1,*3/HD,R
1449,1450,Jordan Zimmermann,31,DET,AL,3,6,6,0,1,0,0,0,0,0,0,0,1,0.167,0.167,0.167,0.333,-10.0,1,0,0,0,0,0,1,R
1450,1451,Ben Zobrist,36,CHC,NL,128,496,435,58,101,20,3,12,50,2,2,54,71,0.232,0.318,0.375,0.693,79.0,163,13,2,2,3,2,479H/36,S
1451,1452,Mike Zunino,26,SEA,AL,124,435,387,52,97,25,0,25,64,1,0,39,160,0.251,0.331,0.509,0.84,125.0,197,8,8,0,1,0,*2/HD,R
1452,1453,Tony Zych,26,SEA,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,R


In [1070]:
bat.drop(columns=["Rk"], inplace=True)

In [1071]:
bat.tail()

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Bats
1448,Ryan Zimmerman,32,WSN,NL,144,576,524,90,159,33,0,36,108,1,0,44,126,0.303,0.358,0.573,0.93,134.0,300,16,3,0,5,1,*3/HD,R
1449,Jordan Zimmermann,31,DET,AL,3,6,6,0,1,0,0,0,0,0,0,0,1,0.167,0.167,0.167,0.333,-10.0,1,0,0,0,0,0,1,R
1450,Ben Zobrist,36,CHC,NL,128,496,435,58,101,20,3,12,50,2,2,54,71,0.232,0.318,0.375,0.693,79.0,163,13,2,2,3,2,479H/36,S
1451,Mike Zunino,26,SEA,AL,124,435,387,52,97,25,0,25,64,1,0,39,160,0.251,0.331,0.509,0.84,125.0,197,8,8,0,1,0,*2/HD,R
1452,Tony Zych,26,SEA,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,R


In [1072]:
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
919,920.0,Rob Zastryzny*,25.0,CHC,NL,0,0,,8.31,4,0,0,0,0,0,13.0,19,13,12,2,7,0,11,1,0,0,62,54,5.31,2.0,13.2,1.4,4.8,7.6,1.57
920,921.0,Brad Ziegler,37.0,MIA,NL,1,4,0.2,4.79,53,0,21,0,0,10,47.0,57,29,25,1,16,6,26,6,0,1,211,83,3.73,1.553,10.9,0.2,3.1,5.0,1.63
921,922.0,Jordan Zimmermann,31.0,DET,AL,8,13,0.381,6.08,29,29,0,0,0,0,160.0,204,111,108,29,44,2,103,7,0,3,713,74,5.18,1.55,11.5,1.6,2.5,5.8,2.34
922,923.0,Tony Zych,26.0,SEA,AL,6,3,0.667,2.66,45,0,7,0,0,1,40.2,30,12,12,2,21,3,35,5,1,1,173,158,3.99,1.254,6.6,0.4,4.6,7.7,1.67
923,,LgAvg per 180 IP,,,,10,10,0.494,4.37,88,20,21,0,0,5,180.0,176,94,87,25,66,4,167,7,1,8,771,101,4.36,1.343,8.8,1.3,3.3,8.3,2.54


In [1073]:
pit = pit[pit["Name"] != "LgAvg per 180 IP"]

In [1074]:
def how_throws(names):
    throws = ""
    for name in names:
        if name.rfind("*") > -1:
            throws = "L"
        else:
            throws = "R"
    return throws

In [1075]:
pit["Throws"] = pit["Name"].apply(how_throws)
pit["Throws"].value_counts()

R    686
L    237
Name: Throws, dtype: int64

In [1076]:
pit["Name"] = [re.sub("[*#]", "", name) for name in pit["Name"]]
pit.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws
918,919,Chris Young,38,KCR,AL,0,0,,7.5,14,2,5,0,0,0,30.0,47,27,25,7,14,2,22,1,0,1,148,60,6.22,2.033,14.1,2.1,4.2,6.6,1.57,R
919,920,Rob Zastryzny,25,CHC,NL,0,0,,8.31,4,0,0,0,0,0,13.0,19,13,12,2,7,0,11,1,0,0,62,54,5.31,2.0,13.2,1.4,4.8,7.6,1.57,L
920,921,Brad Ziegler,37,MIA,NL,1,4,0.2,4.79,53,0,21,0,0,10,47.0,57,29,25,1,16,6,26,6,0,1,211,83,3.73,1.553,10.9,0.2,3.1,5.0,1.63,R
921,922,Jordan Zimmermann,31,DET,AL,8,13,0.381,6.08,29,29,0,0,0,0,160.0,204,111,108,29,44,2,103,7,0,3,713,74,5.18,1.55,11.5,1.6,2.5,5.8,2.34,R
922,923,Tony Zych,26,SEA,AL,6,3,0.667,2.66,45,0,7,0,0,1,40.2,30,12,12,2,21,3,35,5,1,1,173,158,3.99,1.254,6.6,0.4,4.6,7.7,1.67,R


In [1077]:
pit.drop(columns=["Rk"], inplace=True)

In [1078]:
players = pd.merge(bat, pit, how="outer", on=["Name", "Tm", "Age"], suffixes=('_bat', '_pit'))

In [1079]:
players.shape

(1631, 62)

In [1080]:
fld.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary
1350,1351.0,Jordan Zimmermann,31.0,DET,AL,29,29,0,160.0,14,7,6,1,0,0.929,,,-3.0,-4.0,0.0,0.73,0.45,P
1351,1352.0,Ben Zobrist,36.0,CHC,NL,152,105,49,915.2,340,188,149,3,39,0.991,2.0,3.0,4.0,5.0,2.0,3.31,2.22,2B-OF-SS-1B
1352,1353.0,Mike Zunino,26.0,SEA,AL,120,112,105,994.0,957,894,56,7,5,0.993,-6.0,-7.0,4.0,5.0,-4.0,8.6,7.92,C
1353,1354.0,Tony Zych,26.0,SEA,AL,45,0,0,40.2,4,1,3,0,0,1.0,,,0.0,0.0,0.0,0.89,0.09,P
1354,,LgAvg,,,,216,149,114,1324.0,613,445,158,10,42,0.984,0.0,0.0,,,,4.1,2.8,


In [1081]:
fld = fld[fld["Name"] != "LgAvg"]

In [1082]:
fld.drop(columns=["Rk"], inplace=True)

In [1083]:
players = pd.merge(players, fld, how="left", on=["Name", "Age"], suffixes=('', '_fld'))

In [1084]:
cat.tail()

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rctch,Rdrs,Rdrs/yr,Rgood,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9,RF/G,PB,WP,SB,CS,CS%
121,122.0,Chad Wallach,25.0,CIN,NL,3,2,2,21.0,22,20,1,1,0,0.955,0,0,0,1,57,0,0,1,0,0,0,0,9.0,7.0,0,0,1,0,0%
122,123.0,Matt Wieters,31.0,WSN,NL,118,113,102,1003.2,1074,1023,43,8,5,0.993,1,1,1,-5,-6,-2,0,-2,0,-4,0,3,9.56,9.03,5,28,57,19,25%
123,124.0,Tony Wolters,25.0,COL,NL,77,66,61,586.1,573,528,43,2,5,0.997,3,6,3,0,0,1,0,1,0,-2,4,-3,8.76,7.42,7,28,22,14,39%
124,125.0,Mike Zunino,26.0,SEA,AL,120,112,105,994.0,957,894,56,7,5,0.993,-6,-7,-6,4,5,-4,0,0,0,3,1,4,8.6,7.92,10,52,54,17,24%
125,,LgAvg,,,,20,18,15,158.0,158,148,9,1,1,0.993,0,0,0,0,0,0,0,0,0,0,0,0,8.92,7.78,1,7,9,3,27%


In [1085]:
cat.drop_duplicates(subset=["Name"], keep='first', inplace=True)

In [1086]:
cat = cat[cat["Name"] != "LgAvg"]

In [1087]:
cat.drop(columns=["Rk"], inplace=True)

In [1088]:
players = pd.merge(players, cat, how='left', on=["Name", "Age"], suffixes=('', '_cat'))

In [1089]:
appearances.tail()

Unnamed: 0,Name,Age,B,T,Ht,Wt,DoB,Yrs,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
1489,Drew VerHagen,26,R,R,"6' 6""",230,"Oct 22, 1990",4,24,2,2,24,24,0,0,0,0,0,0,0,0,0,0,0,0,DET
1490,Justin Verlander,34,R,R,"6' 5""",235,"Feb 20, 1983",13,28,28,2,28,28,0,0,0,0,0,0,0,0,0,0,0,0,DET
1491,Alex Wilson,30,R,R,"6' 0""",227,"Nov 3, 1986",5,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,DET
1492,Justin Wilson,29,L,L,"6' 2""",205,"Aug 18, 1987",6,42,0,2,42,42,0,0,0,0,0,0,0,0,0,0,0,0,DET
1493,Jordan Zimmermann,31,R,R,"6' 2""",225,"May 23, 1986",9,29,29,3,29,29,0,0,0,0,0,0,0,0,0,0,0,0,DET


In [1090]:
appearances.columns

Index(['Name', 'Age', 'B', 'T', 'Ht', 'Wt', 'DoB', 'Yrs', 'G', 'GS', 'Batting',
       'Defense', 'P', 'C', '1B', '2B', '3B', 'SS', 'LF', 'CF', 'RF', 'OF',
       'DH', 'PH', 'PR', 'Tm'],
      dtype='object')

In [1091]:
appearances = appearances.drop(columns=["", "B", "T", "Ht", "Wt", "DoB", "Yrs", "WAR", "Salary", ""], axis=1,
                               errors='ignore')
appearances.head()

Unnamed: 0,Name,Age,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
0,Luis Avilan,27,61,0,58,61,61,0,0,0,0,0,0,0,0,0,0,0,0,LAD
1,Pedro Baez,29,66,0,61,66,66,0,0,0,0,0,0,0,0,0,0,0,0,LAD
2,Austin Barnes,27,102,53,102,76,0,55,0,21,1,0,0,0,0,0,1,34,0,LAD
3,Cody Bellinger,21,132,127,132,128,0,0,93,0,0,0,39,4,5,46,1,4,0,LAD
4,Walker Buehler,22,8,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,LAD


In [1092]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%
0,Fernando Abad,31,BOS,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48,0,0,43.2,6,0,6,0,1,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,José Abreu,30,CHW,AL,156,675,621,95,189,43,6,33,102,3,0,35,119,0.304,0.354,0.552,0.906,141.0,343,21,15,0,4,6,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139,138,130,1197.0,1221,1135,78,8,130,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Cristhian Adames,25,COL,NL,12,14,13,1,0,0,0,0,0,0,0,1,6,0.0,0.071,0.0,0.071,-80.0,0,0,0,0,0,0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3,1,0,9.0,4,3,1,0,0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Austin Adams,26,WSN,NL,6,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6,0,0,5.0,1,1,0,0,0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Lane Adams,27,ATL,NL,85,122,109,19,30,4,1,5,20,10,0,10,37,0.275,0.339,0.468,0.807,109.0,51,3,1,1,1,0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41,13,9,172.0,51,48,1,2,0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [1093]:
players.shape

(1631, 113)

In [1094]:
pd.set_option('display.max_rows', 1000)
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%
0,Fernando Abad,31,BOS,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,L,AL,2,1,.667,3.30,48,0,15,0,0,1,43.2,40,18,16,4,14,1,37,1,1,0,182,139,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48,0,0,43.2,6,0,6,0,1,1.000,,,1,5,0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,José Abreu,30,CHW,AL,156,675,621,95,189,43,6,33,102,3,0,35,119,.304,.354,.552,.906,141,343,21,15,0,4,6,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139,138,130,1197.0,1221,1135,78,8,130,.993,3,3,2,2,-1,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Cristhian Adames,25,COL,NL,12,14,13,1,0,0,0,0,0,0,0,1,6,.000,.071,.000,.071,-80,0,0,0,0,0,0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3,1,0,9.0,4,3,1,0,0,1.000,-1,-93,-1,-133,0,4.00,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Austin Adams,26,WSN,NL,6,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,/1,R,NL,0,0,,3.60,6,0,3,0,0,0,5.0,4,4,2,0,8,0,10,1,0,1,29,133,4.56,2.400,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6,0,0,5.0,1,1,0,0,0,1.000,,,0,0,0,1.80,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Lane Adams,27,ATL,NL,85,122,109,19,30,4,1,5,20,10,0,10,37,.275,.339,.468,.807,109,51,3,1,1,1,0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41,13,9,172.0,51,48,1,2,0,.961,1,5,,,,2.56,1.20,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,Blake Wood,31,LAA,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,2,0,1.000,4.76,17,0,2,0,0,0,17.0,20,9,9,3,4,1,22,0,0,1,73,91,3.57,1.412,10.6,1.6,2.1,11.6,5.50,R,TOT,ZZ,72,0,0,74.1,16,9,7,0,0,1.000,,,,,,1.94,0.22,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1627,Hunter Wood,23,TBR,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,0,0,,0.00,1,0,1,0,0,0,0.1,0,0,0,0,0,0,0,0,0,0,1,,3.16,0.000,0.0,0.0,0.0,0.0,,R,TBR,AL,1,0,0,0.1,0,0,0,0,0,,,,0,0,,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1628,Daniel Wright,26,LAA,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,0,1,.000,4.58,5,2,3,0,0,0,19.2,21,12,10,1,8,0,11,2,0,1,85,94,4.23,1.475,9.6,0.5,3.7,5.0,1.38,R,LAA,AL,5,2,0,19.2,3,1,2,0,0,1.000,,,1,10,0,1.37,0.60,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1629,Steven Wright,32,BOS,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,1,3,.250,8.25,5,5,0,0,0,0,24.0,40,24,22,9,5,1,13,2,0,1,114,56,7.82,1.875,15.0,3.4,1.9,4.9,2.60,R,BOS,AL,5,5,0,24.0,3,2,0,1,0,.667,,,-1,-8,0,0.75,0.40,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [1095]:
players["Name"] = players["Name"].str.strip()

In [1096]:
players["Name"] = [' '.join(x.split()) for x in players["Name"]]

In [1097]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%
0,Fernando Abad,31,BOS,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48,0,0,43.2,6,0,6,0,1,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,José Abreu,30,CHW,AL,156,675,621,95,189,43,6,33,102,3,0,35,119,0.304,0.354,0.552,0.906,141.0,343,21,15,0,4,6,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139,138,130,1197.0,1221,1135,78,8,130,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Cristhian Adames,25,COL,NL,12,14,13,1,0,0,0,0,0,0,0,1,6,0.0,0.071,0.0,0.071,-80.0,0,0,0,0,0,0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3,1,0,9.0,4,3,1,0,0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Austin Adams,26,WSN,NL,6,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6,0,0,5.0,1,1,0,0,0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Lane Adams,27,ATL,NL,85,122,109,19,30,4,1,5,20,10,0,10,37,0.275,0.339,0.468,0.807,109.0,51,3,1,1,1,0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41,13,9,172.0,51,48,1,2,0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [1098]:
appearances["Name"] = [' '.join(x.split()) for x in appearances["Name"]]
appearances.head()

Unnamed: 0,Name,Age,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
0,Luis Avilan,27,61,0,58,61,61,0,0,0,0,0,0,0,0,0,0,0,0,LAD
1,Pedro Baez,29,66,0,61,66,66,0,0,0,0,0,0,0,0,0,0,0,0,LAD
2,Austin Barnes,27,102,53,102,76,0,55,0,21,1,0,0,0,0,0,1,34,0,LAD
3,Cody Bellinger,21,132,127,132,128,0,0,93,0,0,0,39,4,5,46,1,4,0,LAD
4,Walker Buehler,22,8,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,LAD


In [1099]:
appearances["Name"] = appearances["Name"].str.replace(" HOF", "")
appearances

Unnamed: 0,Name,Age,G,GS,Batting,Defense,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR,Tm
0,Luis Avilan,27,61,0,58,61,61,0,0,0,0,0,0,0,0,0,0,0,0,LAD
1,Pedro Baez,29,66,0,61,66,66,0,0,0,0,0,0,0,0,0,0,0,0,LAD
2,Austin Barnes,27,102,53,102,76,0,55,0,21,1,0,0,0,0,0,1,34,0,LAD
3,Cody Bellinger,21,132,127,132,128,0,0,93,0,0,0,39,4,5,46,1,4,0,LAD
4,Walker Buehler,22,8,0,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,LAD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1489,Drew VerHagen,26,24,2,2,24,24,0,0,0,0,0,0,0,0,0,0,0,0,DET
1490,Justin Verlander,34,28,28,2,28,28,0,0,0,0,0,0,0,0,0,0,0,0,DET
1491,Alex Wilson,30,66,0,6,66,66,0,0,0,0,0,0,0,0,0,0,0,0,DET
1492,Justin Wilson,29,42,0,2,42,42,0,0,0,0,0,0,0,0,0,0,0,0,DET


In [1100]:
players = pd.merge(players, appearances, how='left', on=["Name", "Tm", "Age"], suffixes=('', '_app'))

In [1101]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
0,Fernando Abad,31,BOS,AL,4,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48,0,0,43.2,6,0,6,0,1,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48,0,4,48,48,0,0,0,0,0,0,0,0,0,0,0,0
1,José Abreu,30,CHW,AL,156,675,621,95,189,43,6,33,102,3,0,35,119,0.304,0.354,0.552,0.906,141.0,343,21,15,0,4,6,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139,138,130,1197.0,1221,1135,78,8,130,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,156,156,156,139,0,0,139,0,0,0,0,0,0,0,18,0,0
2,Cristhian Adames,25,COL,NL,12,14,13,1,0,0,0,0,0,0,0,1,6,0.0,0.071,0.0,0.071,-80.0,0,0,0,0,0,0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3,1,0,9.0,4,3,1,0,0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,1,12,2,0,0,1,1,0,1,0,0,0,0,0,10,0
3,Austin Adams,26,WSN,NL,6,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,0,0,0,0,0,0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6,0,0,5.0,1,1,0,0,0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,0,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0
4,Lane Adams,27,ATL,NL,85,122,109,19,30,4,1,5,20,10,0,10,37,0.275,0.339,0.468,0.807,109.0,51,3,1,1,1,0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41,13,9,172.0,51,48,1,2,0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,85,13,85,41,0,0,0,0,0,0,27,11,7,41,0,51,9


In [1102]:
players.to_csv("../data/player stats" + " - " + year + ".csv", index=False)

# Rate Players

In [1103]:
players = pd.read_csv("../data/player stats - " + year + ".csv")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0


In [1104]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR
1626,Blake Wood,31,LAA,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,2.0,0.0,1.0,4.76,17.0,0.0,2.0,0.0,0.0,0.0,17.0,20.0,9.0,9.0,3.0,4.0,1.0,22.0,0.0,0.0,1.0,73.0,91.0,3.57,1.412,10.6,1.6,2.1,11.6,5.5,R,TOT,ZZ,72.0,0.0,0.0,74.1,16.0,9.0,7.0,0.0,0.0,1.0,,,,,,1.94,0.22,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17.0,0.0,0.0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1627,Hunter Wood,23,TBR,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.16,0.0,0.0,0.0,0.0,0.0,,R,TBR,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1628,Daniel Wright,26,LAA,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,0.0,1.0,0.0,4.58,5.0,2.0,3.0,0.0,0.0,0.0,19.2,21.0,12.0,10.0,1.0,8.0,0.0,11.0,2.0,0.0,1.0,85.0,94.0,4.23,1.475,9.6,0.5,3.7,5.0,1.38,R,LAA,AL,5.0,2.0,0.0,19.2,3.0,1.0,2.0,0.0,0.0,1.0,,,1.0,10.0,0.0,1.37,0.6,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,2.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1629,Steven Wright,32,BOS,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,1.0,3.0,0.25,8.25,5.0,5.0,0.0,0.0,0.0,0.0,24.0,40.0,24.0,22.0,9.0,5.0,1.0,13.0,2.0,0.0,1.0,114.0,56.0,7.82,1.875,15.0,3.4,1.9,4.9,2.6,R,BOS,AL,5.0,5.0,0.0,24.0,3.0,2.0,0.0,1.0,0.0,0.667,,,-1.0,-8.0,0.0,0.75,0.4,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1630,Kirby Yates,30,LAA,,,,,,,,,,,,,,,,,,,,,,,,,,,,AL,0.0,0.0,,18.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,32.0,27.16,2.0,18.0,18.0,0.0,9.0,,R,TOT,ZZ,62.0,0.0,0.0,56.2,2.0,0.0,2.0,0.0,0.0,1.0,,,,,,0.32,0.03,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [1105]:
pd.set_option('max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1106]:
list(players.columns.values)

['Name',
 'Age',
 'Tm',
 'Lg_bat',
 'G_bat',
 'PA',
 'AB',
 'R_bat',
 'H_bat',
 '2B',
 '3B',
 'HR_bat',
 'RBI',
 'SB',
 'CS',
 'BB_bat',
 'SO_bat',
 'BA',
 'OBP',
 'SLG',
 'OPS',
 'OPS+',
 'TB',
 'GDP',
 'HBP_bat',
 'SH',
 'SF',
 'IBB_bat',
 'Pos\xa0Summary',
 'Bats',
 'Lg_pit',
 'W',
 'L',
 'W-L%',
 'ERA',
 'G_pit',
 'GS',
 'GF',
 'CG',
 'SHO',
 'SV',
 'IP',
 'H_pit',
 'R_pit',
 'ER',
 'HR_pit',
 'BB_pit',
 'IBB_pit',
 'SO_pit',
 'HBP_pit',
 'BK',
 'WP',
 'BF',
 'ERA+',
 'FIP',
 'WHIP',
 'H9',
 'HR9',
 'BB9',
 'SO9',
 'SO/W',
 'Throws',
 'Tm_fld',
 'Lg',
 'G',
 'GS_fld',
 'CG_fld',
 'Inn',
 'Ch',
 'PO',
 'A',
 'E',
 'DP',
 'Fld%',
 'Rtot',
 'Rtot/yr',
 'Rdrs',
 'Rdrs/yr',
 'Rgood',
 'RF/9',
 'RF/G',
 'Pos\xa0Summary_fld',
 'Tm_cat',
 'Lg_cat',
 'G_cat',
 'GS_cat',
 'CG_cat',
 'Inn_cat',
 'Ch_cat',
 'PO_cat',
 'A_cat',
 'E_cat',
 'DP_cat',
 'Fld%_cat',
 'Rtot_cat',
 'Rtot/yr_cat',
 'Rctch',
 'Rdrs_cat',
 'Rdrs/yr_cat',
 'Rgood_cat',
 'Rair',
 'Rrange',
 'Rthrow',
 'RszC',
 'RsbC',
 'Re

## Create primary position variable

In [1107]:
players.rename(columns={
    "Pos\xa0Summary": "Pos_Summary",
    "Pos\xa0Summary_fld": "Pos_Summary_fld"
}, inplace=True)

In [1108]:
players["Primary_Pos_fld"] = players['Pos_Summary_fld'].str.split("-").str[0]
players["Primary_Pos_fld"].value_counts()

P     903
OF    284
C     118
2B     88
3B     83
SS     77
1B     74
Name: Primary_Pos_fld, dtype: int64

## Batter Ratings

In [1109]:
# pd.set_option('display.max_columns', 200)
# players.columns

### Clutch Rating

In [1110]:
players["rbi_per_g"] = players["RBI"] / players["G_bat"]
players["rbi_per_g"].value_counts()

0.000000    677
0.333333     17
0.500000     15
0.200000     12
0.166667      8
0.100000      8
0.400000      8
0.250000      7
0.066667      7
0.062500      6
0.125000      6
0.083333      6
0.153846      6
0.285714      6
0.272727      5
0.142857      5
0.181818      5
1.000000      5
0.074074      4
0.071429      4
0.148148      4
0.090909      4
0.363636      4
0.600000      4
0.375000      4
0.437500      4
0.666667      4
0.050000      3
0.240000      3
0.384615      3
0.111111      3
0.052632      3
0.183333      3
0.190476      3
0.214286      3
0.428571      3
0.058824      3
0.136364      3
0.300000      3
0.571429      3
0.176471      3
0.277778      3
0.060606      3
0.210526      3
0.120000      3
0.230769      3
0.216216      3
0.055556      3
0.235294      3
0.344828      3
0.530612      2
0.458333      2
0.329268      2
0.103448      2
0.232143      2
0.450980      2
0.064516      2
0.555556      2
0.228571      2
0.324324      2
0.407407      2
0.461538      2
0.194444

In [1111]:
players["clutch"] = (round(players["rbi_per_g"], 3) >= .6).astype(int)
players["clutch"] = players["clutch"].map({0: "", 1: "#"}).astype(str)
players["clutch"].value_counts()

     1543
#      88
Name: clutch, dtype: int64

In [1112]:
players[players["clutch"] == "#"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#
13,Andrew Albers,31,SEA,AL,1.0,3.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,453.0,1.0,0.0,0.0,2.0,0.0,0.0,/1,R,AL,5.0,1.0,0.833,3.51,9.0,6.0,2.0,0.0,0.0,1.0,41.0,43.0,22.0,16.0,6.0,10.0,0.0,37.0,2.0,0.0,1.0,178.0,119.0,4.13,1.293,9.4,1.3,2.2,8.1,3.7,L,SEA,AL,9.0,6.0,0.0,41.0,6.0,1.0,5.0,0.0,0.0,1.0,,,1.0,5.0,0.0,1.32,0.67,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.0,6.0,1.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,1.0,#
31,Aaron Altherr,26,PHI,NL,107.0,412.0,372.0,58.0,101.0,24.0,5.0,19.0,65.0,5.0,4.0,32.0,104.0,0.272,0.34,0.516,0.856,122.0,192.0,12.0,7.0,0.0,1.0,2.0,798/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,NL,106.0,92.0,73.0,837.1,221.0,211.0,7.0,3.0,4.0,0.986,-4.0,-5.0,,,,2.34,2.06,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,107.0,92.0,107.0,106.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,19.0,50.0,106.0,0.0,7.0,0.0,OF,0.607477,#
45,Miguel Andujar,22,NYY,AL,5.0,8.0,7.0,0.0,4.0,2.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.571,0.625,0.857,1.482,286.0,6.0,0.0,0.0,0.0,0.0,0.0,/5DH,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,3.0,0.0,0.0,9.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,27.0,0.0,0.0,0.0,2.0,0.67,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,1.0,5.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,3B,0.8,#
49,Nori Aoki,35,TOR,AL,12.0,34.0,32.0,4.0,9.0,1.0,0.0,3.0,8.0,0.0,0.0,1.0,5.0,0.281,0.294,0.594,0.888,128.0,19.0,0.0,0.0,0.0,1.0,0.0,/97H8,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,1.33,OF-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.0,7.0,12.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,8.0,10.0,0.0,2.0,0.0,OF,0.666667,#
55,Nolan Arenado,26,COL,NL,159.0,680.0,606.0,100.0,187.0,43.0,7.0,37.0,130.0,3.0,2.0,62.0,106.0,0.309,0.373,0.586,0.959,130.0,355.0,21.0,4.0,1.0,6.0,9.0,*5/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,157.0,156.0,140.0,1343.1,423.0,103.0,311.0,9.0,39.0,0.979,10.0,9.0,15.0,13.0,3.0,2.77,2.64,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,159.0,156.0,159.0,157.0,0.0,0.0,0.0,0.0,157.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3B,0.81761,#
105,Cody Bellinger,21,LAD,NL,132.0,548.0,480.0,87.0,128.0,26.0,4.0,39.0,97.0,10.0,3.0,64.0,146.0,0.267,0.352,0.581,0.933,143.0,279.0,5.0,1.0,0.0,3.0,13.0,37/98HD,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LAD,NL,139.0,126.0,107.0,1115.0,768.0,725.0,38.0,5.0,62.0,0.993,-5.0,-6.0,0.0,0.0,1.0,6.16,5.49,1B-OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,132.0,127.0,132.0,128.0,0.0,0.0,93.0,0.0,0.0,0.0,39.0,4.0,5.0,46.0,1.0,4.0,0.0,1B,0.734848,#
108,Adrian Beltre,38,TEX,AL,94.0,389.0,340.0,47.0,106.0,22.0,1.0,17.0,71.0,1.0,0.0,39.0,52.0,0.312,0.383,0.532,0.915,132.0,181.0,7.0,4.0,0.0,6.0,2.0,5D/H,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TEX,AL,65.0,65.0,53.0,552.1,190.0,50.0,135.0,5.0,13.0,0.974,11.0,25.0,9.0,20.0,0.0,3.01,2.85,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,94.0,93.0,94.0,65.0,0.0,0.0,0.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0,3B,0.755319,#
119,Mookie Betts,24,BOS,AL,153.0,712.0,628.0,101.0,166.0,46.0,2.0,24.0,102.0,26.0,3.0,77.0,79.0,0.264,0.344,0.459,0.803,108.0,288.0,9.0,2.0,0.0,5.0,9.0,*9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BOS,AL,153.0,153.0,148.0,1389.1,379.0,366.0,8.0,5.0,1.0,0.987,22.0,19.0,,,,2.42,2.44,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,153.0,153.0,153.0,153.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,153.0,153.0,0.0,0.0,0.0,OF,0.666667,#
125,Charlie Blackmon,30,COL,NL,159.0,725.0,644.0,137.0,213.0,35.0,14.0,37.0,104.0,14.0,10.0,65.0,135.0,0.331,0.399,0.601,1.0,141.0,387.0,4.0,10.0,3.0,3.0,9.0,*8/H,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,158.0,158.0,142.0,1366.1,347.0,339.0,4.0,4.0,3.0,0.988,-4.0,-3.0,,,,2.26,2.17,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,159.0,158.0,159.0,158.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,158.0,0.0,158.0,0.0,1.0,0.0,OF,0.654088,#


### Letter Rating

In [1113]:
players["BA"].value_counts()

0.000    201
0.200     23
0.250     22
0.167     18
0.231     17
0.143     16
0.500     15
0.333     13
0.255     13
0.259     13
0.273     12
0.241     10
0.253     10
0.235     10
1.000     10
0.264      9
0.125      9
0.288      9
0.091      9
0.211      8
0.233      8
0.267      8
0.271      8
0.249      8
0.222      8
0.154      8
0.282      7
0.217      7
0.230      7
0.246      7
0.225      7
0.111      7
0.272      7
0.232      7
0.276      7
0.286      6
0.265      6
0.100      6
0.270      6
0.263      6
0.256      6
0.266      6
0.170      6
0.260      6
0.279      5
0.193      5
0.277      5
0.216      5
0.275      5
0.208      5
0.293      5
0.274      5
0.238      5
0.290      5
0.300      5
0.158      5
0.285      5
0.262      5
0.242      5
0.251      5
0.308      5
0.261      5
0.220      5
0.206      5
0.303      5
0.247      5
0.254      5
0.281      5
0.268      5
0.278      5
0.284      5
0.243      5
0.283      4
0.234      4
0.297      4
0.083      4
0.239      4

In [1114]:
players["BA"].isnull().sum()

530

In [1115]:
players["BA"].replace(np.nan, 0.000, inplace=True)

In [1116]:
break_points = [
    0.029,
    0.057,
    0.084,
    0.112,
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362,
    0.390
]

letters = [
    "G",
    "G+",
    "F",
    "E",
    "E+",
    "D",
    "D+",
    "C",
    "C+",
    "B",
    "B+",
    "A",
    "A+",
    "AA",
    "AAA"
]

def batter_letter(bat_avg, breakpoints=break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg)
    return letter_grades[i]

In [1117]:
players["bat_letter"] = [batter_letter(avg) for avg in players["BA"]]
players["bat_letter"].value_counts()

G      733
B      185
C+     168
C      115
B+      97
D       81
D+      58
A       48
E       37
AAA     29
E+      28
F       25
G+      16
A+       7
AA       4
Name: bat_letter, dtype: int64

In [1118]:
players[players["bat_letter"] == "AAA"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter
13,Andrew Albers,31,SEA,AL,1.0,3.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,453.0,1.0,0.0,0.0,2.0,0.0,0.0,/1,R,AL,5.0,1.0,0.833,3.51,9.0,6.0,2.0,0.0,0.0,1.0,41.0,43.0,22.0,16.0,6.0,10.0,0.0,37.0,2.0,0.0,1.0,178.0,119.0,4.13,1.293,9.4,1.3,2.2,8.1,3.7,L,SEA,AL,9.0,6.0,0.0,41.0,6.0,1.0,5.0,0.0,0.0,1.0,,,1.0,5.0,0.0,1.32,0.67,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.0,6.0,1.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,1.0,#,AAA
45,Miguel Andujar,22,NYY,AL,5.0,8.0,7.0,0.0,4.0,2.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.571,0.625,0.857,1.482,286.0,6.0,0.0,0.0,0.0,0.0,0.0,/5DH,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,3.0,0.0,0.0,9.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,27.0,0.0,0.0,0.0,2.0,0.67,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,1.0,5.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,3B,0.8,#,AAA
126,Aaron Blair,25,ATL,NL,1.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,2.0,429.0,1.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,1.0,0.0,15.0,1.0,1.0,0.0,0.0,0.0,0.0,3.0,5.0,5.0,5.0,1.0,5.0,0.0,3.0,0.0,0.0,1.0,19.0,32.0,10.49,3.333,15.0,3.0,15.0,9.0,0.6,R,ATL,NL,1.0,1.0,0.0,3.0,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,3.0,1.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,AAA
170,Jake Buchanan,27,CIN,NL,4.0,3.0,2.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.5,0.667,0.5,1.167,213.0,1.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,8.16,5.0,0.0,3.0,0.0,0.0,0.0,14.1,24.0,13.0,13.0,1.0,7.0,2.0,4.0,3.0,0.0,0.0,77.0,56.0,5.6,2.163,15.1,0.6,4.4,2.5,0.57,R,CIN,NL,5.0,0.0,0.0,14.1,4.0,0.0,3.0,1.0,0.0,0.75,,,0.0,0.0,0.0,1.88,0.6,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,0.0,4.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.25,,AAA
225,Darrell Ceciliani,27,TOR,AL,3.0,5.0,5.0,2.0,2.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.4,0.4,1.2,1.6,301.0,6.0,0.0,0.0,0.0,0.0,0.0,/H8D7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOR,AL,2.0,1.0,0.0,6.0,2.0,2.0,0.0,0.0,0.0,1.0,1.0,120.0,,,,3.0,1.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,OF,1.0,#,AAA
295,Chase d'Arnaud,30,BOS,AL,2.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,430.0,1.0,0.0,0.0,0.0,0.0,0.0,/H4,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,23.0,12.0,8.0,116.1,54.0,25.0,26.0,3.0,10.0,0.944,-5.0,-51.0,,0.0,,3.95,2.22,SS-2B-OF-3B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,SS,0.0,,AAA
354,Danny Duffy,28,KCR,AL,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.5,1.0,169.0,1.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,9.0,10.0,0.474,3.81,24.0,24.0,0.0,0.0,0.0,0.0,146.1,143.0,67.0,62.0,13.0,41.0,0.0,130.0,4.0,2.0,2.0,609.0,117.0,3.46,1.257,8.8,0.8,2.5,8.0,3.17,L,KCR,AL,24.0,24.0,0.0,146.1,17.0,5.0,12.0,0.0,1.0,1.0,,,-2.0,-3.0,0.0,1.05,0.71,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24.0,24.0,1.0,24.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,AAA
373,Dietrich Enns,26,MIN,AL,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.5,1.0,170.0,1.0,0.0,0.0,0.0,0.0,0.0,/1,L,AL,0.0,0.0,,6.75,2.0,1.0,0.0,0.0,0.0,0.0,4.0,7.0,4.0,3.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,21.0,71.0,9.41,2.0,15.8,4.5,2.3,4.5,2.0,L,MIN,AL,2.0,1.0,0.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,1.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,AAA
451,Yovani Gallardo,31,SEA,AL,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,1.0,176.0,1.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,5.0,10.0,0.333,5.72,28.0,22.0,4.0,0.0,0.0,1.0,130.2,138.0,84.0,83.0,24.0,60.0,3.0,94.0,2.0,0.0,7.0,578.0,73.0,5.53,1.515,9.5,1.7,4.1,6.5,1.57,R,SEA,AL,28.0,22.0,0.0,130.2,25.0,8.0,17.0,0.0,2.0,1.0,,,0.0,0.0,0.0,1.72,0.89,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,28.0,22.0,1.0,28.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,AAA
487,Sean Gilmartin,27,NYM,NL,2.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0,678.0,2.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,13.5,2.0,0.0,1.0,0.0,0.0,0.0,3.1,8.0,5.0,5.0,2.0,1.0,0.0,4.0,0.0,0.0,1.0,19.0,34.0,9.46,2.7,21.6,5.4,2.7,10.8,4.0,L,NYM,NL,2.0,0.0,0.0,3.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.5,,AAA


In [1119]:
players[players["bat_letter"] == "C+"]["BA"].min()

0.223

### HR Number

In [1120]:
players["hr_rate"] = round(players["HR_bat"] / players["H_bat"] * 36, 0)
players["hr_rate"].replace(np.nan, 0, inplace=True)
players["hr_rate"] = players["hr_rate"].astype(int)
players["hr_rate"].value_counts()

0     1032
4      100
3       74
5       72
6       71
7       65
2       60
9       32
8       31
10      21
1       19
12      17
18      10
11       6
13       6
14       5
15       3
24       2
16       2
36       2
19       1
Name: hr_rate, dtype: int64

In [1121]:
players["hr_num_bat"] = players["hr_rate"].map({
    0: "",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["hr_num_bat"].value_counts()

      1032
14     100
13      74
15      72
16      71
21      65
12      60
23      32
22      31
24      21
11      19
26      17
36      10
31       6
25       6
32       5
33       3
34       2
66       2
46       2
41       1
Name: hr_num_bat, dtype: int64

### Triple Number

In [1122]:
players["triple_rate"] = round(players["3B"] / players["H_bat"] * 36, 0)
players["triple_rate"].replace(np.nan, 0, inplace=True)
players["triple_rate"] = players["triple_rate"].astype(int)
players["triple_rate"].value_counts()

0     1350
1      169
2       68
3       15
4        9
5        5
12       4
9        4
7        4
6        2
29       1
Name: triple_rate, dtype: int64

In [1123]:
players.loc[(players["triple_rate"] == 0), "triple_val"] = 0
players.loc[(players["triple_rate"] > 0), "triple_val"] = players["hr_rate"] + players["triple_rate"]
players["triple_val"].value_counts()

0.0     1350
6.0       42
4.0       40
5.0       35
7.0       35
8.0       26
3.0       25
2.0       20
9.0       20
10.0      14
12.0       6
1.0        5
11.0       5
13.0       2
16.0       2
21.0       1
18.0       1
17.0       1
29.0       1
Name: triple_val, dtype: int64

In [1124]:
players["triple_num"] = players["triple_val"].map({
    0: "",
    1: "(11)",
    2: "(12)",
    3: "(13)",
    4: "(14)",
    5: "(15)",
    6: "(16)",
    7: "(21)",
    8: "(22)",
    9: "(23)",
    10: "(24)",
    11: "(25)",
    12: "(26)",
    13: "(31)",
    14: "(32)",
    15: "(33)",
    16: "(34)",
    17: "(35)",
    18: "(36)",
    19: "(41)",
    20: "(42)",
    21: "(43)",
    22: "(44)",
    23: "(45)",
    24: "(46)",
    25: "(51)",
    26: "(52)",
    27: "(53)",
    28: "(54)",
    29: "(55)",
    30: "(56)",
    31: "(61)",
    32: "(62)",
    33: "(63)",
    34: "(64)",
    35: "(65)",
    36: "(66)"
}).astype(str)
players["triple_num"].value_counts()

        1350
(16)      42
(14)      40
(21)      35
(15)      35
(22)      26
(13)      25
(12)      20
(23)      20
(24)      14
(26)       6
(25)       5
(11)       5
(31)       2
(34)       2
(43)       1
(35)       1
(36)       1
(55)       1
Name: triple_num, dtype: int64

### Speed Rating

In [1125]:
# pd.set_option('display.max_seq_items', 200)
# players.columns

In [1126]:
players["speed_score"] = round(players["SB"] / ((players["H_bat"] + players["BB_bat"] + players["HBP_bat"]) - \
                                          (players["2B"] + players["3B"] + players["HR_bat"])), 3)
players["speed_score"].replace(np.nan, 0.000, inplace=True)
players["speed_score"].value_counts()

0.000    1193
0.010      12
0.100       9
0.014       8
0.029       8
0.048       7
0.021       7
0.045       7
0.023       7
0.013       7
0.024       7
0.111       7
0.016       6
0.027       6
0.036       5
0.069       5
0.030       5
0.083       5
0.012       5
0.067       5
0.500       5
0.018       5
0.011       5
0.059       5
0.050       5
0.026       5
0.333       4
0.008       4
0.065       4
0.033       4
0.053       4
0.040       4
0.032       4
0.079       4
0.062       4
0.073       4
0.006       4
0.056       4
0.009       4
0.019       4
0.126       4
0.037       4
0.143       4
0.028       4
0.125       4
0.091       4
0.022       4
0.025       4
0.055       4
0.031       4
0.043       4
0.250       3
0.049       3
0.054       3
0.061       3
0.098       3
0.086       3
0.105       3
0.034       3
0.088       3
0.007       3
0.017       3
0.148       3
0.135       2
0.085       2
0.089       2
2.000       2
0.136       2
0.041       2
0.167       2
0.121       2
0.182 

In [1127]:
speed_breaks = [
    0.075,
    0.100,
    0.200,
    0.300
]

ratings = [
    "",
    "*",
    "**",
    "***",
    "****"
]

def speed_rate(speed, breakpoints=speed_breaks, speed_rates=ratings):
    i = bisect(breakpoints, speed)
    return speed_rates[i]

In [1128]:
players["speed_rating"] = [speed_rate(rate) for rate in players["speed_score"]]
players["speed_rating"].value_counts()

        1448
**        85
*         44
***       30
****      24
Name: speed_rating, dtype: int64

In [1129]:
players[players["speed_rating"] == "****"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****
45,Miguel Andujar,22,NYY,AL,5.0,8.0,7.0,0.0,4.0,2.0,0.0,0.0,4.0,1.0,0.0,1.0,0.0,0.571,0.625,0.857,1.482,286.0,6.0,0.0,0.0,0.0,0.0,0.0,/5DH,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NYY,AL,3.0,0.0,0.0,9.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,27.0,0.0,0.0,0.0,2.0,0.67,3B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,1.0,5.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,3B,0.8,#,AAA,0,,0,0.0,,0.333,****
115,Quintin Berry,32,MIL,NL,7.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,/H78,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,MIL,NL,3.0,0.0,0.0,5.0,3.0,3.0,0.0,0.0,0.0,1.0,1.0,288.0,,,,5.4,1.0,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,0.0,7.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,3.0,0.0,1.0,5.0,OF,0.0,,G,0,,0,0.0,,inf,****
292,Chase d'Arnaud,30,TOT,MLB,35.0,62.0,58.0,12.0,11.0,2.0,0.0,1.0,3.0,5.0,1.0,4.0,20.0,0.19,0.242,0.276,0.518,38.0,16.0,0.0,0.0,0.0,0.0,1.0,H6/47539,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,23.0,12.0,8.0,116.1,54.0,25.0,26.0,3.0,10.0,0.944,-5.0,-51.0,,0.0,,3.95,2.22,SS-2B-OF-3B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SS,0.085714,,D+,3,13.0,0,0.0,,0.417,****
293,Chase d'Arnaud,30,TOT,NL,33.0,61.0,57.0,10.0,10.0,2.0,0.0,1.0,3.0,5.0,1.0,4.0,20.0,0.175,0.23,0.263,0.493,31.0,15.0,0.0,0.0,0.0,0.0,1.0,H6/74539,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,23.0,12.0,8.0,116.1,54.0,25.0,26.0,3.0,10.0,0.944,-5.0,-51.0,,0.0,,3.95,2.22,SS-2B-OF-3B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,SS,0.090909,,D+,4,14.0,0,0.0,,0.455,****
296,Chase d'Arnaud,30,SDP,NL,22.0,51.0,49.0,5.0,7.0,2.0,0.0,1.0,3.0,5.0,1.0,2.0,17.0,0.143,0.176,0.245,0.421,11.0,12.0,0.0,0.0,0.0,0.0,1.0,6/H4539,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,MLB,23.0,12.0,8.0,116.1,54.0,25.0,26.0,3.0,10.0,0.944,-5.0,-51.0,,0.0,,3.95,2.22,SS-2B-OF-3B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22.0,10.0,22.0,15.0,0.0,0.0,1.0,3.0,2.0,10.0,0.0,0.0,1.0,1.0,0.0,8.0,1.0,SS,0.136364,,D,5,15.0,0,0.0,,0.833,****
305,Rajai Davis,36,TOT,AL,117.0,366.0,336.0,56.0,79.0,19.0,2.0,5.0,20.0,29.0,7.0,27.0,83.0,0.235,0.293,0.348,0.641,74.0,117.0,12.0,1.0,1.0,1.0,1.0,*87H/9D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,112.0,81.0,71.0,762.2,186.0,178.0,5.0,3.0,2.0,0.984,3.0,5.0,,,,2.16,1.63,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OF,0.17094,,C+,2,12.0,1,3.0,(13),0.358,****
306,Rajai Davis,36,OAK,AL,100.0,328.0,300.0,49.0,70.0,17.0,2.0,5.0,18.0,26.0,6.0,26.0,70.0,0.233,0.294,0.353,0.647,76.0,106.0,10.0,0.0,1.0,1.0,1.0,87H/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,112.0,81.0,71.0,762.2,186.0,178.0,5.0,3.0,2.0,0.984,3.0,5.0,,,,2.16,1.63,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,100.0,73.0,100.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,79.0,3.0,98.0,0.0,15.0,2.0,OF,0.18,,C+,3,13.0,1,4.0,(14),0.361,****
307,Rajai Davis,36,BOS,AL,17.0,38.0,36.0,7.0,9.0,2.0,0.0,0.0,2.0,3.0,1.0,1.0,13.0,0.25,0.289,0.306,0.595,57.0,11.0,2.0,1.0,0.0,0.0,0.0,/78H9D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,112.0,81.0,71.0,762.2,186.0,178.0,5.0,3.0,2.0,0.984,3.0,5.0,,,,2.16,1.63,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17.0,8.0,17.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,4.0,4.0,14.0,2.0,1.0,3.0,OF,0.117647,,C+,0,,0,0.0,,0.333,****
383,Danny Espinosa,30,SEA,AL,8.0,17.0,16.0,2.0,3.0,2.0,0.0,0.0,2.0,1.0,0.0,1.0,7.0,0.188,0.235,0.313,0.548,48.0,5.0,0.0,0.0,0.0,0.0,0.0,/53H64,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,AL,88.0,74.0,67.0,683.2,326.0,135.0,185.0,6.0,40.0,0.982,2.0,4.0,,-2.0,,4.21,3.64,2B-3B-SS-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.0,4.0,8.0,8.0,0.0,0.0,2.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2B,0.25,,D+,0,,0,0.0,,0.5,****


### Base on Balls Number

In [1130]:
players["walk_rate"] = round(players["BB_bat"] / players["PA"] * 36, 0)
players["walk_rate"].replace(np.nan, 0, inplace=True)
players["walk_rate"] = players["walk_rate"].astype(int)
players["walk_rate"].value_counts()

0     850
2     210
3     189
4     160
1     100
5      67
6      18
7      12
8       6
18      5
9       5
36      4
12      2
10      2
14      1
Name: walk_rate, dtype: int64

In [1131]:
players["bb_num"] = players["walk_rate"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num"].value_counts()

n     850
12    210
13    189
14    160
11    100
15     67
16     18
21     12
22      6
36      5
23      5
66      4
26      2
24      2
32      1
Name: bb_num, dtype: int64

### Batter K Number

In [1132]:
players["k_rate"] = round(players["SO_bat"] / players["PA"] * 36, 0)
players["k_rate"].replace(np.nan, 0, inplace=True)
players["k_rate"] = players["k_rate"].astype(int)
players["k_rate"].value_counts()

0     595
8     148
7     102
9      89
6      81
10     76
5      69
11     69
12     64
36     55
18     51
14     42
13     41
4      30
15     20
24     13
16     13
22     10
19      9
21      9
3       9
17      8
26      5
20      5
27      4
25      3
30      3
29      2
1       2
31      1
23      1
28      1
2       1
Name: k_rate, dtype: int64

In [1133]:
players.loc[(players["k_rate"] == 0), "k_val"] = 0
players.loc[(players["k_rate"] > 0), "k_val"] = players["walk_rate"] + players["k_rate"]
players["k_val"].value_counts()

0.0     595
11.0    113
12.0    104
10.0     96
13.0     83
14.0     75
9.0      74
8.0      69
18.0     65
36.0     59
16.0     51
7.0      41
15.0     40
6.0      31
17.0     25
20.0     17
24.0     16
22.0     14
19.0     12
5.0      10
21.0     10
27.0      7
26.0      6
4.0       5
25.0      3
28.0      2
30.0      2
29.0      1
1.0       1
32.0      1
2.0       1
31.0      1
23.0      1
Name: k_val, dtype: int64

In [1134]:
players["k_num"] = players["k_val"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num"].value_counts()

n     595
25    113
26    104
24     96
31     83
32     75
23     74
22     69
36     65
66     59
34     51
21     41
33     40
16     31
35     25
42     17
46     16
44     14
41     12
43     10
15     10
53      7
52      6
14      5
51      3
54      2
56      2
55      1
62      1
11      1
12      1
45      1
61      1
Name: k_num, dtype: int64

### Batter HBP Rating

In [1135]:
players["hbp_rate"] = round(players["HBP_bat"] / players["PA"] * 36, 0)
players["hbp_rate"].replace(np.nan, 0, inplace=True)
players["hbp_rate"] = players["hbp_rate"].astype(int)
players["hbp_rate"].value_counts()

0    1446
1     162
2      15
4       4
3       2
6       1
5       1
Name: hbp_rate, dtype: int64

In [1136]:
players.loc[(players["hbp_rate"] == 0), "hbp_val"] = 0
players.loc[(players["hbp_rate"] > 0), "hbp_val"] = players["k_val"] + players["hbp_rate"]

In [1137]:
players["hbp_val"].value_counts()

0.0     1446
12.0      27
13.0      25
11.0      24
14.0      20
10.0      13
9.0       11
15.0      11
16.0      11
17.0      10
19.0       9
18.0       5
7.0        4
20.0       3
21.0       2
22.0       2
8.0        2
24.0       2
26.0       1
23.0       1
6.0        1
5.0        1
Name: hbp_val, dtype: int64

In [1138]:
players["hbp_num"] = players["hbp_val"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num"].value_counts()

       1446
/26      27
/31      25
/25      24
/32      20
/24      13
/34      11
/33      11
/23      11
/35      10
/41       9
/36       5
/21       4
/42       3
/44       2
/46       2
/22       2
/43       2
/52       1
/45       1
/16       1
/15       1
Name: hbp_num, dtype: int64

### Probable Hit Number

In [1139]:
players["hit_rate"] = round(players["H_bat"] / players["PA"] * 36, 0)
players["hit_rate"].replace(np.nan, 0, inplace=True)
players["hit_rate"] = players["hit_rate"].astype(int)
players["hit_rate"].value_counts()

0     731
8     196
9     166
7     135
6      79
5      75
10     68
4      51
3      36
2      24
11     20
12     14
18     12
1      12
36      8
14      2
24      2
Name: hit_rate, dtype: int64

In [1140]:
players["PH_num_bat"] = players["hit_rate"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PH_num_bat"].value_counts()

66    743
55    196
54    166
56    135
61     79
62     75
53     68
63     51
64     36
65     24
52     20
51     14
41     12
11      8
45      2
31      2
Name: PH_num_bat, dtype: int64

### Batter Rating

In [1141]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1142]:
players["batter_rating"] = (players["clutch"] + players["bat_letter"] + \
                            players["hr_num_bat"] + players["triple_num"] + \
                            players["speed_rating"] + " [" + players["bb_num"] + \
                            "-" + players["k_num"] + players["hbp_num"] + "]")
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23]
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36]
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32]


In [1143]:
players.tail()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
1626,Blake Wood,31,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,0.0,1.0,4.76,17.0,0.0,2.0,0.0,0.0,0.0,17.0,20.0,9.0,9.0,3.0,4.0,1.0,22.0,0.0,0.0,1.0,73.0,91.0,3.57,1.412,10.6,1.6,2.1,11.6,5.5,R,TOT,ZZ,72.0,0.0,0.0,74.1,16.0,9.0,7.0,0.0,0.0,1.0,,,,,,1.94,0.22,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17.0,0.0,0.0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1627,Hunter Wood,23,TBR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.16,0.0,0.0,0.0,0.0,0.0,,R,TBR,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1628,Daniel Wright,26,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,4.58,5.0,2.0,3.0,0.0,0.0,0.0,19.2,21.0,12.0,10.0,1.0,8.0,0.0,11.0,2.0,0.0,1.0,85.0,94.0,4.23,1.475,9.6,0.5,3.7,5.0,1.38,R,LAA,AL,5.0,2.0,0.0,19.2,3.0,1.0,2.0,0.0,0.0,1.0,,,1.0,10.0,0.0,1.37,0.6,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,2.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1629,Steven Wright,32,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,3.0,0.25,8.25,5.0,5.0,0.0,0.0,0.0,0.0,24.0,40.0,24.0,22.0,9.0,5.0,1.0,13.0,2.0,0.0,1.0,114.0,56.0,7.82,1.875,15.0,3.4,1.9,4.9,2.6,R,BOS,AL,5.0,5.0,0.0,24.0,3.0,2.0,0.0,1.0,0.0,0.667,,,-1.0,-8.0,0.0,0.75,0.4,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1630,Kirby Yates,30,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,18.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,32.0,27.16,2.0,18.0,18.0,0.0,9.0,,R,TOT,ZZ,62.0,0.0,0.0,56.2,2.0,0.0,2.0,0.0,0.0,1.0,,,,,,0.32,0.03,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [1144]:
players

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.30,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.000,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23]
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.000,0.071,0.000,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.000,-1.0,-93.0,-1.0,-133.0,0.0,4.00,1.33,SS-2B-1B,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.000000,,G,0,,0,0.0,,0.000,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36]
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.60,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.400,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.000,,,0.0,0.0,0.0,1.80,0.17,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.000000,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.20,OF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,Blake Wood,31,LAA,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,2.0,0.0,1.000,4.76,17.0,0.0,2.0,0.0,0.0,0.0,17.0,20.0,9.0,9.0,3.0,4.0,1.0,22.0,0.0,0.0,1.0,73.0,91.0,3.57,1.412,10.6,1.6,2.1,11.6,5.50,R,TOT,ZZ,72.0,0.0,0.0,74.1,16.0,9.0,7.0,0.0,0.0,1.000,,,,,,1.94,0.22,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17.0,0.0,0.0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1627,Hunter Wood,23,TBR,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,0.0,,0.00,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.16,0.000,0.0,0.0,0.0,0.0,,R,TBR,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.00,0.00,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1628,Daniel Wright,26,LAA,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,0.0,1.0,0.000,4.58,5.0,2.0,3.0,0.0,0.0,0.0,19.2,21.0,12.0,10.0,1.0,8.0,0.0,11.0,2.0,0.0,1.0,85.0,94.0,4.23,1.475,9.6,0.5,3.7,5.0,1.38,R,LAA,AL,5.0,2.0,0.0,19.2,3.0,1.0,2.0,0.0,0.0,1.000,,,1.0,10.0,0.0,1.37,0.60,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,2.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]
1629,Steven Wright,32,BOS,,,,,,,,,,,,,,,0.000,,,,,,,,,,,,,AL,1.0,3.0,0.250,8.25,5.0,5.0,0.0,0.0,0.0,0.0,24.0,40.0,24.0,22.0,9.0,5.0,1.0,13.0,2.0,0.0,1.0,114.0,56.0,7.82,1.875,15.0,3.4,1.9,4.9,2.60,R,BOS,AL,5.0,5.0,0.0,24.0,3.0,2.0,0.0,1.0,0.0,0.667,,,-1.0,-8.0,0.0,0.75,0.40,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.000,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n]


In [1145]:
players.to_csv("../data/player stats - " + year + " - with batter ratings.csv", index=False)

## Pitcher Ratings

In [1146]:
players = pd.read_csv("../data/player stats - " + year + " - with batter ratings.csv")

In [1147]:
pd.set_option('display.max_seq_items', 150)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_num', 'k_rate', 'k_val', 'k_num', 'hbp_rate', 'hbp_val', 'hbp_num',
       'hit_rate', 'PH_num_bat', 'batter_rating'],
      dtype='object', length=152)

### Pitcher Letter Rating

In [1148]:
players["BAA"] = round(players["H_pit"] /(players["BF"] - (players["BB_pit"] + players["HBP_pit"])),3)
players["BAA"].mean()

0.2647734627831717

In [1149]:
baa_break_points = [
    0.140,
    0.168,
    0.196,
    0.223,
    0.251,
    0.279,
    0.307,
    0.335,
    0.362
]

letters = [
    "J+",
    "J",
    "K",
    "L",
    "M",
    "W",
    "X",
    "Y",
    "Z+",
    "Z"
]

def pitcher_letter(bat_avg_against, breakpoints=baa_break_points, letter_grades=letters):
    i = bisect(breakpoints, bat_avg_against)
    return letter_grades[i]

In [1150]:
players["pit_letter"] = [pitcher_letter(avg) for avg in players["BAA"]]
players["pit_letter"].value_counts()

Z     786
W     205
M     200
X     122
L     120
Y      64
K      47
Z+     31
J+     28
J      28
Name: pit_letter, dtype: int64

In [1151]:
players.loc[(players["IP"].isnull()), "pit_letter"] = ""
players["pit_letter"].value_counts()

      703
W     205
M     200
X     122
L     120
Z      83
Y      64
K      47
Z+     31
J+     28
J      28
Name: pit_letter, dtype: int64

In [1152]:
players[players["pit_letter"] == "K"]["BAA"].min()

0.169

### Innings of Effectiveness Number

**NOTE** - IP is stored as .0, .1, .2 for full, one third, and two-thirds, so these need to be set to their true decimal values before any calculations using IP can be done.

In [1153]:
players["IP_real"] = round(players["IP"]) + (10 * (players["IP"] - round(players["IP"])) / 3)
players["IP_real"].value_counts().head(25)

1.000000     29
5.000000     11
57.333333     9
2.000000      9
6.000000      8
10.333333     8
13.000000     8
4.000000      8
7.666667      8
27.000000     7
5.666667      7
20.000000     7
21.000000     7
8.000000      7
58.666667     7
14.333333     7
7.333333      7
0.666667      7
6.666667      7
8.333333      7
3.666667      7
55.666667     7
8.666667      6
62.000000     6
62.666667     6
Name: IP_real, dtype: int64

In [1154]:
players["IE"] = round(players["IP_real"] / players ["G_pit"], 0)
players["IE"].value_counts()

1.0    514
5.0    111
6.0     97
2.0     73
3.0     57
4.0     55
0.0     15
7.0      6
Name: IE, dtype: int64

In [1155]:
pd.set_option('display.max_columns', 160)
players[players["IE"] == 0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
127,Andres Blanco,33,PHI,NL,80.0,144.0,130.0,10.0,25.0,4.0,0.0,3.0,13.0,1.0,0.0,12.0,34.0,0.192,0.257,0.292,0.549,45.0,38.0,1.0,0.0,0.0,2.0,0.0,H543/6D1,S,NL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,33.0,42.16,3.0,27.0,27.0,0.0,0.0,,R,PHI,NL,47.0,20.0,15.0,219.1,86.0,38.0,46.0,2.0,7.0,0.977,2.0,10.0,1.0,5.0,0.0,3.45,1.79,2B-3B-1B-SS-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,80.0,20.0,80.0,46.0,1.0,0.0,11.0,15.0,16.0,4.0,0.0,0.0,0.0,0.0,1.0,35.0,0.0,2B,0.1625,,D+,4,14.0,0,0.0,,0.033,,3,13,8,11.0,25,0,0.0,,6,61,D+14 [13-25],0.5,Z,0.333333,0.0
284,Kyle Crockett,25,CLE,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,AL,0.0,0.0,,10.8,4.0,0.0,0.0,0.0,0.0,0.0,1.2,4.0,2.0,2.0,0.0,1.0,0.0,2.0,1.0,0.0,0.0,11.0,51.0,4.36,3.0,21.6,0.0,5.4,10.8,2.0,L,CLE,AL,4.0,0.0,0.0,1.2,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,1.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.444,Z,1.666667,0.0
375,Jake Esch,27,SDP,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,,,,,,,,0.0,R,SDP,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],,Z,0.0,0.0
708,Matt Koch,26,ARI,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,inf,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,6.0,,,,,,,0.0,R,ARI,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],1.0,Z,0.0,0.0
1129,Andrew Romine,31,DET,AL,124.0,348.0,318.0,45.0,74.0,17.0,2.0,4.0,25.0,6.0,4.0,22.0,67.0,0.233,0.289,0.336,0.625,67.0,107.0,7.0,4.0,2.0,2.0,0.0,4853H796/D12,S,AL,0.0,0.0,,0.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,6.0,,9.16,2.0,9.0,0.0,9.0,0.0,0.0,R,DET,AL,136.0,79.0,61.0,752.2,305.0,210.0,92.0,3.0,24.0,0.99,-5.0,-7.0,-3.0,-5.0,0.0,3.61,2.22,OF-2B-SS-3B-1B-P-C,DET,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,124.0,79.0,124.0,113.0,2.0,1.0,22.0,27.0,23.0,10.0,18.0,24.0,11.0,51.0,7.0,10.0,12.0,OF,0.201613,,C+,2,12.0,1,3.0,(13),0.078,*,2,12,7,9.0,23,0,0.0,,8,55,C+12(13)* [12-23],0.25,M,1.0,0.0
1154,Dan Runzler,32,PIT,NL,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,4.5,8.0,0.0,0.0,0.0,0.0,0.0,4.0,7.0,4.0,2.0,2.0,2.0,0.0,4.0,0.0,0.0,0.0,20.0,103.0,9.16,2.25,15.8,4.5,4.5,9.0,2.0,L,PIT,NL,8.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.0,0.0,8.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.389,Z,4.0,0.0
1160,Marc Rzepczynski,31,SEA,AL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,2.0,0.5,4.02,64.0,0.0,6.0,0.0,0.0,1.0,31.1,29.0,16.0,14.0,2.0,20.0,5.0,25.0,1.0,0.0,3.0,137.0,105.0,4.4,1.564,8.3,0.6,5.7,7.2,1.25,L,SEA,AL,64.0,0.0,0.0,31.1,7.0,2.0,4.0,1.0,0.0,0.857,,,-2.0,-13.0,-1.0,1.72,0.09,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,64.0,0.0,6.0,64.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.25,M,31.333333,0.0
1390,Wei-Chung Wang,25,MIL,NL,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,L,NL,0.0,0.0,,13.5,8.0,0.0,1.0,0.0,0.0,0.0,1.1,5.0,2.0,2.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,9.0,41.0,9.91,3.75,33.8,6.8,0.0,13.5,,L,MIL,NL,8.0,0.0,0.0,1.1,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,6.75,0.13,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.0,0.0,8.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.556,Z,1.333333,0.0
1474,Xavier Cedeno,30,TBR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,1.0,0.5,12.0,9.0,0.0,0.0,0.0,0.0,0.0,3.0,7.0,5.0,4.0,3.0,4.0,1.0,0.0,0.0,0.0,0.0,21.0,39.0,20.16,3.667,21.0,9.0,12.0,0.0,0.0,L,TBR,AL,9.0,0.0,0.0,3.0,3.0,0.0,2.0,1.0,0.0,0.667,,,0.0,0.0,1.0,6.0,0.22,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.0,0.0,0.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.412,Z,3.0,0.0
1481,William Cuevas,26,DET,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,108.0,1.0,0.0,0.0,0.0,0.0,0.0,0.1,3.0,4.0,4.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,5.0,8.0,6.16,9.0,81.0,0.0,0.0,27.0,,R,DET,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.75,Z,0.333333,0.0


In [1156]:
# Reset 0 Innings of Effectiveness to 1 (can't have 0 in SherCo)
players["IE"].replace(0, 1, inplace=True)
players["IE"].value_counts()

1.0    529
5.0    111
6.0     97
2.0     73
3.0     57
4.0     55
7.0      6
Name: IE, dtype: int64

In [1157]:
players[players["IE"] >= 7]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE
176,Madison Bumgarner,27,SFG,NL,15.0,36.0,34.0,4.0,7.0,0.0,0.0,3.0,5.0,0.0,0.0,2.0,11.0,0.206,0.25,0.471,0.721,84.0,16.0,1.0,0.0,0.0,0.0,0.0,1.0,R,NL,4.0,9.0,0.308,3.32,17.0,17.0,0.0,1.0,0.0,0.0,111.0,101.0,41.0,41.0,17.0,20.0,3.0,101.0,3.0,0.0,0.0,450.0,128.0,3.95,1.09,8.2,1.4,1.6,8.2,5.05,L,SFG,NL,17.0,17.0,1.0,111.0,12.0,2.0,10.0,0.0,1.0,1.0,,,2.0,4.0,0.0,0.97,0.71,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17.0,17.0,15.0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.333333,,C,15,33.0,0,0.0,,0.0,,2,12,11,13.0,31,0,0.0,,7,56,C33 [12-31],0.237,M,111.0,7.0
448,Michael Fulmer,24,DET,AL,2.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,-100.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,R,AL,10.0,12.0,0.455,3.83,25.0,25.0,0.0,1.0,0.0,0.0,164.2,150.0,80.0,70.0,13.0,40.0,2.0,114.0,8.0,1.0,3.0,676.0,117.0,3.67,1.154,8.2,0.7,2.2,6.2,2.85,R,DET,AL,25.0,25.0,1.0,164.2,20.0,9.0,10.0,1.0,0.0,0.95,,,-2.0,-2.0,0.0,1.04,0.76,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,25.0,25.0,2.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,0,66,G [n-32],0.239,M,164.666667,7.0
705,Corey Kluber,31,CLE,AL,1.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,1.0,164.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,R,AL,18.0,4.0,0.818,2.25,29.0,29.0,0.0,5.0,3.0,0.0,203.2,141.0,56.0,51.0,21.0,36.0,2.0,265.0,5.0,0.0,4.0,777.0,202.0,2.5,0.869,6.2,0.9,1.6,11.7,7.36,R,CLE,AL,29.0,29.0,5.0,203.2,32.0,15.0,16.0,1.0,1.0,0.969,,,-1.0,-1.0,0.0,1.37,1.07,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,29.0,29.0,1.0,29.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,AAA,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,18,41,AAA [n-n],0.192,K,203.666667,7.0
1165,Chris Sale,28,BOS,AL,1.0,3.0,3.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333,0.333,0.667,1.0,152.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,L,AL,17.0,8.0,0.68,2.9,32.0,32.0,0.0,1.0,0.0,0.0,214.1,165.0,73.0,69.0,24.0,43.0,0.0,308.0,8.0,0.0,3.0,851.0,157.0,2.45,0.97,6.9,1.0,1.8,12.9,7.16,L,BOS,AL,32.0,32.0,1.0,214.1,30.0,8.0,22.0,0.0,3.0,1.0,,,2.0,2.0,0.0,1.26,0.94,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32.0,32.0,1.0,32.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,A,0,,0,0.0,,0.0,,0,n,12,12.0,26,0,0.0,,12,51,A [n-26],0.206,L,214.333333,7.0
1504,Jaime Garcia,30,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,0.0,1.0,4.05,1.0,1.0,0.0,0.0,0.0,0.0,6.2,8.0,3.0,3.0,0.0,3.0,0.0,7.0,0.0,0.0,0.0,29.0,115.0,2.41,1.65,10.8,0.0,4.1,9.5,2.33,L,TOT,ZZ,27.0,27.0,0.0,157.0,34.0,4.0,28.0,2.0,2.0,0.941,,,,,,1.83,1.19,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.308,Y,6.666667,7.0
1616,Justin Verlander,34,HOU,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,5.0,0.0,1.0,1.06,5.0,5.0,0.0,0.0,0.0,0.0,34.0,17.0,4.0,4.0,4.0,5.0,0.0,43.0,1.0,0.0,0.0,120.0,389.0,2.69,0.647,4.5,1.1,1.3,11.4,8.6,R,TOT,AL,33.0,33.0,0.0,206.0,24.0,10.0,13.0,1.0,1.0,0.958,,,,,,1.0,0.7,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.149,J,34.0,7.0


In [1158]:
players["IE"] = players["IE"].astype('Int64')

### Base on Balls Number

In [1159]:
players["bb_rate"] = round(players["BB_pit"] / players["BF"] * 36, 0)
players["bb_rate"].replace(np.nan, 0, inplace=True)
players["bb_rate"] = players["bb_rate"].astype(int)
players["bb_rate"].value_counts()

0     745
3     301
4     200
2     187
5      80
6      35
1      34
7      15
9      10
8       8
12      7
10      4
24      1
14      1
18      1
21      1
36      1
Name: bb_rate, dtype: int64

In [1160]:
players.loc[(players["BF"].isnull()), "bb_rate"] = np.nan
players["bb_rate"].value_counts()

3.0     301
4.0     200
2.0     187
5.0      80
0.0      42
6.0      35
1.0      34
7.0      15
9.0      10
8.0       8
12.0      7
10.0      4
18.0      1
36.0      1
21.0      1
14.0      1
24.0      1
Name: bb_rate, dtype: int64

In [1161]:
players["bb_num_pit"] = players["bb_rate"].map({
    0: "11",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["bb_num_pit"].value_counts()

nan    703
13     301
14     200
12     187
15      80
11      76
16      35
21      15
23      10
22       8
26       7
24       4
43       1
46       1
32       1
36       1
66       1
Name: bb_num_pit, dtype: int64

### Strikeout Number

In [1162]:
players["k_rate_pit"] = round(players["SO_pit"] / players["BF"] * 36, 0)
players["k_rate_pit"].value_counts()

7.0     158
8.0     144
6.0     123
9.0     106
10.0     89
5.0      85
4.0      46
0.0      38
11.0     37
12.0     32
3.0      21
2.0      14
13.0     13
14.0     10
15.0      6
18.0      3
1.0       2
16.0      1
Name: k_rate_pit, dtype: int64

In [1163]:
players[players["k_rate_pit"]==0]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit
46,Nori Aoki,35,TOT,MLB,109.0,374.0,336.0,48.0,93.0,20.0,2.0,5.0,35.0,10.0,2.0,29.0,44.0,0.277,0.335,0.393,0.728,98.0,132.0,12.0,3.0,1.0,5.0,1.0,79H/8D1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,26.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,1.33,OF-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OF,0.321101,,B,2,12.0,1,3.0,(13),0.102,**,3,13,4,7.0,21,0,0.0,,9,54,B12(13)** [13-21],0.25,M,1.0,1,12.0,26,0.0
47,Nori Aoki,35,TOT,AL,82.0,258.0,234.0,32.0,64.0,13.0,1.0,5.0,27.0,5.0,2.0,16.0,34.0,0.274,0.319,0.402,0.721,96.0,94.0,11.0,2.0,1.0,5.0,1.0,79/H8D1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,26.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,1.33,OF-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OF,0.329268,,B,3,13.0,1,4.0,(14),0.079,*,2,12,5,7.0,21,0,0.0,,9,54,B13(14)* [12-21],0.25,M,1.0,1,12.0,26,0.0
48,Nori Aoki,35,HOU,AL,70.0,224.0,202.0,28.0,55.0,12.0,1.0,2.0,19.0,5.0,2.0,15.0,29.0,0.272,0.323,0.371,0.694,91.0,75.0,11.0,2.0,1.0,4.0,1.0,79/HD1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,21.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,1.33,OF-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,71.0,56.0,70.0,68.0,1.0,0.0,0.0,0.0,0.0,0.0,56.0,0.0,16.0,67.0,1.0,5.0,1.0,OF,0.271429,,B,1,11.0,1,2.0,(12),0.088,*,2,12,5,7.0,21,0,0.0,,9,54,B11(12)* [12-21],0.25,M,1.0,1,12.0,26,0.0
71,Erick Aybar,33,SDP,NL,108.0,370.0,333.0,37.0,78.0,15.0,1.0,7.0,22.0,11.0,4.0,28.0,57.0,0.234,0.3,0.348,0.648,73.0,116.0,5.0,4.0,3.0,2.0,2.0,6H/1,S,NL,0.0,0.0,,0.0,2.0,0.0,2.0,0.0,0.0,0.0,1.1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,,5.41,0.75,0.0,0.0,6.8,0.0,0.0,R,SDP,NL,101.0,93.0,79.0,794.1,399.0,117.0,273.0,9.0,63.0,0.977,-6.0,-9.0,-9.0,-2.0,-1.0,4.42,3.86,SS-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,108.0,93.0,108.0,100.0,2.0,0.0,0.0,0.0,0.0,99.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,SS,0.203704,,C+,3,13.0,0,0.0,,0.126,**,3,13,6,9.0,23,0,0.0,,8,55,C+13** [13-23],0.0,J+,1.333333,1,9.0,23,0.0
127,Andres Blanco,33,PHI,NL,80.0,144.0,130.0,10.0,25.0,4.0,0.0,3.0,13.0,1.0,0.0,12.0,34.0,0.192,0.257,0.292,0.549,45.0,38.0,1.0,0.0,0.0,2.0,0.0,H543/6D1,S,NL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,33.0,42.16,3.0,27.0,27.0,0.0,0.0,,R,PHI,NL,47.0,20.0,15.0,219.1,86.0,38.0,46.0,2.0,7.0,0.977,2.0,10.0,1.0,5.0,0.0,3.45,1.79,2B-3B-1B-SS-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,80.0,20.0,80.0,46.0,1.0,0.0,11.0,15.0,16.0,4.0,0.0,0.0,0.0,0.0,1.0,35.0,0.0,2B,0.1625,,D+,4,14.0,0,0.0,,0.033,,3,13,8,11.0,25,0,0.0,,6,61,D+14 [13-25],0.5,Z,0.333333,1,0.0,11,0.0
152,Rob Brantly,27,CHW,AL,13.0,36.0,31.0,4.0,9.0,1.0,0.0,2.0,5.0,0.0,0.0,3.0,14.0,0.29,0.389,0.516,0.905,143.0,16.0,0.0,2.0,0.0,0.0,0.0,/2DH1,L,AL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,65.0,16.16,1.0,9.0,9.0,0.0,0.0,,R,CHW,AL,7.0,5.0,4.0,43.0,26.0,24.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5.44,3.71,C-P,CHW,AL,6.0,5.0,4.0,42.0,24.0,23.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.14,4.0,0.0,2.0,4.0,1.0,20%,14.0,9.0,13.0,7.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,C,0.384615,,B+,8,22.0,0,0.0,,0.0,,3,13,14,17.0,35,2,19.0,/41,9,54,B+22 [13-35/41],0.25,M,1.0,1,0.0,11,0.0
322,Daniel Descalso,30,ARI,NL,130.0,398.0,344.0,47.0,80.0,16.0,5.0,10.0,51.0,4.0,0.0,48.0,89.0,0.233,0.332,0.395,0.727,84.0,136.0,6.0,4.0,0.0,2.0,0.0,47H35/1D6,L,NL,0.0,0.0,,0.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,,3.16,0.0,0.0,0.0,0.0,0.0,,R,ARI,NL,118.0,83.0,54.0,752.2,325.0,188.0,126.0,11.0,43.0,0.966,-4.0,-6.0,-1.0,-2.0,-1.0,3.75,2.66,2B-OF-1B-3B-P-SS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,131.0,83.0,130.0,105.0,2.0,0.0,19.0,45.0,15.0,1.0,36.0,0.0,0.0,36.0,1.0,35.0,0.0,2B,0.392308,,C+,4,14.0,2,6.0,(16),0.04,,4,14,8,12.0,26,0,0.0,,7,56,C+14(16) [14-26],0.0,J+,2.0,1,0.0,11,0.0
375,Jake Esch,27,SDP,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,,,,,,,,0.0,R,SDP,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,0.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],,Z,0.0,1,36.0,66,0.0
439,Mike Freeman,29,SEA,AL,16.0,34.0,30.0,3.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,9.0,0.067,0.176,0.167,0.343,-5.0,5.0,0.0,0.0,0.0,0.0,1.0,/H345D1,L,AL,0.0,0.0,,9.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,63.0,3.16,3.0,27.0,0.0,0.0,0.0,,R,TOT,MLB,23.0,10.0,6.0,117.2,66.0,33.0,30.0,3.0,10.0,0.955,-2.0,-17.0,,10.0,,4.82,2.74,SS-2B-3B-1B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16.0,6.0,16.0,7.0,1.0,0.0,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,8.0,1.0,SS,0.0625,,F,18,36.0,0,0.0,,0.0,,4,14,10,14.0,32,0,0.0,,2,65,F36 [14-32],0.5,Z,1.0,1,0.0,11,0.0
482,Scooter Gennett,27,CIN,NL,141.0,497.0,461.0,80.0,136.0,22.0,3.0,27.0,97.0,3.0,2.0,30.0,114.0,0.295,0.342,0.531,0.874,123.0,245.0,15.0,4.0,0.0,2.0,1.0,4H5/79D1,L,NL,0.0,0.0,,18.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,7.0,33.0,22.16,3.0,18.0,9.0,9.0,0.0,0.0,R,CIN,NL,125.0,105.0,87.0,937.2,409.0,184.0,216.0,9.0,41.0,0.978,-12.0,-15.0,-14.0,-18.0,-1.0,3.84,3.2,2B-OF-3B-P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,141.0,108.0,141.0,121.0,1.0,0.0,0.0,99.0,10.0,0.0,9.0,0.0,6.0,15.0,3.0,23.0,0.0,2B,0.687943,#,B+,7,21.0,1,8.0,(22),0.025,,2,12,8,10.0,24,0,0.0,,10,53,#B+21(22) [12-24],0.4,Z,1.0,1,5.0,15,0.0


In [1164]:
players.loc[(players["k_rate_pit"] == 0), "k_val_pit"] = 0
players.loc[(players["k_rate_pit"] > 0), "k_val_pit"] = players["bb_rate"] + players["k_rate_pit"]
players["k_val_pit"].value_counts()

11.0    147
10.0    133
12.0    112
9.0      93
8.0      87
13.0     83
7.0      51
14.0     49
15.0     45
0.0      38
6.0      19
17.0     18
16.0     16
18.0     11
5.0      10
20.0      4
4.0       4
36.0      2
1.0       1
21.0      1
19.0      1
26.0      1
23.0      1
22.0      1
Name: k_val_pit, dtype: int64

In [1165]:
players["k_num_pit"] = players["k_val_pit"].map({
    0: "n",
    1: "11",
    2: "12",
    3: "13",
    4: "14",
    5: "15",
    6: "16",
    7: "21",
    8: "22",
    9: "23",
    10: "24",
    11: "25",
    12: "26",
    13: "31",
    14: "32",
    15: "33",
    16: "34",
    17: "35",
    18: "36",
    19: "41",
    20: "42",
    21: "43",
    22: "44",
    23: "45",
    24: "46",
    25: "51",
    26: "52",
    27: "53",
    28: "54",
    29: "55",
    30: "56",
    31: "61",
    32: "62",
    33: "63",
    34: "64",
    35: "65",
    36: "66"
}).astype(str)
players["k_num_pit"].value_counts()

nan    703
25     147
24     133
26     112
23      93
22      87
31      83
21      51
32      49
33      45
n       38
16      19
35      18
34      16
36      11
15      10
42       4
14       4
66       2
11       1
41       1
44       1
45       1
43       1
52       1
Name: k_num_pit, dtype: int64

In [1166]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit


### Hit Batter Number

In [1167]:
players["hbp_rate_pit"] = round(players["HBP_pit"] / players["BF"] * 36, 0)
players["hbp_rate_pit"].replace(np.nan, 0, inplace=True)
players["hbp_rate_pit"] = players["hbp_rate_pit"].astype(int)
players["hbp_rate_pit"].value_counts()

0    1403
1     199
2      17
3       6
6       2
4       2
7       1
5       1
Name: hbp_rate_pit, dtype: int64

In [1168]:
players.loc[(players["hbp_rate_pit"] == 0), "hbp_val_pit"] = 0
players.loc[(players["hbp_rate_pit"] > 0), "hbp_val_pit"] = players["k_val_pit"] + players["hbp_rate_pit"]

In [1169]:
players["hbp_val_pit"].value_counts()

0.0     1403
11.0      44
12.0      38
10.0      27
9.0       24
13.0      20
14.0      16
15.0      15
16.0      13
8.0        9
7.0        7
6.0        3
18.0       3
19.0       2
17.0       2
22.0       1
21.0       1
24.0       1
5.0        1
23.0       1
Name: hbp_val_pit, dtype: int64

In [1170]:
players["hbp_num_pit"] = players["hbp_val_pit"].map({
    0: "",
    1: "/11",
    2: "/12",
    3: "/13",
    4: "/14",
    5: "/15",
    6: "/16",
    7: "/21",
    8: "/22",
    9: "/23",
    10: "/24",
    11: "/25",
    12: "/26",
    13: "/31",
    14: "/32",
    15: "/33",
    16: "/34",
    17: "/35",
    18: "/36",
    19: "/41",
    20: "/42",
    21: "/43",
    22: "/44",
    23: "/45",
    24: "/46",
    25: "/51",
    26: "/52",
    27: "/53",
    28: "/54",
    29: "/55",
    30: "/56",
    31: "/61",
    32: "/62",
    33: "/63",
    34: "/64",
    35: "/65",
    36: "/66"
}).astype(str)
players["hbp_num_pit"].value_counts()

       1403
/25      44
/26      38
/24      27
/23      24
/31      20
/32      16
/33      15
/34      13
/22       9
/21       7
/16       3
/36       3
/41       2
/35       2
/15       1
/44       1
/45       1
/46       1
/43       1
Name: hbp_num_pit, dtype: int64

### Wild Pitch Rating

In [1171]:
pd.set_option('display.max_seq_items', 200)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1172]:
players["WP"].value_counts()

0.0     298
1.0     197
2.0     129
3.0      88
4.0      68
5.0      45
6.0      30
7.0      26
8.0      16
9.0       7
11.0      7
10.0      5
12.0      5
14.0      2
15.0      2
13.0      2
16.0      1
Name: WP, dtype: int64

In [1173]:
players[players["WP"] >= 10]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,G_cat,GS_cat,CG_cat,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit
53,Chris Archer,28,TBR,AL,3.0,8.0,8.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,0.25,0.25,0.25,0.5,38.0,2.0,1.0,0.0,0.0,0.0,0.0,1,R,AL,10.0,12.0,0.455,4.07,34.0,34.0,0.0,0.0,0.0,0.0,201.0,193.0,101.0,91.0,27.0,60.0,0.0,249.0,5.0,0.0,15.0,852.0,103.0,3.4,1.259,8.6,1.2,2.7,11.1,4.15,R,TBR,AL,34.0,34.0,0.0,201.0,15.0,8.0,6.0,1.0,0.0,0.933,,,-3.0,-3.0,0.0,0.63,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,34.0,34.0,3.0,34.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.333333,,C+,0,,0,0.0,,0.0,,0,n,14,14.0,32,0,0.0,,9,54,C+ [n-32],0.245,M,201.0,6,3.0,13,11.0,14.0,32,0,0.0,
57,Jake Arrieta,31,CHC,NL,29.0,64.0,61.0,2.0,8.0,0.0,1.0,1.0,5.0,0.0,0.0,1.0,32.0,0.131,0.145,0.213,0.358,-9.0,13.0,0.0,0.0,2.0,0.0,0.0,1/H,R,NL,14.0,10.0,0.583,3.53,30.0,30.0,0.0,0.0,0.0,0.0,168.1,150.0,82.0,66.0,23.0,55.0,3.0,163.0,10.0,0.0,14.0,707.0,124.0,4.16,1.218,8.0,1.2,2.9,8.7,2.96,R,CHC,NL,30.0,30.0,0.0,168.1,43.0,15.0,26.0,2.0,2.0,0.953,,,-3.0,-4.0,-1.0,2.19,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32.0,30.0,29.0,30.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,P,0.172414,,E+,4,14.0,4,8.0,(22),0.0,,1,11,18,19.0,41,0,0.0,,4,63,E+14(22) [11-41],0.234,M,168.333333,6,3.0,13,8.0,11.0,25,1,12.0,/26
189,Trevor Cahill,29,TOT,MLB,10.0,18.0,14.0,1.0,4.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,4.0,0.286,0.333,0.357,0.69,86.0,5.0,0.0,0.0,3.0,0.0,0.0,1,R,MLB,4.0,3.0,0.571,4.93,21.0,14.0,1.0,0.0,0.0,0.0,84.0,91.0,50.0,46.0,16.0,45.0,1.0,87.0,3.0,0.0,16.0,381.0,88.0,5.28,1.619,9.8,1.7,4.8,9.3,1.93,R,TOT,ZZ,21.0,14.0,0.0,84.0,23.0,3.0,20.0,0.0,3.0,1.0,,,,,,2.46,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,P,0.2,,B+,0,,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,8,55,B+ [12-24],0.273,W,84.0,4,4.0,14,8.0,12.0,26,0,0.0,
190,Trevor Cahill,29,SDP,NL,9.0,17.0,13.0,1.0,4.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,3.0,0.308,0.357,0.385,0.742,99.0,5.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,4.0,3.0,0.571,3.69,11.0,11.0,0.0,0.0,0.0,0.0,61.0,58.0,29.0,25.0,6.0,24.0,1.0,72.0,3.0,0.0,14.0,263.0,115.0,3.4,1.344,8.6,0.9,3.5,10.6,3.0,R,TOT,ZZ,21.0,14.0,0.0,84.0,23.0,3.0,20.0,0.0,3.0,1.0,,,,,,2.46,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,11.0,9.0,11.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.222222,,A,0,,0,0.0,,0.0,,2,12,6,8.0,22,0,0.0,,8,55,A [12-22],0.246,M,61.0,6,3.0,13,10.0,13.0,31,0,0.0,
209,Carlos Carrasco,30,CLE,AL,2.0,5.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.2,0.0,0.2,-40.0,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,18.0,6.0,0.75,3.29,32.0,32.0,0.0,1.0,0.0,0.0,200.0,173.0,73.0,73.0,21.0,46.0,2.0,226.0,10.0,0.0,10.0,798.0,139.0,3.1,1.095,7.8,0.9,2.1,10.2,4.91,R,CLE,AL,32.0,32.0,1.0,200.0,21.0,4.0,17.0,0.0,1.0,1.0,,,2.0,2.0,0.0,0.95,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32.0,32.0,2.0,32.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,7,21,7,14.0,32,0,0.0,,0,66,G [21-32],0.233,M,200.0,6,2.0,12,10.0,12.0,26,0,0.0,
213,Andrew Cashner,30,TEX,AL,2.0,7.0,5.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.2,0.167,0.2,0.367,-6.0,1.0,0.0,0.0,1.0,1.0,0.0,1,R,AL,11.0,11.0,0.5,3.4,28.0,28.0,0.0,0.0,0.0,0.0,166.2,156.0,75.0,63.0,15.0,64.0,0.0,86.0,9.0,1.0,10.0,704.0,142.0,4.61,1.32,8.4,0.8,3.5,4.6,1.34,R,TEX,AL,28.0,28.0,0.0,166.2,25.0,5.0,20.0,0.0,1.0,1.0,,,1.0,1.0,0.0,1.35,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,28.0,28.0,2.0,28.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.5,,C,0,,0,0.0,,0.0,,0,n,10,10.0,24,0,0.0,,5,62,C [n-24],0.247,M,166.666667,6,3.0,13,4.0,7.0,21,0,0.0,
235,Tyler Chatwood,27,COL,NL,33.0,47.0,39.0,5.0,6.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,9.0,0.154,0.175,0.179,0.354,-12.0,7.0,0.0,0.0,7.0,0.0,0.0,1/H,R,NL,8.0,15.0,0.348,4.69,33.0,25.0,3.0,1.0,1.0,1.0,147.2,136.0,79.0,77.0,20.0,77.0,2.0,120.0,4.0,2.0,12.0,631.0,108.0,4.94,1.442,8.3,1.2,4.7,7.3,1.56,R,COL,NL,33.0,25.0,1.0,147.2,43.0,6.0,37.0,0.0,3.0,1.0,,,9.0,12.0,1.0,2.62,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35.0,25.0,33.0,33.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,P,0.060606,,D,0,,0,0.0,,0.0,,1,11,7,8.0,22,0,0.0,,5,62,D [11-22],0.247,M,147.666667,4,4.0,14,7.0,11.0,25,0,0.0,
249,Tyler Clippard,32,TOT,AL,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,2.0,8.0,0.2,4.77,67.0,0.0,23.0,0.0,0.0,5.0,60.1,47.0,33.0,32.0,10.0,31.0,1.0,72.0,2.0,0.0,11.0,264.0,93.0,4.57,1.293,7.0,1.5,4.6,10.7,2.32,R,TOT,AL,67.0,0.0,0.0,60.1,8.0,1.0,5.0,2.0,0.0,0.75,,,,,,0.9,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.203,L,60.333333,1,4.0,14,10.0,14.0,32,0,0.0,
271,Patrick Corbin,27,ARI,NL,33.0,70.0,56.0,6.0,7.0,0.0,1.0,0.0,0.0,0.0,0.0,6.0,28.0,0.125,0.21,0.161,0.37,-3.0,9.0,0.0,0.0,8.0,0.0,0.0,1/H,L,NL,14.0,13.0,0.519,4.03,33.0,32.0,0.0,0.0,0.0,0.0,189.2,208.0,97.0,85.0,26.0,61.0,8.0,178.0,3.0,0.0,10.0,826.0,116.0,4.08,1.418,9.9,1.2,2.9,8.4,2.92,L,ARI,NL,33.0,32.0,0.0,189.2,46.0,12.0,34.0,0.0,1.0,1.0,,,4.0,4.0,0.0,2.18,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35.0,32.0,33.0,33.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,P,0.0,,E+,0,,5,5.0,(15),0.0,,3,13,14,17.0,35,0,0.0,,4,63,E+(15) [13-35],0.273,W,189.666667,6,3.0,13,8.0,11.0,25,0,0.0,
298,Yu Darvish,30,TOT,MLB,9.0,20.0,17.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,14.0,0.059,0.111,0.059,0.17,-52.0,1.0,0.0,0.0,2.0,0.0,0.0,1,R,MLB,10.0,12.0,0.455,3.86,31.0,31.0,0.0,0.0,0.0,0.0,186.2,159.0,83.0,80.0,27.0,58.0,1.0,209.0,6.0,1.0,12.0,766.0,120.0,3.83,1.163,7.7,1.3,2.8,10.1,3.6,R,TOT,ZZ,31.0,31.0,0.0,186.2,27.0,8.0,18.0,1.0,3.0,0.963,,,,,,1.25,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,P,0.0,,F,0,,0,0.0,,0.0,,2,12,25,27.0,53,0,0.0,,2,65,F [12-53],0.226,M,186.666667,6,3.0,13,10.0,13.0,31,0,0.0,


In [1174]:
players.loc[(players["WP"] < 5), "WP_num"] = ""
players.loc[(players["WP"] >= 5), "WP_num"] = "[WP]"
players["WP_num"].value_counts()

        780
[WP]    148
Name: WP_num, dtype: int64

### Gopher Ball Rating

In [1175]:
players["hr_rate_pit"] = players["HR_pit"] / players["H_pit"]
players["hr_rate_pit"].value_counts()

0.000000    92
0.142857    29
0.166667    25
0.250000    22
0.200000    20
0.100000    16
0.125000    13
0.111111    13
0.181818    13
0.222222    12
0.076923    11
0.153846     9
0.333333     9
0.500000     8
0.090909     8
0.285714     8
0.062500     8
0.105263     8
0.230769     7
0.095238     7
0.190476     7
0.189189     7
0.272727     7
0.122807     7
0.187500     6
0.083333     6
0.103448     6
0.080000     5
0.071429     5
0.235294     5
0.047619     5
0.150943     5
0.117647     5
0.156250     5
0.133333     5
0.173913     5
0.086957     5
0.171429     4
0.263158     4
0.227273     4
0.147059     4
0.145833     4
0.150000     4
0.148936     4
0.148148     4
0.052632     4
0.120000     4
1.000000     4
0.093750     4
0.066667     4
0.114650     4
0.214286     4
0.160000     4
0.137255     4
0.176471     4
0.210526     4
0.266667     4
0.136364     4
0.041667     4
0.104478     4
0.294118     3
0.081081     3
0.312500     3
0.085106     3
0.115385     3
0.055556     3
0.070175  

In [1176]:
players["gopher_ball"] = ""
players.loc[(players["hr_rate_pit"] >= .1), "gopher_ball"] = "+"
players.loc[(players["hr_rate_pit"] <= .05), "gopher_ball"] = "-"
players["gopher_ball"].value_counts()

     855
+    662
-    114
Name: gopher_ball, dtype: int64

In [1177]:
players[players["gopher_ball"] == "-"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Inn_cat,Ch_cat,PO_cat,A_cat,E_cat,DP_cat,Fld%_cat,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,...,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1,10.0,24,12.0,22.0,44,1,23.0,/45,,0.0,-
16,Al Alburquerque,31,TOT,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,0.0,2.0,0.0,2.5,21.0,0.0,7.0,0.0,0.0,0.0,18.0,10.0,5.0,5.0,0.0,8.0,0.0,14.0,0.0,0.0,0.0,71.0,182.0,2.94,1.0,5.0,0.0,4.0,7.0,1.75,R,TOT,AL,21.0,0.0,0.0,18.0,9.0,2.0,7.0,0.0,0.0,1.0,,,,,,4.5,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.159,J,18.0,1,4.0,14,7.0,11.0,25,0,0.0,,,0.0,-
17,Al Alburquerque,31,KCR,AL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,AL,0.0,1.0,0.0,3.6,11.0,0.0,5.0,0.0,0.0,0.0,10.0,7.0,4.0,4.0,0.0,6.0,0.0,9.0,0.0,0.0,0.0,42.0,128.0,3.16,1.3,6.3,0.0,5.4,8.1,1.5,R,TOT,AL,21.0,0.0,0.0,18.0,9.0,2.0,7.0,0.0,0.0,1.0,,,,,,4.5,...,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,0.0,1.0,11.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.194,K,10.0,1,5.0,15,8.0,13.0,31,0,0.0,,,0.0,-
20,Scott Alexander,27,KCR,AL,4.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,5.0,4.0,0.556,2.48,58.0,0.0,9.0,0.0,0.0,4.0,69.0,62.0,23.0,19.0,3.0,28.0,0.0,59.0,0.0,0.0,3.0,283.0,181.0,3.23,1.304,8.1,0.4,3.7,7.7,2.11,L,KCR,AL,58.0,0.0,0.0,69.0,15.0,6.0,7.0,2.0,0.0,0.867,,,0.0,0.0,0.0,1.7,...,,,,,,,,,,,,,,,,,,,,,,,,,,,58.0,0.0,4.0,58.0,58.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,12,12.0,26,0,0.0,,0,66,G [n-26],0.243,M,69.0,1,4.0,14,8.0,12.0,26,0,0.0,,,0.048387,-
41,Drew Anderson,23,PHI,NL,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,23.14,2.0,0.0,1.0,0.0,0.0,0.0,2.1,6.0,7.0,6.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,14.0,21.0,2.73,3.0,23.1,0.0,3.9,7.7,2.0,R,PHI,NL,2.0,0.0,0.0,2.1,0.0,0.0,0.0,0.0,0.0,,,,-1.0,-86.0,,0.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.462,Z,2.333333,1,3.0,13,5.0,8.0,22,0,0.0,,,0.0,-
46,Nori Aoki,35,TOT,MLB,109.0,374.0,336.0,48.0,93.0,20.0,2.0,5.0,35.0,10.0,2.0,29.0,44.0,0.277,0.335,0.393,0.728,98.0,132.0,12.0,3.0,1.0,5.0,1.0,79H/8D1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,26.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OF,0.321101,,B,2,12.0,1,3.0,(13),0.102,**,3,13,4,7.0,21,0,0.0,,9,54,B12(13)** [13-21],0.25,M,1.0,1,12.0,26,0.0,0.0,n,0,0.0,,,0.0,-
47,Nori Aoki,35,TOT,AL,82.0,258.0,234.0,32.0,64.0,13.0,1.0,5.0,27.0,5.0,2.0,16.0,34.0,0.274,0.319,0.402,0.721,96.0,94.0,11.0,2.0,1.0,5.0,1.0,79/H8D1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,26.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,OF,0.329268,,B,3,13.0,1,4.0,(14),0.079,*,2,12,5,7.0,21,0,0.0,,9,54,B13(14)* [12-21],0.25,M,1.0,1,12.0,26,0.0,0.0,n,0,0.0,,,0.0,-
48,Nori Aoki,35,HOU,AL,70.0,224.0,202.0,28.0,55.0,12.0,1.0,2.0,19.0,5.0,2.0,15.0,29.0,0.272,0.323,0.371,0.694,91.0,75.0,11.0,2.0,1.0,4.0,1.0,79/HD1,L,AL,0.0,0.0,,27.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,21.0,9.16,3.0,9.0,0.0,18.0,0.0,0.0,R,TOT,MLB,105.0,87.0,56.0,766.0,141.0,133.0,7.0,1.0,1.0,0.993,-11.0,-18.0,,0.0,,1.64,...,,,,,,,,,,,,,,,,,,,,,,,,,,,71.0,56.0,70.0,68.0,1.0,0.0,0.0,0.0,0.0,0.0,56.0,0.0,16.0,67.0,1.0,5.0,1.0,OF,0.271429,,B,1,11.0,1,2.0,(12),0.088,*,2,12,5,7.0,21,0,0.0,,9,54,B11(12)* [12-21],0.25,M,1.0,1,12.0,26,0.0,0.0,n,0,0.0,,,0.0,-
52,Victor Arano,22,PHI,NL,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,R,NL,1.0,0.0,1.0,1.69,10.0,0.0,2.0,0.0,0.0,0.0,10.2,6.0,2.0,2.0,0.0,4.0,0.0,13.0,0.0,0.0,0.0,42.0,263.0,1.85,0.938,5.1,0.0,3.4,11.0,3.25,R,PHI,NL,10.0,0.0,0.0,10.2,1.0,0.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,0.84,...,,,,,,,,,,,,,,,,,,,,,,,,,,,10.0,0.0,10.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.158,J,10.666667,1,3.0,13,11.0,14.0,32,0,0.0,,,0.0,-
68,Luis Avilan,27,LAD,NL,58.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,1,L,NL,2.0,3.0,0.4,2.93,61.0,0.0,5.0,0.0,0.0,0.0,46.0,42.0,16.0,15.0,2.0,22.0,3.0,52.0,1.0,0.0,1.0,194.0,142.0,2.96,1.391,8.2,0.4,4.3,10.2,2.36,L,LAD,NL,61.0,0.0,0.0,46.0,6.0,2.0,3.0,1.0,1.0,0.833,,,0.0,0.0,0.0,0.98,...,,,,,,,,,,,,,,,,,,,,,,,,,,,61.0,0.0,58.0,61.0,61.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,18,18.0,36,0,0.0,,0,66,G [n-36],0.246,M,46.0,1,4.0,14,10.0,14.0,32,0,0.0,,,0.047619,-


### Pitcher Control Number

In [1178]:
players["control_rate"] = round((players["BB_pit"] + players["HBP_pit"] + players["H_pit"]) / 
                                players["BF"] * 36, 0)
players["control_rate"].value_counts()

12.0    197
11.0    170
13.0    139
10.0    106
14.0     80
9.0      45
15.0     39
16.0     33
8.0      25
17.0     20
18.0     16
19.0     11
20.0      8
7.0       8
0.0       7
22.0      5
21.0      4
6.0       3
36.0      2
30.0      2
24.0      2
29.0      2
5.0       1
3.0       1
26.0      1
4.0       1
Name: control_rate, dtype: int64

In [1179]:
players["PCN"] = players["control_rate"].map({
    0: "65",
    1: "65",
    2: "64",
    3: "63",
    4: "62",
    5: "61",
    6: "56",
    7: "55",
    8: "54",
    9: "53",
    10: "52",
    11: "51",
    12: "46",
    13: "45",
    14: "44",
    15: "43",
    16: "42",
    17: "41",
    18: "36",
    19: "35",
    20: "34",
    21: "33",
    22: "32",
    23: "31",
    24: "26",
    25: "25",
    26: "24",
    27: "23",
    28: "22",
    29: "21",
    30: "16",
    31: "15",
    32: "14",
    33: "13",
    34: "12",
    35: "11",
    36: "11"
}).astype(str)
players["PCN"].value_counts()

nan    703
46     197
51     170
45     139
52     106
44      80
53      45
43      39
42      33
54      25
41      20
36      16
35      11
55       8
34       8
65       7
32       5
33       4
56       3
26       2
16       2
11       2
21       2
24       1
61       1
62       1
63       1
Name: PCN, dtype: int64

### Probable Hit Number

In [1180]:
players["hit_rate_pit"] = round(players["H_pit"] / players["BF"] * 36, 0)
players["hit_rate_pit"].replace(np.nan, 0, inplace=True)
players["hit_rate_pit"] = players["hit_rate_pit"].astype(int)
players["hit_rate_pit"].value_counts()

0     717
8     233
9     192
7     150
10     87
6      74
11     47
5      32
14     22
12     22
13     12
4      12
15      8
18      5
22      4
3       4
16      3
17      3
24      1
20      1
2       1
27      1
Name: hit_rate_pit, dtype: int64

In [1181]:
players.loc[(players["BF"].isnull()), "hit_rate_pit"] = np.nan

In [1182]:
players["hit_rate_pit"].value_counts()

8.0     233
9.0     192
7.0     150
10.0     87
6.0      74
11.0     47
5.0      32
14.0     22
12.0     22
0.0      14
4.0      12
13.0     12
15.0      8
18.0      5
3.0       4
22.0      4
16.0      3
17.0      3
20.0      1
2.0       1
24.0      1
27.0      1
Name: hit_rate_pit, dtype: int64

In [1183]:
players["PPH"] = players["hit_rate_pit"].map({
    0: "66",
    1: "66",
    2: "65",
    3: "64",
    4: "63",
    5: "62",
    6: "61",
    7: "56",
    8: "55",
    9: "54",
    10: "53",
    11: "52",
    12: "51",
    13: "46",
    14: "45",
    15: "44",
    16: "43",
    17: "42",
    18: "41",
    19: "36",
    20: "35",
    21: "34",
    22: "33",
    23: "32",
    24: "31",
    25: "26",
    26: "25",
    27: "24",
    28: "23",
    29: "22",
    30: "21",
    31: "16",
    32: "15",
    33: "14",
    34: "13",
    35: "12",
    36: "11"
}).astype(str)
players["PPH"].value_counts()

nan    703
55     233
54     192
56     150
53      87
61      74
52      47
62      32
45      22
51      22
66      14
46      12
63      12
44       8
41       5
64       4
33       4
42       3
43       3
35       1
31       1
24       1
65       1
Name: PPH, dtype: int64

### Pitcher Rating

In [1184]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1185]:
players["goph_lett_inn"] = players["gopher_ball"] + players["pit_letter"] + players["IE"].astype(str)
players.loc[(players["IP"].isnull()), "goph_lett_inn"] = ""
players["goph_lett_inn"].value_counts()

        703
+M1      73
+W1      66
+L1      51
+W5      40
+X1      35
+M6      33
+W6      28
L1       28
+K1      26
+Z1      26
+X5      25
M1       25
-M1      21
-Z1      19
+Y1      19
+J1      16
+M5      16
+W4      14
J+1      13
+Z2      13
X1       13
+L6      12
W1       11
+W2      10
+W3      10
-L1       9
+X2       9
+Z+1      9
+X4       9
+Y3       8
-X1       8
+Y2       8
+J+1      8
+M3       8
+Z+2      8
-Y1       8
+M2       7
+X6       7
+Y4       7
W6        7
-W1       7
-J1       7
+X3       6
W5        6
-K1       6
-J+1      6
+Z+3      5
+L5       5
Z1        5
K1        5
+M4       5
Y1        5
+L4       4
+Y5       4
-Z2       4
X5        3
+Z4       3
+L2       3
Z3        3
+K2       3
+Z3       3
+Z5       3
+L3       2
L6        2
-Z+1      2
M6        2
+K6       2
W3        2
-M2       2
Z4        2
+J2       2
M5        2
-X4       2
Z+4       2
-Z+3      1
J1        1
+M7       1
+J+2      1
-W6       1
Z2        1
-W5       1
K5        1
-M5 

In [1186]:
players["bb_k_hbp"] = "(" + players["bb_num_pit"] + "-" + players["k_num_pit"] + players["hbp_num_pit"] + ") "
players["bb_k_hbp"].value_counts()

(nan-nan)      703
(13-25)         50
(13-24)         47
(14-25)         31
(13-26)         30
(13-31)         25
(12-22)         25
(14-26)         23
(13-23)         22
(14-31)         20
(12-24)         20
(11-n)          19
(14-24/25)      19
(12-21)         19
(12-25)         19
(12-23)         18
(13-25/26)      17
(12-26)         15
(13-22)         15
(13-24/25)      13
(13-23/24)      13
(14-23)         13
(14-32)         13
(15-26)         12
(15-33)         11
(14-24)         11
(13-21)         10
(11-22)          9
(11-21)          9
(14-25/26)       9
(12-31)          9
(15-31)          8
(14-33)          8
(14-34)          8
(13-22/23)       8
(13-26/31)       7
(12-22/23)       7
(13-31/32)       7
(12-23/24)       6
(14-22)          6
(12-16)          6
(13-33)          5
(11-26)          5
(16-26)          5
(12-24/25)       5
(15-25)          5
(12-21/22)       5
(15-32)          5
(13-32)          5
(26-n)           5
(12-32)          5
(12-25/26)       5
(15-24)     

In [1187]:
players.loc[(players["IP"].isnull()), "bb_k_hbp"] = ""
players["bb_k_hbp"].value_counts()

               703
(13-25)         50
(13-24)         47
(14-25)         31
(13-26)         30
(12-22)         25
(13-31)         25
(14-26)         23
(13-23)         22
(14-31)         20
(12-24)         20
(11-n)          19
(14-24/25)      19
(12-21)         19
(12-25)         19
(12-23)         18
(13-25/26)      17
(13-22)         15
(12-26)         15
(13-24/25)      13
(14-32)         13
(13-23/24)      13
(14-23)         13
(15-26)         12
(15-33)         11
(14-24)         11
(13-21)         10
(11-21)          9
(12-31)          9
(11-22)          9
(14-25/26)       9
(15-31)          8
(13-22/23)       8
(14-34)          8
(14-33)          8
(13-26/31)       7
(13-31/32)       7
(12-22/23)       7
(12-16)          6
(14-22)          6
(12-23/24)       6
(16-26)          5
(15-25)          5
(12-21/22)       5
(15-32)          5
(12-15)          5
(13-33)          5
(12-25/26)       5
(11-26)          5
(12-24/25)       5
(13-32)          5
(14-32/33)       5
(15-24)     

In [1188]:
players["pitcher_rating"] = players["goph_lett_inn"] + " " + players["bb_k_hbp"] + " " + players["WP_num"]
players["pitcher_rating"].value_counts()

+M1 (13-26)             7
+M1 (13-24)             5
+M6 (13-26)  [WP]       5
-M1 (11-n)              5
+W5 (13-23)             5
+M1 (13-31)             5
+M6 (13-25)             5
+L1 (13-25)             4
+W6 (13-25)             4
+W5 (14-26)             4
+W1 (14-31)             4
+W1 (13-26)             4
J+1 (11-n)              4
+M6 (13-31)  [WP]       3
+L1 (13-26)             3
+L1 (13-33)             3
+Z1 (11-n)              3
+M1 (13-25)             3
+M1 (14-26)             3
+X1 (12-24/25)          3
+L6 (13-25)             3
+W5 (13-24)             3
+Z1 (14-25)             3
-M1 (26-n)              3
+X5 (13-24)             3
+Y5 (12-21)             3
+W6 (13-25)  [WP]       3
+W6 (13-24)  [WP]       3
+W4 (13-24)             3
+W1 (12-25)             3
+W1 (13-31)             3
+W1 (15-33)             3
+W6 (12-21)             3
+W1 (12-24)             3
+M6 (13-24/25)  [WP]    2
+L1 (14-31/32)          2
-Z1 (26-n)              2
+X2 (14-23)             2
+Y1 (12-22) 

In [1189]:
players[players["Name"] == "Dwight\xa0Gooden"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating


In [1190]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,...,,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.24,M,43.666667,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(13-24),+M1 (13-24)
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,...,,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23],,,,,,,,,,0,0.0,,,,,,,,,,,
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,...,,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36],,,,,,,,,,0,0.0,,,,,,,,,,,
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,...,,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1.0,10.0,24.0,12.0,22.0,44.0,1,23.0,/45,,0.0,-,16.0,42.0,5.0,62.0,-L1,(24-44/45),-L1 (24-44/45)
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,...,,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32],,,,,,,,,,0,0.0,,,,,,,,,,,
5,Matt Adams,28,TOT,NL,131.0,367.0,339.0,46.0,93.0,22.0,1.0,20.0,65.0,0.0,0.0,23.0,88.0,0.274,0.319,0.522,0.841,115.0,177.0,5.0,1.0,0.0,4.0,5.0,3H7/D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1B,0.496183,,B,8,22.0,0,0.0,,0.0,,2,12,9,11.0,25,0,0.0,,9,54,B22 [12-25],,,,,,,,,,0,0.0,,,,,,,,,,,
6,Matt Adams,28,STL,NL,31.0,53.0,48.0,4.0,14.0,2.0,0.0,1.0,7.0,0.0,0.0,4.0,17.0,0.292,0.34,0.396,0.735,94.0,19.0,0.0,0.0,0.0,1.0,0.0,H/73D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,,31.0,8.0,31.0,9.0,0.0,0.0,3.0,0.0,0.0,0.0,6.0,0.0,0.0,6.0,2.0,22.0,0.0,1B,0.225806,,B+,3,13.0,0,0.0,,0.0,,3,13,12,15.0,33,0,0.0,,10,53,B+13 [13-33],,,,,,,,,,0,0.0,,,,,,,,,,,
7,Matt Adams,28,ATL,NL,100.0,314.0,291.0,42.0,79.0,20.0,1.0,19.0,58.0,0.0,0.0,19.0,71.0,0.271,0.315,0.543,0.858,119.0,158.0,5.0,1.0,0.0,3.0,5.0,3H7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,,100.0,69.0,100.0,72.0,0.0,0.0,59.0,0.0,0.0,0.0,13.0,0.0,0.0,13.0,0.0,29.0,0.0,1B,0.58,,B,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,9,54,B23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,
8,Jim Adduci,32,DET,AL,29.0,93.0,83.0,14.0,20.0,6.0,2.0,1.0,10.0,1.0,1.0,10.0,27.0,0.241,0.323,0.398,0.72,91.0,33.0,1.0,0.0,0.0,0.0,0.0,9/HD,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,26.0,22.0,20.0,194.0,52.0,50.0,2.0,0.0,0.0,1.0,-4.0,-22.0,,,,2.41,...,,,,,,,,,,,,,,,,,,,,29.0,22.0,29.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,2.0,2.0,3.0,OF,0.344828,,C+,2,12.0,4,6.0,(16),0.048,,4,14,10,14.0,32,0,0.0,,8,55,C+12(16) [14-32],,,,,,,,,,0,0.0,,,,,,,,,,,
9,Tim Adleman,29,CIN,NL,27.0,32.0,29.0,0.0,3.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,10.0,0.103,0.103,0.138,0.241,-38.0,4.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,5.0,11.0,0.313,5.52,30.0,20.0,4.0,0.0,0.0,0.0,122.1,124.0,79.0,75.0,29.0,51.0,1.0,108.0,6.0,1.0,1.0,531.0,81.0,5.87,1.431,9.1,2.1,3.8,7.9,2.12,R,CIN,NL,30.0,20.0,0.0,122.1,27.0,15.0,12.0,0.0,1.0,1.0,,,2.0,3.0,0.0,1.99,...,,,,,,,,,,,,,,,,,,,,30.0,20.0,27.0,30.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.074074,,E,0,,0,0.0,,0.0,,0,n,11,11.0,25,0,0.0,,3,64,E [n-25],0.262,W,122.333333,4.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.233871,+,12.0,46.0,8.0,55.0,+W4,(13-24),+W4 (13-24)


In [1191]:
players.tail(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rtot_cat,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating
1621,Tyler Webb,26,NYY,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,4.5,7.0,0.0,2.0,0.0,0.0,0.0,6.0,3.0,3.0,3.0,1.0,4.0,0.0,5.0,0.0,0.0,0.0,23.0,106.0,5.66,1.167,4.5,1.5,6.0,7.5,1.25,L,TOT,ZZ,9.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.0,...,,,,,,,,,,,,,,,,,,,,7.0,0.0,0.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.158,J,6.0,1,6.0,16,8.0,14.0,32,0,0.0,,,0.333333,+,11.0,51,5.0,62,+J1,(16-32),+J1 (16-32)
1622,Ryan Weber,26,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,2.45,1.0,1.0,0.0,0.0,0.0,0.0,3.2,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,186.0,3.16,0.818,7.4,0.0,0.0,0.0,,R,SEA,AL,1.0,1.0,0.0,3.2,2.0,1.0,1.0,0.0,0.0,1.0,,,0.0,0.0,0.0,4.91,...,,,,,,,,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.214,L,3.666667,4,0.0,11,0.0,0.0,n,0,0.0,,,0.0,-,8.0,54,8.0,55,-L4,(11-n),-L4 (11-n)
1623,Rob Whalen,23,SEA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,6.14,2.0,1.0,1.0,0.0,0.0,0.0,7.1,7.0,5.0,5.0,1.0,2.0,0.0,2.0,2.0,0.0,0.0,31.0,71.0,6.02,1.227,8.6,1.2,2.5,2.5,1.0,R,SEA,AL,2.0,1.0,0.0,7.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,...,,,,,,,,,,,,,,,,,,,,2.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.259,W,7.333333,4,2.0,12,2.0,4.0,14,2,6.0,/16,,0.142857,+,13.0,45,8.0,55,+W4,(12-14/16),+W4 (12-14/16)
1624,Jason Wheeler,26,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,9.0,2.0,0.0,1.0,0.0,0.0,0.0,3.0,6.0,5.0,3.0,1.0,4.0,0.0,0.0,0.0,0.0,0.0,18.0,55.0,11.49,3.333,18.0,3.0,12.0,0.0,0.0,L,MIN,AL,2.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,0.0,0.0,...,,,,,,,,,,,,,,,,,,,,2.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.429,Z,3.0,2,8.0,22,0.0,0.0,n,0,0.0,,,0.166667,+,20.0,34,12.0,51,+Z2,(22-n),+Z2 (22-n)
1625,Adam Wilk,29,MIN,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,7.84,3.0,1.0,0.0,0.0,0.0,0.0,10.1,16.0,9.0,9.0,3.0,8.0,1.0,6.0,0.0,0.0,2.0,54.0,58.0,8.09,2.323,13.9,2.6,7.0,5.2,0.75,L,TOT,ZZ,4.0,2.0,0.0,14.0,7.0,5.0,2.0,0.0,0.0,1.0,,,,,,4.5,...,,,,,,,,,,,,,,,,,,,,3.0,1.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.348,Z+,10.333333,3,5.0,15,4.0,9.0,23,0,0.0,,,0.1875,+,16.0,42,11.0,52,+Z+3,(15-23),+Z+3 (15-23)
1626,Blake Wood,31,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,2.0,0.0,1.0,4.76,17.0,0.0,2.0,0.0,0.0,0.0,17.0,20.0,9.0,9.0,3.0,4.0,1.0,22.0,0.0,0.0,1.0,73.0,91.0,3.57,1.412,10.6,1.6,2.1,11.6,5.5,R,TOT,ZZ,72.0,0.0,0.0,74.1,16.0,9.0,7.0,0.0,0.0,1.0,,,,,,1.94,...,,,,,,,,,,,,,,,,,,,,17.0,0.0,0.0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.29,X,17.0,1,2.0,12,11.0,13.0,31,0,0.0,,,0.15,+,12.0,46,10.0,53,+X1,(12-31),+X1 (12-31)
1627,Hunter Wood,23,TBR,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,3.16,0.0,0.0,0.0,0.0,0.0,,R,TBR,AL,1.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,,,,0.0,0.0,,0.0,...,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.0,J+,0.333333,1,0.0,11,0.0,0.0,n,0,0.0,,,,,0.0,65,0.0,66,J+1,(11-n),J+1 (11-n)
1628,Daniel Wright,26,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,1.0,0.0,4.58,5.0,2.0,3.0,0.0,0.0,0.0,19.2,21.0,12.0,10.0,1.0,8.0,0.0,11.0,2.0,0.0,1.0,85.0,94.0,4.23,1.475,9.6,0.5,3.7,5.0,1.38,R,LAA,AL,5.0,2.0,0.0,19.2,3.0,1.0,2.0,0.0,0.0,1.0,,,1.0,10.0,0.0,1.37,...,,,,,,,,,,,,,,,,,,,,5.0,2.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.28,X,19.666667,4,3.0,13,5.0,8.0,22,1,9.0,/23,,0.047619,-,13.0,45,9.0,54,-X4,(13-22/23),-X4 (13-22/23)
1629,Steven Wright,32,BOS,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,1.0,3.0,0.25,8.25,5.0,5.0,0.0,0.0,0.0,0.0,24.0,40.0,24.0,22.0,9.0,5.0,1.0,13.0,2.0,0.0,1.0,114.0,56.0,7.82,1.875,15.0,3.4,1.9,4.9,2.6,R,BOS,AL,5.0,5.0,0.0,24.0,3.0,2.0,0.0,1.0,0.0,0.667,,,-1.0,-8.0,0.0,0.75,...,,,,,,,,,,,,,,,,,,,,5.0,5.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.374,Z,24.0,5,2.0,12,4.0,6.0,16,1,7.0,/21,,0.225,+,15.0,43,13.0,46,+Z5,(12-16/21),+Z5 (12-16/21)
1630,Kirby Yates,30,LAA,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,AL,0.0,0.0,,18.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,32.0,27.16,2.0,18.0,18.0,0.0,9.0,,R,TOT,ZZ,62.0,0.0,0.0,56.2,2.0,0.0,2.0,0.0,0.0,1.0,,,,,,0.32,...,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.4,Z,1.0,1,0.0,11,7.0,7.0,21,0,0.0,,,1.0,+,14.0,44,14.0,45,+Z1,(11-21),+Z1 (11-21)


In [1192]:
players.to_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv", index=False)

## Fielding Ratings

In [1193]:
players = pd.read_csv("../data/player stats - " + year + " - with batter and pitcher ratings.csv")

In [1194]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1195]:
players["Primary_Pos_fld"].value_counts()

P     903
OF    284
C     118
2B     88
3B     83
SS     77
1B     74
Name: Primary_Pos_fld, dtype: int64

In [1196]:
players["Primary_Pos_fld"].isnull().sum()

4

In [1197]:
players.groupby("Primary_Pos_fld")["Fld%"].mean()

Primary_Pos_fld
1B    0.990838
2B    0.981784
3B    0.962512
C     0.990847
OF    0.983018
P     0.947876
SS    0.972208
Name: Fld%, dtype: float64

### Superior Rating

In [1198]:
players["superior_rating"] = ""
players.loc[(players["Primary_Pos_fld"] == "P") & (players["Fld%"] >= 0.980), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "C") & (players["Fld%"] >= 0.993), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["Fld%"] >= 0.995), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["Fld%"] >= 0.984), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["Fld%"] >= 0.971), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["Fld%"] >= 0.973), "superior_rating"] = "S"
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["Fld%"] >= 0.990), "superior_rating"] = "S"

In [1199]:
players["superior_rating"].value_counts()

S    883
     748
Name: superior_rating, dtype: int64

In [1200]:
players.head(10)

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,...,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.24,M,43.666667,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(13-24),+M1 (13-24),S
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,...,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23],,,,,,,,,,0,0.0,,,,,,,,,,,,
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,...,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36],,,,,,,,,,0,0.0,,,,,,,,,,,,S
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,...,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1.0,10.0,24.0,12.0,22.0,44.0,1,23.0,/45,,0.0,-,16.0,42.0,5.0,62.0,-L1,(24-44/45),-L1 (24-44/45),S
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,...,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32],,,,,,,,,,0,0.0,,,,,,,,,,,,
5,Matt Adams,28,TOT,NL,131.0,367.0,339.0,46.0,93.0,22.0,1.0,20.0,65.0,0.0,0.0,23.0,88.0,0.274,0.319,0.522,0.841,115.0,177.0,5.0,1.0,0.0,4.0,5.0,3H7/D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1B,0.496183,,B,8,22.0,0,0.0,,0.0,,2,12,9,11.0,25,0,0.0,,9,54,B22 [12-25],,,,,,,,,,0,0.0,,,,,,,,,,,,
6,Matt Adams,28,STL,NL,31.0,53.0,48.0,4.0,14.0,2.0,0.0,1.0,7.0,0.0,0.0,4.0,17.0,0.292,0.34,0.396,0.735,94.0,19.0,0.0,0.0,0.0,1.0,0.0,H/73D,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,31.0,8.0,31.0,9.0,0.0,0.0,3.0,0.0,0.0,0.0,6.0,0.0,0.0,6.0,2.0,22.0,0.0,1B,0.225806,,B+,3,13.0,0,0.0,,0.0,,3,13,12,15.0,33,0,0.0,,10,53,B+13 [13-33],,,,,,,,,,0,0.0,,,,,,,,,,,,
7,Matt Adams,28,ATL,NL,100.0,314.0,291.0,42.0,79.0,20.0,1.0,19.0,58.0,0.0,0.0,19.0,71.0,0.271,0.315,0.543,0.858,119.0,158.0,5.0,1.0,0.0,3.0,5.0,3H7,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TOT,NL,81.0,76.0,55.0,644.1,563.0,524.0,32.0,7.0,43.0,0.988,0.0,0.0,,,,7.77,...,,,,,,,,,,,,,,,,,,,100.0,69.0,100.0,72.0,0.0,0.0,59.0,0.0,0.0,0.0,13.0,0.0,0.0,13.0,0.0,29.0,0.0,1B,0.58,,B,9,23.0,0,0.0,,0.0,,2,12,8,10.0,24,0,0.0,,9,54,B23 [12-24],,,,,,,,,,0,0.0,,,,,,,,,,,,
8,Jim Adduci,32,DET,AL,29.0,93.0,83.0,14.0,20.0,6.0,2.0,1.0,10.0,1.0,1.0,10.0,27.0,0.241,0.323,0.398,0.72,91.0,33.0,1.0,0.0,0.0,0.0,0.0,9/HD,L,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,DET,AL,26.0,22.0,20.0,194.0,52.0,50.0,2.0,0.0,0.0,1.0,-4.0,-22.0,,,,2.41,...,,,,,,,,,,,,,,,,,,,29.0,22.0,29.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,2.0,2.0,3.0,OF,0.344828,,C+,2,12.0,4,6.0,(16),0.048,,4,14,10,14.0,32,0,0.0,,8,55,C+12(16) [14-32],,,,,,,,,,0,0.0,,,,,,,,,,,,S
9,Tim Adleman,29,CIN,NL,27.0,32.0,29.0,0.0,3.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,10.0,0.103,0.103,0.138,0.241,-38.0,4.0,0.0,0.0,3.0,0.0,0.0,1,R,NL,5.0,11.0,0.313,5.52,30.0,20.0,4.0,0.0,0.0,0.0,122.1,124.0,79.0,75.0,29.0,51.0,1.0,108.0,6.0,1.0,1.0,531.0,81.0,5.87,1.431,9.1,2.1,3.8,7.9,2.12,R,CIN,NL,30.0,20.0,0.0,122.1,27.0,15.0,12.0,0.0,1.0,1.0,,,2.0,3.0,0.0,1.99,...,,,,,,,,,,,,,,,,,,,30.0,20.0,27.0,30.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.074074,,E,0,,0,0.0,,0.0,,0,n,11,11.0,25,0,0.0,,3,64,E [n-25],0.262,W,122.333333,4.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.233871,+,12.0,46.0,8.0,55.0,+W4,(13-24),+W4 (13-24),S


### Arm Rating

In [1201]:
players["G"].value_counts()

10.0     38
27.0     35
5.0      33
1.0      33
4.0      33
3.0      32
2.0      32
6.0      31
8.0      29
11.0     28
9.0      28
23.0     27
32.0     26
12.0     25
7.0      25
21.0     24
24.0     24
16.0     24
29.0     24
20.0     23
31.0     22
61.0     22
25.0     22
30.0     22
37.0     21
33.0     21
13.0     21
28.0     20
18.0     20
26.0     20
46.0     20
14.0     19
65.0     18
17.0     16
72.0     16
43.0     16
53.0     16
64.0     16
137.0    16
71.0     16
19.0     15
50.0     15
49.0     15
40.0     14
15.0     14
38.0     13
34.0     13
62.0     13
22.0     13
58.0     13
77.0     12
68.0     12
69.0     11
52.0     11
70.0     11
55.0     10
97.0     10
67.0     10
82.0     10
60.0     10
57.0     10
59.0      9
135.0     9
96.0      9
54.0      9
78.0      8
139.0     8
66.0      8
63.0      8
81.0      8
115.0     8
109.0     8
76.0      7
105.0     7
95.0      7
79.0      7
151.0     7
47.0      7
73.0      7
146.0     7
145.0     6
41.0      6
88.0      6
56.0

In [1202]:
players["a_gp"] = players["A"] / players["G_app"]
players["a_gp"].mean()

0.6472517983208471

In [1203]:
players["arm_rating"] = 8
players.loc[(players["Primary_Pos_fld"] == "P") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "C"), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["a_gp"] >= 0.7), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["a_gp"] >= 2.0), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["a_gp"] >= 2.8), "arm_rating"] = 9
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["a_gp"] >= 0.08), "arm_rating"] = 9

In [1204]:
players["arm_rating"].value_counts()

8    1282
9     349
Name: arm_rating, dtype: int64

### Range Rating

In [1205]:
players["po_gp"] = players["PO"] / players["G_app"]
players["po_gp"].mean()

1.4265379913479446

In [1206]:
players.groupby("Primary_Pos_fld")["po_gp"].mean()

Primary_Pos_fld
1B    5.394306
2B    2.037301
3B    1.004085
C     6.444333
OF    1.969227
P     0.187643
SS    1.561018
Name: po_gp, dtype: float64

In [1207]:
players["range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "P") & (players["po_gp"] >= 0.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "C"), "range_rating"] = 4
players.loc[(players["Primary_Pos_fld"] == "1B") & (players["po_gp"] >= 8.3), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "2B") & (players["po_gp"] >= 2.1), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "3B") & (players["po_gp"] >= 0.8), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "SS") & (players["po_gp"] >= 1.6), "range_rating"] = 5
players.loc[(players["Primary_Pos_fld"] == "OF") & (players["po_gp"] >= 2.1), "range_rating"] = 5

In [1208]:
players["range_rating"].value_counts()

4    1369
5     262
Name: range_rating, dtype: int64

### Catcher Caught Stealing Rate

In [1209]:
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B', '3B', 'HR_bat', 'RBI', 'SB', 'CS', 'BB_bat', 'SO_bat', 'BA',
       'OBP', 'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP_bat', 'SH', 'SF',
       'IBB_bat', 'Pos_Summary', 'Bats', 'Lg_pit', 'W', 'L', 'W-L%', 'ERA',
       'G_pit', 'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H_pit', 'R_pit', 'ER',
       'HR_pit', 'BB_pit', 'IBB_pit', 'SO_pit', 'HBP_pit', 'BK', 'WP', 'BF',
       'ERA+', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9', 'SO/W', 'Throws',
       'Tm_fld', 'Lg', 'G', 'GS_fld', 'CG_fld', 'Inn', 'Ch', 'PO', 'A', 'E',
       'DP', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood', 'RF/9',
       'RF/G', 'Pos_Summary_fld', 'Tm_cat', 'Lg_cat', 'G_cat', 'GS_cat',
       'CG_cat', 'Inn_cat', 'Ch_cat', 'PO_cat', 'A_cat', 'E_cat', 'DP_cat',
       'Fld%_cat', 'Rtot_cat', 'Rtot/yr_cat', 'Rctch', 'Rdrs_cat',
       'Rdrs/yr_cat', 'Rgood_cat', 'Rair', 'Rrange', 'Rthrow', 'RszC', 'RsbC',
       'RerC', 'RF/9_cat

In [1210]:
players["CS%"].value_counts()

0%      16
20%      7
27%      6
25%      6
37%      5
24%      5
33%      5
23%      5
17%      5
14%      4
8%       4
40%      4
38%      3
42%      3
31%      3
29%      3
30%      3
13%      2
36%      2
39%      2
44%      2
21%      2
35%      2
32%      2
50%      2
26%      2
10%      2
6%       1
43%      1
11%      1
18%      1
45%      1
100%     1
28%      1
49%      1
19%      1
Name: CS%, dtype: int64

In [1211]:
players["cs_rate"] = players["CS_cat"] / (players["SB_cat"] + players["CS_cat"])
players["cs_rate"].mean()

0.24205786589049944

In [1212]:
cs_break_points = [
    0.21,
    0.31,
    0.41,
    0.51
]

rating = [
    "",
    "-1",
    "-2",
    "-3",
    "-4"
]

def cs_rating(cs_rate, breakpoints=cs_break_points, ratings=rating):
    i = bisect(breakpoints, cs_rate)
    return ratings[i]

In [1213]:
players["cs_num"] = [cs_rating(rate) for rate in players["cs_rate"]]
players["cs_num"].value_counts()

-4    1516
        44
-1      36
-2      25
-3      10
Name: cs_num, dtype: int64

In [1214]:
players["G_cat"].isnull().sum()

1506

In [1215]:
players.loc[(players["G_cat"].isnull()), "cs_num"] = ""
players.loc[(players["SB_cat"] == 0), "cs_num"] = ""
players["cs_num"].value_counts()

      1560
-1      36
-2      25
-3      10
Name: cs_num, dtype: int64

In [1216]:
players[players["cs_num"] == "-4"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num


### Fielder Rating

In [1217]:
players["fielder_rating"] = (players["superior_rating"] + 
                             players["arm_rating"].astype(str) +
                             players["range_rating"].astype(str) + 
                             " " + 
                             players["cs_num"]
)
players["fielder_rating"].value_counts()

S84       644
84        503
S94        78
95         76
94         73
85         72
S85        62
S95        52
S94 -1     24
S94 -2     17
94 -1      12
94 -2       7
S94 -3      6
94 -3       4
84 -2       1
Name: fielder_rating, dtype: int64

In [1218]:
players[players["fielder_rating"] == "95 -2"]

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating


In [1219]:
players.to_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv", index=False)

# Save teams to separate Excel tabs

In [1220]:
players = pd.read_csv("../data/player stats - " + year + " - with batter pitcher and fielder ratings.csv")

In [1221]:
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,...,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,...,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.24,M,43.666667,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(13-24),+M1 (13-24),S,0.125,8,0.0,4,,,S84
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,...,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.5,8,7.275641,4,,,84
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,...,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.083333,8,0.25,4,,,S84
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,...,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1.0,10.0,24.0,12.0,22.0,44.0,1,23.0,/45,,0.0,-,16.0,42.0,5.0,62.0,-L1,(24-44/45),-L1 (24-44/45),S,0.0,8,0.166667,4,,,S84
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,...,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.011765,8,0.564706,4,,,84


In [1222]:
pd.set_option('display.max_seq_items', 175)
players.columns

Index(['Name', 'Age', 'Tm', 'Lg_bat', 'G_bat', 'PA', 'AB', 'R_bat', 'H_bat',
       '2B',
       ...
       'bb_k_hbp', 'pitcher_rating', 'superior_rating', 'a_gp', 'arm_rating',
       'po_gp', 'range_rating', 'cs_rate', 'cs_num', 'fielder_rating'],
      dtype='object', length=182)

In [1223]:
pd.set_option('display.max_columns', 175)
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,...,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.24,M,43.666667,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(13-24),+M1 (13-24),S,0.125,8,0.0,4,,,S84
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,...,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.5,8,7.275641,4,,,84
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,...,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.083333,8,0.25,4,,,S84
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,...,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1.0,10.0,24.0,12.0,22.0,44.0,1,23.0,/45,,0.0,-,16.0,42.0,5.0,62.0,-L1,(24-44/45),-L1 (24-44/45),S,0.0,8,0.166667,4,,,S84
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,...,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.011765,8,0.564706,4,,,84


In [1224]:
if 'DH' not in players:
    players['DH'] = 0
players.head()

Unnamed: 0,Name,Age,Tm,Lg_bat,G_bat,PA,AB,R_bat,H_bat,2B,3B,HR_bat,RBI,SB,CS,BB_bat,SO_bat,BA,OBP,SLG,OPS,OPS+,TB,GDP,HBP_bat,SH,SF,IBB_bat,Pos_Summary,Bats,Lg_pit,W,L,W-L%,ERA,G_pit,GS,GF,CG,SHO,SV,IP,H_pit,R_pit,ER,HR_pit,BB_pit,IBB_pit,SO_pit,HBP_pit,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Throws,Tm_fld,Lg,G,GS_fld,CG_fld,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos_Summary_fld,Tm_cat,Lg_cat,G_cat,GS_cat,CG_cat,...,Rtot/yr_cat,Rctch,Rdrs_cat,Rdrs/yr_cat,Rgood_cat,Rair,Rrange,Rthrow,RszC,RsbC,RerC,RF/9_cat,RF/G_cat,PB,WP_cat,SB_cat,CS_cat,CS%,G_app,GS_app,Batting,Defense,P,C,1B,2B_app,3B_app,SS,LF,CF,RF,OF,DH,PH,PR,Primary_Pos_fld,rbi_per_g,clutch,bat_letter,hr_rate,hr_num_bat,triple_rate,triple_val,triple_num,speed_score,speed_rating,walk_rate,bb_num,k_rate,k_val,k_num,hbp_rate,hbp_val,hbp_num,hit_rate,PH_num_bat,batter_rating,BAA,pit_letter,IP_real,IE,bb_rate,bb_num_pit,k_rate_pit,k_val_pit,k_num_pit,hbp_rate_pit,hbp_val_pit,hbp_num_pit,WP_num,hr_rate_pit,gopher_ball,control_rate,PCN,hit_rate_pit,PPH,goph_lett_inn,bb_k_hbp,pitcher_rating,superior_rating,a_gp,arm_rating,po_gp,range_rating,cs_rate,cs_num,fielder_rating
0,Fernando Abad,31,BOS,AL,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,L,AL,2.0,1.0,0.667,3.3,48.0,0.0,15.0,0.0,0.0,1.0,43.2,40.0,18.0,16.0,4.0,14.0,1.0,37.0,1.0,1.0,0.0,182.0,139.0,3.68,1.237,8.2,0.8,2.9,7.6,2.64,L,BOS,AL,48.0,0.0,0.0,43.2,6.0,0.0,6.0,0.0,1.0,1.0,,,1.0,5.0,0.0,1.24,0.13,P,,,,,,...,,,,,,,,,,,,,,,,,,,48.0,0.0,4.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.24,M,43.666667,1.0,3.0,13.0,7.0,10.0,24.0,0,0.0,,,0.1,+,11.0,51.0,8.0,55.0,+M1,(13-24),+M1 (13-24),S,0.125,8,0.0,4,,,S84
1,José Abreu,30,CHW,AL,156.0,675.0,621.0,95.0,189.0,43.0,6.0,33.0,102.0,3.0,0.0,35.0,119.0,0.304,0.354,0.552,0.906,141.0,343.0,21.0,15.0,0.0,4.0,6.0,*3D,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHW,AL,139.0,138.0,130.0,1197.0,1221.0,1135.0,78.0,8.0,130.0,0.993,3.0,3.0,2.0,2.0,-1.0,9.12,8.73,1B,,,,,,...,,,,,,,,,,,,,,,,,,,156.0,156.0,156.0,139.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1B,0.653846,#,B+,6,16.0,1,7.0,(21),0.019,,2,12,6,8.0,22,1,9.0,/23,10,53,#B+16(21) [12-22/23],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.5,8,7.275641,4,,,84
2,Cristhian Adames,25,COL,NL,12.0,14.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.071,0.0,0.071,-80.0,0.0,0.0,0.0,0.0,0.0,0.0,H/346,S,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,COL,NL,3.0,1.0,0.0,9.0,4.0,3.0,1.0,0.0,0.0,1.0,-1.0,-93.0,-1.0,-133.0,0.0,4.0,1.33,SS-2B-1B,,,,,,...,,,,,,,,,,,,,,,,,,,12.0,1.0,12.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,SS,0.0,,G,0,,0,0.0,,0.0,,3,13,15,18.0,36,0,0.0,,0,66,G [13-36],,,,,,,,,,0,0.0,,,,,,,,,,,,S,0.083333,8,0.25,4,,,S84
3,Austin Adams,26,WSN,NL,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,/1,R,NL,0.0,0.0,,3.6,6.0,0.0,3.0,0.0,0.0,0.0,5.0,4.0,4.0,2.0,0.0,8.0,0.0,10.0,1.0,0.0,1.0,29.0,133.0,4.56,2.4,7.2,0.0,14.4,18.0,1.25,R,WSN,NL,6.0,0.0,0.0,5.0,1.0,1.0,0.0,0.0,0.0,1.0,,,0.0,0.0,0.0,1.8,0.17,P,,,,,,...,,,,,,,,,,,,,,,,,,,6.0,0.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,0.0,,G,0,,0,0.0,,0.0,,0,n,0,0.0,n,0,0.0,,0,66,G [n-n],0.2,L,5.0,1.0,10.0,24.0,12.0,22.0,44.0,1,23.0,/45,,0.0,-,16.0,42.0,5.0,62.0,-L1,(24-44/45),-L1 (24-44/45),S,0.0,8,0.166667,4,,,S84
4,Lane Adams,27,ATL,NL,85.0,122.0,109.0,19.0,30.0,4.0,1.0,5.0,20.0,10.0,0.0,10.0,37.0,0.275,0.339,0.468,0.807,109.0,51.0,3.0,1.0,1.0,1.0,0.0,H78/9,R,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ATL,NL,41.0,13.0,9.0,172.0,51.0,48.0,1.0,2.0,0.0,0.961,1.0,5.0,,,,2.56,1.2,OF,,,,,,...,,,,,,,,,,,,,,,,,,,85.0,13.0,85.0,41.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0,OF,0.235294,,B,6,16.0,1,7.0,(21),0.323,****,3,13,11,14.0,32,0,0.0,,9,54,B16(21)**** [13-32],,,,,,,,,,0,0.0,,,,,,,,,,,,,0.011765,8,0.564706,4,,,84


In [1225]:
# fix games played column
players.loc[(players["Primary_Pos_fld"] == "P"), "Games_Played"] = players["G_pit"]
players.loc[(players["Primary_Pos_fld"] != "P"), "Games_Played"] = players["G_bat"]
players["Games_Played"].value_counts()

1.0      46
11.0     36
6.0      36
7.0      34
2.0      32
9.0      32
4.0      32
13.0     32
32.0     30
3.0      30
8.0      30
5.0      30
27.0     28
29.0     27
10.0     27
24.0     25
12.0     25
20.0     24
28.0     24
16.0     24
15.0     23
25.0     22
33.0     22
37.0     22
22.0     22
21.0     21
26.0     20
17.0     20
23.0     19
19.0     19
14.0     19
30.0     19
31.0     19
18.0     18
38.0     16
34.0     16
70.0     14
46.0     14
61.0     14
73.0     13
50.0     13
39.0     12
57.0     12
69.0     12
62.0     12
71.0     11
53.0     11
52.0     11
64.0     11
66.0     11
60.0     10
49.0     10
48.0     10
35.0     10
40.0      9
65.0      9
44.0      9
77.0      8
68.0      8
56.0      8
51.0      8
54.0      8
155.0     8
63.0      8
145.0     8
43.0      8
129.0     8
47.0      7
58.0      7
100.0     7
141.0     7
42.0      7
128.0     7
156.0     7
41.0      7
147.0     7
111.0     6
59.0      6
159.0     6
108.0     6
149.0     6
148.0     6
140.0     6
45.0

In [1226]:
players_short = players.loc[:, ["Name", "Age", "Tm", "Games_Played", "GS", "GF", "Pos_Summary_fld", 
                                "fielder_rating", "batter_rating", "PH_num_bat", "Bats", "pitcher_rating", "PCN", 
                                "PPH", "Throws", "Primary_Pos_fld", "P", "C", "1B", "2B_app", "3B_app", "SS", 
                                "LF", "CF", "RF", "OF", "DH", "PH", "PR"]]
players_short.rename(columns={
    "Games_Played": "G",
    "Pos_Summary_fld": "Positions",
    "fielder_rating": "DEF",
    "batter_rating": "Batter Rating",
    "PH_num_bat": "BPH",
    "Bats": "B",
    "pitcher_rating": "Pitcher Rating",
    "Throws": "T",
    "Primary_Pos_fld": "Primary",
    "2B_app": "2B",
    "3B_app": "3B"
}, inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Fernando Abad,31,BOS,48.0,0.0,15.0,P,S84,G [n-n],66,L,+M1 (13-24),51.0,55.0,L,P,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,José Abreu,30,CHW,156.0,,,1B,84,#B+16(21) [12-22/23],53,R,,,,,1B,0.0,0.0,139.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0
2,Cristhian Adames,25,COL,12.0,,,SS-2B-1B,S84,G [13-36],66,S,,,,,SS,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
3,Austin Adams,26,WSN,6.0,0.0,3.0,P,S84,G [n-n],66,R,-L1 (24-44/45),42.0,62.0,R,P,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Lane Adams,27,ATL,85.0,,,OF,84,B16(21)**** [13-32],54,R,,,,,OF,0.0,0.0,0.0,0.0,0.0,0.0,27.0,11.0,7.0,41.0,0.0,51.0,9.0


In [1227]:
players_short["Primary"].replace(np.nan, "DH_PH_PR", inplace=True)

In [1228]:
pos_cat_dtype = pd.api.types.CategoricalDtype(categories=["C", "1B", "2B", "3B", "SS", "OF", "DH_PH_PR", "P", ""],
                                              ordered=True) 
players_short["Primary"] = players_short['Primary'].astype(pos_cat_dtype)
players_short["Primary"].value_counts()

P           903
OF          284
C           118
2B           88
3B           83
SS           77
1B           74
DH_PH_PR      4
              0
Name: Primary, dtype: int64

In [1229]:
players_short.fillna("", inplace=True)
players_short.head()

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
0,Fernando Abad,31,BOS,48.0,0.0,15.0,P,S84,G [n-n],66,L,+M1 (13-24),51.0,55.0,L,P,48,0,0,0,0,0,0,0,0,0,0,0,0
1,José Abreu,30,CHW,156.0,,,1B,84,#B+16(21) [12-22/23],53,R,,,,,1B,0,0,139,0,0,0,0,0,0,0,18,0,0
2,Cristhian Adames,25,COL,12.0,,,SS-2B-1B,S84,G [13-36],66,S,,,,,SS,0,0,1,1,0,1,0,0,0,0,0,10,0
3,Austin Adams,26,WSN,6.0,0.0,3.0,P,S84,G [n-n],66,R,-L1 (24-44/45),42.0,62.0,R,P,6,0,0,0,0,0,0,0,0,0,0,0,0
4,Lane Adams,27,ATL,85.0,,,OF,84,B16(21)**** [13-32],54,R,,,,,OF,0,0,0,0,0,0,27,11,7,41,0,51,9


In [1230]:
players_short["Name"] = players_short["Name"].str.replace("\xa0", " ")

In [1231]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [1232]:
players_short.drop_duplicates(["Name", "Tm"], keep='first', inplace=True)

In [1233]:
# players_short[players_short["Name"] == "Steve Carlton"]

In [1234]:
players_short = players_short.sort_values(["Tm", "Primary", "G", "GS", "GF"], 
                                          ascending = (True, True, False, False, False))
players_short.head(50)

Unnamed: 0,Name,Age,Tm,G,GS,GF,Positions,DEF,Batter Rating,BPH,B,Pitcher Rating,PCN,PPH,T,Primary,P,C,1B,2B,3B,SS,LF,CF,RF,OF,DH,PH,PR
608,Chris Herrmann,29,ARI,106.0,,,C-OF-1B,S94 -1,D+23* [14-31],61,L,,,,,C,0,45,5,0,0,0,22,0,2,24,0,37,6
639,Chris Iannetta,34,ARI,89.0,,,C-3B,94 -1,B23 [14-32/33],55,R,,,,,C,0,78,0,0,1,0,0,0,0,0,0,15,0
823,Jeff Mathis,34,ARI,60.0,,,C,S94 -3,C12(14) [12-31],56,R,,,,,C,0,58,0,0,0,0,0,0,0,0,0,2,0
914,John Ryan Murphy,26,ARI,5.0,,,C,S94,D [n-15],62,R,,,,,C,0,5,0,0,0,0,0,0,0,0,0,0,0
497,Paul Goldschmidt,29,ARI,155.0,,,1B,S84,#B+22(23)* [15-31],54,R,,,,,1B,0,0,151,0,0,0,0,0,0,0,2,2,0
1384,Christian Walker,26,ARI,11.0,,,1B,84,C+46 [12-32/41],56,R,,,,,1B,0,0,1,0,0,0,0,0,0,0,1,10,0
348,Brandon Drury,24,ARI,135.0,,,2B-3B,84,B14(15) [12-24],54,R,,,,,2B,0,0,0,114,1,0,0,0,0,0,3,20,0
322,Daniel Descalso,30,ARI,130.0,0.0,2.0,2B-OF-1B-3B-P-SS,84,C+14(16) [14-26],56,L,J+1 (11-n),65.0,66.0,R,2B,2,0,19,45,15,1,36,0,0,36,1,35,0
725,Jake Lamb,26,ARI,149.0,,,3B,84,#C+22(23) [15-32],55,L,,,,,3B,0,0,0,0,144,0,0,0,0,0,1,5,0
1358,Ildemaro Vargas,25,ARI,12.0,,,3B-2B,84,A [n-22],52,S,,,,,3B,0,0,0,3,2,0,0,0,0,0,0,7,2


In [1235]:
my_dict = {index: group_teams for index, group_teams in players_short.groupby('Tm')}
my_dict

{'ARI':                    Name  Age   Tm      G  GS  GF         Positions     DEF  \
 608      Chris Herrmann   29  ARI  106.0                   C-OF-1B  S94 -1   
 639      Chris Iannetta   34  ARI   89.0                      C-3B   94 -1   
 823         Jeff Mathis   34  ARI   60.0                         C  S94 -3   
 914    John Ryan Murphy   26  ARI    5.0                         C    S94    
 497    Paul Goldschmidt   29  ARI  155.0                        1B    S84    
 1384   Christian Walker   26  ARI   11.0                        1B     84    
 348       Brandon Drury   24  ARI  135.0                     2B-3B     84    
 322     Daniel Descalso   30  ARI  130.0   0   2  2B-OF-1B-3B-P-SS     84    
 725           Jake Lamb   26  ARI  149.0                        3B     84    
 1358    Ildemaro Vargas   25  ARI   12.0                     3B-2B     84    
 972        Chris Owings   25  ARI   97.0                  SS-OF-2B     84    
 806         Ketel Marte   23  ARI   73.0    

In [1236]:
# https://stackoverflow.com/questions/21981820/creating-multiple-excel-worksheets-using-data-in-a-pandas-dataframe/21984437
writer = pd.ExcelWriter('../data/' + year + ' rosters ' + '.xlsx', engine='xlsxwriter')

def create_excel(dictionary):
    count = 0
    for i, j in dictionary.items():
        dictionary[i].to_excel(writer, sheet_name=i)
        count += 1
    
    writer.save()
    return count

In [1237]:
create_excel(my_dict)

31

## Format Excel file

In [1238]:
wb = openpyxl.load_workbook('../data/' + year + ' rosters ' + '.xlsx')

In [1239]:
# wb.sheetnames

In [1240]:
team_dict = {
    "ANA": "Anaheim Angels",
    "ARI": "Arizona Diamondbacks",
    "ATL": "Atlanta Braves",
    "BAL": "Baltimore Orioles",
    "BOS": "Boston Red Sox",
    "BRO": "Brooklyn Dodgers",
    "CAL": "California Angels",
    "CHC": "Chicago Cubs",
    "CHW": "Chicago White Sox",
    "CIN": "Cincinnati Reds",
    "CLE": "Cleveland Indians",
    "COL": "Colorado Rockies",
    "DET": "Detroit Tigers",
    "FLA": "Florida Marlins",
    "HOU": "Houston Astros",
    "KCA": "Kansas City Athletics",
    "KCR": "Kansas City Royals",
    "LAA": "Los Angeles Angels",
    "LAD": "Los Angeles Dodgers",
    "MIA": "Miami Marlins",
    "MLN": "Milwaukee Braves",
    "MIL": "Milwaukee Brewers",
    "MIN": "Minnesota Twins",
    "MON": "Montreal Expos",
    "NYG": "New York Giants",
    "NYM": "New York Mets",
    "NYY": "New York Yankees",
    "OAK": "Oakland A's",
    "PHI": "Philadelphia Phillies",
    "PIT": "Pittsburgh Pirates",
    "SDP": "San Diego Padres",
    "SEA": "Seattle Mariners",
    "SEP": "Seattle Pilots",
    "SFG": "San Francisco Giants",
    "STL": "St. Louis Cardinals",
    "TBD": "Tampa Bay Devil Rays",
    "TBR": "Tampa Bay Rays",
    "TEX": "Texas Rangers",
    "TOR": "Toronto Blue Jays",
    "TOT": "Muli-team Totals",
    "WSN": "Washington Nationals",
    "WSA": "Washington Senators",
    "WSH": "Washington Senators"
}

In [1241]:
# team_dict["ATL"]

In [1242]:
header_fill = openpyxl.styles.colors.Color(rgb='00FFFFFF')
style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                       showLastColumn=False, showRowStripes=True, showColumnStripes=False)
border = Border(left=Side(border_style='thin', color='FF000000'),
                right=Side(border_style='thin', color='FF000000'),
                top=Side(border_style='thin', color='FF000000'),
                bottom=Side(border_style='thin', color='FF000000')
)
alignment = Alignment(horizontal='center')
width_1 = 3
width_2 = 4
width_3 = 5
width_4 = 6
width_5 = 8
width_6 = 12
width_7 = 14
width_8 = 22
width_9 = 24

for sheet in wb:

    sheetname = sheet.title
    sheet.insert_rows(1)
    row_count = sheet.max_row
    column_count = sheet.max_column
    max_cell = "A2:" + str(get_column_letter(column_count)) + str(row_count) + ""
    all_cells = "A1:" + str(get_column_letter(column_count)) + str(row_count) + ""

    sheet.merge_cells('A1:AD1')
    sheet['A1'].alignment = Alignment(horizontal='center')
    sheet['A1'].fill = PatternFill(patternType='solid', fgColor=header_fill)
    sheet['A1'].value = year + ' ' + team_dict[sheetname]
    sheet['A2'].value = "ID"
    tab = Table(displayName="Table" + sheetname, ref=max_cell)
    tab.tableStyleInfo = style
    sheet.add_table(tab)
    sheet.column_dimensions['A'].width = width_4    
    sheet.column_dimensions['B'].width = width_8
    sheet.column_dimensions['C'].width = width_2
    sheet.column_dimensions['D'].width = width_3
    sheet.column_dimensions['E'].width = width_3
    sheet.column_dimensions['F'].width = width_2
    sheet.column_dimensions['G'].width = width_2
    sheet.column_dimensions['H'].width = width_7
    sheet.column_dimensions['I'].width = width_5
    sheet.column_dimensions['J'].width = width_9
    sheet.column_dimensions['K'].width = width_3
    sheet.column_dimensions['L'].width = width_1
    sheet.column_dimensions['M'].width = width_9
    sheet.column_dimensions['N'].width = width_3
    sheet.column_dimensions['O'].width = width_3
    sheet.column_dimensions['P'].width = width_1
    sheet.column_dimensions['Q'].width = width_6
    sheet.column_dimensions['R'].width = width_3
    sheet.column_dimensions['S'].width = width_3
    sheet.column_dimensions['T'].width = width_3
    sheet.column_dimensions['U'].width = width_3
    sheet.column_dimensions['V'].width = width_3
    sheet.column_dimensions['W'].width = width_3
    sheet.column_dimensions['X'].width = width_3
    sheet.column_dimensions['Y'].width = width_3
    sheet.column_dimensions['Z'].width = width_3
    sheet.column_dimensions['AA'].width = width_3
    sheet.column_dimensions['AB'].width = width_3
    sheet.column_dimensions['AC'].width = width_3
    sheet.column_dimensions['AD'].width = width_3
    
    rows = sheet[max_cell]
    for row in rows:
        for cell in row:
            cell.border = border
            cell.alignment = alignment
            cell.font = Font(size = 14)
    
    sheet['A1'].font = Font(size = 32, bold = True, color='005A80B8')
    sheet['A2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['B2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['C2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['D2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['E2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['F2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['G2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['H2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['I2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['J2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['K2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['L2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['M2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['N2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['O2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['P2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Q2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['R2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['S2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['T2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['U2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['V2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['W2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['X2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Y2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['Z2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AA2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AB2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AC2'].font = Font(size = 14, bold = True, color='00FFFFFF')
    sheet['AD2'].font = Font(size = 14, bold = True, color='00FFFFFF')
            

wb.save('../data/' + year + ' rosters ' + 'formatted.xlsx')

# Clean up

## Remove unwanted files

In [1243]:
os.remove('../data/' + year + ' rosters .xlsx')
os.remove('../data/player stats - ' + year + ' - with batter and pitcher ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter pitcher and fielder ratings.csv')
os.remove('../data/player stats - ' + year + ' - with batter ratings.csv')
os.remove('../data/player stats - ' + year + '.csv')