#2021 Baseball Stats

Instructions:

- Download the batter and pitcher stat CSV files (split and complete)

- Upload the downloaded CSV files to the session storage



#Imports and startup

In [1]:
# Necessary imports

import math
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from datetime import datetime
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from ipywidgets import interactive
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import interact, Layout
from bokeh.io import output_notebook
from prettytable import PrettyTable
import locale
import warnings

warnings.filterwarnings('ignore')

# For number formatting 
locale.setlocale(locale.LC_ALL, '') 

# Call once to configure Bokeh to display plots inline in the notebook
output_notebook()

# Style of plots
matplotlib.style.use('seaborn')

#quick startup
btc = pd.read_csv('MLB-stats-2021-b.csv') #complete season for batters
bts = pd.read_csv('MLB-stats-2021-b-split.csv') #split season (played for different teams) for batters
ptc = pd.read_csv('MLB-stats-2021-p.csv') #complete season for pitchers
pts = pd.read_csv('MLB-stats-2021-p-split.csv') #split season for pitchers 

#store player index values
bi = pd.Series(btc['Name'], index = range(0,1508)) #batter index
pi = pd.Series(ptc['Name'], index = range(0,909)) #pitcher index

#split pitchers into starters and relievers 
#(qualified as a starter if they started in over half of their games)
startersIndex = []
relieversIndex = []
index = 0
for player in pi:
  pitcher =  ptc.loc[index]
  gs = pitcher.at['GS']
  g = pitcher.at['G']
  if(g/gs < 2):
    startersIndex.append(index)
  else:
    relieversIndex.append(index)
  index = index+1
starters = ptc.loc[startersIndex]
relievers = ptc.loc[relieversIndex]

#Batter Functions

Print out all batters (complete stats)

In [None]:
display(btc)

Display a team's stats individually

In [None]:
nyyB = bts.loc[bts['Team'] == 'NYY']
nyyP = pt.loc[pt['Team'] == 'NYY']
nyyB


Sort players by specific stat

In [None]:
bt_sort_H = btc.sort_values(by='H', ascending=False)
bt_sort_H

Dropdown menu of stats to display with option for descending or ascending

In [None]:
def sortByStat(stat, direction):
  if direction == 'Ascending':
    directionB = False
  else:
    directionB = True
  return btc.sort_values(by= stat, ascending= directionB)

interact(sortByStat, stat = list(btc.columns), direction = ['Ascending', 'Descending'])

Filters out players by a minimum number of at bats

In [None]:
bt_qual = btc.loc[btc['AB'] > 100]

def sortByStat_100ab(stat, direction):
  if direction == 'Ascending':
    directionB = False
  else:
    directionB = True
  if stat == 'Name' or stat == 'Team' or stat == 'G' or stat == 'AB':
    return btc.sort_values(by= stat, ascending= directionB)
  else:
    return bt_qual.sort_values(by= stat, ascending= directionB)

interact(sortByStat_100ab, stat = list(btc.columns), direction = ['Ascending', 'Descending'])

Bar chart for top batters for a certain stat

In [None]:
def bar_plotB(stat, direction):
  bt_sorted = btc
  bt_qual = btc.loc[btc['AB'] > 100]
  if direction == 'Ascending':
    directionB = False
  else:
    directionB = True
  bt_sorted = bt_qual.sort_values(by= stat, ascending= directionB)
  fig = px.bar(bt_sorted.head(100), x='Name', y=stat, width=1300, height=600)
  fig.update_layout(title_text="Top Players in Hitting Stats in MLB (2021)")
  fig.update_xaxes(rangeslider_visible=True)
  fig.show()

interact(bar_plotB, stat=list(bt_sorted.columns)[2:], direction = ['Ascending', 'Descending'])

Search for player by name

In [None]:
#exception handling for nonexistent players
try:
  search = input("Enter a player's full name to search for: ")
  searchResult = btc.loc[btc['Name'] == search]
  list(bi).index(search)
  display(searchResult)
except ValueError:
  print('This player does not exist.')

Compare two players' stats

In [None]:
def compareTwo (player1, player2):
  searchArray = [list(bi).index(player1), list(bi).index(player2)]
  return btc.loc[searchArray]

search1 = input("Enter a player's full name to search for: ")
search2 = input("Enter another player's full name to search for: ")
compareTwo(search1, search2)

Search for player by name and a specific stat by abbreviation

In [None]:
#exception handling for input of nonexistent player/stat
try:
  searchName = input("Enter a player's full name to search for: ")
  searchStat = input("Enter a stat to view (use abbreviation): ")
  print(round(btc.loc[list(bi).index(searchName)].at[searchStat], 3)) #rounds stats to 3 decimal places
except ValueError:
  print('This player does not exist.')
except KeyError:
  print('This stat does not exist.')

Use stored stats to calculate other stats (HR/PA = home run percentage)

In [None]:
aaron_judge_hr = round(btc.loc[list(bi).index('Aaron Judge')].at['HR'], 3)
aaron_judge_pa = round(btc.loc[list(bi).index('Aaron Judge')].at['PA'], 3)
aaron_judge_hrpercent = round((aaron_judge_hr / aaron_judge_pa)*100, 3)
aaron_judge_hrpercent

Method for finding all players' HR/PA

In [2]:
def find_hrPercent():
  hrPercent = []
  for player in bi:
    hr = btc.loc[list(bi).index(player)].at['HR']
    pa = btc.loc[list(bi).index(player)].at['PA']
    hrpa = round((hr / pa)*100, 3)
    hrPercent.append(hrpa)
  return hrPercent

Add HR% to each player's profile in the DataFrame

In [None]:
#if statement helps prevent duplicate columns being created
if("HR%" not in btc.columns):
  btc.insert(btc.columns.size, 'HR%', find_hrPercent(), True)
btc

Display bar chart for top players for a calculated stat

In [None]:
interact(bar_plotB, stat=list(btc.columns)[4:], direction = ['Ascending', 'Descending'])

Calculate overall stats of all players combined

In [None]:
btc.describe()

#Pitcher Functions

Display pitchers

In [None]:
display(ptc)

Bar chart for top STARTERS for a chosen stat

In [None]:
def bar_plotStarters(stat, direction):
  pt_sorted = starters
  pt_qual = starters.loc[ptc['IP'] > 30]
  if direction == 'Ascending':
    directionB = False
  else:
    directionB = True
  pt_sorted = pt_qual.sort_values(by= stat, ascending= directionB)
  fig = px.bar(pt_sorted.head(100), x='Name', y=stat, width=1300, height=600)
  fig.update_layout(title_text="Top Starters in Pitching Stats in MLB (2021)")
  fig.update_xaxes(rangeslider_visible=True)
  fig.show()

interact(bar_plotStarters, stat=list(pt_sorted.columns)[2:], direction = ['Ascending', 'Descending'])

Bar chart for top RELIEVERS for a chosen stat

In [None]:
def bar_plotRelievers(stat, direction):
  pt_sorted = relievers
  pt_qual = relievers.loc[ptc['IP'] > 30]
  if direction == 'Ascending':
    directionB = False
  else:
    directionB = True
  pt_sorted = pt_qual.sort_values(by= stat, ascending= directionB)
  fig = px.bar(pt_sorted.head(100), x='Name', y=stat, width=1300, height=600)
  fig.update_layout(title_text="Top Starters in Pitching Stats in MLB (2021)")
  fig.update_xaxes(rangeslider_visible=True)
  fig.show()

interact(bar_plotRelievers, stat=list(pt_sorted.columns)[2:], direction = ['Ascending', 'Descending'])