## Sunburst plot allowing for quick look up of player stats

#### Goal is to start with divisions --> teams --> player  --> shows percentile stats

In [2]:
import pybaseball as pb
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go 
import numpy as np

In [3]:
pd.set_option('display.max_columns', None)

### Preparing table 
#### Using the baseball savant table and going to add a League column and Division Column based on team

In [4]:
savant_df = pd.read_csv('/Users/charliecoleman/Desktop/Git/Repositories/baseball_analytics/csv_files/baseball_savant.csv')

In [7]:
al = ['BOS','TOR', 'NYY', 'TBR', 'BAL', 'CLE', 'DET', 'CHW', 'MIN', 'KCR', 'LAA', 'SEA', 'OAK', 'HOU', 'TEX']
nl = list(savant_df['Team'][~savant_df['Team'].isin(al)].unique()[1:])

In [11]:
# Creating the League column
conditions =[
    (savant_df['Team'].isin(al)),
    (savant_df['Team'].isin(nl)),
    (savant_df['Team'] == '- - -')
]
values = ['AL', 'NL', '- - -']
savant_df['League'] = np.select(conditions, values)

In [13]:
# List of AL division Team
ale = al[:5]
alc = al[5:10]
alw = al[10:]

# List of AL division Team
nle = ['PHI', 'WSN', 'ATL', 'NYM', 'MIA']
nlc = ['PIT', 'STL', 'CIN', 'MIL', 'CHC'] 
nlw = ['LAD', 'ARI', 'COL', 'SFG', 'SDP']

In [16]:
conditions = [
    (savant_df['Team'].isin(ale)),
    (savant_df['Team'].isin(alc)),
    (savant_df['Team'].isin(alw)),
    (savant_df['Team'].isin(nle)),
    (savant_df['Team'].isin(nlc)),
    (savant_df['Team'].isin(nlw)),
    (savant_df['Team'] == '- - -')
]
values = ['AL East', 'AL Central',' AL West', 'NL East', 'NL Central', 'NL West', '- - -']
savant_df['Division'] = np.select(conditions, values)

In [18]:
# League and Division added
savant_df[['Name', 'Team', 'League', 'Division']]

Unnamed: 0,Name,Team,League,Division
0,Trea Turner,- - -,- - -,- - -
1,Vladimir Guerrero Jr.,TOR,AL,AL East
2,Marcus Semien,TOR,AL,AL East
3,Juan Soto,WSN,NL,NL East
4,Bryce Harper,PHI,NL,NL East
...,...,...,...,...
227,Jurickson Profar,SDP,NL,NL West
228,Jarred Kelenic,SEA,AL,AL West
229,Jackie Bradley Jr.,MIL,NL,NL Central
230,Cody Bellinger,LAD,NL,NL West


In [None]:
# Adding Barrels to the savant_df
fg_df = fg_df = pb.batting_stats(2021, qual=200)
savant_df = pd.merge(savant_df, fg_df[["Name", "Barrels"]], on="Name", how="left" )

In [22]:
# Removing players that played for more than one team this year - will come back to plotting them 
one_team = savant_df[savant_df["Team"] != '- - -' ]
two_team = savant_df[savant_df["Team"] == '- - -' ]

### Tables are ready - time to plot
#### First plot is just players that didn't change team
#### Plotting Barrels - quite a good way of seeing which league, division and team have the best offense - then can see which player contributed the most

In [104]:
fig = px.sunburst(
    data_frame=one_team,
    path=["League", "Division", "Team", "Name"],
    values="Barrels",
    maxdepth=2,
    hover_name="League",
    hover_data={"Team":False, "Barrel%_percentile":True, "Barrels":True},
    template="seaborn",
    title="Number of Barrels broken down for each League, Division, Team and Player"
)
fig.update_traces(textinfo='label+value')
# fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))

#### This plot is finished - a bit dissappointed that can't really plot percentiles as it sums the percentages which looks bad 

In [103]:
savant_df

Unnamed: 0.1,Unnamed: 0,Name,ID,Year,Team,PA,POS,xwOBA_percentile,xBA_percentile,xSLG_percentile,xISO_percentile,xOBP_percentile,Barrel_percentile,Barrel%_percentile,ExitVelocity_percentile,HardHit%_percentile,K%_percentile,BB%_percentile,Whiff%_percentile,SprintSpeed_percentile,OAA_percentile,wOBA,xwOBA,xwOBA_minus_wOBA,BA,xBA,xBA_minus_BA,SLG,xSLG,xSLG_minus_SLG,K%,BB%,SwStr%,HardHit%,Barrel%,AvgExitVelo,MaxExitVelo,OAA,Spd,wRC+,WAR,League,Division
0,0,Trea Turner,607208,2021,- - -,646.0,SS,85.0,98.0,80.0,55.0,81.0,69.0,40.0,54.0,77.0,74.0,15.0,64.0,100.0,82.0,0.386,0.362,0.024,0.328,0.303,0.025,0.536,0.484,0.052,0.170,0.063,0.102,0.460,7.4,89.6,112.2,3.0,6.4,142,6.9,- - -,- - -
1,1,Vladimir Guerrero Jr.,665489,2021,TOR,698.0,1B,99.0,98.0,98.0,95.0,98.0,100.0,90.0,99.0,98.0,82.0,87.0,28.0,45.0,17.0,0.419,0.416,0.003,0.311,0.306,0.005,0.601,0.591,0.010,0.158,0.123,0.124,0.552,15.1,95.1,117.4,-3.0,3.1,166,6.7,AL,AL East
2,2,Marcus Semien,543760,2021,TOR,724.0,SS,55.0,35.0,66.0,72.0,38.0,86.0,65.0,60.0,52.0,58.0,54.0,72.0,87.0,89.0,0.368,0.331,0.037,0.265,0.244,0.021,0.538,0.453,0.085,0.202,0.091,0.087,0.413,9.8,89.7,109.5,5.0,5.3,131,6.6,AL,AL East
3,3,Juan Soto,665742,2021,WSN,654.0,LF,100.0,97.0,92.0,85.0,100.0,90.0,84.0,94.0,94.0,90.0,100.0,80.0,52.0,86.0,0.420,0.430,-0.010,0.313,0.304,0.009,0.534,0.544,-0.010,0.142,0.222,0.063,0.524,13.3,93.0,116.6,4.0,3.5,163,6.6,NL,NL East
4,4,Bryce Harper,547180,2021,PHI,599.0,RF,99.0,97.0,99.0,97.0,99.0,97.0,97.0,90.0,91.0,43.0,99.0,11.0,69.0,9.0,0.431,0.430,0.001,0.309,0.301,0.008,0.615,0.610,0.005,0.224,0.167,0.144,0.492,18.1,92.5,116.3,-5.0,4.3,170,6.6,NL,NL East
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,227,Jurickson Profar,595777,2021,SDP,412.0,2B,22.0,25.0,5.0,4.0,65.0,4.0,7.0,4.0,9.0,82.0,86.0,87.0,35.0,4.0,0.292,0.302,-0.010,0.227,0.238,-0.011,0.320,0.323,-0.003,0.158,0.119,0.074,0.295,2.7,85.3,107.6,-7.0,4.9,85,-0.7,NL,NL West
228,228,Jarred Kelenic,672284,2021,SEA,377.0,CF,31.0,8.0,41.0,62.0,18.0,36.0,65.0,19.0,38.0,11.0,61.0,35.0,54.0,22.0,0.270,0.310,-0.040,0.181,0.218,-0.037,0.350,0.408,-0.058,0.281,0.095,0.118,0.392,9.9,87.5,110.9,-2.0,4.3,73,-0.7,AL,AL West
229,229,Jackie Bradley Jr.,598265,2021,MIL,428.0,CF,0.0,1.0,1.0,6.0,1.0,13.0,20.0,60.0,49.0,6.0,18.0,5.0,55.0,86.0,0.224,0.257,-0.033,0.163,0.198,-0.035,0.261,0.294,-0.033,0.308,0.065,0.155,0.403,5.0,89.7,108.4,4.0,6.0,35,-0.8,NL,NL Central
230,230,Cody Bellinger,641355,2021,LAD,350.0,CF,5.0,3.0,14.0,35.0,4.0,18.0,38.0,50.0,20.0,17.0,50.0,17.0,74.0,74.0,0.237,0.281,-0.044,0.165,0.208,-0.043,0.302,0.357,-0.055,0.269,0.089,0.142,0.344,7.1,89.3,107.4,2.0,5.0,48,-0.8,NL,NL West


### Just going to plot some percentile values even though it sums the percentages but still helpful

#### Speed Percentile Sunburst

In [111]:
fig = px.sunburst(
    data_frame=one_team,
    path=["League", "Division", "Team", "Name"],
    values="SprintSpeed_percentile",
    maxdepth=2,
    hover_name="League",
    hover_data={"Team":False, "Spd":True},
    template="seaborn",
    title="Number of Barrels broken down for each League, Division, Team and Player"
)
fig.update_traces(textinfo='label+value')

#### OAA percentile 

In [120]:
fig = px.sunburst(
    data_frame=one_team,
    path=["League", "Division", "Team", "Name"],
    values="OAA_percentile",
    maxdepth=2,
    hover_name="League",
    hover_data={"Team":False, "OAA":True},
    template="seaborn",
    title="OAA percentile for each League, Division, Team and Player"
)
fig.update_traces(textinfo='label+value')

#### Looking at these plots it's a good way of seeing the differences in the Leagues where AL is more offence focused whereas NL has a higher proportion of spped good defenders

#### To make these graphs a bit fairer we want to look at the same amount of players for each team
#### First we need to find the minimum number of players a team has and limit the rest of the teams to that number and sort players by WAR

In [118]:
unique_names = savant_df['Team'].unique()
dict = {}
for i in range(0, len(unique_names)):
    count = savant_df['Name'][savant_df["Team"] == unique_names[i]].nunique()
    dict[unique_names[i]] = count

In [119]:
dict

{'- - -': 25,
 'TOR': 7,
 'WSN': 5,
 'PHI': 10,
 'CLE': 5,
 'SDP': 9,
 'HOU': 8,
 'NYY': 8,
 'PIT': 6,
 'SFG': 7,
 'STL': 8,
 'BAL': 7,
 'TBR': 8,
 'BOS': 8,
 'LAA': 5,
 'OAK': 8,
 'LAD': 9,
 'ATL': 6,
 'CHW': 6,
 'KCR': 7,
 'CIN': 9,
 'MIN': 6,
 'SEA': 7,
 'NYM': 9,
 'COL': 8,
 'DET': 7,
 'ARI': 7,
 'MIL': 6,
 'TEX': 4,
 'MIA': 3,
 'CHC': 4}

### Turns out the minimum is 3 so not really worth doing since the sample size will be too small