In [1]:
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np

In [21]:
movement = pd.read_csv('..\pitch_movement.csv')
outcomes = pd.read_csv('..\pitch-arsenal-stats.csv')
arsenal = movement.merge(outcomes,on=['pitcher_id','pitch_type','year'])
#arsenal.to_csv('arsenal.csv')
arsenal.describe()

Unnamed: 0,year,pitcher_id,avg_speed,pitches_thrown,total_pitches,pitches_per_game,pitcher_break_z,league_break_z,diff_z,rise,...,ba,slg,woba,whiff_percent,k_percent,put_away,est_ba,est_slg,est_woba,hard_hit_percent
count,3426.0,3426.0,3426.0,3426.0,3426.0,3426.0,3426.0,3426.0,3426.0,3426.0,...,3426.0,3426.0,3426.0,3426.0,3426.0,3425.0,3426.0,3426.0,3426.0,3426.0
mean,2021.499124,606482.857268,87.764361,364.44279,1398.77087,8.636102,30.156509,-30.06258,-0.100671,-0.687099,...,0.241151,0.397412,0.317387,26.413631,23.003269,18.768905,0.240226,0.392156,0.308306,36.947636
std,0.500072,63372.240992,5.874571,270.752626,753.190338,4.650295,13.117999,12.705216,3.009087,10.840025,...,0.07551,0.14774,0.087737,10.375969,11.824378,6.794912,0.058325,0.122134,0.072453,10.822396
min,2021.0,425794.0,65.8,32.0,486.0,3.0,8.2,-73.5,-14.4,-52.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.041,0.059,0.071,0.0
25%,2021.0,572971.0,83.7,171.0,815.0,5.030864,18.9,-36.6,-1.9,-7.0,...,0.193,0.3,0.261,18.6,14.4,14.3,0.202,0.312,0.259,30.0
50%,2021.0,621244.0,87.6,292.0,1086.0,6.703704,29.1,-29.6,-0.1,0.0,...,0.239,0.387,0.315,25.5,21.75,18.8,0.24,0.385,0.307,36.9
75%,2022.0,657376.0,92.9,479.0,1946.25,12.013889,37.575,-19.6,1.7,6.0,...,0.288,0.479,0.372,33.1,30.5,22.9,0.278,0.464,0.355,44.3
max,2022.0,693821.0,100.8,1861.0,3274.0,20.209877,74.3,-9.3,10.3,41.0,...,0.568,1.231,0.75,66.9,84.8,57.1,0.534,1.088,0.675,76.9


In [11]:
#create a header for the streamlit page with an option to filter the data with two select boxes
#users can decide what year they would like to observes as well as what distinct pitch they would like data on

st.header('MLB pitch comp app')

st.write("""
##### Use velocity and movement filters to discover comparable pitches
""")

year_array = arsenal['year'].unique()
selected_year = st.selectbox(
    "What year would you like to observe?", 
    year_array)

pitch_type_array = arsenal['pitch_type_name'].unique()
pitch_type = st.selectbox(
    'Select Pitch:', 
    pitch_type_array)

In [14]:
#create sliders for pitch velocity and movement components
#use min/max on the data to limit the sliders

min_velo, max_velo=int(arsenal['avg_speed'].min()),int(arsenal['avg_speed'].max()+1)

velo_range = st.slider(
    "Choose Velocity Range", 
    value=(min_velo,max_velo),
    min_value=min_velo,max_value=max_velo)

min_vert, max_vert=arsenal['percent_rank_diff_z'].min(),arsenal['percent_rank_diff_z'].max()

vert_range = st.slider(
    "Choose Rise/Drop % vs. League Average", 
    value=(min_vert,max_vert),
    min_value=min_vert,max_value=max_vert)

min_break, max_break=arsenal['percent_rank_diff_x'].min(),arsenal['percent_rank_diff_x'].max()

break_range = st.slider(
    "Choose Break % vs. League Average",
    value=(min_break,max_break),
    min_value=min_break,max_value=max_break)

In [7]:
#filter the original data with the constraints applied from the sliders

filtered_pitchers=arsenal[(
    arsenal.year==selected_year) & (
    arsenal.pitch_type_name==pitch_type) & (
    arsenal.avg_speed<velo_range[1]) & (
    arsenal.avg_speed>velo_range[0]) & (
    arsenal.percent_rank_diff_z<vert_range[1]) & (
    arsenal.percent_rank_diff_z>vert_range[0]) & (
    arsenal.percent_rank_diff_x<break_range[1]) & (
    arsenal.percent_rank_diff_x>break_range[0])]

st.table(filtered_pitchers)

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

In [24]:
#offer a scatter plot to chart the relationship between the components of a pitch and the results of the pitch

st.header('Outcome analysis')
st.write("""
###### Use the filtered data to plot an outcome statistic vs. velocity or a movement metric
z = vertical movement, x = horizontal movement
""")


fig1_x_axis=['avg_speed','percent_rank_diff_z','percent_rank_diff_x']
fig1_y_axis=['whiff_percent','run_value_per_100','woba']

fig1_x_choice = st.selectbox(
    'Input Metric: ', 
    fig1_x_axis)
fig1_y_choice = st.selectbox(
    'Output Metric: ', 
    fig1_y_axis)

fig1 = px.scatter(
    filtered_pitchers, 
    x=fig1_x_choice, 
    y=fig1_y_choice,
    hover_data=['name'])

fig1.update_layout(
title="<b> {} vs {} </b>".format(fig1_y_choice, fig1_x_choice))

st.plotly_chart(fig1)

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

In [13]:
#offer an interactive histogram to let users see how the distribution of outcomes differs by pitch type

st.header('What pitches are getting the results?')
st.write("""
###### Use the full data set to map out a distribution of each pitch type vs. its outcome
""")


fig2_x_axis=fig1_y_axis

fig2_choice = st.selectbox(
    'Outcome Metric: ',
    fig2_x_axis)

fig2 = px.histogram(arsenal, x=fig2_choice, color='pitch_type')

fig2.update_layout(
title="<b> Distribituion of Pitch Types by {} </b>".format(fig2_choice))

st.plotly_chart(fig2)

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)