In [4]:
import numpy as np
import pandas as pd
import streamlit as st
from termcolor import colored
import matplotlib.pyplot as plt

pitcher_data = pd.read_csv('starting_pitcher_data_pitchtype_2018-2019.tsv', sep = '\t')

pitcher_data.head()

Unnamed: 0,game_pk,game_date,event_inning,top_inning_sw,pitcher,pitch_hand,player_at_bat,n_thruorder_pitcher,hit_into_play_group,is_strike,is_ball,is_strikeout,is_walk,api_pitch_type,pitch_number_thisgame,alan_active_spinrate,api_p_release_spin_rate,event_description,batter_name,pitcher_name
0,529406,2018-03-29,1,Y,502042,R,605141,1.0,field_out,0,0,0.0,0.0,FF,1,1672.89822,2045.595,Mookie Betts flies out sharply to center field...,Mookie Betts,Chris Archer
1,529406,2018-03-29,1,Y,502042,R,643217,1.0,,1,0,,,FF,2,1855.568629,2159.595,,Andrew Benintendi,Chris Archer
2,529406,2018-03-29,1,Y,502042,R,643217,1.0,,1,0,,,FF,3,1925.181992,2212.629,,Andrew Benintendi,Chris Archer
3,529406,2018-03-29,1,Y,502042,R,643217,1.0,field_out,0,0,0.0,0.0,CH,4,1699.629121,1832.357,"Andrew Benintendi grounds out, second baseman ...",Andrew Benintendi,Chris Archer
4,529406,2018-03-29,1,Y,502042,R,434670,1.0,,1,0,,,FF,5,2019.269462,2254.516,,Hanley Ramirez,Chris Archer


### Drop useless columns

In [5]:
pitcher_data = pitcher_data.drop(columns = ['top_inning_sw', 'pitch_hand', 'hit_into_play_group', 'is_strike', 'is_ball', 
                  'is_strikeout', 'is_walk', 'event_description'])

### Add effective_spin column (percentage)

In [6]:
pitcher_data['effective_spin'] = pitcher_data['alan_active_spinrate']/pitcher_data['api_p_release_spin_rate']

pitcher_data.head()

Unnamed: 0,game_pk,game_date,event_inning,pitcher,player_at_bat,n_thruorder_pitcher,api_pitch_type,pitch_number_thisgame,alan_active_spinrate,api_p_release_spin_rate,batter_name,pitcher_name,effective_spin
0,529406,2018-03-29,1,502042,605141,1.0,FF,1,1672.89822,2045.595,Mookie Betts,Chris Archer,0.817805
1,529406,2018-03-29,1,502042,643217,1.0,FF,2,1855.568629,2159.595,Andrew Benintendi,Chris Archer,0.859221
2,529406,2018-03-29,1,502042,643217,1.0,FF,3,1925.181992,2212.629,Andrew Benintendi,Chris Archer,0.870088
3,529406,2018-03-29,1,502042,643217,1.0,CH,4,1699.629121,1832.357,Andrew Benintendi,Chris Archer,0.927564
4,529406,2018-03-29,1,502042,434670,1.0,FF,5,2019.269462,2254.516,Hanley Ramirez,Chris Archer,0.895655


In [7]:
pitcher_data = pitcher_data[['game_pk', 'game_date', 'pitcher', 'pitcher_name', 'player_at_bat', 'batter_name', 
                             'event_inning', 'n_thruorder_pitcher', 'pitch_number_thisgame', 'api_pitch_type',
                            'alan_active_spinrate', 'api_p_release_spin_rate', 'effective_spin']]

pitcher_data.head()

Unnamed: 0,game_pk,game_date,pitcher,pitcher_name,player_at_bat,batter_name,event_inning,n_thruorder_pitcher,pitch_number_thisgame,api_pitch_type,alan_active_spinrate,api_p_release_spin_rate,effective_spin
0,529406,2018-03-29,502042,Chris Archer,605141,Mookie Betts,1,1.0,1,FF,1672.89822,2045.595,0.817805
1,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,2,FF,1855.568629,2159.595,0.859221
2,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,3,FF,1925.181992,2212.629,0.870088
3,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,4,CH,1699.629121,1832.357,0.927564
4,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,5,FF,2019.269462,2254.516,0.895655


### Rename columns as necessary

In [8]:
pitcher_data = pitcher_data.rename(columns = {'pitcher': 'pitcher_id', 'player_at_bat': 'batter_id', 'event_inning': 'inning', 
                                  'n_thruorder_pitcher': 'times_thru_order', 'pitch_number_thisgame': 'pitch_count',
                                 'api_pitch_type': 'pitch_type', 'alan_active_spinrate': 'active_spin',
                                 'api_p_release_spin_rate': 'raw_spin'})

pitcher_data.head(50)

Unnamed: 0,game_pk,game_date,pitcher_id,pitcher_name,batter_id,batter_name,inning,times_thru_order,pitch_count,pitch_type,active_spin,raw_spin,effective_spin
0,529406,2018-03-29,502042,Chris Archer,605141,Mookie Betts,1,1.0,1,FF,1672.89822,2045.595,0.817805
1,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,2,FF,1855.568629,2159.595,0.859221
2,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,3,FF,1925.181992,2212.629,0.870088
3,529406,2018-03-29,502042,Chris Archer,643217,Andrew Benintendi,1,1.0,4,CH,1699.629121,1832.357,0.927564
4,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,5,FF,2019.269462,2254.516,0.895655
5,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,6,FF,2282.154729,2177.711,1.04796
6,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,7,SL,670.291024,2507.549,0.267309
7,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,8,FF,1887.901829,2215.465,0.852147
8,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,9,SL,,,
9,529406,2018-03-29,502042,Chris Archer,434670,Hanley Ramirez,1,1.0,10,SL,398.721782,886.282,0.449881
