# Notebook Contents

- [Imports](#Imports)
- [Data](#Data)
- [Features](#Features)
- [Clustering](#Clustering)
    - [4-Seam](#4-Seam-Fastball)
    - [Cutter](#Cutter)
    - [Sinker](#Sinker)
    - [Slider](#Slider)
    - [Curveball](#Curveball)
    - [Changeup](#Changeup)

# Imports

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, k_means
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

# Data

In [2]:
data = pd.read_csv('../data/model-pitches.csv')
data.drop(columns = ['Unnamed: 0'], inplace = True) 
data.dropna(inplace = True)

pd.set_option('max_columns', None)
print(data.shape)
data.head(5)

(116889, 31)


Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,spin_axis,pfx_-x,pfx_z,bauer_units,effective_speed,release_pos_x,release_pos_z,release_extension,release_pos_y,plate_-x,plate_x,plate_z,delta_run_exp,stand,events,description,hit_distance_sc,exit_velo,launch_angle,launch_speed_angle,xba,xwobacon,woba_value,woba_denom,babip_value,iso_value
0,"Smith, Will",L,FF,92.3,2330.0,148.0,-8.28,16.56,25.24377,92.8,1.4,6.8,6.5,54.03,0.69,-0.69,2.83,-0.073,R,out,hit_into_play,13.0,95.2,-13.0,2.0,0.174,0.158,0.0,1.0,0.0,0.0
4,"Smith, Will",L,FF,91.2,2281.0,143.0,-7.56,15.36,25.010965,90.9,1.49,6.66,6.3,54.15,0.31,-0.31,2.8,-0.189,L,out,hit_into_play,9.0,93.3,-18.0,2.0,0.1,0.09,0.0,1.0,0.0,0.0
9,"Gsellman, Robert",R,SI,94.3,1982.0,221.0,16.2,10.56,21.018028,94.4,-1.2,6.05,6.5,53.98,0.26,-0.26,1.78,-0.061,R,out,hit_into_play,140.0,75.3,65.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0
18,"Gsellman, Robert",R,SL,90.5,2133.0,216.0,-0.12,7.44,23.569061,91.1,-1.51,5.93,6.3,54.23,-0.01,0.01,2.23,-0.173,L,out,hit_into_play,294.0,96.8,18.0,4.0,0.409,0.457,0.0,1.0,0.0,0.0
27,"Martin, Chris",R,FF,94.9,2224.0,216.0,9.0,14.16,23.435195,95.5,-2.71,6.38,6.6,53.86,0.1,-0.1,1.94,-0.163,R,out,hit_into_play,308.0,91.8,29.0,3.0,0.109,0.156,0.0,1.0,0.0,0.0


Pitch Types:

4-Seam, Cutter, Sinker, Slider, Curveball, Changeup

# Features

In [3]:
features = data[['pitch_type', 'p_throws', 'velo', 'spin_rate', 'spin_axis', 'pfx_-x', 'pfx_z', 
                 'bauer_units', 'effective_speed', 'release_pos_x', 'release_pos_z', 'release_extension', 
                 'launch_speed_angle', 'xba', 'xwobacon', 'woba_value', 'babip_value', 'iso_value']]

In [4]:
ff = features.loc[features['pitch_type'] == 'FF']
print('4-Seam shape:', ff.shape)
ff_r = features.loc[(features['pitch_type'] == 'FF') & (features['p_throws'] == 'R')]
print('RHP 4-Seam shape:', ff_r.shape)
ff_l = features.loc[(features['pitch_type'] == 'FF') & (features['p_throws'] == 'L')]
print('LHP 4-Seam shape:', ff_l.shape, '\n')
fc = features.loc[features['pitch_type'] == 'FC']
print('Cutter shape:', fc.shape)
fc_r = features.loc[(features['pitch_type'] == 'FC') & (features['p_throws'] == 'R')]
print('RHP Cutter shape:', fc_r.shape)
fc_l = features.loc[(features['pitch_type'] == 'FC') & (features['p_throws'] == 'L')]
print('LHP Cutter shape:', fc_l.shape, '\n')
si = features.loc[features['pitch_type'] == 'SI']
print('Sinker shape:', si.shape)
si_r = features.loc[(features['pitch_type'] == 'SI') & (features['p_throws'] == 'R')]
print('RHP Sinker shape:', si_r.shape)
si_l = features.loc[(features['pitch_type'] == 'SI') & (features['p_throws'] == 'L')]
print('LHP Sinker shape:', si_l.shape, '\n')
sl = features.loc[features['pitch_type'] == 'SL']
print('Slider shape:', sl.shape)
sl_r = features.loc[(features['pitch_type'] == 'SL') & (features['p_throws'] == 'R')]
print('RHP Slider shape:', sl_r.shape)
sl_l = features.loc[(features['pitch_type'] == 'SL') & (features['p_throws'] == 'L')]
print('LHP Slider shape:', sl_l.shape, '\n')
cu = features.loc[features['pitch_type'] == 'CU']
print('Curveball shape:', cu.shape)
cu_r = features.loc[(features['pitch_type'] == 'CU') & (features['p_throws'] == 'R')]
print('RHP Curveball shape:', cu_r.shape)
cu_l = features.loc[(features['pitch_type'] == 'CU') & (features['p_throws'] == 'L')]
print('LHP Curveball shape:', cu_l.shape, '\n')
ch = features.loc[features['pitch_type'] == 'CH']
print('Changeup shape:', ch.shape)
ch_r = features.loc[(features['pitch_type'] == 'CH') & (features['p_throws'] == 'R')]
print('RHP Changeup shape:', ch_r.shape)
ch_l = features.loc[(features['pitch_type'] == 'CH') & (features['p_throws'] == 'L')]
print('LHP Changeup shape:', ch_l.shape)

4-Seam shape: (40378, 18)
RHP 4-Seam shape: (28478, 18)
LHP 4-Seam shape: (11900, 18) 

Cutter shape: (8698, 18)
RHP Cutter shape: (5588, 18)
LHP Cutter shape: (3110, 18) 

Sinker shape: (21979, 18)
RHP Sinker shape: (15373, 18)
LHP Sinker shape: (6606, 18) 

Slider shape: (21518, 18)
RHP Slider shape: (16153, 18)
LHP Slider shape: (5365, 18) 

Curveball shape: (8439, 18)
RHP Curveball shape: (5743, 18)
LHP Curveball shape: (2696, 18) 

Changeup shape: (15877, 18)
RHP Changeup shape: (9626, 18)
LHP Changeup shape: (6251, 18)


# Clustering

## 4-Seam

### RHP

In [5]:
features_ff_r = ff_r.select_dtypes([np.number])
X_ff_r = features_ff_r

ss = StandardScaler()
X_ff_r_scaled = ss.fit_transform(X_ff_r)
X_ff_r_scaled[:,:] *= -1

pca_ff_r = PCA().fit_transform(X_ff_r_scaled)
model_ff_r = pd.DataFrame(data = pca_ff_r, columns = X_ff_r.columns)

km_ff_r = KMeans(n_clusters = 2, 
               random_state = 1)
km_ff_r.fit(model_ff_r)
label_ff_r = km_ff_r.fit_predict(model_ff_r)

print('Number of iterations:', km_ff_r.n_iter_)
print('Number of features:', km_ff_r.n_features_in_)
print('Number of clusters:', km_ff_r.n_clusters)
print('Inertia:', km_ff_r.inertia_, '\n')
print('Predicted clusters to points: ', label_ff_r[:10])

Number of iterations: 16
Number of features: 16
Number of clusters: 2
Inertia: 373015.2451411064 

Predicted clusters to points:  [1 1 1 1 1 1 1 0 0 1]


### LHP

In [6]:
features_ff_l = ff_l.select_dtypes([np.number])
X_ff_l = features_ff_l

ss = StandardScaler()
X_ff_l_scaled = ss.fit_transform(X_ff_l)
X_ff_l_scaled[:,:] *= -1

pca_ff_l = PCA().fit_transform(X_ff_l_scaled)
model_ff_l = pd.DataFrame(data = pca_ff_l, columns = X_ff_l.columns)

km_ff_l = KMeans(n_clusters = 2, 
               random_state = 1)
km_ff_l.fit(model_ff_l)
label_ff_l = km_ff_l.fit_predict(model_ff_l)

print('Number of iterations:', km_ff_l.n_iter_)
print('Number of features:', km_ff_l.n_features_in_)
print('Number of clusters:', km_ff_l.n_clusters)
print('Inertia:', km_ff_l.inertia_, '\n')
print('Predicted clusters to points: ', label_ff_l[:10])

Number of iterations: 6
Number of features: 16
Number of clusters: 2
Inertia: 155677.14136560383 

Predicted clusters to points:  [1 1 1 1 1 1 1 1 1 0]


### Cluster Labels - RHP 4-Seam

In [7]:
X_ff_r['label'] = label_ff_r
model_ff_r['label'] = label_ff_r

print(X_ff_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  93.930248    93.983436
spin_rate           2268.416903  2272.253473
spin_axis            212.080435   212.220599
pfx_-x                 7.199347     7.088940
pfx_z                 15.686448    15.653048
bauer_units           24.151112    24.178103
effective_speed       94.109867    94.173650
release_pos_x         -1.752105    -1.766247
release_pos_z          5.904354     5.883257
release_extension      6.386213     6.393965
launch_speed_angle     4.702390     2.798840
xba                    0.684870     0.180219
xwobacon               0.870645     0.188524
woba_value             1.116291     0.083932
babip_value            0.704996     0.082693
iso_value              0.843548     0.003613


### Cluster Labels - LHP 4-Seam

In [8]:
X_ff_l['label'] = label_ff_l
model_ff_l['label'] = label_ff_l

print(X_ff_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  92.613703    92.655902
spin_rate           2237.858108  2235.748537
spin_axis            146.283514   146.851829
pfx_-x                -7.641827    -7.277180
pfx_z                 15.867827    15.757346
bauer_units           24.169470    24.135393
effective_speed       92.609730    92.679268
release_pos_x          1.872503     1.861596
release_pos_z          6.011459     5.999227
release_extension      6.288676     6.301537
launch_speed_angle     4.657027     2.740610
xba                    0.687714     0.178375
xwobacon               0.861038     0.184757
woba_value             1.107270     0.087311
babip_value            0.718649     0.084756
iso_value              0.803243     0.003293


## Cutter

### RHP

In [9]:
features_fc_r = fc_r.select_dtypes([np.number])
X_fc_r = features_fc_r

X_fc_r_scaled = ss.fit_transform(X_fc_r)
X_fc_r_scaled[:,:] *= -1

pca_fc_r = PCA().fit_transform(X_fc_r_scaled)
model_fc_r = pd.DataFrame(data = pca_fc_r, columns = X_fc_r.columns)

km_fc_r = KMeans(n_clusters = 2, 
               random_state = 1)

km_fc_r.fit(model_fc_r)
label_fc_r = km_fc_r.fit_predict(model_fc_r)

print('Number of iterations:', km_fc_r.n_iter_)
print('Number of features:', km_fc_r.n_features_in_)
print('Number of clusters:', km_fc_r.n_clusters)
print('Inertia:', km_fc_r.inertia_, '\n')
print("Predicted clusters to points: ", label_fc_r[:10])

Number of iterations: 9
Number of features: 16
Number of clusters: 2
Inertia: 73051.53948568924 

Predicted clusters to points:  [0 0 1 0 1 1 0 0 1 1]


### LHP

In [10]:
features_fc_l = fc_l.select_dtypes([np.number])
X_fc_l = features_fc_l

X_fc_l_scaled = ss.fit_transform(X_fc_l)
X_fc_l_scaled[:,:] *= -1

pca_fc_l = PCA().fit_transform(X_fc_l_scaled)
model_fc_l = pd.DataFrame(data = pca_fc_l, columns = X_fc_l.columns)

km_fc_l = KMeans(n_clusters = 2, 
               random_state = 1)

km_fc_l.fit(model_fc_l)
label_fc_l = km_fc_r.fit_predict(model_fc_l)

print('Number of iterations:', km_fc_l.n_iter_)
print('Number of features:', km_fc_l.n_features_in_)
print('Number of clusters:', km_fc_l.n_clusters)
print('Inertia:', km_fc_l.inertia_, '\n')
print("Predicted clusters to points: ", label_fc_l[:10])

Number of iterations: 25
Number of features: 16
Number of clusters: 2
Inertia: 40625.42947442321 

Predicted clusters to points:  [1 1 0 0 1 0 1 0 1 0]


### Cluster Labels - RHP Cutter

In [11]:
X_fc_r['label'] = label_fc_r
model_fc_r['label'] = label_fc_r

print(X_fc_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  89.468263    89.197436
spin_rate           2408.765856  2411.333333
spin_axis            176.885435   176.593496
pfx_-x                -2.877052    -2.802026
pfx_z                  7.795578     7.786191
bauer_units           26.944691    27.054388
effective_speed       89.751366    89.458599
release_pos_x         -1.722181    -1.728780
release_pos_z          5.962299     5.945791
release_extension      6.294034     6.278361
launch_speed_angle     2.599900     4.536585
xba                    0.179632     0.674115
xwobacon               0.181171     0.818328
woba_value             0.086124     1.090994
babip_value            0.080973     0.747342
iso_value              0.001755     0.742339


### Cluster Labels - LHP Cutter

In [12]:
X_fc_l['label'] = label_fc_l
model_fc_l['label'] = label_fc_l

print(X_fc_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  86.776821    86.592232
spin_rate           2266.653005  2266.457330
spin_axis            170.829235   170.376368
pfx_-x                 1.686776     1.499606
pfx_z                  7.888852     8.057199
bauer_units           26.130115    26.186852
effective_speed       86.865073    86.679103
release_pos_x          2.099850     2.159759
release_pos_z          5.822158     5.781433
release_extension      6.189663     6.195405
launch_speed_angle     2.617486     4.551422
xba                    0.180357     0.671353
xwobacon               0.181276     0.813269
woba_value             0.090870     1.102298
babip_value            0.086521     0.757112
iso_value              0.003188     0.757112


## Sinker

### RHP

In [13]:
features_si_r = si_r.select_dtypes([np.number])
X_si_r = features_si_r

X_si_r_scaled = ss.fit_transform(X_si_r)
X_si_r_scaled[:,:] *= -1

pca_si_r = PCA().fit_transform(X_si_r_scaled)
model_si_r = pd.DataFrame(data = pca_si_r, columns = X_si_r.columns)

km_si_r = KMeans(n_clusters = 2, 
                 random_state = 1)

km_si_r.fit(model_si_r)
label_si_r = km_si_r.fit_predict(model_si_r)

print('Number of iterations:', km_si_r.n_iter_)
print('Number of features:', km_si_r.n_features_in_)
print('Number of clusters:', km_si_r.n_clusters)
print('Inertia:', km_si_r.inertia_, '\n')
print("Predicted clusters to points: ", label_si_r[:10])

Number of iterations: 10
Number of features: 16
Number of clusters: 2
Inertia: 201854.06024813667 

Predicted clusters to points:  [0 0 0 1 0 0 1 0 0 1]


### LHP

In [14]:
features_si_l = si_l.select_dtypes([np.number])
X_si_l = features_si_l

X_si_l_scaled = ss.fit_transform(X_si_l)
X_si_l_scaled[:,:] *= -1

pca_si_l = PCA().fit_transform(X_si_l_scaled)
model_si_l = pd.DataFrame(data = pca_si_l, columns = X_si_l.columns)

km_si_l = KMeans(n_clusters = 2, 
                 random_state = 1)

km_si_l.fit(model_si_l)
label_si_l = km_si_l.fit_predict(model_si_l)

print('Number of iterations:', km_si_l.n_iter_)
print('Number of features:', km_si_l.n_features_in_)
print('Number of clusters:', km_si_l.n_clusters)
print('Inertia:', km_si_l.inertia_, '\n')
print("Predicted clusters to points: ", label_si_l[:10])

Number of iterations: 14
Number of features: 16
Number of clusters: 2
Inertia: 87298.26830274735 

Predicted clusters to points:  [1 1 1 0 1 1 1 1 1 0]


### Cluster Labels - RHP Sinker

In [15]:
X_si_r['label'] = label_si_r
model_si_r['label'] = label_si_r

print(X_si_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  93.439930    93.161034
spin_rate           2136.700456  2133.907054
spin_axis            221.510692   220.747140
pfx_-x                14.859816    14.509504
pfx_z                  8.770393     9.197016
bauer_units           22.871579    22.910033
effective_speed       93.461537    93.165348
release_pos_x         -1.896111    -1.881826
release_pos_z          5.699458     5.719566
release_extension      6.308661     6.289061
launch_speed_angle     2.444216     4.501430
xba                    0.195148     0.680031
xwobacon               0.191651     0.796198
woba_value             0.118471     1.048236
babip_value            0.114431     0.776692
iso_value              0.004205     0.633460


### Cluster Labels - LHP Sinker

In [16]:
X_si_l['label'] = label_si_l
model_si_l['label'] = label_si_l

print(X_si_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  91.878677    92.038078
spin_rate           2069.562056  2071.939083
spin_axis            135.240569   134.979276
pfx_-x               -14.756654   -15.000276
pfx_z                  9.393527     8.759623
bauer_units           22.526015    22.512613
effective_speed       91.851340    91.963471
release_pos_x          2.125943     2.124005
release_pos_z          5.792663     5.788149
release_extension      6.288573     6.263575
launch_speed_angle     4.552214     2.450911
xba                    0.681553     0.193395
xwobacon               0.805402     0.190394
woba_value             1.017824     0.126408
babip_value            0.733734     0.125393
iso_value              0.637507     0.004815


## Slider

### RHP

In [17]:
features_sl_r = sl_r.select_dtypes([np.number])
X_sl_r = features_sl_r

X_sl_r_scaled = ss.fit_transform(X_sl_r)
X_sl_r_scaled[:,:] *= -1

pca_sl_r = PCA().fit_transform(X_sl_r_scaled)
model_sl_r = pd.DataFrame(data = pca_sl_r, columns = X_sl_r.columns)

km_sl_r = KMeans(n_clusters = 2, random_state = 1)

km_sl_r.fit(model_sl_r)
label_sl_r = km_sl_r.fit_predict(model_sl_r)

print('Number of iterations:', km_sl_r.n_iter_)
print('Number of features:', km_sl_r.n_features_in_)
print('Number of clusters:', km_sl_r.n_clusters)
print('Inertia:', km_sl_r.inertia_, '\n')
print("Predicted clusters to points: ", label_sl_r[:10])

Number of iterations: 7
Number of features: 16
Number of clusters: 2
Inertia: 210817.22225737496 

Predicted clusters to points:  [1 1 0 0 0 1 1 1 1 1]


### LHP

In [18]:
features_sl_l = sl_l.select_dtypes([np.number])
X_sl_l = features_sl_l

X_sl_l_scaled = ss.fit_transform(X_sl_l)
X_sl_l_scaled[:,:] *= -1

pca_sl_l = PCA().fit_transform(X_sl_l_scaled)
model_sl_l = pd.DataFrame(data = pca_sl_l, columns = X_sl_l.columns)

km_sl_l = KMeans(n_clusters = 2, random_state = 1)

km_sl_l.fit(model_sl_l)
label_sl_l = km_sl_l.fit_predict(model_sl_l)

print('Number of iterations:', km_sl_l.n_iter_)
print('Number of features:', km_sl_l.n_features_in_)
print('Number of clusters:', km_sl_l.n_clusters)
print('Inertia:', km_sl_l.inertia_, '\n')
print("Predicted clusters to points: ", label_sl_l[:10])

Number of iterations: 7
Number of features: 16
Number of clusters: 2
Inertia: 69992.63479030851 

Predicted clusters to points:  [0 1 1 1 1 1 0 0 1 1]


### Cluster Labels - RHP Slider

In [19]:
X_sl_r['label'] = label_sl_r
model_sl_r['label'] = label_sl_r

print(X_sl_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  84.810380    84.856657
spin_rate           2417.960009  2426.553068
spin_axis            110.686587   111.281234
pfx_-x                -5.951795    -6.161265
pfx_z                  1.735754     1.853734
bauer_units           28.550439    28.643810
effective_speed       84.921658    84.977081
release_pos_x         -1.936670    -1.922959
release_pos_z          5.794172     5.773906
release_extension      6.241294     6.257571
launch_speed_angle     4.546169     2.626474
xba                    0.672872     0.169677
xwobacon               0.818526     0.171846
woba_value             1.122590     0.090630
babip_value            0.733318     0.083746
iso_value              0.819591     0.004700


### Cluster Labels - LHP Slider

In [20]:
X_sl_l['label'] = label_sl_l
model_sl_l['label'] = label_sl_l

print(X_sl_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  83.883925    83.918027
spin_rate           2335.762224  2346.716942
spin_axis            246.592766   247.318440
pfx_-x                 5.217227     5.531808
pfx_z                  1.520375     1.470775
bauer_units           27.883487    28.005239
effective_speed       83.976758    83.981405
release_pos_x          1.965358     1.988166
release_pos_z          5.887455     5.880194
release_extension      6.244809     6.230243
launch_speed_angle     4.611520     2.602273
xba                    0.690307     0.177446
xwobacon               0.849422     0.179525
woba_value             1.116477     0.095622
babip_value            0.721366     0.091426
iso_value              0.817147     0.004907


## Curveball

### RHP

In [21]:
features_cu_r = cu_r.select_dtypes([np.number])
X_cu_r = features_cu_r

X_cu_r_scaled = ss.fit_transform(X_cu_r)
X_cu_r_scaled[:,:] *= -1

pca_cu_r = PCA().fit_transform(X_cu_r_scaled)
model_cu_r = pd.DataFrame(data = pca_cu_r, columns = X_cu_r.columns)

km_cu_r = KMeans(n_clusters = 2, random_state = 1)

km_cu_r.fit(model_cu_r)
label_cu_r = km_cu_r.fit_predict(model_cu_r)

print('Number of iterations:', km_cu_r.n_iter_)
print('Number of features:', km_cu_r.n_features_in_)
print('Number of clusters:', km_cu_r.n_clusters)
print('Inertia:', km_cu_r.inertia_, '\n')
print("Predicted clusters to points: ", label_cu_r[:10])

Number of iterations: 16
Number of features: 16
Number of clusters: 2
Inertia: 75407.59015019915 

Predicted clusters to points:  [0 1 0 0 0 0 0 0 1 0]


### LHP

In [22]:
features_cu_l = cu_l.select_dtypes([np.number])
X_cu_l = features_cu_l

X_cu_l_scaled = ss.fit_transform(X_cu_l)
X_cu_l_scaled[:,:] *= -1

pca_cu_l = PCA().fit_transform(X_cu_l_scaled)
model_cu_l = pd.DataFrame(data = pca_cu_l, columns = X_cu_l.columns)

km_cu_l = KMeans(n_clusters = 2, random_state = 1)

km_cu_l.fit(model_cu_l)
label_cu_l = km_cu_l.fit_predict(model_cu_l)

print('Number of iterations:', km_cu_l.n_iter_)
print('Number of features:', km_cu_l.n_features_in_)
print('Number of clusters:', km_cu_l.n_clusters)
print('Inertia:', km_cu_l.inertia_, '\n')
print("Predicted clusters to points: ", label_cu_l[:10])

Number of iterations: 11
Number of features: 16
Number of clusters: 2
Inertia: 35259.19383230242 

Predicted clusters to points:  [0 1 0 1 0 0 1 0 0 1]


### Cluster Labels - RHP Curveball

In [23]:
X_cu_r['label'] = label_cu_r
model_cu_r['label'] = label_cu_r

print(X_cu_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  79.079004    78.900240
spin_rate           2540.346578  2520.439376
spin_axis             45.297032    43.390756
pfx_-x                -9.802001    -9.519184
pfx_z                 -9.282060    -9.497863
bauer_units           32.175000    32.007423
effective_speed       78.887785    78.685774
release_pos_x         -1.637221    -1.656020
release_pos_z          5.969956     5.972461
release_extension      6.241599     6.216927
launch_speed_angle     2.633799     4.428571
xba                    0.186647     0.661477
xwobacon               0.187629     0.778336
woba_value             0.079765     1.074850
babip_value            0.074319     0.761705
iso_value              0.001472     0.711885


### Cluster Labels - LHP Curveball

In [24]:
X_cu_l['label'] = label_cu_l
model_cu_l['label'] = label_cu_l

print(X_cu_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  77.621218    77.400253
spin_rate           2437.867122  2389.720960
spin_axis            293.908088   297.957071
pfx_-x                 8.600231     8.089697
pfx_z                 -7.146996    -7.351061
bauer_units           31.475532    30.948112
effective_speed       77.250473    77.067045
release_pos_x          1.922967     1.984735
release_pos_z          5.976061     5.998220
release_extension      6.106460     6.116540
launch_speed_angle     2.584034     4.444444
xba                    0.181229     0.671086
xwobacon               0.181639     0.789523
woba_value             0.091675     1.082702
babip_value            0.086660     0.765152
iso_value              0.002626     0.702020


## Changeup

### RHP

In [25]:
features_ch_r = ch_r.select_dtypes([np.number])
X_ch_r = features_ch_r

X_ch_r_scaled = ss.fit_transform(X_ch_r)
X_ch_r_scaled[:,:] *= -1

pca_ch_r = PCA().fit_transform(X_ch_r)
model_ch_r = pd.DataFrame(data = pca_ch_r, columns = X_ch_r.columns)

km_ch_r = KMeans(n_clusters = 2, random_state = 1)

km_ch_r.fit(model_ch_r)
label_ch_r = km_ch_r.fit_predict(model_ch_r)

print('Number of iterations:', km_ch_r.n_iter_)
print('Number of features:', km_ch_r.n_features_in_)
print('Number of clusters:', km_ch_r.n_clusters)
print('Inertia:', km_ch_r.inertia_, '\n')
print("Predicted clusters to points: ", label_ch_r[:10])

Number of iterations: 12
Number of features: 16
Number of clusters: 2
Inertia: 308025617.97422105 

Predicted clusters to points:  [0 0 1 0 0 0 0 1 1 1]


### LHP

In [26]:
features_ch_l = ch_l.select_dtypes([np.number])
X_ch_l = features_ch_l

X_ch_l_scaled = ss.fit_transform(X_ch_l)
X_ch_l_scaled[:,:] *= -1

pca_ch_l = PCA().fit_transform(X_ch_l)
model_ch_l = pd.DataFrame(data = pca_ch_l, columns = X_ch_l.columns)

km_ch_l = KMeans(n_clusters = 2, random_state = 1)

km_ch_l.fit(model_ch_l)
label_ch_l = km_ch_l.fit_predict(model_ch_l)

print('Number of iterations:', km_ch_l.n_iter_)
print('Number of features:', km_ch_l.n_features_in_)
print('Number of clusters:', km_ch_l.n_clusters)
print('Inertia:', km_ch_l.inertia_, '\n')
print("Predicted clusters to points: ", label_ch_l[:10])

Number of iterations: 7
Number of features: 16
Number of clusters: 2
Inertia: 186939315.76588735 

Predicted clusters to points:  [0 0 0 0 0 0 0 0 0 0]


### Cluster Labels - RHP Changeup

In [27]:
X_ch_r['label'] = label_ch_r
model_ch_r['label'] = label_ch_r

print(X_ch_r.groupby(by = 'label').mean().T)

label                         0            1
velo                  85.150080    85.830530
spin_rate           1554.704235  2022.018830
spin_axis            235.687046   239.047201
pfx_-x                12.801042    14.918082
pfx_z                  6.378033     6.441617
bauer_units           18.281275    23.610074
effective_speed       85.420237    85.747979
release_pos_x         -1.876170    -2.040256
release_pos_z          5.832249     5.718619
release_extension      6.392805     6.224354
launch_speed_angle     3.058657     3.045945
xba                    0.313725     0.311579
xwobacon               0.348350     0.341704
woba_value             0.362272     0.353628
babip_value            0.256424     0.263369
iso_value              0.215311     0.190560


### Cluster Labels - LHP Changeup

In [28]:
X_ch_l['label'] = label_ch_l
model_ch_l['label'] = label_ch_l

print(X_ch_l.groupby(by = 'label').mean().T)

label                         0            1
velo                  83.287261    83.847652
spin_rate           1561.541289  2058.894691
spin_axis            123.590131   120.131637
pfx_-x               -12.879063   -15.569882
pfx_z                  7.181118     8.659851
bauer_units           18.776078    24.581479
effective_speed       83.340030    83.602238
release_pos_x          2.085660     1.964204
release_pos_z          5.865209     5.781654
release_extension      6.270493     6.171698
launch_speed_angle     3.039023     3.050900
xba                    0.321317     0.298887
xwobacon               0.351219     0.329034
woba_value             0.363847     0.338833
babip_value            0.281219     0.243967
iso_value              0.178499     0.193506
