# Notebook Contents

- [Imports](#Imports)
- [Data](#Data)
- [Features](#Features)
- [Clustering](#Clustering)
    - [4-Seam](#4-Seam-Fastball)
    - [Cutter](#Cutter)
    - [Sinker](#Sinker)
    - [Slider](#Slider)
    - [Curveball](#Curveball)
    - [Changeup](#Changeup)

# Imports

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, k_means
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

# Data

In [2]:
data = pd.read_csv('../data/model-pitches-rv.csv', index_col = [0])

pd.set_option('max_columns', None)
print(data.shape)
data.head(5)

(705396, 70)


Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,spin_axis,pfx_-x,pfx_z,bauer_units,effective_speed,release_pos_-x,release_pos_x,release_pos_z,release_extension,release_pos_y,plate_-x,plate_x,plate_z,type,balls,strikes,pitch_count,delta_run_exp,stand,bb_type,description,events,hit_distance_sc,exit_velo,launch_angle,launch_speed_angle,woba_value,woba_denom,xba,xwoba,babip_value,iso_value,at_bat_number,pitch_number,inning,inning_topbot,home_score,away_score,post_home_score,post_away_score,on_1b,on_2b,on_3b,outs_when_up,arm_angle,slot,is_strike,is_ball,final_pitch_ab,out_to_end_inning,home_runs,away_runs,runs,re,re_change,re_end_state,re24,lin_weight_above_avg,lin_weight_above_outs,woba_scale,lin_weights_above_avg_scale,lin_weights_above_outs_scale,woba,wraa_change,rv
0,"Smith, Will",L,FF,92.3,2330.0,148.0,-8.28,16.56,25.24377,92.8,-1.4,1.4,6.8,6.5,54.03,0.69,-0.69,2.83,X,1,2,1-2,-0.073,R,ground_ball,hit_into_play,field_out,13.0,95.2,-13.0,2.0,0.0,1.0,0.174,0.158,0.0,0.0,61,4,9,0,5,0,5,0,0,0,0,2,36.0,overhead,1,0,1,1,0,0,0,0.11,-0.11,0.0,-0.11,-0.271,0.0,1.209,-0.328,0.0,0.222,-0.152,-0.262
1,"Smith, Will",L,SL,80.6,2254.0,315.0,9.24,5.76,27.965261,81.2,-1.6,1.6,6.64,6.4,54.15,0.71,-0.71,2.62,S,1,1,1-1,-0.027,R,,strike,,108.0,75.3,75.0,,,,,,,,61,3,9,0,5,0,5,0,0,0,0,2,45.8,3/4,1,0,0,0,0,0,0,0.11,0.0,0.11,0.0,,,1.209,0.0,0.0,0.3,-0.053,-0.053
2,"Smith, Will",L,CU,75.5,1940.0,328.0,7.8,-6.12,25.695364,75.2,-1.46,1.46,6.88,6.2,54.34,0.04,-0.04,2.46,S,1,0,1-0,-0.02,R,,strike,,157.0,83.5,65.0,,,,,,,,61,2,9,0,5,0,5,0,0,0,0,2,38.4,overhead,1,0,0,0,0,0,0,0.11,0.0,0.11,0.0,,,1.209,0.0,0.0,0.356,-0.038,-0.038
3,"Smith, Will",L,CU,75.0,2017.0,330.0,8.28,-8.28,26.893333,74.5,-1.53,1.53,6.83,5.9,54.61,-2.1,2.1,3.89,B,0,0,0-0,0.016,R,,ball,,,,,,,,,,,,61,1,9,0,5,0,5,0,0,0,0,2,38.4,overhead,0,1,0,0,0,0,0,0.11,0.0,0.11,0.0,,,1.209,0.0,0.0,0.314,0.028,0.028
4,"Smith, Will",L,FF,91.2,2281.0,143.0,-7.56,15.36,25.010965,90.9,-1.49,1.49,6.66,6.3,54.15,0.31,-0.31,2.8,X,1,0,1-0,-0.189,L,ground_ball,hit_into_play,field_out,9.0,93.3,-18.0,2.0,0.0,1.0,0.1,0.09,0.0,0.0,60,2,9,0,5,0,5,0,0,0,0,1,36.0,overhead,1,0,1,0,0,0,0,0.29,-0.18,0.11,-0.18,-0.271,0.0,1.209,-0.328,0.0,0.356,-0.038,-0.218


Pitch Types:

4-Seam, Cutter, Sinker, Slider, Curveball, Changeup

# Features

In [3]:
features = data[['velo', 'spin_rate', 'pfx_-x', 'pfx_z', 'release_extension', 
                 'release_pos_x', 'release_pos_z', 'arm_angle', 'plate_x', 
                 'plate_z', 'rv', 'pitch_type', 'p_throws', 'stand']]

In [4]:
ff = features.loc[features['pitch_type'] == 'FF']
print('4-Seam shape:', ff.shape)
ff_r = features.loc[(features['pitch_type'] == 'FF') & (features['p_throws'] == 'R')]
print('RHP 4-Seam shape:', ff_r.shape)
ff_l = features.loc[(features['pitch_type'] == 'FF') & (features['p_throws'] == 'L')]
print('LHP 4-Seam shape:', ff_l.shape, '\n')
fc = features.loc[features['pitch_type'] == 'FC']
print('Cutter shape:', fc.shape)
fc_r = features.loc[(features['pitch_type'] == 'FC') & (features['p_throws'] == 'R')]
print('RHP Cutter shape:', fc_r.shape)
fc_l = features.loc[(features['pitch_type'] == 'FC') & (features['p_throws'] == 'L')]
print('LHP Cutter shape:', fc_l.shape, '\n')
si = features.loc[features['pitch_type'] == 'SI']
print('Sinker shape:', si.shape)
si_r = features.loc[(features['pitch_type'] == 'SI') & (features['p_throws'] == 'R')]
print('RHP Sinker shape:', si_r.shape)
si_l = features.loc[(features['pitch_type'] == 'SI') & (features['p_throws'] == 'L')]
print('LHP Sinker shape:', si_l.shape, '\n')
sl = features.loc[features['pitch_type'] == 'SL']
print('Slider shape:', sl.shape)
sl_r = features.loc[(features['pitch_type'] == 'SL') & (features['p_throws'] == 'R')]
print('RHP Slider shape:', sl_r.shape)
sl_l = features.loc[(features['pitch_type'] == 'SL') & (features['p_throws'] == 'L')]
print('LHP Slider shape:', sl_l.shape, '\n')
cu = features.loc[features['pitch_type'] == 'CU']
print('Curveball shape:', cu.shape)
cu_r = features.loc[(features['pitch_type'] == 'CU') & (features['p_throws'] == 'R')]
print('RHP Curveball shape:', cu_r.shape)
cu_l = features.loc[(features['pitch_type'] == 'CU') & (features['p_throws'] == 'L')]
print('LHP Curveball shape:', cu_l.shape, '\n')
ch = features.loc[features['pitch_type'] == 'CH']
print('Changeup shape:', ch.shape)
ch_r = features.loc[(features['pitch_type'] == 'CH') & (features['p_throws'] == 'R')]
print('RHP Changeup shape:', ch_r.shape)
ch_l = features.loc[(features['pitch_type'] == 'CH') & (features['p_throws'] == 'L')]
print('LHP Changeup shape:', ch_l.shape)

4-Seam shape: (249663, 14)
RHP 4-Seam shape: (176369, 14)
LHP 4-Seam shape: (73294, 14) 

Cutter shape: (47442, 14)
RHP Cutter shape: (31180, 14)
LHP Cutter shape: (16262, 14) 

Sinker shape: (109145, 14)
RHP Sinker shape: (75675, 14)
LHP Sinker shape: (33470, 14) 

Slider shape: (135533, 14)
RHP Slider shape: (100753, 14)
LHP Slider shape: (34780, 14) 

Curveball shape: (58770, 14)
RHP Curveball shape: (39992, 14)
LHP Curveball shape: (18778, 14) 

Changeup shape: (80330, 14)
RHP Changeup shape: (50653, 14)
LHP Changeup shape: (29677, 14)


# Clustering

## 4-Seam

### RHP

In [5]:
features_ff_r = ff_r.select_dtypes([np.number])
X_ff_r = features_ff_r

ss = StandardScaler()
X_ff_r_scaled = ss.fit_transform(X_ff_r)
X_ff_r_scaled[:,:] *= -1

pca_ff_r = PCA().fit_transform(X_ff_r_scaled)
model_ff_r = pd.DataFrame(data = pca_ff_r, columns = X_ff_r.columns)

km_ff_r = KMeans(n_clusters = 2, 
               random_state = 1)
km_ff_r.fit(model_ff_r)
label_ff_r = km_ff_r.fit_predict(model_ff_r)

print('Number of iterations:', km_ff_r.n_iter_)
print('Number of features:', km_ff_r.n_features_in_)
print('Number of clusters:', km_ff_r.n_clusters)
print('Inertia:', km_ff_r.inertia_, '\n')
print('Predicted clusters to points: ', label_ff_r[:10])

Number of iterations: 8
Number of features: 11
Number of clusters: 2
Inertia: 1657404.2635047587 

Predicted clusters to points:  [1 1 1 0 0 0 0 0 0 0]


### LHP

In [6]:
features_ff_l = ff_l.select_dtypes([np.number])
X_ff_l = features_ff_l

ss = StandardScaler()
X_ff_l_scaled = ss.fit_transform(X_ff_l)
X_ff_l_scaled[:,:] *= -1

pca_ff_l = PCA().fit_transform(X_ff_l_scaled)
model_ff_l = pd.DataFrame(data = pca_ff_l, columns = X_ff_l.columns)

km_ff_l = KMeans(n_clusters = 2, 
               random_state = 1)
km_ff_l.fit(model_ff_l)
label_ff_l = km_ff_l.fit_predict(model_ff_l)

print('Number of iterations:', km_ff_l.n_iter_)
print('Number of features:', km_ff_l.n_features_in_)
print('Number of clusters:', km_ff_l.n_clusters)
print('Inertia:', km_ff_l.inertia_, '\n')
print('Predicted clusters to points: ', label_ff_l[:10])

Number of iterations: 8
Number of features: 11
Number of clusters: 2
Inertia: 687023.2105281404 

Predicted clusters to points:  [1 1 1 1 1 1 1 1 1 1]


### Cluster Labels - RHP 4-Seam

In [7]:
X_ff_r['label'] = label_ff_r
model_ff_r['label'] = label_ff_r

print(X_ff_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 94.150068    93.999385
spin_rate          2316.958590  2244.258914
pfx_-x                5.876507     8.607555
pfx_z                17.491145    14.420394
release_extension     6.379305     6.419449
release_pos_x        -1.295864    -2.344052
release_pos_z         6.098714     5.640221
arm_angle            24.593898    39.044232
plate_x               0.101667    -0.045807
plate_z               2.773357     2.725154
rv                   -0.024059    -0.024406


### Cluster Labels - LHP 4-Seam

In [8]:
X_ff_l['label'] = label_ff_l
model_ff_l['label'] = label_ff_l

print(X_ff_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 92.871790    92.727757
spin_rate          2221.748320  2270.378398
pfx_-x               -9.298097    -6.132056
pfx_z                14.862905    17.177429
release_extension     6.346536     6.255089
release_pos_x         2.494034     1.434551
release_pos_z         5.731918     6.183181
arm_angle            82.083719    41.754777
plate_x              -0.033746    -0.105000
plate_z               2.704317     2.740986
rv                   -0.025582    -0.023639


## Cutter

### RHP

In [9]:
features_fc_r = fc_r.select_dtypes([np.number])
X_fc_r = features_fc_r

X_fc_r_scaled = ss.fit_transform(X_fc_r)
X_fc_r_scaled[:,:] *= -1

pca_fc_r = PCA().fit_transform(X_fc_r_scaled)
model_fc_r = pd.DataFrame(data = pca_fc_r, columns = X_fc_r.columns)

km_fc_r = KMeans(n_clusters = 2, 
               random_state = 1)

km_fc_r.fit(model_fc_r)
label_fc_r = km_fc_r.fit_predict(model_fc_r)

print('Number of iterations:', km_fc_r.n_iter_)
print('Number of features:', km_fc_r.n_features_in_)
print('Number of clusters:', km_fc_r.n_clusters)
print('Inertia:', km_fc_r.inertia_, '\n')
print("Predicted clusters to points: ", label_fc_r[:10])

Number of iterations: 11
Number of features: 11
Number of clusters: 2
Inertia: 292572.29810798983 

Predicted clusters to points:  [1 1 1 1 1 1 0 0 0 0]


### LHP

In [10]:
features_fc_l = fc_l.select_dtypes([np.number])
X_fc_l = features_fc_l

X_fc_l_scaled = ss.fit_transform(X_fc_l)
X_fc_l_scaled[:,:] *= -1

pca_fc_l = PCA().fit_transform(X_fc_l_scaled)
model_fc_l = pd.DataFrame(data = pca_fc_l, columns = X_fc_l.columns)

km_fc_l = KMeans(n_clusters = 2, 
               random_state = 1)

km_fc_l.fit(model_fc_l)
label_fc_l = km_fc_r.fit_predict(model_fc_l)

print('Number of iterations:', km_fc_l.n_iter_)
print('Number of features:', km_fc_l.n_features_in_)
print('Number of clusters:', km_fc_l.n_clusters)
print('Inertia:', km_fc_l.inertia_, '\n')
print("Predicted clusters to points: ", label_fc_l[:10])

Number of iterations: 14
Number of features: 11
Number of clusters: 2
Inertia: 147751.1262909977 

Predicted clusters to points:  [0 0 0 0 0 0 0 0 0 0]


### Cluster Labels - RHP Cutter

In [11]:
X_fc_r['label'] = label_fc_r
model_fc_r['label'] = label_fc_r

print(X_fc_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 89.315693    89.587565
spin_rate          2496.910619  2347.088612
pfx_-x               -4.411322    -1.540695
pfx_z                 7.297632     8.539033
release_extension     6.281366     6.316436
release_pos_x        -1.208928    -2.309777
release_pos_z         6.109475     5.769408
arm_angle            23.128584    37.909527
plate_x               0.471198     0.342269
plate_z               2.202869     2.413689
rv                   -0.029421    -0.018127


### Cluster Labels - LHP Cutter

In [12]:
X_fc_l['label'] = label_fc_l
model_fc_l['label'] = label_fc_l

print(X_fc_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 87.134740    86.219876
spin_rate          2326.978748  2183.202161
pfx_-x                2.235573     0.943448
pfx_z                 7.898992     8.006080
release_extension     6.014424     6.430753
release_pos_x         1.653919     2.768992
release_pos_z         6.045132     5.519646
arm_angle            50.988806    94.401332
plate_x              -0.470832    -0.101117
plate_z               2.319337     2.429122
rv                   -0.025978    -0.023026


## Sinker

### RHP

In [13]:
features_si_r = si_r.select_dtypes([np.number])
X_si_r = features_si_r

X_si_r_scaled = ss.fit_transform(X_si_r)
X_si_r_scaled[:,:] *= -1

pca_si_r = PCA().fit_transform(X_si_r_scaled)
model_si_r = pd.DataFrame(data = pca_si_r, columns = X_si_r.columns)

km_si_r = KMeans(n_clusters = 2, 
                 random_state = 1)

km_si_r.fit(model_si_r)
label_si_r = km_si_r.fit_predict(model_si_r)

print('Number of iterations:', km_si_r.n_iter_)
print('Number of features:', km_si_r.n_features_in_)
print('Number of clusters:', km_si_r.n_clusters)
print('Inertia:', km_si_r.inertia_, '\n')
print("Predicted clusters to points: ", label_si_r[:10])

Number of iterations: 12
Number of features: 11
Number of clusters: 2
Inertia: 712717.8219386875 

Predicted clusters to points:  [0 0 0 0 0 0 0 1 1 1]


### LHP

In [14]:
features_si_l = si_l.select_dtypes([np.number])
X_si_l = features_si_l

X_si_l_scaled = ss.fit_transform(X_si_l)
X_si_l_scaled[:,:] *= -1

pca_si_l = PCA().fit_transform(X_si_l_scaled)
model_si_l = pd.DataFrame(data = pca_si_l, columns = X_si_l.columns)

km_si_l = KMeans(n_clusters = 2, 
                 random_state = 1)

km_si_l.fit(model_si_l)
label_si_l = km_si_l.fit_predict(model_si_l)

print('Number of iterations:', km_si_l.n_iter_)
print('Number of features:', km_si_l.n_features_in_)
print('Number of clusters:', km_si_l.n_clusters)
print('Inertia:', km_si_l.inertia_, '\n')
print("Predicted clusters to points: ", label_si_l[:10])

Number of iterations: 6
Number of features: 11
Number of clusters: 2
Inertia: 307675.6180323935 

Predicted clusters to points:  [0 0 1 1 1 1 1 1 1 1]


### Cluster Labels - RHP Sinker

In [15]:
X_si_r['label'] = label_si_r
model_si_r['label'] = label_si_r

print(X_si_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 94.191338    92.312558
spin_rate          2168.322617  2110.492290
pfx_-x               14.024325    15.803129
pfx_z                10.705027     7.082527
release_extension     6.308550     6.325699
release_pos_x        -1.441713    -2.488337
release_pos_z         5.897599     5.447137
arm_angle            27.380730    41.599229
plate_x              -0.169562    -0.243801
plate_z               2.373930     2.271731
rv                   -0.020269    -0.010709


### Cluster Labels - LHP Sinker

In [16]:
X_si_l['label'] = label_si_l
model_si_l['label'] = label_si_l

print(X_si_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 91.389825    92.495981
spin_rate          2018.016841  2107.781685
pfx_-x              -16.179602   -14.372530
pfx_z                 6.563751    10.787048
release_extension     6.423489     6.217359
release_pos_x         2.888786     1.721551
release_pos_z         5.336435     6.013484
arm_angle           101.034444    53.205515
plate_x               0.271250     0.171142
plate_z               2.339134     2.348063
rv                   -0.025111    -0.014442


## Slider

### RHP

In [17]:
features_sl_r = sl_r.select_dtypes([np.number])
X_sl_r = features_sl_r

X_sl_r_scaled = ss.fit_transform(X_sl_r)
X_sl_r_scaled[:,:] *= -1

pca_sl_r = PCA().fit_transform(X_sl_r_scaled)
model_sl_r = pd.DataFrame(data = pca_sl_r, columns = X_sl_r.columns)

km_sl_r = KMeans(n_clusters = 2, random_state = 1)

km_sl_r.fit(model_sl_r)
label_sl_r = km_sl_r.fit_predict(model_sl_r)

print('Number of iterations:', km_sl_r.n_iter_)
print('Number of features:', km_sl_r.n_features_in_)
print('Number of clusters:', km_sl_r.n_clusters)
print('Inertia:', km_sl_r.inertia_, '\n')
print("Predicted clusters to points: ", label_sl_r[:10])

Number of iterations: 22
Number of features: 11
Number of clusters: 2
Inertia: 953440.964893037 

Predicted clusters to points:  [0 0 0 0 1 0 0 0 1 1]


### LHP

In [18]:
features_sl_l = sl_l.select_dtypes([np.number])
X_sl_l = features_sl_l

X_sl_l_scaled = ss.fit_transform(X_sl_l)
X_sl_l_scaled[:,:] *= -1

pca_sl_l = PCA().fit_transform(X_sl_l_scaled)
model_sl_l = pd.DataFrame(data = pca_sl_l, columns = X_sl_l.columns)

km_sl_l = KMeans(n_clusters = 2, random_state = 1)

km_sl_l.fit(model_sl_l)
label_sl_l = km_sl_l.fit_predict(model_sl_l)

print('Number of iterations:', km_sl_l.n_iter_)
print('Number of features:', km_sl_l.n_features_in_)
print('Number of clusters:', km_sl_l.n_clusters)
print('Inertia:', km_sl_l.inertia_, '\n')
print("Predicted clusters to points: ", label_sl_l[:10])

Number of iterations: 21
Number of features: 11
Number of clusters: 2
Inertia: 326848.08955771895 

Predicted clusters to points:  [0 0 0 0 0 0 0 0 0 0]


### Cluster Labels - RHP Slider

In [19]:
X_sl_r['label'] = label_sl_r
model_sl_r['label'] = label_sl_r

print(X_sl_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 86.119960    81.960869
spin_rate          2407.139553  2513.310630
pfx_-x               -4.215567   -11.408072
pfx_z                 2.138894     0.950985
release_extension     6.225575     6.348711
release_pos_x        -1.682138    -2.569835
release_pos_z         5.954205     5.316307
arm_angle            29.970596    42.829514
plate_x               0.500928     0.320731
plate_z               1.856788     2.011423
rv                   -0.038424    -0.043431


### Cluster Labels - LHP Slider

In [20]:
X_sl_l['label'] = label_sl_l
model_sl_l['label'] = label_sl_l

print(X_sl_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 84.746523    82.126266
spin_rate          2313.466480  2453.579482
pfx_-x                4.236420     9.125104
pfx_z                 2.387635    -0.609765
release_extension     6.296487     6.143085
release_pos_x         1.729412     2.581197
release_pos_z         6.059561     5.473641
arm_angle            52.540963    89.263410
plate_x              -0.441919    -0.282268
plate_z               1.798075     1.922830
rv                   -0.042656    -0.042102


## Curveball

### RHP

In [21]:
features_cu_r = cu_r.select_dtypes([np.number])
X_cu_r = features_cu_r

X_cu_r_scaled = ss.fit_transform(X_cu_r)
X_cu_r_scaled[:,:] *= -1

pca_cu_r = PCA().fit_transform(X_cu_r_scaled)
model_cu_r = pd.DataFrame(data = pca_cu_r, columns = X_cu_r.columns)

km_cu_r = KMeans(n_clusters = 2, random_state = 1)

km_cu_r.fit(model_cu_r)
label_cu_r = km_cu_r.fit_predict(model_cu_r)

print('Number of iterations:', km_cu_r.n_iter_)
print('Number of features:', km_cu_r.n_features_in_)
print('Number of clusters:', km_cu_r.n_clusters)
print('Inertia:', km_cu_r.inertia_, '\n')
print("Predicted clusters to points: ", label_cu_r[:10])

Number of iterations: 12
Number of features: 11
Number of clusters: 2
Inertia: 376274.0943036844 

Predicted clusters to points:  [1 1 1 1 1 1 1 1 1 1]


### LHP

In [22]:
features_cu_l = cu_l.select_dtypes([np.number])
X_cu_l = features_cu_l

X_cu_l_scaled = ss.fit_transform(X_cu_l)
X_cu_l_scaled[:,:] *= -1

pca_cu_l = PCA().fit_transform(X_cu_l_scaled)
model_cu_l = pd.DataFrame(data = pca_cu_l, columns = X_cu_l.columns)

km_cu_l = KMeans(n_clusters = 2, random_state = 1)

km_cu_l.fit(model_cu_l)
label_cu_l = km_cu_l.fit_predict(model_cu_l)

print('Number of iterations:', km_cu_l.n_iter_)
print('Number of features:', km_cu_l.n_features_in_)
print('Number of clusters:', km_cu_l.n_clusters)
print('Inertia:', km_cu_l.inertia_, '\n')
print("Predicted clusters to points: ", label_cu_l[:10])

Number of iterations: 9
Number of features: 11
Number of clusters: 2
Inertia: 170834.96729459992 

Predicted clusters to points:  [1 1 0 1 1 1 1 1 1 1]


### Cluster Labels - RHP Curveball

In [23]:
X_cu_r['label'] = label_cu_r
model_cu_r['label'] = label_cu_r

print(X_cu_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 78.920958    79.540736
spin_rate          2524.845337  2553.113222
pfx_-x               -8.171655   -11.267108
pfx_z               -11.393815    -7.009066
release_extension     6.254420     6.237362
release_pos_x        -1.181202    -2.182511
release_pos_z         6.167669     5.742017
arm_angle            22.527294    36.800368
plate_x               0.167276     0.203130
plate_z               1.788731     1.860487
rv                   -0.032961    -0.041101


### Cluster Labels - LHP Curveball

In [24]:
X_cu_l['label'] = label_cu_l
model_cu_l['label'] = label_cu_l

print(X_cu_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 78.257255    77.414513
spin_rate          2275.773816  2543.363644
pfx_-x                6.533309     9.652736
pfx_z                -2.869493   -10.465993
release_extension     6.149725     6.112003
release_pos_x         2.664879     1.439308
release_pos_z         5.746441     6.155055
arm_angle            88.150055    42.226733
plate_x              -0.236858    -0.141054
plate_z               1.779667     1.903775
rv                   -0.035454    -0.044090


## Changeup

### RHP

In [25]:
features_ch_r = ch_r.select_dtypes([np.number])
X_ch_r = features_ch_r

X_ch_r_scaled = ss.fit_transform(X_ch_r)
X_ch_r_scaled[:,:] *= -1

pca_ch_r = PCA().fit_transform(X_ch_r)
model_ch_r = pd.DataFrame(data = pca_ch_r, columns = X_ch_r.columns)

km_ch_r = KMeans(n_clusters = 2, random_state = 1)

km_ch_r.fit(model_ch_r)
label_ch_r = km_ch_r.fit_predict(model_ch_r)

print('Number of iterations:', km_ch_r.n_iter_)
print('Number of features:', km_ch_r.n_features_in_)
print('Number of clusters:', km_ch_r.n_clusters)
print('Inertia:', km_ch_r.inertia_, '\n')
print("Predicted clusters to points: ", label_ch_r[:10])

Number of iterations: 10
Number of features: 11
Number of clusters: 2
Inertia: 1773202735.8894036 

Predicted clusters to points:  [1 1 1 1 1 0 1 0 0 0]


### LHP

In [26]:
features_ch_l = ch_l.select_dtypes([np.number])
X_ch_l = features_ch_l

X_ch_l_scaled = ss.fit_transform(X_ch_l)
X_ch_l_scaled[:,:] *= -1

pca_ch_l = PCA().fit_transform(X_ch_l)
model_ch_l = pd.DataFrame(data = pca_ch_l, columns = X_ch_l.columns)

km_ch_l = KMeans(n_clusters = 2, random_state = 1)

km_ch_l.fit(model_ch_l)
label_ch_l = km_ch_l.fit_predict(model_ch_l)

print('Number of iterations:', km_ch_l.n_iter_)
print('Number of features:', km_ch_l.n_features_in_)
print('Number of clusters:', km_ch_l.n_clusters)
print('Inertia:', km_ch_l.inertia_, '\n')
print("Predicted clusters to points: ", label_ch_l[:10])

Number of iterations: 13
Number of features: 11
Number of clusters: 2
Inertia: 920657635.8181262 

Predicted clusters to points:  [0 0 0 0 0 0 0 0 0 0]


### Cluster Labels - RHP Changeup

In [27]:
X_ch_r['label'] = label_ch_r
model_ch_r['label'] = label_ch_r

print(X_ch_r.groupby(by = 'label').mean().T)

label                        0            1
velo                 85.956297    85.248104
spin_rate          2051.217743  1564.045441
pfx_-x               15.165121    12.986649
pfx_z                 6.321285     6.256120
release_extension     6.248753     6.404805
release_pos_x        -2.030620    -1.881358
release_pos_z         5.692416     5.821656
arm_angle            35.284248    32.871275
plate_x              -0.292740    -0.385455
plate_z               1.803669     1.862081
rv                   -0.034579    -0.030490


### Cluster Labels - LHP Changeup

In [28]:
X_ch_l['label'] = label_ch_l
model_ch_l['label'] = label_ch_l

print(X_ch_l.groupby(by = 'label').mean().T)

label                        0            1
velo                 83.426990    83.837590
spin_rate          1559.444192  2051.185899
pfx_-x              -12.960786   -15.581257
pfx_z                 7.156511     8.830455
release_extension     6.279377     6.185499
release_pos_x         2.082711     1.965572
release_pos_z         5.867780     5.794954
arm_angle            66.777709    64.322455
plate_x               0.520617     0.424509
plate_z               1.877259     1.856021
rv                   -0.030789    -0.032614
