In [33]:
import tensorflow

In [34]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, top_k_accuracy_score, average_precision_score, \
    roc_auc_score
from sklearn.model_selection import StratifiedKFold, GridSearchCV
import xgboost as xgb
from xgboost import XGBRFClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.utils import class_weight

In [35]:
# Constants
game_sit_feat = ['month', 'year', 'inning', 'inning_topbot', 
                 'outs', 'strikes', 'balls', 'pitch_number', 
                 'on_1b', 'on_2b', 'on_3b', 'score_diff', 
                 'of_std', 'of_strat', 'if_std', 'if_strat', 'if_shift']
wo_recursive_feat = game_sit_feat + ['Pitcher_Tend_FF', 'Pitcher_Tend_CU', 'Pitcher_Tend_CH', 'Pitcher_Tend_SL', 'Pitcher_Tend_SI', 
                                     'Pitcher_Strike_Tend_FF', 'Pitcher_Strike_Tend_CU', 'Pitcher_Strike_Tend_CH', 'Pitcher_Strike_Tend_SL', 'Pitcher_Strike_Tend_SI', 
                                     'batter_stance', 'Strike_Tend_FF', 'Strike_Tend_CU', 'Strike_Tend_CH', 'Strike_Tend_SL', 'Strike_Tend_SI', 'Overall_Strike_Tend', 
                                     'Slug_Avg_FF', 'Slug_Avg_CU', 'Slug_Avg_CH', 'Slug_Avg_SL', 'Slug_Avg_SI', 'Overall_Slug_Avg', 
                                     'Zone_1_Strike_Pcnt', 'Zone_2_Strike_Pcnt', 'Zone_3_Strike_Pcnt', 'Zone_4_Strike_Pcnt', 'Zone_5_Strike_Pcnt', 'Zone_6_Strike_Pcnt', 'Zone_7_Strike_Pcnt', 'Zone_8_Strike_Pcnt', 'Zone_9_Strike_Pcnt', 'Zone_11_Strike_Pcnt', 'Zone_12_Strike_Pcnt', 'Zone_13_Strike_Pcnt', 'Zone_14_Strike_Pcnt']

# Import the data
import pandas as pd
verlander_df = pd.read_csv("./VerlanderOnlyDataset.csv", index_col=0)

X_whole = pd.read_csv("./VerlanderOnly_X_train.csv").to_numpy()
y = pd.read_csv("./VerlanderOnly_y_train.csv").to_numpy()

verlander_cols = np.array(pd.read_csv("./VerlanderOnlyDataset.csv", index_col=0).columns)
game_sit_cols = np.where(np.isin(verlander_cols, game_sit_feat))[0] - 1
wo_recursive_cols = np.where(np.isin(verlander_cols, wo_recursive_feat))[0] - 1

X_game_sit = X_whole[:, game_sit_cols]
X_wo_recurse = X_whole[:, wo_recursive_cols]

# K-Fold Cross Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [36]:
def keras_mlp(train_data, train_labels, val_data, val_labels):
    num_features = train_data.shape[1]
    le = LabelEncoder()
    le.fit(train_labels)
    encoded_y_train = le.transform(train_labels)
    encoded_y_test = le.transform(val_labels)
    class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                      classes=np.unique(encoded_y_train),
                                                      y=encoded_y_train)
    num_classes = 5
    weights = {}
    for i in range(num_classes):
        weights[i] = class_weights[i]
    model = Sequential()
    # First outer layer expects input of shape (num_features, ) and has a first hidden layer with 12 nodes with relu activation
    model.add(Dense(100, input_shape=(num_features,), activation='relu'))
    # Second hidden layer has 30 nodes with relu activation
    model.add(Dense(50, activation='relu'))
    model.add(Dense(25, activation='relu'))
    # Final layer has num_classes nodes and done with softmax
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=tensorflow.keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'])
    model.fit(train_data, encoded_y_train, class_weight=weights, epochs=300, batch_size=100, verbose=0)
    test_results = model.evaluate(val_data, encoded_y_test, verbose=0)
    print("Test Loss", test_results[0])
    print("Test Accuracy", test_results[1])
    train_results = model.evaluate(train_data, encoded_y_train, verbose=0)
    print("Train Loss", train_results[0])
    print("Train Accuracy", train_results[1])
    return model, test_results, train_results

In [37]:
def normalize_data(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    x_bar = scaler.transform(data)
    return x_bar


In [38]:
keras_mlp_models_x_whole = []
keras_mlp_test_accuracies_x_whole = []
keras_mlp_test_loss_x_whole = []
keras_mlp_train_accuracies_x_whole = []
keras_mlp_train_loss_x_whole = []
for train_index, test_index in skf.split(X_whole, y):
    print("Whole")
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    X_train, X_test = normalize_data(X_train), normalize_data(X_test)
    y_train, y_test = y[train_index], y[test_index]
   
   
    model, test_results, train_results = keras_mlp(X_train, y_train, X_test, y_test)
    keras_mlp_models_x_whole.append(model)
    keras_mlp_test_accuracies_x_whole.append(test_results[1])
    keras_mlp_test_loss_x_whole.append(test_results[0])
    keras_mlp_train_accuracies_x_whole.append(train_results[1])
    keras_mlp_train_loss_x_whole.append(train_results[0])
    

keras_mlp_models_x_game_sit = []
keras_mlp_test_accuracies_x_game_sit = []
keras_mlp_test_loss_x_game_sit = []
keras_mlp_train_accuracies_x_game_sit = []
keras_mlp_train_loss_x_game_sit = []
for train_index, test_index in skf.split(X_game_sit, y):
    print("Situational")
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    X_train, X_test = normalize_data(X_train), normalize_data(X_test)
    y_train, y_test = y[train_index], y[test_index]
   
   
    model, test_results, train_results = keras_mlp(X_train, y_train, X_test, y_test)
    keras_mlp_models_x_game_sit.append(model)
    keras_mlp_test_accuracies_x_game_sit.append(test_results[1])
    keras_mlp_test_loss_x_game_sit.append(test_results[0])
    keras_mlp_train_accuracies_x_game_sit.append(train_results[1])
    keras_mlp_train_loss_x_game_sit.append(train_results[0])
    
keras_mlp_models_x_wo_recurse = []
keras_mlp_test_accuracies_x_wo_recurse = []
keras_mlp_test_loss_x_wo_recurse = []
keras_mlp_train_accuracies_x_wo_recurse = []
keras_mlp_train_loss_x_wo_recurse = []
for train_index, test_index in skf.split(X_wo_recurse, y):
    print("Without Recursive")
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    X_train, X_test = normalize_data(X_train), normalize_data(X_test)
    y_train, y_test = y[train_index], y[test_index]
   
   
    model, test_results, train_results = keras_mlp(X_train, y_train, X_test, y_test)
    keras_mlp_models_x_wo_recurse.append(model)
    keras_mlp_test_accuracies_x_wo_recurse.append(test_results[1])
    keras_mlp_test_loss_x_wo_recurse.append(test_results[0])
    keras_mlp_train_accuracies_x_wo_recurse.append(train_results[1])
    keras_mlp_train_loss_x_wo_recurse.append(train_results[0])


Whole


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 2.582458972930908
Test Accuracy 0.41422221064567566
Train Loss 0.5862472653388977
Train Accuracy 0.7508112192153931
Whole


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 2.7057340145111084
Test Accuracy 0.3955555558204651
Train Loss 0.6737123727798462
Train Accuracy 0.7003600597381592
Whole


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 2.4556095600128174
Test Accuracy 0.4110952913761139
Train Loss 0.6034297943115234
Train Accuracy 0.7389990091323853
Whole


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 2.426325559616089
Test Accuracy 0.3933143615722656
Train Loss 0.627400279045105
Train Accuracy 0.7242866158485413
Whole


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 2.8264546394348145
Test Accuracy 0.43812233209609985
Train Loss 0.5942274928092957
Train Accuracy 0.7309983372688293
Situational


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.4873929023742676
Test Accuracy 0.35715556144714355
Train Loss 1.0554794073104858
Train Accuracy 0.4899764358997345
Situational


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.4980677366256714
Test Accuracy 0.35875555872917175
Train Loss 1.0831718444824219
Train Accuracy 0.482597678899765
Situational


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5030874013900757
Test Accuracy 0.33961594104766846
Train Loss 1.088485598564148
Train Accuracy 0.4648413062095642
Situational


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5594249963760376
Test Accuracy 0.337660014629364
Train Loss 1.0845954418182373
Train Accuracy 0.4716419279575348
Situational


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5217909812927246
Test Accuracy 0.3573968708515167
Train Loss 1.0579602718353271
Train Accuracy 0.4986220896244049
Without Recursive


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5378210544586182
Test Accuracy 0.37262222170829773
Train Loss 0.9088283181190491
Train Accuracy 0.5558074116706848
Without Recursive


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5893865823745728
Test Accuracy 0.3473777770996094
Train Loss 0.9791963696479797
Train Accuracy 0.5094457268714905
Without Recursive


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.5479117631912231
Test Accuracy 0.36237552762031555
Train Loss 0.9520666003227234
Train Accuracy 0.537381112575531
Without Recursive


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.6053253412246704
Test Accuracy 0.3296585977077484
Train Loss 0.9190033674240112
Train Accuracy 0.55342698097229
Without Recursive


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Test Loss 1.498254418373108
Test Accuracy 0.4030938744544983
Train Loss 0.956719160079956
Train Accuracy 0.5280025005340576


In [39]:
print("X whole train accuracy")
avg_train_accuracy = np.mean(keras_mlp_train_accuracies_x_whole)
print(avg_train_accuracy)
print("X whole test accuracy")
avg_test_accuracy = np.mean(keras_mlp_test_accuracies_x_whole)
print(avg_test_accuracy)
print("X sit train accuracy")
avg_train_accuracy = np.mean(keras_mlp_train_accuracies_x_game_sit)
print(avg_train_accuracy)
print("X sit test accuracy")
avg_test_accuracy = np.mean(keras_mlp_test_accuracies_x_game_sit)
print(avg_test_accuracy)
print("X without recursive train accuracy")
avg_train_accuracy = np.mean(keras_mlp_train_accuracies_x_wo_recurse)
print(avg_train_accuracy)
print("X without recursive test accuracy")
avg_test_accuracy = np.mean(keras_mlp_test_accuracies_x_wo_recurse)
print(avg_test_accuracy)

X whole train accuracy
0.7290910482406616
X whole test accuracy
0.410461950302124
X sit train accuracy
0.48153588771820066
X sit test accuracy
0.3501167893409729
X without recursive train accuracy
0.5368127465248108
X without recursive test accuracy
0.3630255997180939


In [40]:
import pickle
with open('stdz-weights-nodropout-whole.pkl', 'wb') as f:
    pickle.dump(keras_mlp_models_x_whole, f)
with open('stdz-weights-nodropout-sit.pkl', 'wb') as f:
    pickle.dump(keras_mlp_models_x_game_sit, f)
with open('stdz-weights-nodropout-worecursive.pkl', 'wb') as f:
    pickle.dump(keras_mlp_models_x_wo_recurse, f)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dense_2
.........vars
............0
............1
......dense_3
.........vars
............0
............1
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2022-12-06 19:38:29         2697
metadata.json                                  2022-12-06 19:38:29           64
variables.h5                                   2022-12-06 19:38:30       226

File Name                                             Modified             Size
config.json                                    2022-12-06 19:38:31         2694
metadata.json                                  2022-12-06 19:38:31           64
variables.h5                                   2022-12-06 19:38:31       126936
Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dense_2
.........vars
............0
............1
......dense_3
.........vars
............0
............1
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive savi

In [None]:
print()