In [1]:
# Libraries
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [2]:
# Constants
game_sit_feat = ['month', 'year', 'inning', 'inning_topbot', 
                 'outs', 'strikes', 'balls', 'pitch_number', 
                 'on_1b', 'on_2b', 'on_3b', 'score_diff', 
                 'of_std', 'of_strat', 'if_std', 'if_strat', 'if_shift']
wo_recursive_feat = game_sit_feat + ['Pitcher_Tend_FF', 'Pitcher_Tend_CU', 'Pitcher_Tend_CH', 'Pitcher_Tend_SL', 'Pitcher_Tend_SI', 
                                     'Pitcher_Strike_Tend_FF', 'Pitcher_Strike_Tend_CU', 'Pitcher_Strike_Tend_CH', 'Pitcher_Strike_Tend_SL', 'Pitcher_Strike_Tend_SI', 
                                     'batter_stance', 'Strike_Tend_FF', 'Strike_Tend_CU', 'Strike_Tend_CH', 'Strike_Tend_SL', 'Strike_Tend_SI', 'Overall_Strike_Tend', 
                                     'Slug_Avg_FF', 'Slug_Avg_CU', 'Slug_Avg_CH', 'Slug_Avg_SL', 'Slug_Avg_SI', 'Overall_Slug_Avg', 
                                     'Zone_1_Strike_Pcnt', 'Zone_2_Strike_Pcnt', 'Zone_3_Strike_Pcnt', 'Zone_4_Strike_Pcnt', 'Zone_5_Strike_Pcnt', 'Zone_6_Strike_Pcnt', 'Zone_7_Strike_Pcnt', 'Zone_8_Strike_Pcnt', 'Zone_9_Strike_Pcnt', 'Zone_11_Strike_Pcnt', 'Zone_12_Strike_Pcnt', 'Zone_13_Strike_Pcnt', 'Zone_14_Strike_Pcnt']

In [3]:
# Import the data
X_whole = pd.read_csv("../data/VerlanderOnly_X_train.csv").to_numpy()
y = pd.read_csv("../data/VerlanderOnly_y_train.csv").to_numpy().T[0]

verlander_cols = np.array(pd.read_csv("../data/VerlanderOnlyDataset.csv", index_col=0).columns)
game_sit_cols = np.where(np.isin(verlander_cols, game_sit_feat))[0] - 1
wo_recursive_cols = np.where(np.isin(verlander_cols, wo_recursive_feat))[0] - 1

X_game_sit = X_whole[:, game_sit_cols]
X_wo_recurse = X_whole[:, wo_recursive_cols]

## Non-Regularized

In [4]:
# Without Recursive Features: Non-regularized (53 features)
mlp_wo_recurse_models = []
mlp_wo_recurse_train_accs = []
mlp_wo_recurse_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = MLPClassifier(hidden_layer_sizes=[70, 35, 20], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    train_acc = model.score(X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(X_test)))
    print(np.unique(model.predict(X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_wo_recurse_train_accs.append(train_acc)
    mlp_wo_recurse_test_accs.append(test_acc)
    mlp_wo_recurse_models.append(model)

Train Accuracy:  0.6202160288038405
Test Accuracy:  0.5569777777777778
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 224  703 4315   18  365]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
Train Accuracy:  0.6151042361203716
Test Accuracy:  0.5706666666666667
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 319  687 4500   13  106]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
Train Accuracy:  0.6160103120277358
Test Accuracy:  0.5638335704125178
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 268  462 4594   23  277]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.6245444039470175
Test Accuracy:  0.5677453769559033
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 209  571 4567   18  259]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.6183660769846209
Test Accuracy:  0.5670341394025604
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 313  501 4425   19  366]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]


In [5]:
# Game Situational Features: Non-regularized (17 features)
mlp_game_sit_models = []
mlp_game_sit_train_accs = []
mlp_game_sit_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = MLPClassifier(hidden_layer_sizes=[50, 25, 10], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    train_acc = model.score(X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(X_test)))
    print(np.unique(model.predict(X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_game_sit_train_accs.append(train_acc)
    mlp_game_sit_test_accs.append(test_acc)
    mlp_game_sit_models.append(model)

Train Accuracy:  0.5932791038805174
Test Accuracy:  0.5800888888888889
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  21  497 5085    8   14]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
Train Accuracy:  0.5929235009112326
Test Accuracy:  0.5841777777777778
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  46  372 5170   16   21]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
Train Accuracy:  0.5968975020001778
Test Accuracy:  0.5812588904694168
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  45  480 5082   13    4]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.5928526980176015
Test Accuracy:  0.5832147937411095
Predictions:
['CH' 'CU' 'FF' 'SI']
[  29  454 5124   17]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.591385900968975
Test Accuracy:  0.5844594594594594
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  19  450 5126   13   16]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]


In [6]:
# All Features: Non-regularized (100 features)
mlp_whole_models = []
mlp_whole_train_accs = []
mlp_whole_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = MLPClassifier(hidden_layer_sizes=[100, 50, 25], 
                          learning_rate_init=0.0001,
                          batch_size=50, 
                          max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    train_acc = model.score(X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(X_test)))
    print(np.unique(model.predict(X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_whole_train_accs.append(train_acc)
    mlp_whole_test_accs.append(test_acc)
    mlp_whole_models.append(model)

Train Accuracy:  0.7589456371960706
Test Accuracy:  0.5313777777777777
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 548  890 3648   31  508]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]




Train Accuracy:  0.771436191492199
Test Accuracy:  0.5182222222222223
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 638  759 3504   20  704]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]




Train Accuracy:  0.7650902302426882
Test Accuracy:  0.5302275960170697
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 633  716 3764   32  479]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]




Train Accuracy:  0.764645746288559
Test Accuracy:  0.5145803698435277
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 706  743 3622   25  528]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]




Train Accuracy:  0.7597564227931372
Test Accuracy:  0.5304054054054054
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 548  606 3825   34  611]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]


## Regularized

In [7]:
# Without Recursive Features: Non-regularized (53 features)
mlp_wo_recurse_reg_models = []
mlp_wo_recurse_reg_train_accs = []
mlp_wo_recurse_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = MLPClassifier(hidden_layer_sizes=[70, 35, 20], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    train_acc = model.score(scaled_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(scaled_X_test)))
    print(np.unique(model.predict(scaled_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_wo_recurse_reg_train_accs.append(train_acc)
    mlp_wo_recurse_reg_test_accs.append(test_acc)
    mlp_wo_recurse_reg_models.append(model)

Train Accuracy:  0.6090589856425301
Test Accuracy:  0.5644444444444444
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 219  610 4462   18  316]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
Train Accuracy:  0.6131928701604659
Test Accuracy:  0.5664
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 204  732 4444   17  228]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
Train Accuracy:  0.6106320561827718
Test Accuracy:  0.5654338549075392
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 167  503 4584   27  343]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.6074317717130412
Test Accuracy:  0.5691678520625889
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 182  549 4636   18  239]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.6134323050937861
Test Accuracy:  0.5771692745376956
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 108  476 4795   19  226]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]


In [8]:
# Game Situational Features: Regularized (17 features)
mlp_game_sit_reg_models = []
mlp_game_sit_reg_train_accs = []
mlp_game_sit_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = MLPClassifier(hidden_layer_sizes=[50, 25, 10], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    train_acc = model.score(scaled_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(scaled_X_test)))
    print(np.unique(model.predict(scaled_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_game_sit_reg_train_accs.append(train_acc)
    mlp_game_sit_reg_test_accs.append(test_acc)
    mlp_game_sit_reg_models.append(model)

Train Accuracy:  0.5925234475707872
Test Accuracy:  0.5781333333333334
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  41  463 5099    7   15]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
Train Accuracy:  0.591767791261057
Test Accuracy:  0.5843555555555555
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  13  501 5094   14    3]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
Train Accuracy:  0.5914747977598008
Test Accuracy:  0.5819701280227596
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  46  365 5202   10    1]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.5909414170148458
Test Accuracy:  0.5860597439544808
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  32  377 5188   16   11]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.5902746910836518
Test Accuracy:  0.5892603129445235
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  30  350 5191   11   42]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]


In [9]:
# All Features: Regularized (100 features)
mlp_whole_reg_models = []
mlp_whole_reg_train_accs = []
mlp_whole_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = MLPClassifier(hidden_layer_sizes=[100, 50, 25], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    train_acc = model.score(scaled_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(scaled_X_test)))
    print(np.unique(model.predict(scaled_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_whole_reg_train_accs.append(train_acc)
    mlp_whole_reg_test_accs.append(test_acc)
    mlp_whole_reg_models.append(model)

Train Accuracy:  0.7393874738854069
Test Accuracy:  0.5235555555555556
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 517  934 3723   30  421]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]




Train Accuracy:  0.7331199715517625
Test Accuracy:  0.5340444444444444
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 455  943 3721   19  487]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]




Train Accuracy:  0.7322873144279491
Test Accuracy:  0.52649359886202
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 496  962 3570   29  567]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]




Train Accuracy:  0.7481998399857765
Test Accuracy:  0.5280938833570412
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 543  696 3774   28  583]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
Train Accuracy:  0.7470441817050405
Test Accuracy:  0.5248933143669986
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 479  919 3705   29  492]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]




In [10]:
# Without Recursive Features: PCA 95% (53 features)
mlp_wo_recurse_pca_models = []
mlp_wo_recurse_pca_train_accs = []
mlp_wo_recurse_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    print("PCA Reduced Dimensions: ", reduced_X_train.shape)
    
    model = MLPClassifier(hidden_layer_sizes=[70, 35, 20], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    train_acc = model.score(reduced_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_wo_recurse_pca_train_accs.append(train_acc)
    mlp_wo_recurse_pca_test_accs.append(test_acc)
    mlp_wo_recurse_pca_models.append(model)

PCA Reduced Dimensions:  (22497, 18)




Train Accuracy:  0.6326176823576477
Test Accuracy:  0.5482666666666667
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 320  630 4312   14  349]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
PCA Reduced Dimensions:  (22497, 19)




Train Accuracy:  0.6276392407876605
Test Accuracy:  0.5569777777777778
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 196  814 4355   17  243]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
PCA Reduced Dimensions:  (22498, 18)
Train Accuracy:  0.6233887456662814
Test Accuracy:  0.5665007112375533
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 241  583 4438   22  340]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 19)




Train Accuracy:  0.6352120188461197
Test Accuracy:  0.5417852062588905
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 277  592 4394   23  338]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 19)
Train Accuracy:  0.6281891723708775
Test Accuracy:  0.5533428165007113
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 273  517 4442   16  376]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]




In [22]:
X_wo_recurse.shape

(28122, 53)

In [11]:
# Game Situational Features: PCA 95% (17 features)
mlp_game_sit_pca_models = []
mlp_game_sit_pca_train_accs = []
mlp_game_sit_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    print("PCA Reduced Dimensions: ", reduced_X_train.shape)
    
    model = MLPClassifier(hidden_layer_sizes=[50, 25, 10], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)
    
    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    train_acc = model.score(reduced_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_game_sit_pca_train_accs.append(train_acc)
    mlp_game_sit_pca_test_accs.append(test_acc)
    mlp_game_sit_pca_models.append(model)

PCA Reduced Dimensions:  (22497, 12)
Train Accuracy:  0.5952793705827444
Test Accuracy:  0.5813333333333334
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  27  498 5088    8    4]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
PCA Reduced Dimensions:  (22497, 12)
Train Accuracy:  0.5912788371782904
Test Accuracy:  0.5832888888888889
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[   8  426 5174   12    5]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
PCA Reduced Dimensions:  (22498, 12)
Train Accuracy:  0.590185794292826
Test Accuracy:  0.5796586059743954
Predictions:
['CU' 'FF' 'SI']
[ 359 5255   10]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 12)
Train Accuracy:  0.594363943461641
Test Accuracy:  0.5828591749644382
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[   8  414 5164    7   31]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 12)
Train Accuracy:  0.5912081073873233
T

In [12]:
# All Features: PCA 95% (100 features)
mlp_whole_pca_models = []
mlp_whole_pca_train_accs = []
mlp_whole_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    print("PCA Reduced Dimensions: ", reduced_X_train.shape)
    
    model = MLPClassifier(hidden_layer_sizes=[100, 50, 25], 
                          learning_rate_init=0.0001, 
                          batch_size=100, 
                          max_iter=1000)

    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    train_acc = model.score(reduced_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1])
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1])
    
    mlp_whole_pca_train_accs.append(train_acc)
    mlp_whole_pca_test_accs.append(test_acc)
    mlp_whole_pca_models.append(model)

PCA Reduced Dimensions:  (22497, 35)




Train Accuracy:  0.748144197004045
Test Accuracy:  0.5121777777777777
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 556  955 3444   30  640]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613]
PCA Reduced Dimensions:  (22497, 35)




Train Accuracy:  0.7563675156687558
Test Accuracy:  0.5127111111111111
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 518  980 3527   23  577]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613]
PCA Reduced Dimensions:  (22498, 35)




Train Accuracy:  0.7486887723353187
Test Accuracy:  0.5181365576102418
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 477  995 3565   32  555]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 35)




Train Accuracy:  0.7477998044270602
Test Accuracy:  0.5181365576102418
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 551  781 3746   24  522]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612]
PCA Reduced Dimensions:  (22498, 35)
Train Accuracy:  0.7445995199573295
Test Accuracy:  0.5280938833570412
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 558  932 3699   28  407]
Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612]




## Ensemble

In [15]:
from sklearn.linear_model import LogisticRegression

In [20]:
# Ensemble of MLP models
ensemble_models = []
ensemble_train_accs = []
ensemble_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_index, test_index) in enumerate(skf.split(X_wo_recurse, y)):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    mlp_pca_model = mlp_wo_recurse_pca_models[fold]
    mlp_stdz_model = mlp_wo_recurse_reg_models[fold]
    mlp_model = mlp_wo_recurse_models[fold]

    pca_mod_probs_train = mlp_pca_model.predict(reduced_X_train)
    stdz_mod_probs_train = mlp_stdz_model.predict(scaled_X_train)
    mod_probs_train = mlp_model.predict(X_train)
    
    pca_mod_probs_test = mlp_pca_model.predict(reduced_X_test)
    stdz_mod_probs_test = mlp_stdz_model.predict(scaled_X_test)
    mod_probs_test = mlp_model.predict(X_test)
    
#     print("Agreement PCA-STDZ: ", np.mean(pca_mod_probs_test == stdz_mod_probs_test))
#     print("Agreement PCA-REGULAR: ", np.mean(pca_mod_probs_test == mod_probs_test))
#     print("Agreement STDZ-REGULAR: ", np.mean(stdz_mod_probs_test == mod_probs_test))
    
    model_prob_votes_train = np.hstack([pca_mod_probs_train.reshape(-1, 1), stdz_mod_probs_train.reshape(-1, 1), mod_probs_train.reshape(-1, 1)])
    model_prob_votes_test = np.hstack([pca_mod_probs_test.reshape(-1, 1), stdz_mod_probs_test.reshape(-1, 1), mod_probs_test.reshape(-1, 1)])
    
    train_pred = []
    for votes in model_prob_votes_train:
        values, counts = np.unique(votes, return_counts=True)
        train_pred.append(values[np.argmax(counts)])
    train_pred = np.array(train_pred)
    
    test_pred = []
    for votes in model_prob_votes_test:
        values, counts = np.unique(votes, return_counts=True)
        test_pred.append(values[np.argmax(counts)])
    test_pred = np.array(test_pred)
    
    train_acc = np.mean(train_pred == y_train)
    test_acc = np.mean(test_pred == y_test)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc, "\n")
    
    print("Predictions:")
    print(np.unique(test_pred))
    print(np.unique(test_pred, return_counts=True)[1], "\n")
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1], "\n")

Train Accuracy:  0.6299062097168512
Test Accuracy:  0.5683555555555555 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 205  597 4549   16  258] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613] 

Train Accuracy:  0.6282170956127484
Test Accuracy:  0.5733333333333334 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 197  680 4605   16  127] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613] 

Train Accuracy:  0.6263667881589474
Test Accuracy:  0.5737908961593172 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 197  457 4704   21  245] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.6291225886745488
Test Accuracy:  0.5720128022759602 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 178  532 4720   17  177] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.629611521024091
Test Accuracy:  0.5825035561877667 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 179  453 4755   17  220] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']


In [21]:
print(np.mean(mlp_whole_train_accs), "\t", np.mean(mlp_whole_test_accs))

print(np.mean(mlp_wo_recurse_train_accs), "\t", np.mean(mlp_wo_recurse_test_accs))

print(np.mean(mlp_game_sit_train_accs), "\t", np.mean(mlp_game_sit_test_accs))

print(np.mean(mlp_whole_reg_train_accs), "\t", np.mean(mlp_whole_reg_test_accs))

print(np.mean(mlp_wo_recurse_reg_train_accs), "\t", np.mean(mlp_wo_recurse_reg_test_accs))

print(np.mean(mlp_game_sit_reg_train_accs), "\t", np.mean(mlp_game_sit_reg_test_accs))

print(np.mean(mlp_whole_pca_train_accs), "\t", np.mean(mlp_whole_pca_test_accs))

print(np.mean(mlp_wo_recurse_pca_train_accs), "\t", np.mean(mlp_wo_recurse_pca_test_accs))

print(np.mean(mlp_game_sit_pca_train_accs), "\t", np.mean(mlp_game_sit_pca_test_accs))

0.7639748456025308 	 0.5249626742532005
0.6188482115767172 	 0.5652515062430853
0.593467741155701 	 0.5826399620673305
0.7400077563111871 	 0.5274161593172119
0.6107495977585191 	 0.5685230851904536
0.5913964289380285 	 0.5839558147621305
0.7491199618785018 	 0.5178511774932828
0.6294093720057173 	 0.5533746356883199
0.5924632105805651 	 0.5824977019124388
