In [1]:
# Libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [2]:
# Constants
game_sit_feat = ['month', 'year', 'inning', 'inning_topbot', 
                 'outs', 'strikes', 'balls', 'pitch_number', 
                 'on_1b', 'on_2b', 'on_3b', 'score_diff', 
                 'of_std', 'of_strat', 'if_std', 'if_strat', 'if_shift']
wo_recursive_feat = game_sit_feat + ['Pitcher_Tend_FF', 'Pitcher_Tend_CU', 'Pitcher_Tend_CH', 'Pitcher_Tend_SL', 'Pitcher_Tend_SI', 
                                     'Pitcher_Strike_Tend_FF', 'Pitcher_Strike_Tend_CU', 'Pitcher_Strike_Tend_CH', 'Pitcher_Strike_Tend_SL', 'Pitcher_Strike_Tend_SI', 
                                     'batter_stance', 'Strike_Tend_FF', 'Strike_Tend_CU', 'Strike_Tend_CH', 'Strike_Tend_SL', 'Strike_Tend_SI', 'Overall_Strike_Tend', 
                                     'Slug_Avg_FF', 'Slug_Avg_CU', 'Slug_Avg_CH', 'Slug_Avg_SL', 'Slug_Avg_SI', 'Overall_Slug_Avg', 
                                     'Zone_1_Strike_Pcnt', 'Zone_2_Strike_Pcnt', 'Zone_3_Strike_Pcnt', 'Zone_4_Strike_Pcnt', 'Zone_5_Strike_Pcnt', 'Zone_6_Strike_Pcnt', 'Zone_7_Strike_Pcnt', 'Zone_8_Strike_Pcnt', 'Zone_9_Strike_Pcnt', 'Zone_11_Strike_Pcnt', 'Zone_12_Strike_Pcnt', 'Zone_13_Strike_Pcnt', 'Zone_14_Strike_Pcnt']

In [3]:
# Import the data
X_whole = pd.read_csv("../data/VerlanderOnly_X_train.csv").to_numpy()
y = pd.read_csv("../data/VerlanderOnly_y_train.csv").to_numpy().T[0]

verlander_cols = np.array(pd.read_csv("../data/VerlanderOnlyDataset.csv", index_col=0).columns)
game_sit_cols = np.where(np.isin(verlander_cols, game_sit_feat))[0] - 1
wo_recursive_cols = np.where(np.isin(verlander_cols, wo_recursive_feat))[0] - 1

X_game_sit = X_whole[:, game_sit_cols]
X_wo_recurse = X_whole[:, wo_recursive_cols]

## Non-Regularized

In [4]:
# Logistic Regression - Without Recursive Features: Non-regularized (53 features)
log_reg_wo_recurse_models = []
log_reg_wo_recurse_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_wo_recurse_test_accs.append(test_acc)
    log_reg_wo_recurse_models.append(model)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5802666666666667


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5784888888888889


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5739687055476529


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5753911806543386
Test Accuracy:  0.5812588904694168


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
# Logistic Regression - Game Situational Features: Non-regularized (17 features)
log_reg_game_sit_models = []
log_reg_game_sit_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_game_sit_test_accs.append(test_acc)
    log_reg_game_sit_models.append(model)

Test Accuracy:  0.5783111111111111
Test Accuracy:  0.5779555555555556
Test Accuracy:  0.5764580369843528
Test Accuracy:  0.5785917496443812
Test Accuracy:  0.582325746799431


In [6]:
# Logistic Regression - All Features: Non-regularized (100 features)
log_reg_whole_models = []
log_reg_whole_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(X_train, y_train)
    
    test_acc = model.score(X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_whole_test_accs.append(test_acc)
    log_reg_whole_models.append(model)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5850666666666666


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5886222222222223


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5825035561877667


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Accuracy:  0.5864153627311522
Test Accuracy:  0.5890825035561877


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Regularized

In [7]:
# Logistic Regression - Without Recursive Features: Regularized (53 features)
log_reg_wo_recurse_reg_models = []
log_reg_wo_recurse_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_wo_recurse_reg_test_accs.append(test_acc)
    log_reg_wo_recurse_reg_models.append(model)

Test Accuracy:  0.5799111111111112
Test Accuracy:  0.5783111111111111
Test Accuracy:  0.5755689900426743
Test Accuracy:  0.5785917496443812
Test Accuracy:  0.5832147937411095


In [8]:
# Logistic Regression - Game Situational Features: Regularized (17 features)
log_reg_game_sit_reg_models = []
log_reg_game_sit_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_game_sit_reg_test_accs.append(test_acc)
    log_reg_game_sit_reg_models.append(model)

Test Accuracy:  0.5779555555555556
Test Accuracy:  0.5776
Test Accuracy:  0.5764580369843528
Test Accuracy:  0.5787695590327169
Test Accuracy:  0.5821479374110953


In [9]:
# Logistic Regression - All Features: Regularized (100 features)
log_reg_whole_reg_models = []
log_reg_whole_reg_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(scaled_X_train, y_train)
    
    test_acc = model.score(scaled_X_test, y_test)
    print("Test Accuracy: ", test_acc)
    
    log_reg_whole_reg_test_accs.append(test_acc)
    log_reg_whole_reg_models.append(model)

Test Accuracy:  0.5873777777777778
Test Accuracy:  0.5889777777777778
Test Accuracy:  0.5830369843527738
Test Accuracy:  0.5880156472261735
Test Accuracy:  0.5890825035561877


In [10]:
# Logistic Regression - Without Recursive Features: PCA 95% (53 features)
log_reg_wo_recurse_pca_models = []
log_reg_wo_recurse_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_wo_recurse, y):
    X_train, X_test = X_wo_recurse[train_index], X_wo_recurse[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    print("Train Accuracy: ", model.score(reduced_X_train, y_train))
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1], "\n")
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1], "\n")
    
    log_reg_wo_recurse_pca_test_accs.append(test_acc)
    log_reg_wo_recurse_pca_models.append(model)

Train Accuracy:  0.5769658176645776
Test Accuracy:  0.5758222222222222
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[   9  340 5181   11   84] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613] 

Train Accuracy:  0.577454771747344
Test Accuracy:  0.5767111111111111
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  20  317 5186   14   88] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613] 

Train Accuracy:  0.5774291048093164
Test Accuracy:  0.5720128022759602
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  11  347 5124   12  130] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5777402435772069
Test Accuracy:  0.5741465149359887
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[   8  329 5159   16  112] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5764067917148191
Test Accuracy:  0.5809032716927454
Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  10  276 5238   15   85] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938

In [11]:
# Logistic Regression - Game Situational Features: PCA 95% (53 features)
log_reg_game_sit_pca_models = []
log_reg_game_sit_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_game_sit, y):
    X_train, X_test = X_game_sit[train_index], X_game_sit[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    print("Train Accuracy: ", model.score(reduced_X_train, y_train))
    print("Test Accuracy: ", test_acc)
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1], "\n")
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1], "\n")
    
    log_reg_game_sit_pca_test_accs.append(test_acc)
    log_reg_game_sit_pca_models.append(model)

Train Accuracy:  0.5781215273147531
Test Accuracy:  0.5774222222222222
Predictions:
['CU' 'FF' 'SL']
[ 352 5251   22] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613] 

Train Accuracy:  0.57789927545895
Test Accuracy:  0.5772444444444444
Predictions:
['CU' 'FF' 'SL']
[ 332 5269   24] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613] 

Train Accuracy:  0.5780513823450973
Test Accuracy:  0.5743243243243243
Predictions:
['CU' 'FF' 'SL']
[ 364 5228   32] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5773846564139035
Test Accuracy:  0.5764580369843528
Predictions:
['CU' 'FF' 'SL']
[ 339 5257   28] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5761401013423415
Test Accuracy:  0.5807254623044097
Predictions:
['CU' 'FF' 'SL']
[ 285 5317   22] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  938 3278   38  612] 



In [12]:
# Logistic Regression - Whole Features: PCA 95% (53 features)
log_reg_whole_pca_models = []
log_reg_whole_pca_train_accs = []
log_reg_whole_pca_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in skf.split(X_whole, y):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    model = LogisticRegression(max_iter=1000)
    
    model.fit(reduced_X_train, y_train)
    
    test_acc = model.score(reduced_X_test, y_test)
    train_acc = model.score(reduced_X_train, y_train)
    print("Train Accuracy: ", train_acc)
    print("Test Accuracy: ", test_acc, "\n")
    
    print("Predictions:")
    print(np.unique(model.predict(reduced_X_test)))
    print(np.unique(model.predict(reduced_X_test), return_counts=True)[1], "\n")
    print("Actual:")
    print(np.unique(y_test))
    print(np.unique(y_test, return_counts=True)[1], "\n")
    
    log_reg_whole_pca_train_accs.append(train_acc)
    log_reg_whole_pca_test_accs.append(test_acc)
    log_reg_whole_pca_models.append(model)

Train Accuracy:  0.5851891363292884
Test Accuracy:  0.5866666666666667 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  45  421 4979   24  156] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 758  939 3277   38  613] 

Train Accuracy:  0.5867893496910699
Test Accuracy:  0.5852444444444445 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  63  395 4977   30  160] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3277   39  613] 

Train Accuracy:  0.5856965063561206
Test Accuracy:  0.5796586059743954 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  46  405 4947   28  198] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5855631611698817
Test Accuracy:  0.5812588904694168 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  51  355 4995   30  193] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']
[ 757  939 3278   38  612] 

Train Accuracy:  0.5853409191928172
Test Accuracy:  0.5869487908961594 

Predictions:
['CH' 'CU' 'FF' 'SI' 'SL']
[  34  325 5065   21  179] 

Actual:
['CH' 'CU' 'FF' 'SI' 'SL']

## Ensemble

In [19]:
# Ensemble of LogReg Models
ensemble_models = []
ensemble_train_accs = []
ensemble_test_accs = []

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_index, test_index) in enumerate(skf.split(X_whole, y)):
    X_train, X_test = X_whole[train_index], X_whole[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    scaler = MinMaxScaler()
    scaled_X_train = scaler.fit_transform(X_train)
    scaled_X_test = scaler.transform(X_test)
    
    pca = PCA(n_components=0.95, svd_solver='full')
    reduced_X_train = pca.fit_transform(scaled_X_train)
    reduced_X_test = pca.transform(scaled_X_test)
    
    log_reg_pca_model = log_reg_whole_pca_models[fold]
    log_reg_stdz_model = log_reg_whole_reg_models[fold]
    log_reg_model = log_reg_whole_models[fold]
    
    pca_mod_probs_train = log_reg_pca_model.predict(reduced_X_train)
    stdz_mod_probs_train = log_reg_stdz_model.predict(scaled_X_train)
    mod_probs_train = log_reg_model.predict(X_train)
    
    pca_mod_probs_test = log_reg_pca_model.predict(reduced_X_test)
    stdz_mod_probs_test = log_reg_stdz_model.predict(scaled_X_test)
    mod_probs_test = log_reg_model.predict(X_test)
    
    print("Agreement PCA-STDZ: ", np.mean(pca_mod_probs_test == stdz_mod_probs_test))
    print("Agreement PCA-REGULAR: ", np.mean(pca_mod_probs_test == mod_probs_test))
    print("Agreement STDZ-REGULAR: ", np.mean(stdz_mod_probs_test == mod_probs_test))
    
#     model_prob_votes_train = np.hstack([pca_mod_probs_train.reshape(-1, 1), stdz_mod_probs_train.reshape(-1, 1), mod_probs_train.reshape(-1, 1)])
#     model_prob_votes_test = np.hstack([pca_mod_probs_test.reshape(-1, 1), stdz_mod_probs_test.reshape(-1, 1), mod_probs_test.reshape(-1, 1)])
    
#     ensemble = LogisticRegression()
#     ensemble.fit(model_prob_votes_train, y_train)
    
#     train_acc = ensemble.score(model_prob_votes_train, y_train) 
#     test_acc = ensemble.score(model_prob_votes_test, y_test)
#     print("Train Accuracy: ", train_acc)
#     print("Test Accuracy: ", test_acc, "\n")
    
#     print("Predictions:")
#     print(np.unique(ensemble.predict(model_prob_votes_test)))
#     print(np.unique(ensemble.predict(model_prob_votes_test), return_counts=True)[1], "\n")
#     print("Actual:")
#     print(np.unique(y_test))
#     print(np.unique(y_test, return_counts=True)[1], "\n")
    
#     ensemble_train_accs.append(train_acc)
#     ensemble_test_accs.append(test_acc)
#     ensemble_models.append(ensemble)

Agreement PCA-STDZ:  0.9552
Agreement PCA-REGULAR:  0.9571555555555555
Agreement STDZ-REGULAR:  0.9882666666666666
Agreement PCA-STDZ:  0.9523555555555555
Agreement PCA-REGULAR:  0.9521777777777778
Agreement STDZ-REGULAR:  0.9880888888888889
Agreement PCA-STDZ:  0.94950213371266
Agreement PCA-REGULAR:  0.94950213371266
Agreement STDZ-REGULAR:  0.9907539118065434
Agreement PCA-STDZ:  0.9541251778093883
Agreement PCA-REGULAR:  0.954302987197724
Agreement STDZ-REGULAR:  0.9905761024182077
Agreement PCA-STDZ:  0.9557254623044097
Agreement PCA-REGULAR:  0.9589260312944523
Agreement STDZ-REGULAR:  0.9893314366998578


In [28]:
print("\t", np.mean(log_reg_whole_test_accs))

print("\t", np.mean(log_reg_wo_recurse_test_accs))

print("\t", np.mean(log_reg_game_sit_test_accs))

print("\t", np.mean(log_reg_whole_reg_test_accs))

print("\t", np.mean(log_reg_wo_recurse_reg_test_accs))

print("\t", np.mean(log_reg_game_sit_reg_test_accs))

print(np.mean(log_reg_whole_pca_train_accs), "\t", np.mean(log_reg_whole_pca_test_accs))

print("\t", np.mean(log_reg_wo_recurse_pca_test_accs))

print("\t", np.mean(log_reg_game_sit_pca_test_accs))

	 0.5863380622727992
	 0.5778748664453928
	 0.5787284400189664
	 0.5872981381381381
	 0.5791195511300774
	 0.5785862177967441
0.5857158145478356 	 0.5839554796902166
	 0.5759191844476056
	 0.5772348980559506


In [29]:
verlander_cols

array(['pitch_type', 'month', 'year', 'inning', 'inning_topbot', 'outs',
       'strikes', 'balls', 'pitch_number', 'on_1b', 'on_2b', 'on_3b',
       'score_diff', 'of_std', 'of_strat', 'if_std', 'if_strat',
       'if_shift', 'Pitcher_Tend_FF', 'Pitcher_Tend_CU',
       'Pitcher_Tend_CH', 'Pitcher_Tend_SL', 'Pitcher_Tend_SI',
       'Pitcher_Strike_Tend_FF', 'Pitcher_Strike_Tend_CU',
       'Pitcher_Strike_Tend_CH', 'Pitcher_Strike_Tend_SL',
       'Pitcher_Strike_Tend_SI', 'PrevPitch_FF', 'PrevPitch_CU',
       'PrevPitch_CH', 'PrevPitch_SL', 'PrevPitch_SI', 'PrevPitch_Strike',
       'PrevPitch_Ball', 'PrevPitch_InPlay', 'Prev5_Pcnt_FF',
       'Prev5_FF_Strike', 'Prev5_Pcnt_CU', 'Prev5_CU_Strike',
       'Prev5_Pcnt_CH', 'Prev5_CH_Strike', 'Prev5_Pcnt_SL',
       'Prev5_SL_Strike', 'Prev5_Pcnt_SI', 'Prev5_SI_Strike',
       'Prev10_Pcnt_FF', 'Prev10_FF_Strike', 'Prev10_Pcnt_CU',
       'Prev10_CU_Strike', 'Prev10_Pcnt_CH', 'Prev10_CH_Strike',
       'Prev10_Pcnt_SL', 'Prev10_SL_Str