In [124]:
# Import our dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [125]:
mlb_df = pd.read_csv("Resources/mlb_complete.csv")
mlb_df.drop(columns=['Unnamed: 0','W','L','W-L%'], inplace=True)
mlb_df.head()

Unnamed: 0,Tm,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,...,BF,LOB_y,DefEff,Ch,PO,A,E,DP,post_season,Year
0,Arizona Diamondbacks_2022,26.5,4.33,5351,702,1232,262,24,173,658,...,6065,1051,0.704,5746,4290,1370,86,134,False,2022
1,Atlanta Braves_2022,27.5,4.87,5509,789,1394,298,11,243,753,...,6031,1101,0.701,5803,4344,1382,77,110,True,2022
2,Baltimore Orioles_2022,27.0,4.16,5429,674,1281,275,25,171,639,...,6058,1092,0.69,5920,4300,1529,91,151,False,2022
3,Boston Red Sox_2022,28.8,4.54,5539,735,1427,352,12,155,704,...,6167,1109,0.683,5825,4293,1447,85,134,False,2022
4,Chicago Cubs_2022,27.9,4.06,5425,657,1293,265,31,159,620,...,6162,1130,0.697,5880,4331,1453,96,139,False,2022


In [126]:
features_df = mlb_df.drop(columns=['Tm','post_season','Year'])
target_df = mlb_df['post_season'].astype(int)

In [127]:
features_df.shape

(150, 36)

In [128]:
features_df

Unnamed: 0,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,SB,...,SO_y,HBP_y,BF,LOB_y,DefEff,Ch,PO,A,E,DP
0,26.5,4.33,5351,702,1232,262,24,173,658,104,...,1216,59,6065,1051,0.704,5746,4290,1370,86,134
1,27.5,4.87,5509,789,1394,298,11,243,753,87,...,1554,62,6031,1101,0.701,5803,4344,1382,77,110
2,27.0,4.16,5429,674,1281,275,25,171,639,95,...,1214,64,6058,1092,0.690,5920,4300,1529,91,151
3,28.8,4.54,5539,735,1427,352,12,155,704,52,...,1346,72,6167,1109,0.683,5825,4293,1447,85,134
4,27.9,4.06,5425,657,1293,265,31,159,620,111,...,1383,73,6162,1130,0.697,5880,4331,1453,96,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,28.0,4.69,5498,759,1369,248,9,205,725,63,...,1337,67,6271,1214,0.685,6039,4366,1540,133,151
146,27.1,4.42,5475,716,1415,274,43,150,664,128,...,1421,53,5992,1001,0.708,5913,4345,1483,85,136
147,27.4,4.55,5453,737,1308,266,24,194,696,74,...,1121,72,6231,1090,0.682,5971,4293,1558,120,168
148,28.9,4.38,5477,709,1336,320,16,217,680,47,...,1298,67,6265,1132,0.678,5849,4301,1447,101,138


In [129]:
X_train, X_test, y_train, y_test = train_test_split(features_df, target_df, random_state=78)
X_scaler = skl.preprocessing.StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [130]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=36))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [131]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [132]:
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

INFO:tensorflow:Oracle triggered exit


In [133]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 3,
 'num_layers': 2,
 'units_0': 9,
 'units_1': 7,
 'units_2': 5,
 'units_3': 7,
 'units_4': 1,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [134]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.5004 - accuracy: 0.8421 - 279ms/epoch - 139ms/step
Loss: 0.5004073977470398, Accuracy: 0.8421052694320679
