In [1]:
!pip install keras-tuner



In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import matplotlib as plt
import sklearn as skl
import tensorflow as tf

# Import our input dataset
stats_df = pd.read_csv('stats_target_clean.csv')
stats_df.head()

Unnamed: 0,Position,Season,Tm,Team_Name,Lg,G,W,L,W/L%,MOV,...,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,2020-21,WAS,Washington Wizards,NBA,72,34,38,0.472,-1.83,...,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,0,2020-21,UTA,Utah Jazz,NBA,72,52,20,0.722,9.25,...,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,0,2020-21,TOR,Toronto Raptors,NBA,72,27,45,0.375,-0.47,...,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,0,2020-21,SAS,San Antonio Spurs,NBA,72,33,39,0.458,-1.74,...,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,0,2020-21,SAC,Sacramento Kings,NBA,72,31,41,0.431,-3.68,...,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [3]:
stats_df.drop('Lg', axis=1, inplace=True)

In [4]:
stats_df.drop('Season', axis=1, inplace=True)

In [5]:
stats_df.head()

Unnamed: 0,Position,Tm,Team_Name,G,W,L,W/L%,MOV,SOS,SRS,...,eFG%,TS%,TOV%,ORB%,FT/FGA,opp_eFG%,opp_TS%,opp_TOV%,opp_ORB%,opp_FT/FGA
0,0,WAS,Washington Wizards,72,34,38,0.472,-1.83,-0.01,-1.85,...,0.531,0.569,12.3,21.3,0.221,0.539,0.576,12.5,22.4,0.217
1,0,UTA,Utah Jazz,72,52,20,0.722,9.25,-0.29,8.97,...,0.563,0.597,12.7,24.5,0.195,0.507,0.537,10.3,20.7,0.159
2,0,TOR,Toronto Raptors,72,27,45,0.375,-0.47,-0.07,-0.54,...,0.529,0.567,11.9,20.8,0.196,0.543,0.584,14.4,23.7,0.234
3,0,SAS,San Antonio Spurs,72,33,39,0.458,-1.74,0.15,-1.58,...,0.517,0.554,10.2,20.0,0.192,0.541,0.57,11.8,22.7,0.174
4,0,SAC,Sacramento Kings,72,31,41,0.431,-3.68,0.23,-3.45,...,0.549,0.578,12.0,21.3,0.185,0.557,0.591,12.1,25.0,0.199


In [7]:
# Generate our categorical variable list
stats_cat = stats_df.dtypes[stats_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
stats_df[stats_cat].nunique()

Tm           38
Team_Name    34
dtype: int64

In [8]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(stats_df[stats_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(stats_cat)
encode_df.head()

Unnamed: 0,Tm_ATL,Tm_BOS,Tm_BRK,Tm_CHA,Tm_CHH,Tm_CHI,Tm_CHO,Tm_CLE,Tm_DAL,Tm_DEN,...,Team_Name_Philadelphia 76ers,Team_Name_Phoenix Suns,Team_Name_Portland Trail Blazers,Team_Name_Sacramento Kings,Team_Name_San Antonio Spurs,Team_Name_Seattle Supersonics,Team_Name_Toronto Raptors,Team_Name_Utah Jazz,Team_Name_Vancouver Grizzlies,Team_Name_Washington Wizards
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Merge one-hot encoded features and drop the originals
stats_df = stats_df.merge(encode_df,left_index=True, right_index=True)
stats_df = stats_df.drop(stats_cat,1)
stats_df.head(20)

Unnamed: 0,Position,G,W,L,W/L%,MOV,SOS,SRS,Pace,ORtg,...,Team_Name_Philadelphia 76ers,Team_Name_Phoenix Suns,Team_Name_Portland Trail Blazers,Team_Name_Sacramento Kings,Team_Name_San Antonio Spurs,Team_Name_Seattle Supersonics,Team_Name_Toronto Raptors,Team_Name_Utah Jazz,Team_Name_Vancouver Grizzlies,Team_Name_Washington Wizards
0,0,72,34,38,0.472,-1.83,-0.01,-1.85,104.1,111.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0,72,52,20,0.722,9.25,-0.29,8.97,98.5,117.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0,72,27,45,0.375,-0.47,-0.07,-0.54,99.2,112.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0,72,33,39,0.458,-1.74,0.15,-1.58,98.9,111.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0,72,31,41,0.431,-3.68,0.23,-3.45,100.0,113.6,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0,72,42,30,0.583,1.79,0.01,1.81,98.4,117.8,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2,72,51,21,0.708,5.82,-0.15,5.67,97.2,117.2,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0,72,49,23,0.681,5.58,-0.31,5.28,99.5,113.2,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0,72,21,51,0.292,-9.31,0.29,-9.02,98.7,105.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0,72,22,50,0.306,-10.64,0.51,-10.13,101.0,103.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Remove Position target from features data
y = stats_df.Position
X = stats_df.drop(columns=["Position"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=2))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [12]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [13]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
activation        |sigmoid           |?                 
first_units       |5                 |?                 
num_layers        |3                 |?                 
units_0           |7                 |?                 
tuner/epochs      |3                 |?                 
tuner/initial_e...|0                 |?                 
tuner/bracket     |2                 |?                 
tuner/round       |0                 |?                 

Epoch 1/3


ValueError: in user code:

    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py:787 train_step
        y_pred = self(x, training=True)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\boyer\anaconda3\envs\mlenv\lib\site-packages\keras\engine\input_spec.py:254 assert_input_compatibility
        ' but received input with shape ' + display_shape(x.shape))

    ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 2 but received input with shape (None, 92)
