# Tuning with keras-tuner

In [None]:
# Import our dependencies

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras_tuner as kt
import pandas as pd
import tensorflow as tf

# Functions

In [None]:
def load_dataset():
    """ Reads dataset csv and returns pandas dataframe """
    
    filepath = "../Resources/charity_data.csv"

    df = pd.read_csv(filepath, encoding="utf-8", low_memory=False)
    
    return df

In [None]:
def clean_dataset(a_df):
    """ Returns deduped, na-dropped, index-reset dataframe """    
    
    a_df = a_df.drop_duplicates()   
        
    a_df = a_df.dropna()
    
    a_df = a_df.drop(columns=["EIN", "NAME"])
        
    a_df = a_df.reset_index(drop=True)
    
    return a_df   

In [None]:
def examine_dataset(a_df):
    """ Provides summary info and visualizations of dataset """
    
    a_df.info()
    
    print("\n\n")
                  
    # Determine the number of unique values in each column.

    for col in a_df.columns:
        if (a_df[col].nunique() > 10):
            print(f"{col}\n\n{a_df[col].value_counts()}\n\n")  

In [None]:
def reduce_cats(a_col, a_cutoff):
    """ Inputs are a series and a cutoff value for 'Other' """
    
    print(f"BEFORE: \n\n{df[a_col].value_counts()}\n\n")

    types_to_replace = (df[a_col].value_counts().loc[lambda x: x < int(a_cutoff)]).keys().tolist()

    for code in types_to_replace:        
        df[a_col] = df[a_col].replace(code, "Other")

    # Check to make sure binning was successful

    print(f"AFTER: \n\n{df[a_col].value_counts()}\n\n")   

In [None]:
def encode_df(a_df):
    """ Returns one-hot encoded dataframe """
    
    categorical_list = a_df.dtypes[a_df.dtypes == "object"].index.tolist() 
    
    print(f"CATEGORIES FOR EACH CATEGORICAL FEATURE ENCODED:\n\n{a_df[categorical_list].nunique()}\n\n")
    
    concat_list = []
    
    for categorical in categorical_list:
        
        concat_list.append(pd.get_dummies(a_df[categorical], prefix=categorical, prefix_sep='_'))        
    
    concat_list.append(a_df["IS_SUCCESSFUL"])

    return pd.concat(concat_list, axis=1)   

In [None]:
def pre_process(a_df):
    """ Make X,y ... train_test_split ... scale, fit and transform """
    
    # Split our preprocessed data into our features and target arrays

    y = enc_df["IS_SUCCESSFUL"].values
    X = enc_df.drop(["IS_SUCCESSFUL"], axis=1).values

    # Split the preprocessed data into a training and testing dataset

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 
    
    # Create a StandardScaler instances
    
    scaler = StandardScaler()

    # Fit the StandardScaler

    X_scaler = scaler.fit(X_train)

    # Scale the data

    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    print(f"TRAIN SCALED SHAPE: {X_train_scaled.shape}")
    print(f"TEST SCALED SHAPE: {X_test_scaled.shape}")

    input_dim = X_train_scaled.shape[1]  
    
    return input_dim, X_train_scaled, X_test_scaled, y_train, y_test

In [None]:
def create_model(hp):
    
    # NB: change input_dim as needed!
    
    # Instantiate a Sequential model
    
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers.
    
    activation = hp.Choice('activation', ['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide the number of neurons in first layer and also
    # the activation function. 
    
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=1), activation=activation, input_dim=43))

    # Allow kerastuner to decide the number of hidden layers and number of 
    # neurons in each one
    
    for i in range(hp.Int('num_layers', 1, 4)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    # Define the output layer
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [None]:
def find_best():
    """ Uses keras-tuner to find best model specs """
    
    tuner = kt.Hyperband(
        create_model,
        objective="val_accuracy",
        max_epochs=10,
        hyperband_iterations=2)
    
    # Run the kerastuner search for best hyperparameters

    tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))
    
     # Get best model hyperparameters
 
    best_hyper = tuner.get_best_hyperparameters(1)[0]

    print(best_hyper.values)
    
     # Evaluate best model against full test data
 
    best_model = tuner.get_best_models(1)[0]
    model_loss, model_accuracy = best_model.evaluate(X_test_scaled, y_test, verbose=2)

    print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")
    
     # Summarize the best model
 
    print(best_model.summary())

# Call functions

In [None]:
df = clean_dataset(load_dataset())

examine_dataset(df)

In [None]:
# Bin columns with > 10 unique values

reduce_cats("APPLICATION_TYPE", 500)

reduce_cats("ASK_AMT", 25_000)

reduce_cats("CLASSIFICATION", 1800)

In [None]:
df.head()

In [None]:
enc_df = encode_df(df)

enc_df

In [None]:
input_dim, X_train_scaled, X_test_scaled, y_train, y_test = pre_process(enc_df)

In [None]:
find_best()

# Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn = tf.keras.models.Sequential()

# Input layer

nn.add(tf.keras.layers.Dense(units=7, input_dim=27, activation="tanh"))

# First hidden layer

nn.add(tf.keras.layers.Dense(units=9, activation="tanh"))

# Second hidden layer

nn.add(tf.keras.layers.Dense(units=9, activation="tanh"))

# Third hidden layer

nn.add(tf.keras.layers.Dense(units=7, activation="tanh"))

# Output layer

nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model

nn.summary()

# Compile the model

nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) 

# Train the model

fit_model = nn.fit(X_train_scaled, y_train, epochs=10) 

# Evaluate the model using the test data

model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Export our model to HDF5 file

nn.save("../Models/nn_optimized.h5")

# Best result: 

### 