# Helper Functions

In [1]:
def limit_unique(df, max_value, columns_to_limit):
    """
    Purpose of the function is to limit the number of unique values
    """
    
    # Loop through each column
    for col in columns_to_limit:
        # Get the value counts of the column
        total_counts = df[col].value_counts()
        
        # Get the top values to retain, not including "Other"
        top_counts = total_counts[:max_value-1]
        
        # Define the cutoff
        cutoff_value = top_counts.iloc[-1]
        
        # Create a list of values to replace
        replace_values = total_counts.loc[total_counts.values < cutoff_value].index
        
        # Replace in dataframe
        for value in replace_values:
            df[col] = df[col].replace(value, "Other")
        
        # Check to make sure binning was successful
        print(df[col].value_counts())
        print(f'Number of unique values: {df[col].nunique()}\n')

In [2]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    
    # Choose activation function in hidden layers
    activation_first_hidden = hp.Choice('activation_layer_0', activation_functions)
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(
        units = hp.Int(
            'units_layer_0',
            min_value = 1,
            max_value = max_num_neurons,
            step = step_count),
        activation = activation_first_hidden,
        # kernel_regularizer = tf.keras.regularizers.L1(0.01),
        input_dim = number_input_features
    ))
    
    # # Tune whether to use dropout based on the Boolean hyperparameter
    # if hp.Boolean("use_dropout"):
    #     # Add a dropout layer if the Boolean hyperparameter is True
    #     nn_model.add(layers.Dropout(rate=0.5))  # Adjust the dropout rate as needed
    
    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    num_layers = hp.Int('num_layers', 1, max_hidden_layers-1) # options: 1, 2
    
    for i in range(1, num_layers+1): # i-values: 1, 2 only
        # Choose the number of neurons per layer
        units_layer_i = hp.Int(
            f'units_layer_{i}',
            min_value = 1,
            max_value = max_num_neurons,
            step = step_count
        )
        
        # Choose a different activation function for each layer
        activation_layer_i = hp.Choice(f'activation_layer_{i}', activation_functions)

        nn_model.add(tf.keras.layers.Dense(
            units = units_layer_i,
            activation = activation_layer_i
        ))
        
        # # Tune whether to use dropout based on the Boolean hyperparameter
        # if hp.Boolean("use_dropout"):
        #     # Add a dropout layer if the Boolean hyperparameter is True
        #     nn_model.add(layers.Dropout(rate=0.5))  # Adjust the dropout rate as needed

    # Add the output layer
    nn_model.add(tf.keras.layers.Dense(
        units = 1,
        activation = "sigmoid"
    ))

    # Compile the model
    nn_model.compile(
        loss = "binary_crossentropy",
        optimizer = optimiser,
        metrics = ["accuracy"]
    )
    
    return(nn_model)