# Imports

In [None]:
import numpy as np
import pandas as pd
from   sklearn.model_selection import train_test_split
from   sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
import tensorflow as tf
from   tensorflow.keras.callbacks import ModelCheckpoint

# Model Training

## Get Encoded data

In [None]:
model_df = pd.read_csv("charity_data.preprocess.2.one_hot_encoded.csv")

In [None]:
row_count = model_df.IS_SUCCESSFUL.count()
print(f"The number of rows in the data set is: {row_count}")

## Model Parameters

In [None]:
# The neural network model is designed to 
# test the model by setting a threshold
# to remove ASK_AMT outlier values

feature_ask_amt_outlier_le_threshold = 100000000000000


# The neural network model is designed to 
# test the model with various binning 
# thresholds for the features
# 
# APPLICATION_TYPE
# CLASSIFICATION

feature_appl_binning_le_row_cnt = 10

feature_class_binning_le_row_cnt = 4

hidden_layer_1_nodes   = 75
hidden_layer_1_act_func = "relu"

hidden_layer_2_nodes   = 150
hidden_layer_2_act_func = "relu"

hidden_layer_3_nodes   = 10
hidden_layer_3_act_func = "relu"

output_layer_nodes = 1
output_layer_act_func = "sigmoid"

### Remove target ASK_AMT Outliers

In [None]:
model_df = model_df[(model_df["ASK_AMT"] <= feature_ask_amt_outlier_le_threshold)]

In [None]:
row_count_removed = row_count - model_df.IS_SUCCESSFUL.count()
print(f"Eliminating ASK_AMT outliers over {feature_ask_amt_outlier_le_threshold} dollars removed {row_count_removed} rows")

### Bin features APPLICATION_TYPE and CLASSIFICATION

In [None]:
def remapp(classification, remap_class_list):
    if classification in remap_class_list:
        return "Other"
    return classification

In [None]:
def set_feature_binning(feature_Name, binning_le_row_cnt):
    ## if binning row count threshold is > 0 bin feature
    if binning_le_row_cnt > 0:
        # Get feature unique classification row counts
        feature_cat_filt_row_cnt_ser = model_df[feature_Name].value_counts()
        
        # Filter classification values that have a row counts that
        # are less than or equal to the binning threshold row count
        feature_cat_filt_row_cnt_ser = feature_cat_filt_row_cnt_ser[(feature_cat_filt_row_cnt_ser[:] <= binning_le_row_cnt)]
        
        # Set feature catigory value to other for feature 
        # catigory values that have been binned
        model_df[feature_Name] = model_df[feature_Name].apply(remapp, args=[feature_cat_filt_row_cnt_ser.index])
        
        ## get 
        feature_updated_cat_count = model_df[feature_Name].nunique()
        print(f" This feature {feature_Name} has the binning row count threshold of {binning_le_row_cnt} and has reduced that catigory values to {feature_updated_cat_count}")

In [None]:
unique_class_count = model_df["APPLICATION_TYPE"].nunique()
print(f"The number of unique catigory values for the feature APPLICATION_TYPE is {unique_class_count}")

In [None]:
unique_class_count = model_df["CLASSIFICATION"].nunique()
print(f"The number of unique catigory values for the feature CLASSIFICATION is {unique_class_count}")

In [None]:
set_feature_binning("APPLICATION_TYPE", feature_appl_binning_le_row_cnt)

In [None]:
set_feature_binning("CLASSIFICATION", feature_class_binning_le_row_cnt)

### Encoding Catigorical Fields APPLICATION_TYPE and CLASSIFICATION After Binning

In [None]:
# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False, dtype=np.int64)

# Fit and transform the OneHotEncoder using the categorical variable list
encoded_df = pd.DataFrame(encoder.fit_transform(model_df[["APPLICATION_TYPE","CLASSIFICATION"]]))

# Add the encoded variable names to the DataFrame
encoded_df.columns = encoder.get_feature_names(["APPLICATION_TYPE","CLASSIFICATION"])

In [None]:
# Merge one-hot encoded features and drop the originals
model_df.drop(["APPLICATION_TYPE","CLASSIFICATION"],1, inplace=True)
model_df = model_df.merge(encoded_df,left_index=True, right_index=True)

In [None]:
# Display one hot encoding for features APPLICATION_TYPE and CLASSIFICATION
for column_name in model_df.columns:
    if column_name[0:16] == "APPLICATION_TYPE" or column_name[0:14] == "CLASSIFICATION":
        print(f"The column:[{column_name}] has [{len(model_df[column_name].unique())}] values")

### Scale ASK_AMT feature

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_ASK_AMT_scaler = scaler.fit(model_df["ASK_AMT"].values.reshape(-1,1))

# Scale the data
model_df["ASK_AMT"] = X_ASK_AMT_scaler.transform(model_df["ASK_AMT"].values.reshape(-1,1))

### Create training and testing data sets

In [None]:
# Split our preprocessed data into our features and target arrays
y = model_df["IS_SUCCESSFUL"].values
X = model_df.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

### Train neural network

In [None]:
# Define the model - deep neural net
number_input_features = len(X_train[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add( tf.keras.layers.Dense(units=hidden_layer_1_nodes, input_dim=number_input_features, activation=hidden_layer_1_act_func))

# Second hidden layer add if node count > 0
if hidden_layer_2_nodes > 0:
    nn.add(tf.keras.layers.Dense(units=hidden_layer_2_nodes, activation=hidden_layer_2_act_func))

# Third hidden layer add if node count > 1
if hidden_layer_3_nodes > 0:
    nn.add(tf.keras.layers.Dense(units=hidden_layer_3_nodes, activation=hidden_layer_3_act_func))
    
# Output layer
nn.add(tf.keras.layers.Dense(units=output_layer_nodes, activation=output_layer_act_func))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

### Accuracy and Error Data for Model Training

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

In [None]:
# Plot the loss
print("Model loss grapth")
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
print("Model accuracy grapth")
history_df.plot(y="accuracy")

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Test set statistics:  Loss: {model_loss}, Accuracy: {model_accuracy}")

### Save Model if Accuracy >= 0.75

In [None]:
# Export our model to HDF5 file
if model_accuracy >= 0.75:
    nn.save("charity_analysis_trained.h5")