## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head(2)

In [None]:
import seaborn as sns

In [None]:
sns.countplot(x ='IS_SUCCESSFUL',data = application_df )

## This bar plot shows relatively well balanced data

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df = application_df.drop(['EIN','NAME'], axis = 1)
application_df.head(2)

In [None]:
# Determine the number of unique values in each column.
application_df.nunique()

In [None]:
# Look at APPLICATION_TYPE value counts for binning
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_type_counts

In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable named `application_types_to_replace`
application_types_to_replace = list(application_type_counts[application_type_counts<=156].index)
# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# # Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()


In [None]:
# Look at CLASSIFICATION value counts for binning
classification_value_count = application_df.CLASSIFICATION.value_counts()
classification_value_count

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
classification_value_count[classification_value_count>1]

In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classifications_to_replace = list(classification_value_count[classification_value_count<=777].index)

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

In [None]:
application_df

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
scaled_df = pd.get_dummies(application_df)
scaled_df.head()

In [None]:
#Original data before spliting to X_train and X_train
scaled_df.shape

In [None]:
scaled_df['IS_SUCCESSFUL'].value_counts()

In [None]:
# Split our preprocessed data into our features and target arrays
X= scaled_df.drop('IS_SUCCESSFUL', axis = 1).values
y= scaled_df['IS_SUCCESSFUL'].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=1)


In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# We can see that 25 percent of the data is assigned to the X_test by defualt
X_train.shape

## Compile, Train and Evaluate the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn = Sequential()

# First hidden layer
nn.add(Dense(units = 80, activation = 'relu', input_dim = len(X_train[0])))

# Second hidden layer
nn.add(Dense(units = 30, activation = 'relu'))

# Output layer for binary classification problem using sigmoid activation function
nn.add(Dense(units = 1, activation = "sigmoid"))

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
nn.fit(X_train_scaled, y_train, validation_data = (X_test_scaled,y_test), epochs=200)

In [None]:
model_losses_df = pd.DataFrame(nn.history.history)
model_losses_df.head()

In [None]:
#Pplot the loss vs val_loss to see for any overfit and/or see the rate of change of one to the other

model_losses_df[['loss','val_loss']].plot()

## In the above plot, a loss is from my training data set while val-loss is a loss from the test set. From this plot, we learn that our validation loss increased while the loss decreased. This is an indication of overfitting our model.

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

## This model evaluation data indicated that, it has an accuracy of around 72 percent with an epoch of 200.However, it is evident from the plot that, an epoch of 200 is too long due to the fact that the loss and the val_loss is no more tracked each other. As a result, I decided to include an early stopping mechanism so that the system automatically stops when the rate of change of the loss and the val_loss are no more significant.  

In [None]:
# Adding an early stopping (callback) mechanism
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
#This callback will stop the training when there is no improvement in the loss for 25 consecutive epochs.
early_stop = EarlyStopping(monitor = 'val_loss' , mode = 'min', verbose = 1, patience = 25)

In [None]:
# Train the model
nn.fit(X_train_scaled, y_train, validation_data = (X_test_scaled,y_test), epochs=200, callbacks = [early_stop])

In [None]:
model_losses_df = pd.DataFrame(nn.history.history)
model_losses_df.head()

In [None]:
# plot the loss vs val_loss to see for any overfit and/or see the rate of change of one to the other 
# following the early stopping mechanism

model_losses_df[['loss','val_loss']].plot()

## Not a good result at all. Let's add the Dropout layers and see if the model improves

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

   ## For preventing an overfitting. I added a Dropout layer from tensorflow library. The Dropout layer randomly sets input units to 0 with a frequency of `rate` at each step   during training time, which helps prevent overfitting. In my case I have decided to turn off half the input at a time during each epoch. 

In [None]:
# Dropout
from tensorflow.keras.layers import Dropout

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn = Sequential()

# First hidden layer
nn.add(Dense(units = 80,activation = 'relu', input_dim = 43, name = 'layer1'))
# Added dropout layer
nn.add(Dropout(0.5))
# Second hidden layer
nn.add(Dense(units = 30,activation = 'relu', name = 'layer2'))
# Added dropout layer
nn.add(Dropout(0.5))
# Output layer for binary classification problem using sigmoid activation function
nn.add(Dense(units = 1,activation = "sigmoid"))
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
nn.fit(X_train_scaled, y_train, validation_data = (X_test_scaled,y_test), epochs=200, callbacks = [early_stop])

In [None]:
model_losses_df = pd.DataFrame(nn.history.history)
model_losses_df.head()

In [None]:
model_losses_df[['loss','val_loss']].plot()

## This is the type of plot that I am expecting. Both the loss and the val_loss decreased and converge. Therefore, the Dropout layer has improved our model. The accuracy has improved a bit as shown below

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=0)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

## I did not use google colab and did not save it as HDF5