# Model Optimization Attempts

## Attempt 1: further consolidation of the features, addition of another hidden layer, bump up the node count
###  Preprocessing the Data for a Neural Network

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import preproc_funcs as pf

#  Import and read the charity_data.csv.
import pandas as pd 

In [None]:
# Split our preprocessed data into our features and target arrays
y = model_data_df.IS_SUCCESSFUL.values
X = model_data_df.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#Check the balance of the training target
from collections import Counter
Counter(y_train)

### Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 66
hidden_nodes_layer2 = 33
hidden_nodes_layer3 = 10

nn_new = tf.keras.models.Sequential()

# First hidden layer
nn_new.add(
    tf.keras.layers.Dense(
        units=hidden_nodes_layer1, 
        input_dim=number_input_features, 
        activation="relu"
    )
)
# Second hidden layer
nn_new.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Third hidden layer
nn_new.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer3, 
    activation="relu"
))

# Output layer
nn_new.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_new.summary()

In [None]:
# Compile the model
nn_new.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints_new/",exist_ok=True)
checkpoint_path = "checkpoints_new/weights.{epoch:02d}.hdf5"

In [None]:
# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn_new.fit(X_train_scaled,y_train,epochs=50,callbacks=[cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to HDF5 file
nn_new.save("trained_application_new_v1.h5")

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

## Attempt 2: Give the model all of the data

In [None]:
# Split our preprocessed data into our features and target arrays
y = model_data_df.IS_SUCCESSFUL.values
X = model_data_df.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 3*number_input_features
hidden_nodes_layer2 = 150
hidden_nodes_layer3 = 50

nn_new_v2 = tf.keras.models.Sequential()

# First hidden layer
nn_new_v2.add(
    tf.keras.layers.Dense(
        units=hidden_nodes_layer1, 
        input_dim=number_input_features, 
        activation="relu"
    )
)
# Second hidden layer
nn_new_v2.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Third hidden layer
nn_new_v2.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer3, 
    activation="relu"
))

# Output layer
nn_new_v2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_new_v2.summary()

In [None]:
# Compile the model
nn_new_v2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints_new_v2/",exist_ok=True)
checkpoint_path = "checkpoints_new_v2/weights.{epoch:02d}.hdf5"

In [None]:
# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn_new_v2.fit(X_train_scaled,y_train,epochs=200,callbacks=[cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new_v2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to HDF5 file
nn_new_v2.save("trained_application_new_v2.h5")

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

## Attempt 3: give the model all the data but tweak model parameters

In [None]:
# Split our preprocessed data into our features and target arrays
y = model_data_df.IS_SUCCESSFUL.values
X = model_data_df.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 50
hidden_nodes_layer2 = 10

nn_new_v3 = tf.keras.models.Sequential()

# First hidden layer
nn_new_v3.add(
    tf.keras.layers.Dense(
        units=hidden_nodes_layer1, 
        input_dim=number_input_features, 
        activation="relu"
    )
)
# Second hidden layer
nn_new_v3.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Output layer
nn_new_v3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_new_v3.summary()

In [None]:
# Compile the model
nn_new_v3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints_new_v3/",exist_ok=True)
checkpoint_path = "checkpoints_new_v3/weights.{epoch:02d}.hdf5"

In [None]:
# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn_new_v3.fit(X_train_scaled,y_train,epochs=50,callbacks=[cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new_v3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to HDF5 file
nn_new_v3.save("trained_application_new_v3.h5")

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

## Atempt 4

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 150
hidden_nodes_layer2 = 75
hidden_nodes_layer3 = 50
hidden_nodes_layer4 = 25

nn_new_v4 = tf.keras.models.Sequential()

# First hidden layer
nn_new_v4.add(
    tf.keras.layers.Dense(
        units=hidden_nodes_layer1, 
        input_dim=number_input_features, 
        activation="relu"
    )
)
# Second hidden layer
nn_new_v4.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Second hidden layer
nn_new_v4.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Second hidden layer
nn_new_v4.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Output layer
nn_new_v4.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_new_v4.summary()

In [None]:
# Compile the model
nn_new_v4.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints_new_v4/",exist_ok=True)
checkpoint_path = "checkpoints_new_v4/weights.{epoch:02d}.hdf5"

In [None]:
# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn_new_v4.fit(X_train_scaled,y_train,epochs=50,callbacks=[cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new_v4.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to HDF5 file
nn_new_v4.save("trained_application_new_v4.h5")

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

## Attempt 5

In [None]:
# Split our preprocessed data into our features and target arrays
y = model_data_df.IS_SUCCESSFUL.values
X = model_data_df.drop(["IS_SUCCESSFUL"],1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 3*number_input_features
hidden_nodes_layer2 = np.floor(0.5*hidden_nodes_layer1)
hidden_nodes_layer3 = np.floor(0.5*hidden_nodes_layer1)
hidden_nodes_layer4 = np.floor(0.5*hidden_nodes_layer2)

nn_new_v5 = tf.keras.models.Sequential()

# First hidden layer
nn_new_v5.add(
    tf.keras.layers.Dense(
        units=hidden_nodes_layer1, 
        input_dim=number_input_features, 
        activation="relu"
    )
)
# Second hidden layer
nn_new_v5.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Second hidden layer
nn_new_v5.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Second hidden layer
nn_new_v5.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, 
    activation="relu"
))

# Output layer
nn_new_v5.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_new_v5.summary()

In [None]:
# Compile the model
nn_new_v5.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints_new_v5/",exist_ok=True)
checkpoint_path = "checkpoints_new_v5/weights.{epoch:02d}.hdf5"

In [None]:
# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5)

# Train the model
fit_model = nn_new_v5.fit(X_train_scaled,y_train,epochs=50,callbacks=[cp_callback])

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_new_v5.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export the model to HDF5 file
nn_new_v5.save("trained_application_new_v5.h5")

In [None]:
# Create a DataFrame containing training history
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")