In [None]:
! pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/128.9 kB[0m [31m3.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.6 kt-legacy-1.0.5


## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import numpy as np
from scipy import stats
# Import the kerastuner library
import keras_tuner as kt


#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df.drop(['EIN', 'NAME'], axis=1, inplace=True)

application_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [None]:
# Getting dataframe info
application_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34299 entries, 0 to 34298
Data columns (total 10 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   APPLICATION_TYPE        34299 non-null  object
 1   AFFILIATION             34299 non-null  object
 2   CLASSIFICATION          34299 non-null  object
 3   USE_CASE                34299 non-null  object
 4   ORGANIZATION            34299 non-null  object
 5   STATUS                  34299 non-null  int64 
 6   INCOME_AMT              34299 non-null  object
 7   SPECIAL_CONSIDERATIONS  34299 non-null  object
 8   ASK_AMT                 34299 non-null  int64 
 9   IS_SUCCESSFUL           34299 non-null  int64 
dtypes: int64(3), object(7)
memory usage: 2.6+ MB


In [None]:
# Determine the number of unique values in each column.
application_df.nunique()

APPLICATION_TYPE            17
AFFILIATION                  6
CLASSIFICATION              71
USE_CASE                     5
ORGANIZATION                 4
STATUS                       2
INCOME_AMT                   9
SPECIAL_CONSIDERATIONS       2
ASK_AMT                   8747
IS_SUCCESSFUL                2
dtype: int64

In [None]:
# Look at APPLICATION_TYPE value counts for binning
app_type_vc = application_df['APPLICATION_TYPE'].value_counts()
print(app_type_vc)

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64


In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
app_type_filter = app_type_vc[app_type_vc < 500]
application_types_to_replace = app_type_filter.index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

In [None]:
# Look at CLASSIFICATION value counts for binning
classification_vc = application_df['CLASSIFICATION'].value_counts()
print(classification_vc)

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: CLASSIFICATION, Length: 71, dtype: int64


In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
# Get the value counts for the 'CLASSIFICATION' column
print(classification_vc[classification_vc>1])


C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
C2800       95
C7100       75
C1300       58
C1280       50
C1230       36
C1400       34
C7200       32
C2300       32
C1240       30
C8000       20
C7120       18
C1500       16
C1800       15
C6000       15
C1250       14
C8200       11
C1238       10
C1278       10
C1235        9
C1237        9
C7210        7
C2400        6
C1720        6
C4100        6
C1257        5
C1600        5
C1260        3
C2710        3
C0           3
C3200        2
C1234        2
C1246        2
C1267        2
C1256        2
Name: CLASSIFICATION, dtype: int64


In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classification_filter = classification_vc[classification_vc<700]
classifications_to_replace = classification_filter.index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
Other     1484
C7000      777
Name: CLASSIFICATION, dtype: int64

In [None]:
# 'SPECIAL_CONSIDERATIONS' column has values Y or N. Converting it to numeric 1/0
# Define a mapping dictionary
mapping = {'Y': 1, 'N': 0}

# Use the map function to replace values in the 'SPECIAL_CONSIDERATIONS' column
application_df['SPECIAL_CONSIDERATIONS'] = application_df['SPECIAL_CONSIDERATIONS'].map(mapping)

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`

# List of categorical columns to one-hot encode
categorical_columns = application_df.dtypes[application_df.dtypes == "object"].index.tolist()


# Perform one-hot encoding for the specified columns
dummy_columns = pd.get_dummies(application_df[categorical_columns], columns=categorical_columns)

# Merge the one-hot encoded columns with the non-categorical columns
application_df_encoded = pd.concat([application_df.drop(columns=categorical_columns), dummy_columns], axis=1)

application_df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34299 entries, 0 to 34298
Data columns (total 44 columns):
 #   Column                        Non-Null Count  Dtype
---  ------                        --------------  -----
 0   STATUS                        34299 non-null  int64
 1   SPECIAL_CONSIDERATIONS        34299 non-null  int64
 2   ASK_AMT                       34299 non-null  int64
 3   IS_SUCCESSFUL                 34299 non-null  int64
 4   APPLICATION_TYPE_Other        34299 non-null  uint8
 5   APPLICATION_TYPE_T10          34299 non-null  uint8
 6   APPLICATION_TYPE_T19          34299 non-null  uint8
 7   APPLICATION_TYPE_T3           34299 non-null  uint8
 8   APPLICATION_TYPE_T4           34299 non-null  uint8
 9   APPLICATION_TYPE_T5           34299 non-null  uint8
 10  APPLICATION_TYPE_T6           34299 non-null  uint8
 11  APPLICATION_TYPE_T7           34299 non-null  uint8
 12  APPLICATION_TYPE_T8           34299 non-null  uint8
 13  AFFILIATION_CompanySponsored  3

In [None]:
# Split our preprocessed data into our features and target arrays
X = application_df_encoded.drop(columns = 'IS_SUCCESSFUL')
y = application_df_encoded['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=1,
                                                    stratify=y)


In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 80)                3520      
                                                                 
 dense_4 (Dense)             (None, 30)                2430      
                                                                 
 dense_5 (Dense)             (None, 1)                 31        
                                                                 
Total params: 5981 (23.36 KB)
Trainable params: 5981 (23.36 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define a callback to save model weights every five epochs
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="model_weights_1.h5",  # Filepath to save the model weights
    save_best_only=True,          # Save only the best model
    save_weights_only=True,       # Save only the model weights
    monitor="val_loss",           # Metric to monitor (e.g., validation loss)
    mode="min",                   # "min" for loss, "max" for accuracy, "auto" for automatic
    verbose=1,                    # Verbosity
    period=5                      # Save every 5 epochs
)

# Example model fitting with the callback
nn.fit(
    X_train_scaled, y_train,
    epochs=100,
    callbacks=[checkpoint_callback]  # Include the callback during training
)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100



Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100



Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100



Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100



Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100



Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100



Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100



Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100



Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100



Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100



Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100



Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100



Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100



Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100



Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100



Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100



Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100



Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100



Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100



Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100





<keras.src.callbacks.History at 0x7930beabbf70>

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.5625 - accuracy: 0.7278 - 509ms/epoch - 2ms/step
Loss: 0.5624700784683228, Accuracy: 0.7278134226799011


In [None]:
# Export our model to HDF5 file
nn.save("AlphabetSoupCharity.h5")

  saving_api.save_model(


##Model Optimization

### Attempt 1: Steps to Optimize Model

1. Remove Outliers, by removing any rows where scaled any value of |X_train_scaled| > 3
2. Adding a hidden layer
3. Increasing no. of neurons to 100, 60, and 30 respectively
4. Increasing no. of epochs to 150

In [None]:
# Removing outliers

# Calculate Z-scores for each feature in 'X_train'
z_scores = np.abs(stats.zscore(X_train_scaled))

# Set a Z-score threshold to identify outliers (e.g., |Z-score| > 3)
threshold = 3

# Create a mask to identify rows with outliers
outlier_mask = (z_scores > threshold).any(axis=1)

# Remove rows with outliers from 'X_train' and corresponding labels
X_train_no_outliers = X_train_scaled[~outlier_mask]
y_train_no_outliers = y_train[~outlier_mask]

# Checking no. of rows left in training data
print(len(X_train_no_outliers))

13374


In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_no_outliers[0])
hidden_nodes_layer1 =  100
hidden_nodes_layer2 = 60
hidden_nodes_layer3 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 100)               4400      
                                                                 
 dense_7 (Dense)             (None, 60)                6060      
                                                                 
 dense_8 (Dense)             (None, 30)                1830      
                                                                 
 dense_9 (Dense)             (None, 1)                 31        
                                                                 
Total params: 12321 (48.13 KB)
Trainable params: 12321 (48.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define a callback to save model weights every five epochs
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="model_weights_1.h5",  # Filepath to save the model weights
    save_best_only=True,          # Save only the best model
    save_weights_only=True,       # Save only the model weights
    monitor="val_loss",           # Metric to monitor (e.g., validation loss)
    mode="min",                   # "min" for loss, "max" for accuracy, "auto" for automatic
    verbose=1,                    # Verbosity
    period=5                      # Save every 5 epochs
)

# Example model fitting with the callback
nn.fit(
    X_train_no_outliers, y_train_no_outliers,
    epochs=150,
    callbacks=[checkpoint_callback]  # Include the callback during training
)



Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150



Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150



Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150



Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150



Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150



Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150



Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150



Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150



Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150



Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150



Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150



Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150



Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150



Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150



Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150



Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150



Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150



Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150



Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150



Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150



Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150



Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150



Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150



Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150



Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150



Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150



Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150



Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150



Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150



Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150





<keras.src.callbacks.History at 0x7930be88f940>

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 11.8970 - accuracy: 0.6248 - 524ms/epoch - 2ms/step
Loss: 11.896970748901367, Accuracy: 0.6248396635055542


In [None]:
# Export our model to HDF5 file
nn.save("AlphabetSoupCharity_Optimization_1.h5")

### Attempt 2: Steps to Optimize Model

1. Adding a hidden layer
2. Increasing no. of neurons to 150, 80, and 50 respectively
3. Increasing no. of epochs to 150
4. Change activation function to tanh

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  150
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 50

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="tanh"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 150)               6600      
                                                                 
 dense_11 (Dense)            (None, 80)                12080     
                                                                 
 dense_12 (Dense)            (None, 50)                4050      
                                                                 
 dense_13 (Dense)            (None, 1)                 51        
                                                                 
Total params: 22781 (88.99 KB)
Trainable params: 22781 (88.99 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Define a callback to save model weights every five epochs
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="model_weights_1.h5",  # Filepath to save the model weights
    save_best_only=True,          # Save only the best model
    save_weights_only=True,       # Save only the model weights
    monitor="val_loss",           # Metric to monitor (e.g., validation loss)
    mode="min",                   # "min" for loss, "max" for accuracy, "auto" for automatic
    verbose=1,                    # Verbosity
    period=5                      # Save every 5 epochs
)

# Example model fitting with the callback
nn.fit(
    X_train_scaled, y_train,
    epochs=150,
    callbacks=[checkpoint_callback])  # Include the callback during training



Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150



Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150



Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150



Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150



Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150



Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150



Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150



Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150



Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150



Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150



Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150



Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150



Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150



Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150



Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150



Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150



Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150



Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150



Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150



Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150



Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150



Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150



Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150



Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150



Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150



Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150



Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150



Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150



Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150



Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150





<keras.src.callbacks.History at 0x7930be85a4d0>

In [None]:
 # Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.5669 - accuracy: 0.7298 - 643ms/epoch - 2ms/step
Loss: 0.5669205784797668, Accuracy: 0.7297959327697754


In [None]:
# saving the model as HDF5 file
nn.save("AlphabetSoupCharity_Optimization_2.h5")


### Attempt 3: Steps to Optimize Model

1. Adding another hidden layer
2. Increasing no. of neurons to 150, 80, and 60 respectively
3. Increasing no. of epochs to 150
4. Change activation function to tanh
5. Removing Classification and Application Type columns

In [None]:
# Split our preprocessed data into our features and target arrays
X = application_df_encoded.drop(columns='IS_SUCCESSFUL')
X = X.drop(columns=application_df_encoded.filter(like='APPLICATION_TYPE').columns)
X = X.drop(columns=application_df_encoded.filter(like='CLASSIFICATION').columns)

y = application_df_encoded['IS_SUCCESSFUL']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=1,
                                                    stratify=y)


In [None]:
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  150
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 60


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="tanh"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_14 (Dense)            (None, 150)               4200      
                                                                 
 dense_15 (Dense)            (None, 80)                12080     
                                                                 
 dense_16 (Dense)            (None, 60)                4860      
                                                                 
 dense_17 (Dense)            (None, 1)                 61        
                                                                 
Total params: 21201 (82.82 KB)
Trainable params: 21201 (82.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.6017 - accuracy: 0.6982 - 527ms/epoch - 2ms/step
Loss: 0.601708710193634, Accuracy: 0.6981924176216125


In [None]:
# Saving the model
nn.save("AlphabetSoupCharity_Optimization_3.h5")

  saving_api.save_model(


## Using Keras Tuner to optimize model

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=150,
        step=10), activation=activation, input_dim=43))

     # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=80,
            step=5),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [None]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=30,
    hyperband_iterations=2)


# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Trial 179 Complete [00h 02m 28s]
val_accuracy: 0.7321282625198364

Best val_accuracy So Far: 0.7329446077346802
Total elapsed time: 01h 32m 42s

Search: Running Trial #180

Value             |Best Value So Far |Hyperparameter
sigmoid           |relu              |activation
41                |141               |first_units
1                 |3                 |num_layers
6                 |41                |units_0
16                |6                 |units_1
56                |51                |units_2
71                |71                |units_3
26                |16                |units_4
6                 |21                |units_5
30                |30                |tuner/epochs
0                 |0                 |tuner/initial_epoch
0                 |0                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/30

KeyboardInterrupt: ignored

### Keras Tuner Results:

Stopped running the loop at Trial 179. It appears the additional improvements in accuracy are quite minute.

Best Model So Far:

*   Activation: relu
*   Input Layer Nodes: 141
*   No. of hidden layers: 3
*   Layer 1 Nodes: 41
*   Layer 2 Nodes: 6
*   Layer 3 Nodes: 51
*   Output Layer: Sigmoid, 1 Node

Accuracy: 73.3%

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  141
hidden_nodes_layer2 = 41
hidden_nodes_layer3 = 6
hidden_nodes_layer4 = 51

nn = tf.keras.models.Sequential()

# Input hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)
# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_32 (Dense)            (None, 141)               3948      
                                                                 
 dense_33 (Dense)            (None, 41)                5822      
                                                                 
 dense_34 (Dense)            (None, 6)                 252       
                                                                 
 dense_35 (Dense)            (None, 51)                357       
                                                                 
 dense_36 (Dense)            (None, 1)                 52        
                                                                 
Total params: 10431 (40.75 KB)
Trainable params: 10431 (40.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Example model fitting with the callback
nn.fit(
    X_train_scaled, y_train,
    epochs=100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7930b37afe50>

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.6043 - accuracy: 0.6988 - 750ms/epoch - 3ms/step
Loss: 0.6042799353599548, Accuracy: 0.6987755298614502
