In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import numpy as np

#  Import and read the charity_data.csv.
import pandas as pd
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
new_columns = ['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE', 'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS', 'ASK_AMT', 'IS_SUCCESSFUL']
clean_df = application_df[new_columns].copy()
clean_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
app_counts = clean_df.value_counts('APPLICATION_TYPE')
application_types_to_replace =  app_counts[app_counts <500].index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()

APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

In [4]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
class_counts = clean_df.value_counts('CLASSIFICATION')
classifications_to_replace = class_counts[class_counts <1000].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")

# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
Other     2261
C3000     1918
C2100     1883
Name: count, dtype: int64

In [5]:
# Convert categorical data to numeric with `pd.get_dummies`
cols_to_convert = ['USE_CASE', 'INCOME_AMT']
dummy_df = pd.get_dummies(clean_df,columns = cols_to_convert,  prefix=cols_to_convert)
dummy_df.head()

Unnamed: 0,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,ORGANIZATION,STATUS,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,USE_CASE_CommunityServ,USE_CASE_Heathcare,...,USE_CASE_ProductDev,INCOME_AMT_0,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M
0,T10,Independent,C1000,Association,1,N,5000,1,False,False,...,True,True,False,False,False,False,False,False,False,False
1,T3,Independent,C2000,Co-operative,1,N,108590,1,False,False,...,False,False,True,False,False,False,False,False,False,False
2,T5,CompanySponsored,C3000,Association,1,N,5000,0,False,False,...,True,True,False,False,False,False,False,False,False,False
3,T3,CompanySponsored,C2000,Trust,1,N,6692,1,False,False,...,False,False,False,True,False,False,False,False,False,False
4,T3,Independent,C1000,Trust,1,N,142590,1,False,True,...,False,False,False,False,True,False,False,False,False,False


In [6]:
#Look at Ask Amt value counts
ask_counts = clean_df.value_counts('ASK_AMT')
filt_ask_counts = ask_counts.loc[ask_counts >1]
print(filt_ask_counts)

ASK_AMT
5000     25398
63981        3
10478        3
15583        3
6725         3
         ...  
5255         2
5731         2
5175         2
5470         2
5179         2
Name: count, Length: 152, dtype: int64


In [7]:
#Create bins of Ask Amt
# Define the bin edges
bin_edges = [0, 5000, 10000, 50000, 100000, 500000, 1000000, float('inf')]

# Define the bin labels
bin_labels = ['0-5000','5001-10000', '10001-50000', '50001-100000', '100001-500000', '500001-1000000', '1000000+']

# Create bins using pd.cut()
clean_df['ASK_AMT_bins'] = pd.cut(clean_df['ASK_AMT'], bins=bin_edges, labels=bin_labels)

# Display the counts of values in each bin
bin_counts = clean_df['ASK_AMT_bins'].value_counts().sort_index()
print(bin_counts)

ASK_AMT_bins
0-5000            25398
5001-10000          549
10001-50000        2398
50001-100000       1423
100001-500000      2304
500001-1000000      650
1000000+           1577
Name: count, dtype: int64


In [8]:
#Create new DF for PCA
pca_df = dummy_df[['USE_CASE_CommunityServ', 'USE_CASE_Heathcare', 'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev', 'IS_SUCCESSFUL', 'INCOME_AMT_0', 'INCOME_AMT_1-9999', 'INCOME_AMT_10000-24999', 'INCOME_AMT_100000-499999', 'INCOME_AMT_10M-50M', 'INCOME_AMT_1M-5M',
            'INCOME_AMT_25000-99999', 'INCOME_AMT_50M+', 'INCOME_AMT_5M-10M']]
merged_df = pd.merge(pca_df, clean_df[['ASK_AMT_bins']], left_index=True, right_index=True)
merged_df = pd.get_dummies(merged_df, columns=['ASK_AMT_bins'])
pca_df_numeric = merged_df.select_dtypes(include=[np.number])
# Create a PCA model instance
pca = PCA(n_components=21)

# Use the PCA model with `fit_transform`
pca_result = pca.fit_transform(merged_df)
scaled_pca = pd.DataFrame(data=pca_result)
# Get the PCA components
pca_components = pd.DataFrame(pca.components_, columns=merged_df.columns)

# Print the PCA components
print("PCA Components:")
print(pca_components)


PCA Components:
    USE_CASE_CommunityServ  USE_CASE_Heathcare  USE_CASE_Other  \
0                -0.006163            0.007144        0.000098   
1                 0.019880            0.008874        0.000147   
2                 0.005370            0.001999       -0.000172   
3                 0.000821            0.002083       -0.000148   
4                -0.000966            0.048785        0.000306   
5                -0.000476           -0.012104        0.000067   
6                 0.002952           -0.009097        0.000506   
7                -0.009726           -0.021172        0.000179   
8                -0.034302           -0.062247        0.000717   
9                -0.013750           -0.014841       -0.000127   
10                0.073969            0.033646        0.000815   
11                0.747812            0.129551        0.001964   
12               -0.033109            0.015479       -0.001340   
13               -0.132510            0.081510       -0.0016

In [9]:
# Split our preprocessed data into our features and target arrays, with fewer inputs than in original model, using only use case
X = dummy_df[['USE_CASE_CommunityServ', 'USE_CASE_Heathcare', 'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev']]
y = dummy_df["IS_SUCCESSFUL"]

# Split the preprocessed data into a training and testing dataset
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, random_state=42)

In [10]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train2)

# Scale the data
X_train_scaled2 = X_scaler.transform(X_train2)
X_test_scaled2 = X_scaler.transform(X_test2)

In [11]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn2 = tf.keras.models.Sequential()

# Add the input layer
nn2.add(tf.keras.layers.Input(shape=(X_train_scaled2.shape[1],)))

# First hidden layer
nn2.add(tf.keras.layers.Dense(units=5, activation='relu', input_dim=5))

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=5, activation='relu'))

# Output layer
nn2.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 5)                 30        
                                                                 
 dense_1 (Dense)             (None, 5)                 30        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 66 (264.00 Byte)
Trainable params: 66 (264.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [13]:
# Train the model
fit_nn2 = nn2.fit(X_train2, y_train2, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn2.evaluate(X_test_scaled2, y_test2,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.9057 - accuracy: 0.4718 - 492ms/epoch - 2ms/step
Loss: 0.9056698679924011, Accuracy: 0.47183674573898315


In [15]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn3 = tf.keras.models.Sequential()

# Add the input layer
nn3.add(tf.keras.layers.Input(shape=(X_train_scaled2.shape[1],)))

# First hidden layer
nn3.add(tf.keras.layers.Dense(units=5, activation='relu', input_dim=5))

# Second hidden layer and adjust units
nn3.add(tf.keras.layers.Dense(units=3, activation='relu'))

#Third hidden layer with increased neurons
nn3.add(tf.keras.layers.Dense(units=25, activation='relu'))

# Output layer
nn3.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn3.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 5)                 30        
                                                                 
 dense_4 (Dense)             (None, 3)                 18        
                                                                 
 dense_5 (Dense)             (None, 25)                100       
                                                                 
 dense_6 (Dense)             (None, 1)                 26        
                                                                 
Total params: 174 (696.00 Byte)
Trainable params: 174 (696.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
# Compile the model
nn3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [17]:
# Train the model
fit_nn3 = nn3.fit(X_train_scaled2, y_train2, epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [18]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn3.evaluate(X_test_scaled2,y_test2,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.6903 - accuracy: 0.5369 - 447ms/epoch - 2ms/step
Loss: 0.6902990341186523, Accuracy: 0.5369096398353577


In [19]:
merged_df.head()
# Split our preprocessed data into our features and target arrays,  using only ask_amt
X3 = merged_df[['ASK_AMT_bins_0-5000', 'ASK_AMT_bins_5001-10000',  'ASK_AMT_bins_10001-50000', 'ASK_AMT_bins_50001-100000',  'ASK_AMT_bins_100001-500000', 'ASK_AMT_bins_500001-1000000',  'ASK_AMT_bins_1000000+']]
y3 = merged_df["IS_SUCCESSFUL"]

# Split the preprocessed data into a training and testing dataset
X_train3, X_test3, y_train3, y_test3 = train_test_split(X3, y3, random_state=42)

In [20]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train3)

# Scale the data
X_train_scaled3 = X_scaler.transform(X_train3)
X_test_scaled3 = X_scaler.transform(X_test3)

In [21]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn4 = tf.keras.models.Sequential()

# Add the input layer
nn4.add(tf.keras.layers.Input(shape=(X_train_scaled3.shape[1],)))

# First hidden layer
nn4.add(tf.keras.layers.Dense(units=7, activation='relu', input_dim=7))

# Second hidden layer
nn4.add(tf.keras.layers.Dense(units=7, activation='relu', input_dim=7))

# Third hidden layer, and increase number of neurons
nn4.add(tf.keras.layers.Dense(units=25, activation='relu', input_dim=7))

# Output layer
nn4.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn4.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 7)                 56        
                                                                 
 dense_8 (Dense)             (None, 7)                 56        
                                                                 
 dense_9 (Dense)             (None, 25)                200       
                                                                 
 dense_10 (Dense)            (None, 1)                 26        
                                                                 
Total params: 338 (1.32 KB)
Trainable params: 338 (1.32 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
# Compile the model
nn4.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [23]:
#Increase # of Epochs
fit_nn4 = nn4.fit(X_train3, y_train3, epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [24]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn4.evaluate(X_test_scaled3,y_test3,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.7143 - accuracy: 0.5355 - 456ms/epoch - 2ms/step
Loss: 0.7142576575279236, Accuracy: 0.5355101823806763


In [25]:
#Rerun the model with Tanh & Softmax
#Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn5 = tf.keras.models.Sequential()

# Add the input layer
nn5.add(tf.keras.layers.Input(shape=(X_train_scaled3.shape[1],)))

# First hidden layer
nn5.add(tf.keras.layers.Dense(units=7, activation='tanh', input_dim=7))

# Second hidden layer
nn5.add(tf.keras.layers.Dense(units=7, activation='tanh', input_dim=7))

# Third hidden layer, and increase number of neurons
nn5.add(tf.keras.layers.Dense(units=17, activation='tanh', input_dim=7))

# Output layer
nn5.add(tf.keras.layers.Dense(units=1, activation='softmax'))

# Check the structure of the model
nn5.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 7)                 56        
                                                                 
 dense_12 (Dense)            (None, 7)                 56        
                                                                 
 dense_13 (Dense)            (None, 17)                136       
                                                                 
 dense_14 (Dense)            (None, 1)                 18        
                                                                 
Total params: 266 (1.04 KB)
Trainable params: 266 (1.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
# Compile the model
nn5.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
# Run the model
fit_nn5 = nn5.fit(X_train3, y_train3, epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [27]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn5.evaluate(X_test_scaled3,y_test3,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.6909 - accuracy: 0.5343 - 458ms/epoch - 2ms/step
Loss: 0.6909289956092834, Accuracy: 0.5343440175056458


In [28]:
# Split our preprocessed data into our features and target arrays
X4 = dummy_df[['USE_CASE_CommunityServ', 'USE_CASE_Heathcare', 'USE_CASE_Other', 'USE_CASE_Preservation', 'USE_CASE_ProductDev', 'INCOME_AMT_0', 'INCOME_AMT_1-9999', 'INCOME_AMT_10000-24999', 'INCOME_AMT_100000-499999', 'INCOME_AMT_10M-50M', 'INCOME_AMT_1M-5M', 'INCOME_AMT_25000-99999', 'INCOME_AMT_50M+', 'INCOME_AMT_5M-10M']]
y4 = dummy_df["IS_SUCCESSFUL"]

# Split the preprocessed data into a training and testing dataset
X_train4, X_test4, y_train4, y_test4 = train_test_split(X4, y4, random_state=42)

In [29]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler4 = scaler.fit(X_train4)

# Scale the data
X_train_scaled4 = X_scaler4.transform(X_train4)
X_test_scaled4 = X_scaler4.transform(X_test4)

In [40]:
#Rerun the model with Tanh & Softmax
#Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
nn6 = tf.keras.models.Sequential()

# Add the input layer
nn6.add(tf.keras.layers.Input(shape=(X_train_scaled4.shape[1],)))

# First hidden layer
nn6.add(tf.keras.layers.Dense(units=7, activation='relu', input_dim=7))

# Second hidden layer
nn6.add(tf.keras.layers.Dense(units=7, activation='relu', input_dim=7))

# Third hidden layer, and increase number of neurons
nn6.add(tf.keras.layers.Dense(units=25, activation='relu', input_dim=7))

# Output layer
nn6.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn6.summary()


Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_25 (Dense)            (None, 7)                 105       
                                                                 
 dense_26 (Dense)            (None, 7)                 56        
                                                                 
 dense_27 (Dense)            (None, 25)                200       
                                                                 
 dense_28 (Dense)            (None, 1)                 26        
                                                                 
Total params: 387 (1.51 KB)
Trainable params: 387 (1.51 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [41]:
# Compile the model
nn6.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
# Run the model
fit_nn6 = nn6.fit(X_train_scaled4, y_train4, epochs=75)

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


In [42]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn6.evaluate(X_test_scaled4,y_test4,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 1s - loss: 0.6857 - accuracy: 0.5447 - 509ms/epoch - 2ms/step
Loss: 0.6857037544250488, Accuracy: 0.5447230339050293
