<a href="https://colab.research.google.com/github/dhockenb/venture_funding_classifier/blob/main/GC_venture_funding_with_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [59]:
# Import required modules and dependencies
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [None]:
# Upload applicants data.csv to Google Colab
from google.colab import files

csv_file = files.upload()

In [None]:
# Read the applicants_data.csv file into a Pandas DataFrame
applicant_data_df = pd.read_csv("applicants_data.csv")

# Review the DataFrame
applicant_data_df.head()


In [None]:
# Review the data types associated with the columns
applicant_data_df.dtypes


In [None]:
# Drop the 'EIN' and 'NAME' columns from the DataFrame
applicant_data_df = applicant_data_df.drop(["EIN", "NAME"], axis=1)

# Review the DataFrame
applicant_data_df.head()


In [None]:
# Create a list of categorical variables 
categorical_variables = ["APPLICATION_TYPE", "AFFILIATION", "CLASSIFICATION", "USE_CASE", "ORGANIZATION", "INCOME_AMT", "SPECIAL_CONSIDERATIONS"]

# Display the categorical variables list
categorical_variables


In [65]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)


In [66]:
# Encode the categorical variables using OneHotEncoder
encoded_data = enc.fit_transform(applicant_data_df[categorical_variables])


In [None]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(encoded_data,
                          columns=enc.get_feature_names(categorical_variables))

# Review the DataFrame
encoded_df.head()


In [None]:
# Add the numerical variables from the original DataFrame to the OneHotEncoder DataFrame
applicant_num_data_df = applicant_data_df.drop(["APPLICATION_TYPE", "AFFILIATION", "CLASSIFICATION", "USE_CASE", "ORGANIZATION", "INCOME_AMT", "SPECIAL_CONSIDERATIONS"], axis = 1)
encoded_df2 = pd.concat([applicant_num_data_df, encoded_df], axis=1)

# Review the Dataframe
encoded_df2.head()


In [None]:
# Define the target set y as the IS_SUCCESSFUL column
y = encoded_df2[['IS_SUCCESSFUL']]

# Display a sample of y
y.head()


In [None]:
# Define features set X by selecting all columns but IS_SUCCESSFUL
X = encoded_df2.drop(['IS_SUCCESSFUL'], axis=1)

# Review the features DataFrame
X.head()


In [124]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [125]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


## Binary classification model using two-layer deep neural network and relu activation function 

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features


In [74]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 = (number_input_features + 1) // 2

# Review the number hidden nodes in the first layer
hidden_nodes_layer1


In [None]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 = (hidden_nodes_layer1 + 1) // 2

# Review the number hidden nodes in the second layer
hidden_nodes_layer2


In [77]:
# Create the Sequential model instance
nn = Sequential()


In [78]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1, activation="relu", input_dim=number_input_features))


In [79]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))


In [80]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation="sigmoid"))


In [None]:
# Display the Sequential model summary
nn.summary()


In [82]:
# Compile the Sequential model using binary_crossentropy loss function, adam optimizer and accuracy evaluation metric.
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [None]:
# Fit the model using 50 epochs and the training data
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)


In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [85]:
# Set the model's file path
file_path = "AlphabetSoup.h5"

# Export model to a HDF5 file
nn.save(file_path)


## Alternative Model 1 with three hidden layers

In [None]:
# Define the the number of inputs (features) to the model
number_input_features_A1 = len(X_train.iloc[0])

# Review the number of features
number_input_features_A1

In [87]:
# Define the number of neurons in the output layer
number_output_neurons_A1 = 1

In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A1 = (number_input_features + 1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A1

In [None]:
hidden_nodes_layer2_A1 = (hidden_nodes_layer1_A1 + 1) // 2

hidden_nodes_layer2_A1

In [None]:
hidden_nodes_layer3_A1 = (hidden_nodes_layer2_A1 + 1) // 2

hidden_nodes_layer3_A1

In [91]:
# Create the Sequential model instance
nn_A1 = Sequential()

In [None]:
# First hidden layer
nn_A1.add(Dense(units=hidden_nodes_layer1_A1, activation="relu", input_dim=number_input_features_A1))

nn_A1.add(Dense(units=hidden_nodes_layer2_A1, activation="relu"))

nn_A1.add(Dense(units=hidden_nodes_layer3_A1, activation="relu"))


# Output layer
nn_A1.add(Dense(units=1, activation="sigmoid"))


# Check the structure of the model
nn_A1.summary()

In [93]:
# Compile the Sequential model
nn_A1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [None]:
# Fit the model using 50 epochs and the training data
fit_model_A1 = nn_A1.fit(X_train_scaled, y_train, epochs=50)


In [None]:
print("Alternative Model 1 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A1.evaluate(X_test_scaled, y_test, verbose=2)


# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

## Alternative Model 2 with elu activation function


In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

In [96]:
# Define the number of neurons in the output layer
number_output_neurons_A2 = 1


In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A2 = (number_input_features + 1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A2

In [None]:
hidden_nodes_layer2_A2 = (hidden_nodes_layer1_A2 + 1) // 2

hidden_nodes_layer2_A2

In [99]:
# Create the Sequential model instance
nn_A2 = Sequential()

In [None]:
# First hidden layer
nn_A2.add(Dense(units=hidden_nodes_layer1_A2, activation="elu", input_dim=number_input_features))

nn_A2.add(Dense(units=hidden_nodes_layer2_A2, activation="elu"))

# Output layer
nn_A2.add(Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_A2.summary()


In [101]:
# Compile the model
nn_A2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [None]:
# Fit the model
fit_model_A2 = nn_A2.fit(X_train_scaled, y_train, epochs=50)

In [None]:
print("Alternative Model 2 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A2.evaluate(X_test_scaled, y_test, verbose=2)


# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

## Alternative Model 3 removing ASK_AMT as feature.

In [None]:
X = encoded_df2.drop(['ASK_AMT', 'IS_SUCCESSFUL'], axis=1)
X.head()

In [104]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [105]:
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

In [107]:
number_output_neurons_A3 = 1

In [None]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1_A3 = (number_input_features + 1) // 2

# Review the number of hidden nodes in the first layer
hidden_nodes_layer1_A3

In [None]:
hidden_nodes_layer2_A3 = (hidden_nodes_layer1_A3 + 1) // 2

hidden_nodes_layer2_A3

In [110]:
nn_A3 = Sequential()

In [None]:
nn_A3.add(Dense(units=hidden_nodes_layer1_A3, activation="relu", input_dim=number_input_features))

nn_A3.add(Dense(units=hidden_nodes_layer2_A3, activation="relu"))

# Output layer
nn_A3.add(Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_A3.summary()

In [112]:
nn_A3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
fit_model_A3 = nn_A3.fit(X_train_scaled, y_train, epochs=50)

In [None]:
print("Alternative Model 3 Results")

# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn_A3.evaluate(X_test_scaled, y_test, verbose=2)


# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [130]:
# Set the file path for the first alternative model
file_path = "AlphabetSoup_A1.h5"

# Export model to a HDF5 file
nn_A1.save(file_path)


In [131]:
# Set the file path for the second alternative model
file_path = "AlphabetSoup_A2.h5"

# Export model to a HDF5 file
nn_A2.save(file_path)


In [132]:
# Set the file path for the third alternative model
file_path = "AlphabetSoup_A3.h5"

# Export model to a HDF5 file
nn_A3.save(file_path)