In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf


In [None]:
# Load the data
data_url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
df = pd.read_csv(data_url)


In [None]:
# Preprocessing steps
# Drop the non-beneficial columns 'EIN' and 'NAME'
df = df.drop(columns=['EIN', 'NAME'])

# Determine the number of unique values in each column
unique_counts = df.nunique()
print("Number of unique values in each column:")
print(unique_counts)

# Look at APPLICATION_TYPE value counts for binning
application_type_counts = df['APPLICATION_TYPE'].value_counts()
print("APPLICATION_TYPE value counts:")
print(application_type_counts)

# Choose a cutoff value
cutoff_value = 500

# Create a list of application types to be replaced with "Other"
application_types_to_replace = application_type_counts[application_type_counts < cutoff_value].index.tolist()

# Replace in dataframe
for app in application_types_to_replace:
    df['APPLICATION_TYPE'] = df['APPLICATION_TYPE'].replace(app, "Other")

# Look at CLASSIFICATION value counts for binning
classification_counts = df['CLASSIFICATION'].value_counts()

# Choose a cutoff value
cutoff_value = 100

# Create a list of classifications to be replaced with "Other"
classifications_to_replace = classification_counts[classification_counts < cutoff_value].index.tolist()

# Replace in dataframe
for cls in classifications_to_replace:
    df['CLASSIFICATION'] = df['CLASSIFICATION'].replace(cls, "Other")

# Convert categorical data to numeric with pd.get_dummies
df_encoded = pd.get_dummies(df)


In [None]:
# Split the data into features and target
X = df_encoded.drop(columns=['IS_SUCCESSFUL'])
y = df_encoded['IS_SUCCESSFUL']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Scale the features training and testing sets
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [None]:
# Create a new neural network model with optimizations
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  100
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 20

# Initialize the Sequential model
nn_optimized = tf.keras.models.Sequential()

# First hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# Second hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# Third hidden layer
nn_optimized.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='relu'))

# Output layer
nn_optimized.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn_optimized.summary()


In [None]:
# Compile the model with optimizations
nn_optimized.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with optimizations
fit_model_optimized = nn_optimized.fit(X_train_scaled, y_train, epochs=50)


In [None]:
# Evaluate the model with optimizations
model_loss, model_accuracy = nn_optimized.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


In [None]:
# Export the optimized model to HDF5 file
nn_optimized.save("AlphabetSoupCharity_Optimization.h5")
