In [1]:
# Import necessary libraries
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Read the charity_data.csv into a Pandas DataFrame
url = "https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv"
df = pd.read_csv(url)





In [2]:
# Data Preprocessing
y = df['IS_SUCCESSFUL']
X = df.drop(['EIN', 'NAME', 'IS_SUCCESSFUL'], axis=1)

In [3]:
# Determine the number of unique values in each column.
unique_values = X.nunique()

# For columns with more than 10 unique values, determine the number of data points for each unique value.
for column in X.columns:
    if unique_values[column] > 10:
        print(f"{column} value counts:\n{X[column].value_counts()}\n")

APPLICATION_TYPE value counts:
APPLICATION_TYPE
T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: count, dtype: int64

CLASSIFICATION value counts:
CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C4120        1
C8210        1
C2561        1
C4500        1
C2150        1
Name: count, Length: 71, dtype: int64

ASK_AMT value counts:
ASK_AMT
5000        25398
10478           3
15583           3
63981           3
6725            3
            ...  
5371754         1
30060           1
43091152        1
18683           1
36500179        1
Name: count, Length: 8747, dtype: int64



In [4]:
# Choose a cutoff value and create a list of categorical variables to be replaced with "Other"
cutoff_value = 500
columns_to_replace = []
for column in X.columns:
    if unique_values[column] > 10:
        value_counts = X[column].value_counts()
        columns_to_replace.extend(list(value_counts[value_counts < cutoff_value].index))

In [5]:
# Replace in the dataframe
for column in X.columns:
    if unique_values[column] > 10:
        X[column] = X[column].replace(columns_to_replace, "Other")

# Check to make sure binning was successful
for column in X.columns:
    if unique_values[column] > 10:
        print(f"{column} value counts after binning:\n{X[column].value_counts()}\n")

APPLICATION_TYPE value counts after binning:
APPLICATION_TYPE
T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: count, dtype: int64

CLASSIFICATION value counts after binning:
CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
Other     1484
C7000      777
Name: count, dtype: int64

ASK_AMT value counts after binning:
ASK_AMT
5000     25398
Other     8901
Name: count, dtype: int64



In [6]:
# Use pd.get_dummies() to encode categorical variables
X_encoded = pd.get_dummies(X)

In [7]:
# Split the preprocessed data into a features array, X, and a target array, y.
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, random_state=42, stratify=y)

In [8]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the training and testing features datasets
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Design a neural network model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Dense(units=80, input_dim=len(X_train.columns), activation='relu'))
nn.add(tf.keras.layers.Dense(units=30, activation='relu'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))




In [10]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




In [11]:
# Create a callback that saves the model's weights every five epochs
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    "AlphabetSoupCharity.h5", save_weights_only=True, save_best_only=True, monitor='accuracy', mode='max', verbose=2, period=5
)




In [12]:
# Train the model
nn.fit(X_train_scaled, y_train, epochs=25, callbacks=[checkpoint_callback])

Epoch 1/25


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 5: accuracy improved from -inf to 0.73472, saving model to AlphabetSoupCharity.h5
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 10: accuracy improved from 0.73472 to 0.73577, saving model to AlphabetSoupCharity.h5
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 15: accuracy improved from 0.73577 to 0.73760, saving model to AlphabetSoupCharity.h5
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 20: accuracy improved from 0.73760 to 0.73869, saving model to AlphabetSoupCharity.h5
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 25: accuracy did not improve from 0.73869


<keras.src.callbacks.History at 0x1c5f123c410>

In [13]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.5538 - accuracy: 0.7242 - 292ms/epoch - 1ms/step
Loss: 0.5538214445114136, Accuracy: 0.7241982221603394


In [14]:
# Save and export the results to an HDF5 file
nn.save("AlphabetSoupCharity.h5")

  saving_api.save_model(
