In [1]:
#Preprocess the Data

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout, BatchNormalization

application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
application_df = application_df.drop(columns=['EIN', 'NAME'])

In [3]:
print(application_df.columns)

Index(['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE',
       'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS',
       'ASK_AMT', 'IS_SUCCESSFUL'],
      dtype='object')


In [4]:
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
classification_counts = application_df['CLASSIFICATION'].value_counts()

In [5]:
threshold = 10
application_types_to_replace = application_type_counts[application_type_counts < threshold].index.tolist()
classification_types_to_replace = classification_counts[classification_counts < threshold].index.tolist()

for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app, "Other")

for cls in classification_types_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls, "Other")\

application_df['CLASSIFICATION'].value_counts()

CLASSIFICATION
C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
Other       98
C2800       95
C7100       75
C1300       58
C1280       50
C1230       36
C1400       34
C7200       32
C2300       32
C1240       30
C8000       20
C7120       18
C1500       16
C1800       15
C6000       15
C1250       14
C8200       11
C1238       10
C1278       10
Name: count, dtype: int64

In [6]:
application_df = pd.get_dummies(application_df, columns=['APPLICATION_TYPE', 'AFFILIATION', 'CLASSIFICATION', 'USE_CASE', 'ORGANIZATION', 'STATUS', 'INCOME_AMT', 'SPECIAL_CONSIDERATIONS'], drop_first=True)
application_df.head()

Unnamed: 0,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_T10,APPLICATION_TYPE_T12,APPLICATION_TYPE_T13,APPLICATION_TYPE_T19,APPLICATION_TYPE_T2,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,...,STATUS_1,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M,SPECIAL_CONSIDERATIONS_Y
0,5000,1,True,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
1,108590,1,False,False,False,False,False,True,False,False,...,True,True,False,False,False,False,False,False,False,False
2,5000,0,False,False,False,False,False,False,False,True,...,True,False,False,False,False,False,False,False,False,False
3,6692,1,False,False,False,False,False,True,False,False,...,True,False,True,False,False,False,False,False,False,False
4,142590,1,False,False,False,False,False,True,False,False,...,True,False,False,True,False,False,False,False,False,False


In [7]:
print(application_df.columns)

Index(['ASK_AMT', 'IS_SUCCESSFUL', 'APPLICATION_TYPE_T10',
       'APPLICATION_TYPE_T12', 'APPLICATION_TYPE_T13', 'APPLICATION_TYPE_T19',
       'APPLICATION_TYPE_T2', 'APPLICATION_TYPE_T3', 'APPLICATION_TYPE_T4',
       'APPLICATION_TYPE_T5', 'APPLICATION_TYPE_T6', 'APPLICATION_TYPE_T7',
       'APPLICATION_TYPE_T8', 'APPLICATION_TYPE_T9',
       'AFFILIATION_Family/Parent', 'AFFILIATION_Independent',
       'AFFILIATION_National', 'AFFILIATION_Other', 'AFFILIATION_Regional',
       'CLASSIFICATION_C1200', 'CLASSIFICATION_C1230', 'CLASSIFICATION_C1238',
       'CLASSIFICATION_C1240', 'CLASSIFICATION_C1250', 'CLASSIFICATION_C1270',
       'CLASSIFICATION_C1278', 'CLASSIFICATION_C1280', 'CLASSIFICATION_C1300',
       'CLASSIFICATION_C1400', 'CLASSIFICATION_C1500', 'CLASSIFICATION_C1700',
       'CLASSIFICATION_C1800', 'CLASSIFICATION_C2000', 'CLASSIFICATION_C2100',
       'CLASSIFICATION_C2300', 'CLASSIFICATION_C2700', 'CLASSIFICATION_C2800',
       'CLASSIFICATION_C3000', 'CLASSIFICATI

In [10]:
y = application_df['IS_SUCCESSFUL']
X = application_df.drop(columns=['IS_SUCCESSFUL'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=78)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
#Optimize the Model

nn = tf.keras.models.Sequential()

nn.add(tf.keras.layers.Dense(units=128, input_dim=X_train_scaled.shape[1], activation='relu'))

nn.add(BatchNormalization())

nn.add(Dropout(0.2))

nn.add(tf.keras.layers.Dense(units=64, activation='relu'))

nn.add(BatchNormalization())

nn.add(Dropout(0.2))

nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [14]:
history = nn.fit(X_train_scaled, y_train, epochs=100, batch_size=32,
                 validation_data=(X_test_scaled, y_test), callbacks=[early_stopping])

Epoch 1/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.6626 - loss: 0.6644 - val_accuracy: 0.7203 - val_loss: 0.5691
Epoch 2/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7167 - loss: 0.5737 - val_accuracy: 0.7219 - val_loss: 0.5640
Epoch 3/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7252 - loss: 0.5634 - val_accuracy: 0.7216 - val_loss: 0.5611
Epoch 4/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7300 - loss: 0.5607 - val_accuracy: 0.7219 - val_loss: 0.5625
Epoch 5/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7257 - loss: 0.5627 - val_accuracy: 0.7210 - val_loss: 0.5582
Epoch 6/100
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7363 - loss: 0.5522 - val_accuracy: 0.7227 - val_loss: 0.5583
Epoch 7/100
[1m858/85

In [15]:
loss, accuracy = nn.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 952us/step - accuracy: 0.7321 - loss: 0.5448
Test Loss: 0.5528444647789001
Test Accuracy: 0.7249271273612976


In [16]:
nn.save('AlphabetSoupCharity_Optimization.h5')

