In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import keras_tuner as kt

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("Resources/charity_data.csv")
application_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [2]:
application_drop = application_df.drop(columns=['EIN','NAME'])
app_type_counts = application_drop['APPLICATION_TYPE'].value_counts()
replace_application = app_type_counts.loc[app_type_counts < 400].keys()
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")
classification_counts = application_drop['CLASSIFICATION'].value_counts()
replace_class = classification_counts.loc[
    classification_counts <= 1880].keys()
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")
application_cat = application_drop.dtypes[
    application_drop.dtypes == 'object'].index.tolist()

In [3]:
enc = OneHotEncoder(sparse=False)
encode_df = pd.DataFrame(enc.fit_transform(application_drop[application_cat]))
encode_df.columns = enc.get_feature_names_out(application_cat)
application_df = application_drop.merge(encode_df, left_index=True, right_index=True)
application_df = application_df.drop(application_cat, 1)


  application_df = application_df.drop(application_cat, 1)


In [4]:
y = application_df['IS_SUCCESSFUL']
X = application_df.drop(['IS_SUCCESSFUL'],1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train = y_train.values.reshape(-1,1)

  X = application_df.drop(['IS_SUCCESSFUL'],1)


In [5]:
from tensorflow import keras
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:05d}.hdf5"
checkpoint_two = "checkpoints/weights_2.{epoch:05d}.hdf5"


In [6]:
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch'
)
cp_2 = ModelCheckpoint(
    filepath=checkpoint_two,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch'
)
stop_early = EarlyStopping(
    monitor='val_accuracy',
    mode='max',
    patience=5,
    verbose=1
)

In [7]:
def build_model(hp):
    model = keras.Sequential()
    activation = hp.Choice('activation', ['relu','tanh','sigmoid'])
    model.add(keras.layers.Dense(
        hp.Choice('units', [16,32]),
        activation = activation,
        input_dim=116
    ))

    for i in range(hp.Int('num_layers', 1, 6)):
        model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
        min_value=1,
        max_value=10,
        step=2),
        activation=activation))
    
    model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])

    return model


In [8]:
build_model(kt.HyperParameters())

2023-03-26 15:51:27.900369: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-26 15:51:27.900781: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



<keras.engine.sequential.Sequential at 0x15a1ed220>

In [9]:
from keras_tuner.tuners import hyperband
 
tuner = kt.tuners.Hyperband(
    build_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2
)

In [None]:
tuner.search(X_train_scaled,y_train,
             epochs=20,
             callbacks=[cp_callback, stop_early],
             validation_data=(X_test_scaled,y_test))

In [None]:
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2023-03-25 22:25:43.558978: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


268/268 - 2s - loss: 0.5519 - accuracy: 0.7349 - 2s/epoch - 7ms/step
Loss: 0.5519341230392456, Accuracy: 0.7349271178245544


In [None]:
best_hyper = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Number of Layers: {best_hyper.get('units')}, Layers: {best_hyper.get('num_layers')}, activation: {best_hyper.get('activation')}")

Number of Layers: 16, Layers: 2, activation: relu


In [None]:
tuner.get_best_hyperparameters(1)[0].values

{'activation': 'relu',
 'units': 16,
 'num_layers': 2,
 'units_0': 5,
 'units_1': 9,
 'units_2': 5,
 'units_3': 1,
 'units_4': 5,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

### Second Attempt
---

In [11]:
import tensorflow_addons as tfa

In [12]:
def build_model_2(hp):
    model = keras.Sequential()
    activation = hp.Choice('activation', ['relu','sigmoid','tanh', 'softmax'])
    model.add(keras.layers.Dense(
        units=64,
        activation = activation,
        input_dim=116
    ))
    # for i in range(hp.Int('num_layers', 3, 4)):
    #     model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
    #     min_value=10,
    #     max_value=20,
    #     step=5),
    #     activation=activation))
    model.add(tf.keras.layers.Dense(units=32, activation=activation))
    model.add(tf.keras.layers.Dense(units=16, activation=activation))
    
    model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    model.compile(loss=tfa.losses.SigmoidFocalCrossEntropy(), optimizer='adam', metrics=['accuracy'])

    return model


In [13]:
tuner2 = kt.tuners.Hyperband(
    build_model_2,
    objective="val_accuracy",
    max_epochs=10,
    hyperband_iterations=3,
    overwrite=True
)

In [14]:
tuner2.search(
    X_train_scaled,y_train,
    epochs=20,
    validation_data=(X_test_scaled,y_test))

Trial 4 Complete [00h 00m 17s]
val_accuracy: 0.6193585991859436

Best val_accuracy So Far: 0.6735860109329224
Total elapsed time: 00h 01m 10s
INFO:tensorflow:Oracle triggered exit


### Third Attempt
---

In [None]:
application_drop = application_df.drop(columns=['EIN','NAME'])
app_type_counts = application_drop['APPLICATION_TYPE'].value_counts()
replace_application = app_type_counts.loc[app_type_counts < 400].keys()
for app in replace_application:
    application_df.APPLICATION_TYPE = application_df.APPLICATION_TYPE.replace(app,"Other")
classification_counts = application_drop['CLASSIFICATION'].value_counts()
replace_class = classification_counts.loc[
    classification_counts <= 1880].keys()
for cls in replace_class:
    application_df.CLASSIFICATION = application_df.CLASSIFICATION.replace(cls,"Other")
application_cat = application_drop.dtypes[
    application_drop.dtypes == 'object'].index.tolist()