## Preprocessing

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import itertools
from joblib import Parallel, delayed

In [2]:
data = pd.read_csv("Resources/charity_data.csv")

In [3]:
data = data.drop(columns=["EIN", "NAME"])


In [4]:
app_type_counts = data["APPLICATION_TYPE"].value_counts()
app_type_to_replace = app_type_counts[app_type_counts < 100].index

data["APPLICATION_TYPE"] = data["APPLICATION_TYPE"].replace(app_type_to_replace, "Other")

classification_counts = data["CLASSIFICATION"].value_counts()
classification_to_replace = classification_counts[classification_counts < 100].index

data["CLASSIFICATION"] = data["CLASSIFICATION"].replace(classification_to_replace, "Other")


In [5]:
data_encoded = pd.get_dummies(data, columns=["APPLICATION_TYPE", "CLASSIFICATION", "AFFILIATION", "USE_CASE", "ORGANIZATION", "INCOME_AMT", "SPECIAL_CONSIDERATIONS"], drop_first=True)


In [6]:
X = data_encoded.drop("IS_SUCCESSFUL", axis=1).values
y = data_encoded["IS_SUCCESSFUL"].values

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [9]:
number_input_features = X_train_scaled.shape[1]

In [10]:


def train_model(hidden_layer, neuron, optimizer):
    # Build the model
    model = Sequential()
    
    # Add input layer
    model.add(Dense(units=neuron, activation='relu', input_dim=number_input_features))

    # Add hidden layers
    for _ in range(hidden_layer - 1):
        model.add(Dense(units=neuron, activation='relu'))
    
    # Add output layer
    model.add(Dense(units=1, activation='sigmoid'))
    
    # Compile the model
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    # Train the model
    model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0, validation_split=0.2)
    
    # Evaluate the model
    loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
    
    return {
        'hidden_layers': hidden_layer,
        'neurons_per_layer': neuron,
        'optimizer': optimizer,
        'loss': loss,
        'accuracy': accuracy
    }

# Define a list of possible configurations
hidden_layers = [1, 2, 3]
neurons_per_layer = [32, 64, 128]
optimizers = ['adam', 'sgd', 'rmsprop']
combinations = list(itertools.product(hidden_layers, neurons_per_layer, optimizers))

# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['hidden_layers', 'neurons_per_layer', 'optimizer', 'loss', 'accuracy'])

# Train models in parallel
n_jobs = -1  # Set to the number of CPU cores or -1 to use all available cores
results = Parallel(n_jobs=n_jobs)(delayed(train_model)(hl, n, opt) for hl, n, opt in combinations)

# Save the results to the DataFrame
results_df = results_df.append(results, ignore_index=True)

# Save the results to a CSV file
results_df.to_csv('model_optimization_results.csv', index=False)

results_df
