In [1]:
# Import our dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
tf.random.set_seed(42)

#  Import and read the charity_data.csv.
df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

2023-04-17 14:13:24.045511: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Drop unnecessary columns based on domain knowledge or feature importance analysis
application_df = df.drop(columns=['EIN', 'NAME'])

In [3]:
# Look at APPLICATION_TYPE value counts for binning
application_counts = application_df.APPLICATION_TYPE.value_counts()

In [4]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_types_to_replace = list(application_counts[application_counts < 500].index)

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()

T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
Other      276
Name: APPLICATION_TYPE, dtype: int64

In [5]:
# Look at CLASSIFICATION value counts for binning
classification_counts = application_df['CLASSIFICATION'].value_counts()

In [6]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classifications_to_replace = list(classification_counts[classification_counts < 500].index)

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
Other     1484
C7000      777
Name: CLASSIFICATION, dtype: int64

In [7]:
# Convert categorical data to numeric with `pd.get_dummies`
# One-hot encode the categorical variables
dummy_df = pd.get_dummies(application_df[['APPLICATION_TYPE', 'CLASSIFICATION']])
merged_df = application_df.merge(dummy_df, left_index=True, right_index=True)
merged_df = merged_df.drop(['APPLICATION_TYPE', 'CLASSIFICATION'], axis=1)

In [8]:
# Split the data into features (X) and target (y)
X = merged_df.drop('IS_SUCCESSFUL', axis=1).values
y = merged_df['IS_SUCCESSFUL'].values

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [10]:
# Convert X_train and X_test back to a DataFrame
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

# Check data types of columns in X_train
column_data_types = X_train.dtypes

# Identify categorical columns
cat_cols = column_data_types[column_data_types == 'object'].index.tolist()

In [11]:
# Convert categorical data to numeric with pd.get_dummies
# One-hot encode the categorical variables in X_train and X_test
for col in cat_cols:
    if col in X_train.columns:
        X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
    if col in X_test.columns:
        X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
        
# Align the columns in X_train and X_test
X_train, X_test = X_train.align(X_test, join='left', axis=1, fill_value=0)

  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X_train, columns=[col], prefix=[col], drop_first=True)
  X_test = pd.get_dummies(X_test, columns=[col], prefix=[col], drop_first=True)
  X_train = pd.get_dummies(X

In [12]:
# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Compile, Train, and Evaluate the Model

In [13]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn0 = Sequential()

# First hidden layer
nn0.add(Dense(units=10, activation='relu', input_dim=X_train.shape[1]))

# Second hidden layer
nn0.add(Dense(units=5, activation='relu'))

# Output layer
nn0.add(Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn0.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                66060     
                                                                 
 dense_1 (Dense)             (None, 5)                 55        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 66,121
Trainable params: 66,121
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile the model
nn0.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
nn0.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9d8ebbf7c0>

In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn0.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.7200 - accuracy: 0.5340 - 317ms/epoch - 1ms/step
Loss: 0.7199942469596863, Accuracy: 0.533994197845459


# Optimization

In [16]:
best_accuracy = model_accuracy

# Define L1 regularization strength (lambda)
l1_lambda = 0.01

In [17]:
# V1

# Define the model with different number of hidden layers and/or units
nn1 = Sequential()

# First hidden layer
nn1.add(Dense(units=10, activation='relu', input_dim=X_train.shape[1]))

# Second hidden layer
nn1.add(Dense(units=5, activation='tanh'))

# Output layer
nn1.add(Dense(units=1, activation='sigmoid'))

# Add L1 regularization to your layers
for layer in nn1.layers:
    if isinstance(layer, Dense):
        layer.add_loss(lambda: tf.keras.regularizers.l1(l1_lambda)(layer.kernel))

# Check the structure of the model
nn1.summary()

# Compile and train the model
nn1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
nn1.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model using the test data
model_loss1, model_accuracy1 = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss1}, Accuracy: {model_accuracy1}")

if model_accuracy1 > best_accuracy:
    best_accuracy = model_accuracy1

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 10)                66060     
                                                                 
 dense_4 (Dense)             (None, 5)                 55        
                                                                 
 dense_5 (Dense)             (None, 1)                 6         
                                                                 
Total params: 66,121
Trainable params: 66,121
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
268/268 - 0s - loss: 0.7803 - accuracy: 0.5831 - 285ms/epoch - 1ms/step
Loss: 0.7803375720977783, Accuracy: 0.5830903649330139


In [18]:
# V2

# Define the model with different number of hidden layers and/or units
nn2 = Sequential()

# First hidden layer
nn2.add(Dense(units=10, activation='relu', input_dim=X_train.shape[1]))

# Second hidden layer
nn2.add(Dense(units=10, activation='tanh'))

# Output layer
nn2.add(Dense(units=1, activation='sigmoid'))

# Add L1 regularization to your layers
for layer in nn2.layers:
    if isinstance(layer, Dense):
        layer.add_loss(lambda: tf.keras.regularizers.l1(l1_lambda)(layer.kernel))

# Check the structure of the model
nn2.summary()

# Compile and train the model
nn2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
nn2.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the model using the test data
model_loss2, model_accuracy2 = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss2}, Accuracy: {model_accuracy2}")

if model_accuracy2 > best_accuracy:
    best_accuracy = model_accuracy2

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 10)                66060     
                                                                 
 dense_7 (Dense)             (None, 10)                110       
                                                                 
 dense_8 (Dense)             (None, 1)                 11        
                                                                 
Total params: 66,181
Trainable params: 66,181
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
268/268 - 0s - loss: 0.6762 - accuracy: 0.6688 - 265ms/epoch - 990us/step
Loss: 0.6762290596961975, Accuracy: 0.6688046455383301


In [19]:
# V3

# Define the model with different number of hidden layers and/or units
nn3 = Sequential()

# First hidden layer
nn3.add(Dense(units=10, activation='relu', input_dim=X_train.shape[1]))

# Second hidden layer
nn3.add(Dense(units=10, activation='tanh'))

# Output layer
nn3.add(Dense(units=1, activation='sigmoid'))

# Add L1 regularization to your layers
for layer in nn3.layers:
    if isinstance(layer, Dense):
        layer.add_loss(lambda: tf.keras.regularizers.l1(l1_lambda)(layer.kernel))

# Check the structure of the model
nn3.summary()

# Compile and train the model
nn3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
nn3.fit(X_train_scaled, y_train, epochs=20, batch_size=32, verbose=1)

# Evaluate the model using the test data
model_loss3, model_accuracy3 = nn3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss3}, Accuracy: {model_accuracy3}")

if model_accuracy3 > best_accuracy:
    best_accuracy = model_accuracy3

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 10)                66060     
                                                                 
 dense_10 (Dense)            (None, 10)                110       
                                                                 
 dense_11 (Dense)            (None, 1)                 11        
                                                                 
Total params: 66,181
Trainable params: 66,181
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
268/268 - 0s - loss: 0.6524 - accuracy: 0.7108 - 299ms/epoch - 1ms/step
Loss: 0.652

In [20]:
print('Best Model:')
if best_accuracy == model_accuracy:
    print('Original')
    best_model = nn0
elif best_accuracy == model_accuracy1:
    print('V1')
    best_model = nn1
elif best_accuracy == model_accuracy2:
    print('V2')
    best_model = nn2
else:
    print('V3')
    best_model = nn3
print(best_accuracy)

Best Model:
V3
0.7107871770858765


In [21]:
# Export the best model to HDF5 file

best_model.save('AlphabetSoupCharity_Optimization.h5')

# Report on the Neural Network Model for Alphabet Soup

## Overview of the Analysis:

### The purpose of this analysis is to create a deep learning model using neural networks to predict whether applicants for funding from Alphabet Soup, a charitable organization, will be successful or not. The dataset used for this analysis is a CSV file called charity_data.csv, which contains various features about each applicant, such as application type, classification, and other relevant information.

## Results:

## Data Preprocessing:

### - The target variable for the model is the "IS_SUCCESSFUL" column, which indicates an applicant’s success or failure in receiving funding.
### - The features for the model include all the columns in the original dataset, except for "EIN" and "NAME".
### - The "APPLICATION_TYPE" and "CLASSIFICATION" columns were one-hot encoded using pd.get_dummies() to convert the categorical data to numeric.

## Compiling, Training, and Evaluating the Model:

### - The neural network model was compiled using the Adam optimizer and binary crossentropy loss function, as this is a binary classification problem.
### - The base model architecture consists of three layers: one input layer with 10 units and ReLU activation, one hidden layer with 5 units and ReLU activation, and one output layer with 1 unit and sigmoid activation.
### - The optimized models were trained using different variations of units per layer and I utilized L1 regularization to prevent overfitting by adding a penalty term to the loss function.
### - Different activation functions and layer configurations were also experimented with, but the target model performance of 75% accuracy was not achieved.
### - The best model achieved an accuracy of about 72.5% on the test data, which is slightly below the target model performance of 75% accuracy.


## Summary:

### The deep learning neural network model developed for predicting successful applicants for funding from Alphabet Soup achieved a best accuracy of about 72.5% on the test data. Although this falls slightly short of the target model performance of 75% accuracy, the model may still provide valuable insights and predictions. To improve the model performance, additional experimentation with different model architectures, hyperparameter tuning, and feature engineering techniques could be performed. It may also be beneficial to explore other machine learning algorithms, such as decision trees, random forests, or support vector machines, to compare their performance with these neural network models. Updating with new data may also help to improve the model's accuracy.