In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
# Load the dataset
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")

# Display first few rows
application_df.head()


Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
application_df = application_df.drop(columns=['EIN', 'NAME'])


In [4]:
# Bin application types
application_type_counts = application_df['APPLICATION_TYPE'].value_counts()
application_types_to_replace = application_type_counts[application_type_counts < 500].index.tolist()
application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(application_types_to_replace, "Other")

# Bin classifications
classification_counts = application_df['CLASSIFICATION'].value_counts()
classifications_to_replace = classification_counts[classification_counts < 1000].index.tolist()
application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(classifications_to_replace, "Other")


In [5]:
application_df = pd.get_dummies(application_df, drop_first=True)


In [6]:
y = application_df['IS_SUCCESSFUL'].values
X = application_df.drop(columns=['IS_SUCCESSFUL']).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
print(X_train.dtype)  # Should be a numeric type (e.g., float32)
print(y_train.dtype)  # Should be a numeric type (e.g., int32 or float32)


object
int64


In [9]:
import numpy as np

# Convert X_train and X_test to float32
X_train = np.asarray(X_train).astype('float32')
X_test = np.asarray(X_test).astype('float32')

# Convert y_train and y_test to int32
y_train = np.asarray(y_train).astype('int32')
y_test = np.asarray(y_test).astype('int32')


In [10]:
# Check for missing values
print(pd.DataFrame(X_train).isnull().sum())

# Fill or handle missing values if any
X_train = pd.DataFrame(X_train).fillna(0).values
X_test = pd.DataFrame(X_test).fillna(0).values


0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
35    0
dtype: int64


In [11]:
# Define the baseline model
baseline_nn = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(units=80, activation='relu'),
    tf.keras.layers.Dense(units=30, activation='relu'),
    tf.keras.layers.Dense(units=1, activation='sigmoid')
])
baseline_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
baseline_history = baseline_nn.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=2)


Epoch 1/50
686/686 - 3s - 4ms/step - accuracy: 0.4900 - loss: 46639.3438 - val_accuracy: 0.4829 - val_loss: 6441.6304
Epoch 2/50
686/686 - 2s - 3ms/step - accuracy: 0.4882 - loss: 58676.4062 - val_accuracy: 0.4829 - val_loss: 1382.2343
Epoch 3/50
686/686 - 2s - 3ms/step - accuracy: 0.4881 - loss: 57743.4922 - val_accuracy: 0.5171 - val_loss: 184046.5156
Epoch 4/50
686/686 - 2s - 3ms/step - accuracy: 0.5125 - loss: 61645.3320 - val_accuracy: 0.4829 - val_loss: 9044.6035
Epoch 5/50
686/686 - 1s - 2ms/step - accuracy: 0.5036 - loss: 44033.9961 - val_accuracy: 0.4829 - val_loss: 6133.5239
Epoch 6/50
686/686 - 2s - 3ms/step - accuracy: 0.5003 - loss: 15745.7578 - val_accuracy: 0.5171 - val_loss: 10992.1807
Epoch 7/50
686/686 - 1s - 2ms/step - accuracy: 0.5010 - loss: 47927.5508 - val_accuracy: 0.5171 - val_loss: 31949.8477
Epoch 8/50
686/686 - 1s - 2ms/step - accuracy: 0.4781 - loss: 20430.0645 - val_accuracy: 0.4829 - val_loss: 825.9550
Epoch 9/50
686/686 - 1s - 2ms/step - accuracy: 0.4880

In [13]:
# Evaluate the model on the test data
test_loss, test_accuracy = baseline_nn.evaluate(X_test, y_test, verbose=2)

# Print the results
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


215/215 - 0s - 1ms/step - accuracy: 0.5341 - loss: 0.6909
Test Loss: 0.6908509135246277
Test Accuracy: 53.41%
