## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd 
application_df = pd.read_csv("https://static.bc-edx.com/data/dl-1-2/m21/lms/starter/charity_data.csv")
application_df.head()

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
application_df.drop(['EIN', 'NAME'], axis=1, inplace=True)
application_df.head()

In [None]:
# Determine the number of unique values in each column.
print(application_df.nunique())

In [None]:
# Look at APPLICATION_TYPE value counts for binning
application_df['APPLICATION_TYPE'].value_counts()

In [None]:
# Choose a cutoff value and create a list of application types to be replaced
# use the variable name `application_types_to_replace`
application_types_to_replace =  ['T9', 'T13', 'T12', 'T2','T25', 'T14', 'T15', 'T29', 'T17', 'T10']

# Replace in dataframe
for app in application_types_to_replace:
    application_df['APPLICATION_TYPE'] = application_df['APPLICATION_TYPE'].replace(app,"Other")

# Check to make sure binning was successful
application_df['APPLICATION_TYPE'].value_counts()

In [None]:
# Look at CLASSIFICATION value counts for binning
application_df['CLASSIFICATION'].value_counts()

In [None]:
# You may find it helpful to look at CLASSIFICATION value counts >1
application_df['CLASSIFICATION'].value_counts()[application_df['CLASSIFICATION'].value_counts() > 1]

In [None]:
# Choose a cutoff value and create a list of classifications to be replaced
# use the variable name `classifications_to_replace`
classifications_to_replace = ['C1700', 'C4000', 'C5000', 'C1270', 'C2700', 'C2800', 'C7100', 'C1300',
                              'C1280', 'C1230', 'C1400', 'C2300', 'C7200', 'C1240', 'C8000', 'C7120',
                              'C1500', 'C1800', 'C6000', 'C1250', 'C8200', 'C1278', 'C1238', 'C1235',
                              'C1237', 'C7210', 'C1720', 'C2400', 'C4100', 'C1257', 'C1600', 'C1260', 'C0',
                              'C2710', 'C1246', 'C1234', 'C1267', 'C1256', 'C3200', 'C2380', 'C8210', 'C4200',
                              'C1236', 'C1248', 'C1370', 'C1820', 'C2570', 'C6100', 'C1245', 'C1580', 'C1570',
                              'C1732', 'C2561', 'C4500', 'C2170', 'C2500', 'C2190', 'C1283', 'C4120',
                               'C5200', 'C1900', 'C2150','C2600', 'C1728', 'C3700'  ]

# Replace in dataframe
for cls in classifications_to_replace:
    application_df['CLASSIFICATION'] = application_df['CLASSIFICATION'].replace(cls,"Other")
    
# Check to make sure binning was successful
application_df['CLASSIFICATION'].value_counts()

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
affiliation_dummies = pd.get_dummies(application_df['AFFILIATION'])
use_case_dummies = pd.get_dummies(application_df['USE_CASE'])
organization_dummies = pd.get_dummies(application_df['ORGANIZATION'])
special_consideration_dummies = pd.get_dummies(application_df['SPECIAL_CONSIDERATIONS'])
income_amount_dummies = pd.get_dummies(application_df['INCOME_AMT'])
application_type_dummies = pd.get_dummies(application_df['APPLICATION_TYPE'])

In [None]:
application_df = pd.concat([application_df, affiliation_dummies], axis=1)
application_df = pd.concat([application_df, use_case_dummies], axis=1)
application_df = pd.concat([application_df, organization_dummies], axis=1)
application_df = pd.concat([application_df, special_consideration_dummies], axis=1)
application_df = pd.concat([application_df, income_amount_dummies], axis=1)
application_df = pd.concat([application_df, application_type_dummies], axis=1)
application_df.drop(['AFFILIATION', 'USE_CASE','ORGANIZATION', 'SPECIAL_CONSIDERATIONS', 'INCOME_AMT', 'APPLICATION_TYPE'], axis=1, inplace=True)
application_df.head()

In [None]:
# Split our preprocessed data into our features and target arrays
y = application_df['IS_SUCCESSFUL']
X = application_df.drop(columns=['IS_SUCCESSFUL'])

In [None]:
X[:5]

In [None]:
y[:5]

In [None]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
#  YOUR CODE GOES HERE

nn = tf.keras.models.Sequential()

# First hidden layer
#  YOUR CODE GOES HERE

# Second hidden layer
#  YOUR CODE GOES HERE

# Output layer
#  YOUR CODE GOES HERE

# Check the structure of the model
nn.summary()

In [None]:
# Compile the model
#  YOUR CODE GOES HERE

In [None]:
# Train the model
#  YOUR CODE GOES HERE

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export our model to HDF5 file
#  YOUR CODE GOES HERE