In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import sweetviz as sv
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
df = pd.read_csv("./data/customer_data.csv")
df.head(1)

In [None]:
report = sv.analyze(df)
report.show_html()

In [None]:
# Select independent variables (X) and target variable (y)
X = df.iloc[:, 3:-1].values  #iloc[rows, columns]
y = df.iloc[:, -1].values

In [None]:
# Encoding Categorical data

## Encode Gender column
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

## Encode Geography column with OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
# Split dataset into Training and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Scale Features
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test) 

In [None]:
# Build ANN
## Inintialize ANN
# NB
# output neuron = 1 because we are dealing with binary output
# "sigmoid" allows us to get probability of prediction
# "softmax" for non-binary classification
# for binary classification use "binary_crossentropy"
# for non-binary classification us "categorical_crossentropy" or others

def create_model():

    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(units=6, activation="relu"), # input layer
        tf.keras.layers.Dense(units=6, activation="relu"), # second/hidden layer
        tf.keras.layers.Dense(units=1, activation="relu")  # Output layer
    ])

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

model = create_model()

## Train ANN on Trainset
model.fit(X_train, y_train, batch_size=32, epochs=100)

In [None]:
# Test model
y_pred = model.predict(X_test)
# round up/down to 1/0
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

In [None]:
# Make prediction
## Predict probability of customer leaving the bank
model.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]))

In [None]:
# Save model as HDF5
model.save("./saved_model/my_model.h5")

new_model = tf.keras.models.load_model("./saved_model/my_model.h5")

# Show the model architecture
new_model.summary()

### to improve model performance, load weight from checkpoints
see [https://www.tensorflow.org/tutorials/keras/save_and_load]