# DP21: Tensorflow/Keras

## Install dependencies

In [None]:
!pip install tensorflow
!pip install Keras
!pip install shap

## Import Libraries

In [None]:
import pandas as pd
import numpy as np 
import sklearn

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt  
from sklearn import datasets, metrics, model_selection, svm
from sklearn import metrics
from matplotlib import pyplot
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss
import shap


# Keras specific
import keras
from keras.models import Sequential
from keras.layers import Dense

#Domo
import domojupyter as domo

## Read data from Domo and inspect

In [None]:
import domojupyter as domo
domo.write_dataframe(output, 'Retail SHAP1')

In [None]:
df = domo.read_dataframe('data', query='SELECT * FROM table')
df.columns

In [None]:
CATEGORICAL_COLUMNS = ['MKT_CHANNEL', 'AGE_BAND']
TARGET_COLUMN = ['ORDERED']

## Prep data for model

In [None]:
#Split
X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(df[CATEGORICAL_COLUMNS], df[TARGET_COLUMN], test_size=0.20, random_state=40)

#One-hot encoding
X_train_dummies = pd.get_dummies(X_train_df)
X_test_dummies = pd.get_dummies(X_test_df)

#Get matrix values
X_train = X_train_dummies.values
y_train = y_train_df.values
X_test = X_test_dummies.values
y_test = y_test_df.values

#Inspect
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

## Define Keras model

In [None]:
model = Sequential()
model.add(Dense(1, input_dim=9, activation= "sigmoid"))
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['binary_accuracy'])

model.fit(
  x=X_train,
  y=y_train,
  shuffle=True,
  epochs=5,
  batch_size=16
)

In [None]:
model.summary()

## Validate Model

In [None]:
preds = model.predict(X_test)
preds

## Evaluate Model

### ROC Values

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y_test, preds)
df_fpr = pd.DataFrame(data=fpr, columns=['fpr'])
df_tpr = pd.DataFrame(data=tpr, columns=['tpr'])
df_thres = pd.DataFrame(data=thresholds, columns=['thresholds'])
df_roc = pd.concat([df_fpr.reset_index(drop=True), df_tpr, df_thres], axis=1)
df_roc.head()

In [None]:
#Plot ROC Curve
pyplot.plot(fpr, tpr, marker='.', label='Logistic')
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.show()

### Accuracy

In [None]:
##Combine predictions to test dataframe

X_test_df1 = pd.DataFrame(data=X_test_df, columns=X_train_df.columns.tolist())
preds_test_df = pd.DataFrame(data=preds, columns=['pred_prob'])
df1 = pd.concat([X_test_df1.reset_index(drop=True), preds_test_df], axis=1)
df1['ordered_pred'] = np.where(((df1.pred_prob <= 0.6)), 0, 1)
df1.head()

In [None]:
# ACCURACY
accuracy = accuracy_score(y_test, df1['ordered_pred'])
accuracy

### Confusion Matrix

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, df1['ordered_pred'])

d = {'Predicted/Actual': ['Predicted - Purchased', 'Predicted - Not Purchased'], 'Actual - Purchased': [cm[0,0], cm[0,1]], 'Actual - Not Purchased': [cm[1,0], cm[1,1]]}
df_conf_matrix = pd.DataFrame(data=d)
df_conf_matrix

### ROC AUC

In [None]:
roc_auc = roc_auc_score(y_test, df1['ordered_pred'])
roc_auc

### F1 Score

In [None]:
f1 = f1_score(y_test, df1['ordered_pred'], average='macro')
f1

### Log Loss

In [None]:
log_loss = log_loss(y_test, df1['ordered_pred'])
log_loss

### SHAP - Feature Importance

In [None]:
# Summarize the training set to accelerate analysis
df_test_normed_summary = shap.kmeans(X_test, 5)

# Instantiate an explainer with the model predictions and training data summary
explainer = shap.KernelExplainer(model.predict, df_test_normed_summary)

# Extract Shapley values from the explainer
shap_values = explainer.shap_values(X_test)

In [None]:
shap.summary_plot(shap_values[0], X_test_dummies)

## Predict on Prod

In [None]:
prod = domo.read_dataframe('Retail | Prod', query='SELECT * FROM table')
prod.columns

In [None]:
#One-hot encoding
X_prod_dummies = pd.get_dummies(prod[CATEGORICAL_COLUMNS])

#Get matrix values
X_prod = X_prod_dummies.values

#Inspect
print(X_prod.shape)

In [None]:
#Predict
preds_prod = model.predict(X_prod)
preds_prod

In [None]:
#Combine
preds_prod_df = pd.DataFrame(data=preds_prod, columns=['pred_prob'])
prod_df1 = pd.concat([prod.reset_index(drop=True), preds_prod_df], axis=1)
prod_df1['ordered_pred'] = np.where(((prod_df1.pred_prob <= 0.6)), 0, 1)
prod_df1.head()