In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score,recall_score,precision_score
from explainerdashboard import ClassifierExplainer, ExplainerDashboard


The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`

The dash_table package is deprecated. Please replace
`import dash_table` with `from dash import dash_table`

Also, if you're using any of the table format helpers (e.g. Group), replace 
`from dash_table.Format import Group` with 
`from dash.dash_table.Format import Group`


In [2]:
feature_descriptions={'State':'Customer’s state',
'Account Length':'Integer number showing the duration of activity for customer account',
'Area Code':'Area code of customer',
'Phone':'Phone number of customer',
"Int'l Plan":'Binary indicator showing whether the customer has international calling plan',
'VMail Plan':'Indicator of voice mail plan',
'VMail Message':'The number of voicemail messages',
'Day Mins':'The number of minutes the customer used the service during day time',
'Day Calls':'Discrete attribute indicating the total number of calls during day time',
'Day Charge':'Charges for using the service during day time',
'Eve Mins':'The number of minutes the customer used the service during evening time',
'Eve Calls':' The number of calls during evening time',
'Eve Charge':' Charges for using the service during evening time',
'Night Mins':' Number of minutes the customer used the service during night time',
'Night Calls':'The number of calls during night time',
'Night Charge':'Charges for using the service during night time',
'Intl Mins':'Number of minutes the customer used the service to make international calls',
'Intl Calls':'The number of international calls',
'Intl Charge':'Charges for international calls',
'CustServ Calls':'The number of calls to customer support service'
}

# Import dataset

In [3]:
df=pd.read_csv('churn.csv')

# Data Transformation

### Transform phone and binary attributes

In [4]:
df['Phone_Number']='('+df['Area Code'].astype('str')+')'+df['Phone']
df["Int'l Plan"]=df["Int'l Plan"].apply(lambda i:0 if i=="no" else 1)
df['VMail Plan']=df['VMail Plan'].apply(lambda i:0 if i=="no" else 1)
df['Churn?']=df['Churn?'].apply(lambda c:0 if c=="False." else 1)

In [5]:
df.index=df['Phone_Number']

In [6]:
df=df.drop(columns=['Area Code','Phone','Phone_Number'])

### Divide into attributes and class

In [7]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

### Encoding State column

In [8]:
X=pd.get_dummies(X,columns=['State'])

### Splitting the dataset into the Training set and Test set

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [10]:
phone_numbers=X_test.index.tolist()

# Building model using Decision Tree

### Training the Decision Tree Classification model on the Training set

In [11]:
def decision_tree_class():
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    return classifier

In [12]:
dt_class=decision_tree_class()

In [13]:
explainer_dt_all = ClassifierExplainer(dt_class, X_test, y_test, 
                                descriptions=feature_descriptions,
                                labels=['Not Churn', 'Churn'], 
                                cats=['State'],
                                idxs = phone_numbers, 
                                index_name = "Phone Number", 
                                target = "Churn", 
                                )
db_dt_all=ExplainerDashboard(explainer_dt_all, 
                        title="Churn Prediction by Decision Tree", 
                        shap_interaction=False,
                        )
db_dt_all.run(port=8050)

Note: model_output=='probability', so assuming that raw shap output of DecisionTreeClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...
Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating predictions...
Calculating pred_percentiles...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explain

 * Running on http://0.0.0.0:8050/ (Press CTRL+C to quit)
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET / HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:45] "[37mGET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:03:46] "

### Making the Confusion Matrix for Decision Tree

In [14]:
def cm_scores(classifier):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    print(cm)
    print(' ')
    print('True Positive is {}; True Negative is {}; False Positive is {}; False Negative is {}.\n'.format(tp,tn,fp,fn))
    accuracy=round(accuracy_score(y_test, y_pred),2)
    recall=round(recall_score(y_test,y_pred),2)
    precision=round(precision_score(y_test,y_pred),2)
    print('Accuracy is {}; recall is {}; precision is {}.'.format(accuracy,recall,precision))

In [15]:
cm_scores(dt_class)

[[672  47]
 [ 24  91]]
 
True Positive is 91; True Negative is 672; False Positive is 47; False Negative is 24.

Accuracy is 0.91; recall is 0.79; precision is 0.66.


# Building model using Naive Bayes

### Training the Naive Bayes Classification model on the Training set

In [16]:
def naive_bayes_class():
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return classifier

In [17]:
nb_class=naive_bayes_class()

In [21]:
explainer_nb_all = ClassifierExplainer(nb_class, X_test, y_test, 
                                descriptions=feature_descriptions,
                                labels=['Not Churn', 'Churn'], 
                                cats=['State'],
                                idxs = phone_numbers, 
                                index_name = "Phone Number", 
                                target = "Churn", 
                                )
db_nb_all=ExplainerDashboard(explainer_nb_all, 
                        title="Churn Prediction by Naive Bayes", 
                        shap_interaction=False,
                        )
db_nb_all.run(port=8050)

Note: shap values for shap='kernel' normally get calculated against X_background, but paramater X_background=None, so setting X_background=shap.sample(X, 50)...
Generating self.shap_explainer = shap.KernelExplainer(model, X, link='identity')
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...


  0%|          | 0/834 [00:00<?, ?it/s]

Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating predictions...
Calculating pred_percentiles...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...
Starting ExplainerDashboard on http://192.168.2.227:8050
Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

Dash is running on http://0.0.0.0:8050/

 * Serving Flask app "explainerdashboard.dashboards" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://0.0.0.0:8050/ (Press CTRL+C to quit)
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET / HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:14] "[37mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:15] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:15] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
192.168.2.227 - - [13/Nov/2021 09:37:15] "[37mPOST /_dash-update-component HTTP/1.1[0m" 204 -
192.168.2.227 - - [13/Nov/2021 09:37:15] "

### Making the Confusion Matrix for Decision Tree

In [18]:
cm_scores(nb_class)

[[424 295]
 [ 49  66]]
 
True Positive is 66; True Negative is 424; False Positive is 295; False Negative is 49.

Accuracy is 0.59; recall is 0.57; precision is 0.18.
