### Requirements

#### Import Moduls

In [1]:
# IMPORT MODULS 

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from termcolor import colored
from matplotlib import pyplot as plt
import dash_html_components as html
import dash_core_components as dcc
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from explainerdashboard import ClassifierExplainer, RegressionExplainer, ExplainerDashboard

from sklearn.utils.class_weight import compute_class_weight
import ipywidgets as widgets
import itertools

#### Display Setting

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Functions

In [2]:
def quickly_check(dataframe):
    """
    This function describes the structural properties of the dataset entered in.

    Shape info
    Types info
    First 3 observations
    Last 3 observations
    NA info
    Summary quantiles

    Parameters
    ----------
        dataframe: dataframe

    Returns
    -------
        None

    Examples
    --------
        import seaborn as sns
        df = sns.load_dataset("titanic")
        quickly_check(df)

    """
    print(colored("##################### SHAPE #####################", "green", attrs=["bold"]))
    print(colored(dataframe.shape, "grey", attrs=["bold"]))

    print(colored("##################### TYPES #####################", "green", attrs=["bold"]))
    print(colored(dataframe.dtypes, "grey", attrs=["bold"]))

    print(colored("##################### NA #####################", "green", attrs=["bold"]))
    print(colored(dataframe.isnull().sum(), "grey", attrs=["bold"]))

    print(colored("##################### QUANTILES #####################", "green", attrs=["bold"]))
    print(colored(dataframe.quantile([0, 0.05, 0.50, 0.95, 0.99, 1]).T, "grey", attrs=["bold"]))
    print(colored("#################################################", "green", attrs=["bold"]))


In [1]:
def label_encoder(dataframe, binary_col):
    labelencoder = preprocessing.LabelEncoder()
    dataframe[binary_col] = labelencoder.fit_transform(dataframe[binary_col])
    return dataframe


def one_hot_encoder(dataframe, categorical_cols, drop_first=False):
    dataframe = pd.get_dummies(dataframe, columns=categorical_cols, drop_first=drop_first)
    return dataframe

### EDA

#### Data Import and Check Feature

In [6]:
df = pd.read_csv("churn2.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [8]:
# CHECK DF 
quickly_check(df)

##################### SHAPE #####################
(10000, 14)
##################### TYPES #####################
RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object
##################### NA #####################
RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64
##################### QUANTILES #####################
                        0.00          0.05          0.50          0.95  \
RowNumber     

In [9]:
# Eşsiz Başvuru Sayısı; 

print("#"*100, "\n",
      "✔ Veri setinde {} adet eşsiz müşteri vardır.".format(df["CustomerId"].nunique()),
      "\n", "#"*100)

#################################################################################################### 
 ✔ Veri setinde 10000 adet eşsiz müşteri vardır. 
 ####################################################################################################


In [10]:
# Eşsiz Platform Sayısı; 

print("#"*100, "\n",
      "✔ Veri setinde {} adet eşsiz Geography vardır.".format(df["Geography"].nunique()),
      "\n", "#"*100)

#################################################################################################### 
 ✔ Veri setinde 3 adet eşsiz Geography vardır. 
 ####################################################################################################


In [11]:
df["Geography"].value_counts()

France     5014
Germany    2509
Spain      2477
Name: Geography, dtype: int64

In [14]:
df.groupby("Geography").agg({"Age": ["sum","mean", "min","max"],
                            "EstimatedSalary": ["sum","mean", "min","max"]})

Unnamed: 0_level_0,Age,Age,Age,Age,EstimatedSalary,EstimatedSalary,EstimatedSalary,EstimatedSalary
Unnamed: 0_level_1,sum,mean,min,max,sum,mean,min,max
Geography,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
France,193098,38.511767,18,92,500894500.0,99899.180814,90.07,199929.17
Germany,99787,39.771622,18,84,253693600.0,101113.435102,11.58,199970.74
Spain,96333,38.890997,18,88,246314300.0,99440.572281,417.41,199992.48


#### Data Prep

In [15]:
# Remove unnecessary columns

drop_cols = ["Surname", "RowNumber"]
df.drop(drop_cols, axis=1, inplace=True)

# Custormerid set index
df.set_index("CustomerId", inplace=True)

In [16]:
df.head()

Unnamed: 0_level_0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
CustomerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
15634602,619,France,Female,42,2,0.0,1,1,1,101348.88,1
15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [17]:
# Encoding

df = label_encoder(df, "Gender")
df = one_hot_encoder(df, ["Geography"], drop_first=True)

#### Base Model

In [18]:
X = df.drop("Exited", axis=1)
y = df["Exited"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

model = RandomForestClassifier(n_estimators=50, max_depth=5)
model.fit(X_train, y_train)

#### Explainerdashboard

In [19]:
# dictionary definition for variables

desc = {
    "CreditScore": "Müşteri Kredi Skoru",
    "Gender": "Cinsiyet",
    "Age": "Müşteri Yaşı",
    "Tenure": "Müşteri Olma Yaşı",
    "Balance": "Balance",
    "NumOfProducts": "Müşterinin Ürün Sayısı",
    "HasCrCard": "Müşterinin Kredi Kartı Var mı?",
    "IsActiveMember": "Müşterinin Aktifliği",
    "EstimatedSalary": "Müşterinin Tahmini Maaşı",
    "Exited": "Churn Durumu"
}

In [20]:
explainer = ClassifierExplainer(model, X_test, y_test,
                                descriptions=desc,
                                target=["Exited"])

Note: shap=='guess' so guessing for RandomForestClassifier shap='tree'...
Detected RandomForestClassifier model: Changing class type to RandomForestClassifierExplainer...
Note: model_output=='probability', so assuming that raw shap output of RandomForestClassifier is in probability space...
Generating self.shap_explainer = shap.TreeExplainer(model)


In [21]:
# Validasyon df

# explainer = ClassifierExplainer(model, X_test,
                                #descriptions=desc,
                                #target=["Exited"])

In [None]:
from waitress import serve


db = ExplainerDashboard(explainer, title="--DASHBOARD NAME--", shap_interaction=False).run()