<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#-Modelling-Customer-Churn:-Ensembling-of-models" data-toc-modified-id="-Modelling-Customer-Churn:-Ensembling-of-models-1"><span class="toc-item-num">1&nbsp;&nbsp;</span> Modelling Customer Churn: Ensembling of models</a></span><ul class="toc-item"><li><span><a href="#Colab" data-toc-modified-id="Colab-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Colab</a></span></li></ul></li><li><span><a href="#-Useful-Scripts-" data-toc-modified-id="-Useful-Scripts--2"><span class="toc-item-num">2&nbsp;&nbsp;</span> Useful Scripts </a></span></li><li><span><a href="#-Load-the-Data-" data-toc-modified-id="-Load-the-Data--3"><span class="toc-item-num">3&nbsp;&nbsp;</span> Load the Data </a></span></li><li><span><a href="#Combining-All-predictions" data-toc-modified-id="Combining-All-predictions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Combining All predictions</a></span></li></ul></div>

<div class="alert alert-block alert-success">
<b>Kernel Author:</b>  <br>
<a href="https://bhishanpdl.github.io/" , target="_blank">Bhishan Poudel, Ph.D Astrophysics</a> .
</div>

<a id="data-desc"></a>

<h1> Modelling Customer Churn: Ensembling of models</h1>

<a href="#top" class="btn btn-primary btn-sm" role="button" aria-pressed="true" style="color:white" data-toggle="popover" title="go to TOC">Go to Top</a>

References


In [1]:
import time

time_start_notebook = time.time()

## Colab

In [2]:
%%capture
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    # usual imports
    !pip install watermark
    !pip install scikit-plot


    print('Environment: Google Colab')

In [9]:
import numpy as np
import pandas as pd
import os,sys,time,glob
import joblib
from tqdm import tqdm_notebook as tqdm

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly_express as px

# machine learning
import sklearn.metrics as skmetrics
import scikitplot.metrics as skpmetrics


# settings
sns.set()
SEED = 100
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',200)
pd.set_option('plotting.backend','matplotlib') # matplotlib, bokeh, altair, plotly

%matplotlib inline
%load_ext watermark
%watermark -iv

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
autopep8       1.5.2
numpy          1.19.4
imblearn       0.7.0
joblib         0.17.0
seaborn        0.11.0
json           2.0.9
plotly_express 0.4.1
pandas         1.1.4



<a id="useful" ></a>

<h1> Useful Scripts </h1> 

<a href="#top" class="btn btn-primary btn-sm" role="button" aria-pressed="true" style="color:white" data-toggle="popover" title="go to TOC">Go to Top</a>

In [4]:
def show_methods(obj, ncols=4,contains=None):
    lst = [i for i in dir(obj) if i[0]!='_' ]
    if contains is not None:
        lst = [i for i in lst if contains in i]
    df = pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
    return df

In [6]:
def custom_loss(y_true, y_pred):
    tn, fp, fn, tp = skmetrics.confusion_matrix(y_true,y_pred).ravel()
    loss = 400*tp - 200*fn - 100*fp
    return loss

<a id="load-data" ></a>

<h1> Load the Data </h1> 

<a href="#top" class="btn btn-primary btn-sm" role="button" aria-pressed="true" style="color:white" data-toggle="popover" title="go to TOC">Go to Top</a>

In [27]:
path_data_train = '../data/raw/train.csv'
path_data_test = '../data/raw/test.csv'

if ENV_COLAB:
    path_data_train = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/train.csv'
    path_data_test = 'https://raw.githubusercontent.com/bhishanpdl/Datasets/master/Projects/Telco_Customer_Churn/raw/test.csv'

In [29]:
df_ytest = pd.read_csv(path_data_test,usecols=['Churn'])
df_ytest.head()


Unnamed: 0,Churn
0,Yes
1,No
2,Yes
3,No
4,No


In [31]:
ytest = df_ytest['Churn'].map({'Yes':1, 'No':0}).values
ytest[:5]

array([1, 0, 1, 0, 0])

In [8]:
!ls ../predictions

featuretools_lr.csv lrcv.csv            pycaret_lr.csv      pycaret_xgboost.csv
lr.csv              pycaret_lda.csv     pycaret_nb.csv


In [10]:
path_preds = glob.glob('../predictions/*.csv')
path_preds

['../predictions/lrcv.csv',
 '../predictions/featuretools_lr.csv',
 '../predictions/pycaret_nb.csv',
 '../predictions/pycaret_lr.csv',
 '../predictions/pycaret_lda.csv',
 '../predictions/pycaret_xgboost.csv',
 '../predictions/lr.csv']

In [12]:
path_names = [i.split('/')[-1].rstrip('.csv') for i in path_preds]
path_names

['lr',
 'featuretools_lr',
 'pycaret_nb',
 'pycaret_lr',
 'pycaret_lda',
 'pycaret_xgboost',
 'lr']

In [15]:
dfs = [pd.read_csv(f) for f in path_preds]
dfs[0].head()

Unnamed: 0,customerID,ypreds_lrcv,yprobs_lrcv
0,1794-HBQTJ,1,0.713449
1,0356-OBMAC,0,0.210394
2,4077-CROMM,1,0.757991
3,5442-PPTJY,0,0.074839
4,2333-KWEWW,0,0.08767


In [16]:
df = dfs[0]

In [17]:
for dfx in dfs[1:]:
    df = pd.merge(df,dfx,on='customerID')

In [18]:
df.head()

Unnamed: 0,customerID,ypreds_lrcv,yprobs_lrcv,ypreds_featuretools_lr,yprobs_featuretools_lr,ypreds_pycaret_nb,yprobs_pycaret_nb,ypreds_pycaret_lr,yprobs_pycaret_lr,ypreds_pycaret_lda,yprobs_pycaret_lda,ypreds_pycaret_xgboost,yprobs_pycaret_xgboost,ypreds_lr,yprobs_lr
0,1794-HBQTJ,1,0.713449,0,0.445886,1,0.9967,1,0.5445,0,0.6444,1,0.5433,1,1.0
1,0356-OBMAC,0,0.210394,0,0.078101,0,1.0,0,0.7947,0,0.9264,0,0.7239,1,0.956693
2,4077-CROMM,1,0.757991,1,0.508567,1,0.7255,1,0.7195,1,0.6536,1,0.5501,1,1.0
3,5442-PPTJY,0,0.074839,0,0.022152,0,1.0,0,0.9508,0,0.9534,0,0.8111,1,1.0
4,2333-KWEWW,0,0.08767,0,0.019473,0,1.0,0,0.9625,0,0.9574,0,0.8104,1,1.0


# Combining All predictions

In [20]:
df.filter(regex='ypreds_').head(2)

Unnamed: 0,ypreds_lrcv,ypreds_featuretools_lr,ypreds_pycaret_nb,ypreds_pycaret_lr,ypreds_pycaret_lda,ypreds_pycaret_xgboost,ypreds_lr
0,1,0,1,1,0,1,1
1,0,0,0,0,0,0,1


In [25]:
ypreds = df.filter(regex='ypreds_').mean(axis=1).gt(0.5).astype(np.int8)
ypreds[:5]

0    1
1    0
2    1
3    0
4    0
dtype: int8

In [32]:
skmetrics.confusion_matrix(ytest,ypreds)

array([[785, 250],
       [ 88, 286]])

In [33]:
custom_loss(ytest, ypreds)

71800