# Telco Churn Model Using Naïve Bayes Classifier

Naive Bayes Classification: Dropped call, Payment Method, Bill Type (Local,Long Distance), and Estimated Income to predict Churn.

In [1]:
#1) Import necessary library 
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

In [2]:
#2) Import dataset (telco data)
ds=pd.read_excel('telco-churn-ASSESSMENT.xlsx')
ds.head()

Unnamed: 0,ID,LONGDIST,International,LOCAL,DROPPED,PAY_MTHD,LocalBillType,LongDistanceBillType,AGE,SEX,STATUS,CHILDREN,Est_Income,Car_Owner,CHURNED
0,0,5.246,7.515,86.328,0,CH,FreeLocal,Standard,57,F,M,2,27535.3,Y,Vol
1,3,0.0,0.0,3.942,0,CC,Budget,Intnl_discount,50,F,S,2,64632.3,N,InVol
2,4,5.556,0.0,9.363,1,CC,Budget,Intnl_discount,68,F,M,2,81000.9,N,Vol
3,8,14.019,5.68,29.806,0,CC,Budget,Standard,34,M,S,0,87467.1,Y,Current
4,10,13.664,2.956,32.638,0,CC,FreeLocal,Intnl_discount,60,M,M,2,83220.6,N,Vol


In [3]:
#check for missing/null data

ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1477 entries, 0 to 1476
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   ID                    1477 non-null   int64  
 1   LONGDIST              1477 non-null   float64
 2   International         1477 non-null   float64
 3   LOCAL                 1477 non-null   float64
 4   DROPPED               1477 non-null   int64  
 5   PAY_MTHD              1477 non-null   object 
 6   LocalBillType         1477 non-null   object 
 7   LongDistanceBillType  1477 non-null   object 
 8   AGE                   1477 non-null   int64  
 9   SEX                   1477 non-null   object 
 10  STATUS                1477 non-null   object 
 11  CHILDREN              1477 non-null   int64  
 12  Est_Income            1477 non-null   float64
 13  Car_Owner             1477 non-null   object 
 14  CHURNED               1477 non-null   object 
dtypes: float64(4), int64(

In [4]:
#Some columns in the dataset I don’t think will be needed for training the machine learning model. 
#So we will be continue focusing on these five attributes:
#(Dropped calls, Payment Method, Local bill type, Long distance bill type, Estimate Income)

In [5]:
#3) Allocate the relevant attributes as input and output

x = ds[['DROPPED', 'PAY_MTHD', 'LocalBillType', 'LongDistanceBillType', 'Est_Income']]
y = ds[['CHURNED']]

In [6]:
#4) Use LabelEncoder to encode categorical data
encoder = LabelEncoder()
x['PAY_MTHD'] = encoder.fit_transform(x['PAY_MTHD'])
x['LocalBillType'] = encoder.fit_transform(x['LocalBillType'])
x['LongDistanceBillType'] = encoder.fit_transform(x['LongDistanceBillType'])

In [7]:
# Encoding the Dependent Variable
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)

In [8]:
#5) Split data into training and test sets with the appropriate proportions
from sklearn.model_selection import train_test_split
X_train, X_test , y_train , y_test = train_test_split (x, y, test_size = 0.2, random_state = 0)

In [9]:
#6) Normalized data using StandardScaler
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X = sc_X.fit_transform(x)

In [10]:
#7) Fit and predict results using the Classifier
from sklearn.naive_bayes import GaussianNB
classifier= GaussianNB()
classifier.fit(X_train,y_train)

GaussianNB()

In [11]:
#8) Evaluate the results
y_pred=classifier.predict(X_test)
y_test,y_pred

(array([0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 2,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 2,
        0, 0, 0, 2, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 1, 0, 0,
        2, 0, 2, 2, 2, 1, 0, 0, 0, 0, 2, 0, 0, 2, 1, 0, 0, 2, 0, 1, 2, 0,
        0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 0, 2, 2, 2, 0, 0, 0, 1,
        2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 1, 0, 0, 1, 2, 0, 0, 1, 2,
        2, 2, 0, 1, 0, 0, 0, 1, 0, 2, 0, 2, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0,
        0, 0, 0, 2, 2, 0, 0, 2, 0, 1, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2,
        0, 0, 0, 0, 0, 2, 1, 0, 2, 1, 2, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0,
        2, 0, 0, 2, 0, 2, 0, 2, 0, 2, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 2, 2,
        1, 0, 2, 0, 0, 1, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 2, 0, 0, 2,
        2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 0, 0,
        0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 2, 0,
        0, 0, 1, 2, 2, 2, 1, 2, 1, 2])

In [12]:
#Performance Evaluation: using confusion matrix
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm

array([[178,   0,   0],
       [ 26,   0,   0],
       [ 92,   0,   0]], dtype=int64)

In [13]:
#calculate the prediction accuracy
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.6013513513513513

                                                                                                                           A.M