In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score,recall_score,precision_score

# Import dataset

In [2]:
df=pd.read_csv('churn.csv')

# Data Transformation

### Transform phone and binary attributes

In [3]:
df['Phone']=df['Phone'].apply(lambda p:int(p[:3]+p[4:]))
df["Int'l Plan"]=df["Int'l Plan"].apply(lambda i:0 if i=="no" else 1)
df['VMail Plan']=df['VMail Plan'].apply(lambda i:0 if i=="no" else 1)
df['Churn?']=df['Churn?'].apply(lambda c:0 if c=="False." else 1)

### Divide into attributes and class

In [4]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

### Encoding State column

In [5]:
X=pd.get_dummies(X,columns=['State'])

### Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

### Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Building model using Decision Tree

### Training the Decision Tree Classification model on the Training set

In [8]:
def decision_tree_class():
    classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    classifier.fit(X_train, y_train)
    return classifier

In [9]:
dt_class=decision_tree_class()

### Making the Confusion Matrix for Decision Tree

In [10]:
def cm_scores(classifier):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    print(cm)
    print(' ')
    print('True Positive is {}; True Negative is {}; False Positive is {}; False Negative is {}.\n'.format(tp,tn,fp,fn))
    accuracy=round(accuracy_score(y_test, y_pred),2)
    recall=round(recall_score(y_test,y_pred),2)
    precision=round(precision_score(y_test,y_pred),2)
    print('Accuracy is {}; recall is {}; precision is {}.'.format(accuracy,recall,precision))

In [11]:
cm_scores(dt_class)

[[669  50]
 [ 21  94]]
 
True Positive is 94; True Negative is 669; False Positive is 50; False Negative is 21.

Accuracy is 0.91; recall is 0.82; precision is 0.65.


# Building model using Naive Bayes

### Training the Naive Bayes Classification model on the Training set

In [12]:
def naive_bayes_class():
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return classifier

In [13]:
nb_class=naive_bayes_class()

### Making the Confusion Matrix for Decision Tree

In [14]:
cm_scores(nb_class)

[[424 295]
 [ 49  66]]
 
True Positive is 66; True Negative is 424; False Positive is 295; False Negative is 49.

Accuracy is 0.59; recall is 0.57; precision is 0.18.
