In [13]:
# initial imports
import sqlite3
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Load 'customer_churn_training' Into Pandas DataFrame
conn=sqlite3.connect('..\Resources\customer_churn_data.db')
query = "SELECT * FROM customer_churn_complete"
customer_churn_complete = pd.read_sql(query, conn)
conn.close()

# Check That 'customer_churn_training' Loaded Succesfully
customer_churn_complete.head()

Unnamed: 0,age,female,male,tenure,basic_subscription,standard_subscription,premium_subscription,monthly_contract,quarterly_contract,annual_contract,total_spend,payment_delay,usage_frequency,last_interaction,support_calls,churn
0,30.0,1.0,0.0,39.0,0.0,1.0,0.0,0.0,0.0,1.0,932.0,18.0,14.0,17.0,5.0,1.0
1,65.0,1.0,0.0,49.0,1.0,0.0,0.0,1.0,0.0,0.0,557.0,8.0,1.0,6.0,10.0,1.0
2,55.0,1.0,0.0,14.0,1.0,0.0,0.0,0.0,1.0,0.0,185.0,18.0,4.0,3.0,6.0,1.0
3,58.0,0.0,1.0,38.0,0.0,1.0,0.0,1.0,0.0,0.0,396.0,7.0,21.0,29.0,7.0,1.0
4,23.0,0.0,1.0,32.0,1.0,0.0,0.0,1.0,0.0,0.0,617.0,8.0,20.0,20.0,5.0,1.0


In [3]:
# define features set
X = customer_churn_complete.copy()
X.drop("churn", axis=1, inplace=True)
X.head()

Unnamed: 0,age,female,male,tenure,basic_subscription,standard_subscription,premium_subscription,monthly_contract,quarterly_contract,annual_contract,total_spend,payment_delay,usage_frequency,last_interaction,support_calls
0,30.0,1.0,0.0,39.0,0.0,1.0,0.0,0.0,0.0,1.0,932.0,18.0,14.0,17.0,5.0
1,65.0,1.0,0.0,49.0,1.0,0.0,0.0,1.0,0.0,0.0,557.0,8.0,1.0,6.0,10.0
2,55.0,1.0,0.0,14.0,1.0,0.0,0.0,0.0,1.0,0.0,185.0,18.0,4.0,3.0,6.0
3,58.0,0.0,1.0,38.0,0.0,1.0,0.0,1.0,0.0,0.0,396.0,7.0,21.0,29.0,7.0
4,23.0,0.0,1.0,32.0,1.0,0.0,0.0,1.0,0.0,0.0,617.0,8.0,20.0,20.0,5.0


In [4]:
y = customer_churn_complete["churn"].values.reshape(-1,1)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5, stratify=y)

In [6]:
# Create StandardScaler Instance
scaler = StandardScaler(random_state=5)

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale Training And Testing Data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-1.003381,1.115437,-1.115437,-0.94749,1.440753,-0.714332,-0.712967,-0.525262,1.2484,-0.804484,-0.315676,-1.478292,0.033855,-0.883825,-0.585243
1,-1.319131,1.115437,-1.115437,-0.367163,1.440753,-0.714332,-0.712967,-0.525262,-0.801025,1.243033,1.54156,0.532709,1.65872,-0.302919,-0.585243
2,0.654306,-0.89651,0.89651,-0.193065,1.440753,-0.714332,-0.712967,1.903813,-0.801025,-0.804484,0.745311,1.833945,-0.198268,-0.302919,1.647099
3,0.338556,-0.89651,0.89651,0.735458,-0.694082,1.39991,-0.712967,1.903813,-0.801025,-0.804484,0.194626,1.360768,0.614164,-0.651463,0.371475
4,0.417493,-0.89651,0.89651,0.039066,-0.694082,1.39991,-0.712967,-0.525262,-0.801025,1.243033,0.717613,0.059532,-0.778577,1.091258,-0.266337


In [7]:
model = tree.DecisionTreeClassifier()

In [8]:
model = model.fit(X_train_scaled, y_train)

In [9]:
predictions = model.predict(X_test_scaled)

In [10]:
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actural 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

acc_score = accuracy_score(y_test, predictions)

In [11]:
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score: {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actural 0,49598,6581
Actual 1,8192,61931


Accuracy Score: 0.8830343145793416
Classification Report
              precision    recall  f1-score   support

         0.0       0.86      0.88      0.87     56179
         1.0       0.90      0.88      0.89     70123

    accuracy                           0.88    126302
   macro avg       0.88      0.88      0.88    126302
weighted avg       0.88      0.88      0.88    126302

