## Decision Tree & GradientBoost Classification Approach ##

In [5]:
# Import Dependencies
import sqlite3
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

### Load/Preprocess Customer Churn Dataset ###

In [6]:
# Load 'customer_churn_complete' Into Pandas DataFrame
conn=sqlite3.connect('../Resources/customer_churn_data.db')
query = "SELECT * FROM customer_churn_complete"
customer_churn_complete = pd.read_sql(query, conn)
conn.close()

# Check That 'customer_churn_complete' Loaded Succesfully
customer_churn_complete.head()

Unnamed: 0,age,female,male,tenure,basic_subscription,standard_subscription,premium_subscription,monthly_contract,quarterly_contract,annual_contract,total_spend,payment_delay,usage_frequency,last_interaction,support_calls,churn
0,30.0,1.0,0.0,39.0,0.0,1.0,0.0,0.0,0.0,1.0,932.0,18.0,14.0,17.0,5.0,1.0
1,65.0,1.0,0.0,49.0,1.0,0.0,0.0,1.0,0.0,0.0,557.0,8.0,1.0,6.0,10.0,1.0
2,55.0,1.0,0.0,14.0,1.0,0.0,0.0,0.0,1.0,0.0,185.0,18.0,4.0,3.0,6.0,1.0
3,58.0,0.0,1.0,38.0,0.0,1.0,0.0,1.0,0.0,0.0,396.0,7.0,21.0,29.0,7.0,1.0
4,23.0,0.0,1.0,32.0,1.0,0.0,0.0,1.0,0.0,0.0,617.0,8.0,20.0,20.0,5.0,1.0


In [7]:
# Define Features Set
X = customer_churn_complete.drop(columns=['churn'])

# Define Target
y = customer_churn_complete['churn']

# Split Into Train And Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=35, stratify=y)

In [8]:
# Create StandardScaler Instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale Training And Testing Data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-1.079918,-0.896577,0.896577,-0.368805,1.437691,-0.71402,-0.711788,1.903101,-0.799614,-0.805686,0.65735,1.12432,0.844689,-1.232789,1.012046
1,-0.606751,1.115354,-1.115354,1.545149,1.437691,-0.71402,-0.711788,-0.525458,1.250603,-0.805686,-0.035186,-1.36024,-0.778767,-1.232789,-0.583931
2,1.601358,1.115354,-1.115354,0.26918,-0.69556,-0.71402,1.404912,-0.525458,1.250603,-0.805686,0.347326,0.296134,1.076611,-0.652039,1.969632
3,-0.606751,1.115354,-1.115354,-0.948791,-0.69556,1.400521,-0.711788,1.903101,-0.799614,-0.805686,0.25795,0.414446,-1.474534,0.625613,1.012046
4,1.285914,1.115354,-1.115354,0.67517,-0.69556,1.400521,-0.711788,-0.525458,1.250603,-0.805686,0.412595,1.006008,1.656417,-0.071288,-0.903126


### Fit Decision Tree Classifier Model ###

In [9]:
# Create Decision Tree Classifier
dtc_model = tree.DecisionTreeClassifier(random_state=35)

# Fit The Model
dtc_model = dtc_model.fit(X_train_scaled, y_train)

### Fit Gradient Boost Classifier Model ###

In [10]:
# Create Gradient Boost Classifier
gbc_model = GradientBoostingClassifier(random_state=35)

# Fit The Model
gbc_model = gbc_model.fit(X_train_scaled, y_train)

### Make Predictions Using Decision Tree Model and Gradient Boosting Model ###

In [11]:
# Make Predictions Using The Testing Data
predictions_dtc = dtc_model.predict(X_test_scaled)
predictions_gbc = gbc_model.predict(X_test_scaled)

### Model Evaluation: Confusion Matrix and Classification Report ###

In [12]:
# Calculate The Confusion Matrix For Decision Tree Model
confusion_matrix_dtc = confusion_matrix(y_test, predictions_dtc)
confusion_matrix_dtc_df = pd.DataFrame(confusion_matrix_dtc,
                                      index=['Actual 0', 'Actual 1'],
                                      columns=['Predicted 0', 'Predicted 1'])

# Calculate The Accuracy Score For Decision Tree Model
accuracy_score_confusion_matrix_dtc = accuracy_score(y_test, predictions_dtc)

# Display Results For Decision Tree Model
print('Confusion Matrix')
display(confusion_matrix_dtc_df)
print(f'Accuracy Score: {accuracy_score_confusion_matrix_dtc}')
print('\nClassification Report')
print(classification_report(y_test, predictions_dtc))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,49639,6540
Actual 1,8216,61907


Accuracy Score: 0.8831689126062928

Classification Report
              precision    recall  f1-score   support

         0.0       0.86      0.88      0.87     56179
         1.0       0.90      0.88      0.89     70123

    accuracy                           0.88    126302
   macro avg       0.88      0.88      0.88    126302
weighted avg       0.88      0.88      0.88    126302



In [13]:
# Calculate The Confusion Matrix For Gradient Boost Model
confusion_matrix_gbc = confusion_matrix(y_test, predictions_gbc)
confusion_matrix_gbc_df = pd.DataFrame(confusion_matrix_gbc,
                                       index=['Actual 0', 'Actual 1'],
                                       columns=['Predicted 0', 'Predicted 1'])

# Calculate The Accuracy Score For Gradient Boost Model
accuracy_score_confusion_matrix_gbc = accuracy_score(y_test, predictions_gbc)

# Display Results For Gradient Boost Model
print('Confusion Matrix')
display(confusion_matrix_gbc_df)
print(f'Accuracy Score: {accuracy_score_confusion_matrix_gbc}')
print('\nClassification Report')
print(classification_report(y_test, predictions_gbc))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,48473,7706
Actual 1,2119,68004


Accuracy Score: 0.9222102579531599

Classification Report
              precision    recall  f1-score   support

         0.0       0.96      0.86      0.91     56179
         1.0       0.90      0.97      0.93     70123

    accuracy                           0.92    126302
   macro avg       0.93      0.92      0.92    126302
weighted avg       0.92      0.92      0.92    126302

