## Logistic Regression Classification Approach ##

In [1]:
# Import Dependencies
import pandas as pd
import sqlite3
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

### Load/Preprocess Customer Churn Dataset ###

In [2]:
# Load 'customer_churn_complete' Into Pandas DataFrame
conn=sqlite3.connect('../Resources/customer_churn_data.db')
query = "SELECT * FROM customer_churn_complete"
customer_churn_complete = pd.read_sql(query, conn)
conn.close()

# Check That 'customer_churn_complete' Loaded Succesfully
customer_churn_complete.head()

Unnamed: 0,age,female,male,tenure,basic_subscription,standard_subscription,premium_subscription,monthly_contract,quarterly_contract,annual_contract,total_spend,payment_delay,usage_frequency,last_interaction,support_calls,churn
0,30.0,1.0,0.0,39.0,0.0,1.0,0.0,0.0,0.0,1.0,932.0,18.0,14.0,17.0,5.0,1.0
1,65.0,1.0,0.0,49.0,1.0,0.0,0.0,1.0,0.0,0.0,557.0,8.0,1.0,6.0,10.0,1.0
2,55.0,1.0,0.0,14.0,1.0,0.0,0.0,0.0,1.0,0.0,185.0,18.0,4.0,3.0,6.0,1.0
3,58.0,0.0,1.0,38.0,0.0,1.0,0.0,1.0,0.0,0.0,396.0,7.0,21.0,29.0,7.0,1.0
4,23.0,0.0,1.0,32.0,1.0,0.0,0.0,1.0,0.0,0.0,617.0,8.0,20.0,20.0,5.0,1.0


In [3]:
# Define Features Set
X = customer_churn_complete.drop(columns=['churn'])

# Define Target
y = customer_churn_complete['churn']

# Split Into Train And Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2, stratify=y)

In [4]:
# Create StandardScaler Instance
scaler = StandardScaler()

# Fit Standard Scaler
X_scaler = scaler.fit(X_train)

# Scale Training And Testing Data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Visualize Scaling
X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_train_scaled_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-1.161497,1.116039,-1.116039,-0.890064,-0.695393,-0.713923,1.404389,1.901208,-0.801221,-0.803505,0.294685,1.361856,-1.359626,0.277177,-1.223382
1,-1.082556,-0.896026,0.896026,-1.702087,-0.695393,1.400711,-0.712053,-0.525981,1.248096,-0.803505,0.17655,1.835064,-1.707968,-0.7678,1.329523
2,-1.477257,-0.896026,0.896026,-0.658058,-0.695393,-0.713923,1.404389,-0.525981,1.248096,-0.803505,-1.196253,-0.76758,-1.707968,-0.7678,-0.904269
3,0.812009,-0.896026,0.896026,0.559975,1.438037,-0.713923,-0.712053,1.901208,-0.801221,-0.803505,0.241606,0.060534,-1.359626,0.161068,1.96775
4,-1.398317,-0.896026,0.896026,-1.412079,-0.695393,-0.713923,1.404389,-0.525981,-0.801221,1.244548,1.380991,0.297138,-1.707968,-1.000017,-1.223382


 ### Fit Logistic Regression Model ###

In [5]:
# Create Logistic Regression Classifier
logistic_regression_model = LogisticRegression(random_state=2)

# Fit The Model
logistic_regression_model.fit(X_train_scaled, y_train)

### Make Predictions Using the Logistic Regression Model ###

In [6]:
# Make Predictions Using The Testing Data
predictions = logistic_regression_model.predict(X_test_scaled)

### Score Model, Calculate Accuracy Score ###

In [7]:
# Score The Model
print(f"Training Data Score: {logistic_regression_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {logistic_regression_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.8487717205413509
Testing Data Score: 0.8478408892970816


### Model Evaluation: Confusion Matrix and Classification Report ###

In [8]:
# Calculate The Confusion Matrix
confusion_matrix = confusion_matrix(y_test, predictions)
confusion_matrix_df = pd.DataFrame(confusion_matrix,
                                   index=['Actual 0', 'Actual 1'],
                                   columns=['Predicted 0', 'Predicted 1'])

# Display Results
print('Confusion Matrix')
display(confusion_matrix_df)
print('\nClassification Report')
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,47629,8550
Actual 1,10668,59455



Classification Report
              precision    recall  f1-score   support

         0.0       0.82      0.85      0.83     56179
         1.0       0.87      0.85      0.86     70123

    accuracy                           0.85    126302
   macro avg       0.85      0.85      0.85    126302
weighted avg       0.85      0.85      0.85    126302

