In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score

In [2]:
customer_df = pd.read_csv('customer_churn.csv')
customer_df.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [3]:
X = customer_df[['tenure', 'SeniorCitizen', 'MonthlyCharges']]
y = customer_df['Churn']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)


In [5]:
transformer = StandardScaler()
transformer.fit(X_train)

X_train_scaled_np = transformer.transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled_np, columns = X_train.columns, index = X_train.index) 

X_test_scaled_np = transformer.transform(X_test)
X_test_scaled = pd.DataFrame(X_test_scaled_np, columns = X_test.columns, index = X_test.index)

In [6]:
y_train = y_train.reset_index(drop=True) 
y_test = y_test.reset_index(drop=True) 

In [10]:
lr = LogisticRegression(random_state=0, solver='lbfgs')
lr.fit(X_train_scaled, y_train)

y_pred_test = lr.predict(X_test_scaled)    

print("The accuracy in the TEST set is: {:.2f}".format(accuracy_score(y_test, y_pred_test)))
print("The precision in the TEST set is: {:.2f}".format(precision_score(y_test, y_pred_test, pos_label = "No")))
print("The recall in the TEST set is: {:.2f}".format(recall_score(y_test, y_pred_test, pos_label = "No")))
print("The F1 in the TEST set is: {:.2f}".format(f1_score(y_test, y_pred_test, pos_label = "No")))
print("The Kappa in the TEST set is: {:.2f}".format(cohen_kappa_score(y_test, y_pred_test)))

The accuracy in the TEST set is: 0.78
The precision in the TEST set is: 0.82
The recall in the TEST set is: 0.90
The F1 in the TEST set is: 0.86
The Kappa in the TEST set is: 0.38


In [11]:
from imblearn.over_sampling import SMOTE

In [12]:
sm = SMOTE(random_state=100,k_neighbors=3)

X_train_SMOTE,y_train_SMOTE = sm.fit_resample(X_train_scaled,y_train)

In [14]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_SMOTE, y_train_SMOTE)
y_pred_test = lr.predict(X_test_scaled)

print("The accuracy in the TEST set is: {:.2f}".format(accuracy_score(y_test,y_pred_test)))
print("The precision in the TEST set is: {:.2f}".format(precision_score(y_test,y_pred_test, pos_label = "No")))
print("The recall in the TEST set is: {:.2f}".format(recall_score(y_test,y_pred_test, pos_label = "No")))
print("The F1 in the TEST set is: {:.2f}".format(f1_score(y_test,y_pred_test, pos_label = "No")))
print("The Kappa in the TEST set is: {:.2f}".format(cohen_kappa_score(y_test,y_pred_test)))

The accuracy in the TEST set is: 0.72
The precision in the TEST set is: 0.88
The recall in the TEST set is: 0.72
The F1 in the TEST set is: 0.79
The Kappa in the TEST set is: 0.38


In [None]:
# In this case SMOTE only imporove in the precision Test and keeps the same value for Kappa Test