# Telco Customer Churn - Model Evaluation

## Import packages

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from joblib import load

## Load data

In [2]:
df = pd.read_csv('../dataset/dataset_model.csv')

In [3]:
df.describe()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,...,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_scaled,MonthlyCharges_scaled,TotalCharges_scaled
count,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,...,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0,7032.0
mean,0.504693,0.1624,0.482509,0.298493,32.421786,0.903299,0.592719,64.798208,2283.300441,0.265785,...,0.551052,0.209329,0.239619,0.219283,0.216297,0.33632,0.2281,-1.38841e-16,8.828736000000001e-17,-1.099487e-16
std,0.500014,0.368844,0.499729,0.457629,24.54526,0.295571,0.491363,30.085974,2266.771362,0.441782,...,0.497422,0.406858,0.426881,0.41379,0.411748,0.472483,0.419637,1.000071,1.000071,1.000071
min,0.0,0.0,0.0,0.0,1.0,0.0,0.0,18.25,18.8,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.280248,-1.547283,-0.9990692
25%,0.0,0.0,0.0,0.0,9.0,1.0,0.0,35.5875,401.45,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.9542963,-0.9709769,-0.8302488
50%,1.0,0.0,0.0,0.0,29.0,1.0,1.0,70.35,1397.475,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.1394171,0.184544,-0.3908151
75%,1.0,0.0,1.0,1.0,55.0,1.0,1.0,89.8625,3794.7375,1.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.9199259,0.8331482,0.6668271
max,1.0,1.0,1.0,1.0,72.0,1.0,1.0,118.75,8684.8,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.612573,1.793381,2.824261


## Data splitting - training, testing
To be consistent, data will be split using the same random seed and partition sizes as performed in the model training step.

In [4]:
X = df.drop(['Churn'],axis=1) 
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=4321, stratify=y)

In [5]:
# raw numerical features are removed from the training, validation and test datasets
X_test = X_test.drop(['tenure','MonthlyCharges','TotalCharges'],axis=1) 

## Load model

In [6]:
model = load('../models/customer_churn_model.joblib') 

## Model evaluation on the test dataset

In [7]:
y_pred = model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[763 270]
 [ 82 292]]
              precision    recall  f1-score   support

           0       0.90      0.74      0.81      1033
           1       0.52      0.78      0.62       374

    accuracy                           0.75      1407
   macro avg       0.71      0.76      0.72      1407
weighted avg       0.80      0.75      0.76      1407



The model produced a recall of 0.78 and f1-score of 0.62 for positive churn observations, and an overall weight f1-score of 0.76.