In [56]:
import joblib
import os

model_path = os.path.join("./artifact_downloads/outputs", "mlflow-model" ,"model.pkl")
model = joblib.load(model_path)

In [57]:
import pandas as pd

test_data_orig = pd.read_csv("./wa_telco_customer_churn_test_data/WA_Fn-UseC_-Telco-Customer-Churn_Test.csv")

test_data = test_data_orig.drop("Churn", axis=1)
test_data["Partner"] = test_data["Partner"].map({"Yes": True, "No": False})
test_data["Dependents"] = test_data["Dependents"].map({"Yes": True, "No": False})
test_data["PhoneService"] = test_data["PhoneService"].map({"Yes": True, "No": False})
test_data["PaperlessBilling"] = test_data["PaperlessBilling"].map({"Yes": True, "No": False})
 


In [58]:

result = model.predict(test_data)

In [59]:
test_data_orig["Churn_Prediction"] = result

test_data_orig['Churn_Prediction'] = test_data_orig['Churn_Prediction'].map({True: 'Yes', False: 'No'})


In [60]:
test_data_orig.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn',
       'Churn_Prediction'],
      dtype='object')

In [61]:
selected_columns = test_data_orig[['customerID', 'Churn', 'Churn_Prediction']]

# Display the selected columns
print(selected_columns)

      customerID Churn Churn_Prediction
0     4376-KFVRS    No               No
1     2754-SDJRD    No              Yes
2     9917-KWRBE    No               No
3     0365-GXEZS    No               No
4     9385-NXKDA    No               No
...          ...   ...              ...
1404  5204-HMGYF    No               No
1405  9950-MTGYX    No               No
1406  3675-EQOZA    No               No
1407  3646-ITDGM    No               No
1408  3913-FCUUW    No               No

[1409 rows x 3 columns]


In [62]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have a DataFrame called 'test_data_orig'
# with 'Churn' representing actual labels ('No' or 'Yes') and 'Churn_Prediction' representing predicted labels ('No' or 'Yes')
actual_labels = (test_data_orig['Churn'] == 'Yes').astype(int)  # Convert 'Yes' to 1 and 'No' to 0
predicted_labels = (test_data_orig['Churn_Prediction'] == 'Yes').astype(int)  # Convert 'Yes' to 1 and 'No' to 0

# Calculate accuracy, precision, recall, and F1-score
accuracy = accuracy_score(actual_labels, predicted_labels)
precision = precision_score(actual_labels, predicted_labels)
recall = recall_score(actual_labels, predicted_labels)
f1 = f1_score(actual_labels, predicted_labels)

# Print the metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Accuracy: 0.80
Precision: 0.65
Recall: 0.51
F1-Score: 0.57


Accuracy (0.80): An accuracy of 80% indicates that your model correctly predicts whether a customer will churn or not in 80% of the cases. This is a reasonably good starting point, but it's important to consider other metrics due to the potential class imbalance in customer churn problems. For example, if only a small percentage of customers actually churn, a model that predicts "no churn" for all customers could still achieve a high accuracy, but it would not be useful.

Precision (0.65): Precision of 65% means that when your model predicts a customer will churn, it is correct 65% of the time. This metric is important because you want to avoid incorrectly flagging customers who don't intend to churn as potential churners. A precision of 65% suggests that the model is decent at identifying actual churners without too many false positives.

Recall (0.51): Recall of 51% means that your model is able to capture 51% of the actual churn cases. While this isn't extremely high, it's still meaningful because it indicates that the model can identify more than half of the customers who are likely to churn. Recall is important when you want to minimize false negatives and ensure you don't miss potential churners.

F1-Score (0.57): The F1-Score of 0.57 is a balanced metric that takes both precision and recall into account. It suggests that the model strikes a balance between identifying actual churners and avoiding false alarms.

In customer churn prediction, it's often more important to have a higher recall (to capture as many potential churners as possible) while maintaining a reasonable precision (to avoid unnecessarily alarming non-churners). However, the balance between precision and recall depends on the specific business goals and costs associated with churn.

Consider monitoring these metrics over time, and if possible, compare your model's performance to previous methods or industry benchmarks to assess whether further improvements are needed. Additionally, you might want to perform more advanced analyses, such as feature importance assessment or model tuning, to enhance your churn prediction model.

In [25]:
# Show column types
column_types = test_data.dtypes
print(column_types)

customerID           object
gender               object
SeniorCitizen         int64
Partner                bool
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
dtype: object
