### Import Pacakages and Artifacts

In [1]:
import pandas as pd
import joblib

scaler = joblib.load("Artifacts/standard_scaler.pkl")
df_test = pd.read_csv("Datasets/preprocessed_test.csv")

imp_feats = joblib.load("Artifacts/important_features.pkl")
important_features = imp_feats["Important Features"]

label_encoders = joblib.load("Artifacts/label_encoders.pkl")
manual_mapping = joblib.load("Artifacts/manual_mapping.pkl")

target_column = "Churn"

In [2]:
df_test["Gender"]

0        0
1        0
2        1
3        1
4        0
        ..
64369    0
64370    1
64371    1
64372    0
64373    0
Name: Gender, Length: 64374, dtype: int64

In [3]:
important_features

['Age', 'Support Calls', 'Payment Delay', 'Total Spend']

### 2. Standard-scaling Test Dataset

In [4]:
df_test.loc[:, important_features] = (
    df_test[important_features].astype("float64")
)

df_test.loc[:, important_features] = scaler.transform(df_test[important_features])

  1.01482767]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_test.loc[:, important_features] = scaler.transform(df_test[important_features])
  1.75738978]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_test.loc[:, important_features] = scaler.transform(df_test[important_features])
  df_test.loc[:, important_features] = scaler.transform(df_test[important_features])
  0.26737146]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_test.loc[:, important_features] = scaler.transform(df_test[important_features])


### 3. Deploy Models for Inference

In [5]:
from sklearn import metrics

model_names = ["GNB", "ADA", "CAT", "DT"]
pred_results = {}
for model_name in model_names:
    print(f"model performance ({model_name}):\n")

    X_test= df_test[important_features]
    y_test = df_test[target_column]

    model = joblib.load("Saved Models/" + model_name + ".pkl")

    y_pred = model.predict(X_test)
    y_pred = y_pred.astype(int)
    pred_results[model_name] = y_pred

    # performance metrics
    accuracy = metrics.accuracy_score(y_test, y_pred)
    recall = metrics.recall_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred)
    F1_score = metrics.f1_score(y_test, y_pred)
    print("Test accuracy: ", accuracy)
    print("Test recall: ", recall)
    print("Test precision: ", precision)
    print("Test F1 score: ", F1_score)
    print("+"*100)

for model_name in model_names:
    df_test_results = df_test.copy()
    y_pred = pred_results[model_name]

    # inverse map label encoded and manual mapped columns:
    df_test_results["Churn Prediction"] = y_pred
    df_test_results.loc[:, important_features] = scaler.inverse_transform(df_test_results[important_features])

    for encoded_column in list(label_encoders.keys()):
        le = label_encoders[encoded_column]
        df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])

    mapping_column = (list(manual_mapping.keys()))[0]
    mapping = manual_mapping[mapping_column]
    inverse_mapping = {v: k for k, v in mapping.items()}

    df_test_results.loc[:, mapping_column] = df_test_results[mapping_column].map(inverse_mapping)


    df_test_results.to_csv("Datasets/test_pred_results" + "_" + model_name +".csv", index=False)


model performance (GNB):

Test accuracy:  0.57414484108491
Test recall:  0.9854064867346604
Test precision:  0.527000719083782
Test F1 score:  0.6867329448063079
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
model performance (ADA):

Test accuracy:  0.5541056948457451
Test recall:  0.9973108582297576
Test precision:  0.515152541798655
Test F1 score:  0.6793780577709269
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
model performance (CAT):

Test accuracy:  0.5377792276384876
Test recall:  0.9975732135244154
Test precision:  0.5061397670549085
Test F1 score:  0.6715529897453446
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
model performance (DT):

Test accuracy:  0.5374064063131078
Test recall:  0.9976060079362477
Test precision:  0.5059375311845126
Test F1 score:  0.67138238117834
++++++++++++++++++++++++++++++++++++++++++++

  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, mapping_column] = df_test_results[mapping_column].map(inverse_mapping)
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, mapping_column] = df_test_results[mapping_column].map(inverse_mapping)
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, mapping_column] = df_test_results[mapping_column].map(inverse_mapping)
  df_test_results.loc[:, encoded_column] = le.inverse_transform(df_test_results[encoded_column])
  df_test_results.loc[:, encoded_