In [3]:
# imports

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

import pickle

import tensorflow as tf
from tensorflow.keras.models import load_model

import warnings
warnings.filterwarnings("ignore")

In [4]:
# load the model
model = load_model(r"../models/churn_model.h5")

# load the scaler
with open(r"../models/scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# load the gender encoder
with open(r"../models/gender_encoder.pkl", "rb") as f:
    gender_encoder = pickle.load(f)

# load the geography encoder
with open(r"../models/geo_encoder.pkl", "rb") as f:
    geo_encoder = pickle.load(f)



In [5]:
# example data for prediction
input_example = {
    "CreditScore":600,
    "Geography":"France",
    "Gender":"Male",
    "Age":40,
    "Tenure":5,
    "Balance":10000,
    "NumOfProducts":2,
    "HasCrCard":1,
    "IsActiveMember":1,
    "EstimatedSalary":50000
}

In [10]:
# onehot encode geography
geo_encoded = np.array(geo_encoder.transform([[input_example["Geography"]]]))

# geography dataframe
geo_encoded_df = pd.DataFrame(geo_encoded, columns=geo_encoder.get_feature_names_out(["Geography"]))

geo_encoded_df




Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [11]:
# concatenate the input example with the encoded geography
input_df = pd.DataFrame([input_example])
input_df = pd.concat([input_df, geo_encoded_df], axis=1)
# drop the original Geography column
input_df = input_df.drop(columns=["Geography"])

input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,Male,40,5,10000,2,1,1,50000,1.0,0.0,0.0


In [16]:
gender_encoder = gender_encoder.get("Gender")

In [18]:
# convert gender to categorical numeric
input_df["Gender"] = gender_encoder.transform(input_df["Gender"])

In [19]:
input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,5,10000,2,1,1,50000,1.0,0.0,0.0


In [20]:
# scale the input data
input_scaled = scaler.transform(input_df)

In [21]:
input_scaled

array([[-0.53598516,  0.91324755,  0.10479359, -0.00134472, -1.05836066,
         0.80843615,  0.64920267,  0.97481699, -0.87683221,  1.00150113,
        -0.57946723, -0.57638802]])

In [22]:
# predict the churn probability
churn_probability = model.predict(input_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step


In [24]:
churn_probability[0][0]  # Get the churn probability for the first (and only) input example

np.float32(0.0048299697)

In [25]:
if churn_probability[0][0] > 0.5:
    print("The customer is likely to churn.")
else:
    print("The customer is likely to stay.")

The customer is likely to stay.
