In [39]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import numpy as np
import pandas as pd

LOAD THE ANN TRAINED MODEL, SCALER , PICKLE , ONE-HOT ENCODING

In [45]:
model = load_model('churn_model.h5')
scaler = pickle.load(open('scaler.pkl', 'rb'))
label_encoded_geo = pickle.load(open('encode_geo.pkl', 'rb'))  # Contains OneHotEncoder for Geography
label_encoder_gender = pickle.load(open('label_encoder_gender.pkl', 'rb'))  # Contains LabelEncoder for Gender




In [62]:
input_data = {
    'CreditScore': 650,
    'Geography': 'France',
    'Gender': 'Female',
    'Age': 34,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'EstimatedSalary': 50000
}

In [64]:
# Create DataFrame from input data
input_data_df = pd.DataFrame([input_data])

# 1. Gender (using label_encoder_gender - LabelEncoder)  
input_data_df['Gender'] = label_encoder_gender.transform(input_data_df[['Gender']]).flatten()

# 2. Geography (using label_encoded_geo - OneHotEncoder)
geo_encoded = label_encoded_geo.transform(input_data_df[['Geography']])
geo_encoded_df = pd.DataFrame(geo_encoded, columns=label_encoded_geo.get_feature_names_out(['Geography']))

# Drop original categorical columns and combine with encoded features
input_data_df = input_data_df.drop('Geography', axis=1)
input_data_df = pd.concat([input_data_df, geo_encoded_df], axis=1)

# Check the actual column names
print("Current columns:", input_data_df.columns.tolist())
print("\nDataframe before scaling:")
print(input_data_df)


Current columns: ['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_France', 'Geography_Germany', 'Geography_Spain']

Dataframe before scaling:
   CreditScore  Gender  Age  Tenure  Balance  NumOfProducts  HasCrCard  \
0          650       0   34       3    60000              2          1   

   IsActiveMember  EstimatedSalary  Geography_France  Geography_Germany  \
0               1            50000               1.0                0.0   

   Geography_Spain  
0              0.0  


  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [65]:
input_data_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,650,0,34,3,60000,2,1,1,50000,1.0,0.0,0.0


    NOTE:

    THE INPUT DATA SHOULD ALWAYS BE IN ORDER WRT CSV FILE

In [None]:
# # Scale features with correct column matching
# # Get the feature names that the scaler expects
# expected_feature_names = scaler.get_feature_names_out()
# print("Expected feature names from scaler:", expected_feature_names.tolist())
# print("Current columns:", input_data_df.columns.tolist())

# # Reorder dataframe to match the scaler's expected column order
# input_data_df = input_data_df[expected_feature_names]

# # Scale all features (they should all be numerical at this point)
# input_data_df[expected_feature_names] = scaler.transform(input_data_df[expected_feature_names])

# print("\nDataframe after scaling:")
# print(input_data_df)


Expected feature names from scaler: ['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geography_France', 'Geography_Germany', 'Geography_Spain']
Current columns: ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Gender', 'Geography_France', 'Geography_Germany', 'Geography_Spain']

Dataframe after scaling:
   CreditScore    Gender      Age    Tenure  Balance  NumOfProducts  \
0    -0.022435 -1.098232 -0.46638  0.004607  -0.2623       0.819663   

   HasCrCard  IsActiveMember  EstimatedSalary  Geography_France  \
0   0.645981        0.970714        -0.881539          0.998287   

   Geography_Germany  Geography_Spain  
0          -0.575591         -0.57779  


In [66]:
input_scaled = scaler.transform(input_data_df)
input_scaled

array([[-0.02243537, -1.09823226, -0.46637979, -0.68894811, -0.26230046,
         0.81966266,  0.64598061,  0.97071435, -0.88153859,  0.99828718,
        -0.57559072, -0.57779016]])

In [67]:
prediction = model.predict(input_scaled)
if prediction > 0.5:
    print("The customer is likely to churn.")
else:
    print("The customer is likely to stay.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
The customer is likely to stay.
