In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import pandas as pd
import numpy as np




In [2]:
# Load the trained model, scaler and onehotencoder pickle file
model = load_model('model.h5')

# load the encoder and scaler pickle file
with open('label_encoders.pkl','rb') as file:
    label_encoders = pickle.load(file)

with open('onehot_encoder.pkl','rb') as file:
    onehot_encoder = pickle.load(file)

with open('scaler.pkl','rb') as file:
    scaler = pickle.load(file)




In [3]:
# Taking input data

input_data = {
    'gender': ['Male'],
    'SeniorCitizen': ['No'],
    'Partner': ['Yes'],
    'Dependents': ['No'],
    'tenure': ['12'],
    'PhoneService': ['Yes'],
    'MultipleLines': ['No'],
    'InternetService': ['DSL'],
    'OnlineSecurity': ['Yes'],
    'OnlineBackup': ['No'],
    'DeviceProtection': ['No'],
    'TechSupport': ['No'],
    'StreamingTV': ['No'],
    'StreamingMovies': ['No'],
    'Contract': ['Month-to-month'],
    'PaperlessBilling': ['Yes'],
    'PaymentMethod': ['Electronic check'],
    'MonthlyCharges': ['29.85'],
    'TotalCharges': ['298.50'],
}

In [4]:
# One Hot Encoding

# Convert input dictionary to DataFrame
input_df = pd.DataFrame(input_data)

# Select categorical columns that need encoding (same as used in training)
categorical_columns = ['InternetService', 'Contract', 'PaymentMethod']

# Apply OneHotEncoding to categorical columns
encoded_features = onehot_encoder.transform(input_df[categorical_columns])

# Convert the encoded data to DataFrame with proper column names
encoded_df = pd.DataFrame(encoded_features, columns=onehot_encoder.get_feature_names_out())

# Drop the original categorical columns from input data
input_df = input_df.drop(columns=categorical_columns)

# Merge input data with encoded features
final_input_df = pd.concat([input_df, encoded_df], axis=1)

# Print the final transformed input data
print(final_input_df[:1])


  gender SeniorCitizen Partner Dependents tenure PhoneService MultipleLines  \
0   Male            No     Yes         No     12          Yes            No   

  OnlineSecurity OnlineBackup DeviceProtection  ... InternetService_DSL  \
0            Yes           No               No  ...                 1.0   

  InternetService_Fiber optic InternetService_No Contract_Month-to-month  \
0                         0.0                0.0                     1.0   

  Contract_One year Contract_Two year  \
0               0.0               0.0   

   PaymentMethod_Bank transfer (automatic)  \
0                                      0.0   

   PaymentMethod_Credit card (automatic)  PaymentMethod_Electronic check  \
0                                    0.0                             1.0   

   PaymentMethod_Mailed check  
0                         0.0  

[1 rows x 26 columns]


In [5]:
# List of categorical features to be Label Encoded
label_encoded_columns = [
    'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 
    'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 
    'StreamingMovies', 'PaperlessBilling', 'Churn'
]

for col in label_encoded_columns:
    if col in final_input_df.columns and col in label_encoders:
        # Convert column to string
        final_input_df[col] = final_input_df[col].astype(str)

        # Debug: Print unique values before encoding
        # print(f"Unique values in {col} before encoding: {final_input_df[col].unique()}")
        # print(f"Label Encoder Classes for {col}: {label_encoders[col].classes_}")

        # Ensure the encoder has seen the values
        known_classes = set(label_encoders[col].classes_)
        
        # Replace unseen values with a known class (e.g., most frequent one)
        final_input_df[col] = final_input_df[col].apply(lambda x: x if x in known_classes else list(known_classes)[0])

        # Apply transformation
        final_input_df[col] = label_encoders[col].transform(final_input_df[col])

# Print the final transformed input data
print(final_input_df)

   gender  SeniorCitizen  Partner  Dependents tenure  PhoneService  \
0       1              0        1           0     12             1   

   MultipleLines  OnlineSecurity  OnlineBackup  DeviceProtection  ...  \
0              0               1             0                 0  ...   

   InternetService_DSL  InternetService_Fiber optic  InternetService_No  \
0                  1.0                          0.0                 0.0   

   Contract_Month-to-month Contract_One year Contract_Two year  \
0                      1.0               0.0               0.0   

   PaymentMethod_Bank transfer (automatic)  \
0                                      0.0   

   PaymentMethod_Credit card (automatic)  PaymentMethod_Electronic check  \
0                                    0.0                             1.0   

   PaymentMethod_Mailed check  
0                         0.0  

[1 rows x 26 columns]


In [6]:
print(final_input_df)

   gender  SeniorCitizen  Partner  Dependents tenure  PhoneService  \
0       1              0        1           0     12             1   

   MultipleLines  OnlineSecurity  OnlineBackup  DeviceProtection  ...  \
0              0               1             0                 0  ...   

   InternetService_DSL  InternetService_Fiber optic  InternetService_No  \
0                  1.0                          0.0                 0.0   

   Contract_Month-to-month Contract_One year Contract_Two year  \
0                      1.0               0.0               0.0   

   PaymentMethod_Bank transfer (automatic)  \
0                                      0.0   

   PaymentMethod_Credit card (automatic)  PaymentMethod_Electronic check  \
0                                    0.0                             1.0   

   PaymentMethod_Mailed check  
0                         0.0  

[1 rows x 26 columns]


In [7]:
final_input_df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,OnlineSecurity,OnlineBackup,DeviceProtection,...,InternetService_DSL,InternetService_Fiber optic,InternetService_No,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,1,0,1,0,12,1,0,1,0,0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [8]:
# scaling the input data
input_scaled = scaler.transform(final_input_df)
input_scaled

array([[ 0.97545208, -0.4377492 ,  1.03137591, -0.65266232, -0.83419795,
         0.32957344, -0.86052337,  1.5809424 , -0.73382801, -0.72212816,
        -0.63776294, -0.79474818, -0.79951251,  0.83332832, -1.16378023,
        -0.88051512,  1.38044248, -0.88705673, -0.52408075,  0.90963773,
        -0.52353044, -0.55835281, -0.53067848, -0.52765585,  1.40914569,
        -0.54274967]])

In [9]:
# predict churn
prediction = model.predict(input_scaled)
prediction



array([[0.13607153]], dtype=float32)

In [10]:
prediction_probability = prediction[0][0]

In [11]:
prediction_probability

0.13607153

In [12]:
if prediction_probability > 0.5:
    print("The customer is likely to churn")

else:
    print("The customer is not likely to churn")

The customer is not likely to churn
