In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [None]:
# Load the databse
data =pd.read_csv('Churn_Modelling.csv')
data.head()

In [None]:
#Preporessing the data
### Drop irrelevant columns
data=data.drop(['RowNumber', "CustomerId", "Surname"],axis=1)

In [None]:
data

In [None]:
## Encode categorical variables
label_encoder_gender=LabelEncoder()
data['Gender']=label_encoder_gender.fit_transform(data['Gender'])
data

In [None]:
### Onehot Encode 'Geography'
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo=OneHotEncoder()
geo_encoder=onehot_encoder_geo.fit_transform(data[['Geography']])


In [None]:
geo_encoder.toarray()

In [None]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

In [None]:
geo_encoder_df=pd.DataFrame(geo_encoder.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))
geo_encoder_df


In [None]:
## combile one hot encoder columns with the orginal data
data = pd.concat([data.drop('Geography', axis=1), geo_encoder_df], axis=1)
data.head()

In [None]:
## save the encoders and sscaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)
with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo,file)
    
    

In [None]:
## Divide the dataset into independent and dependent features

x=data.drop('Exited', axis=1)
y=data['Exited']

## Split the data in training and testing set
x_train,x_test, y_train, y_test=train_test_split(x,y,test_size=0.2, random_state=42)

## Scale these features
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.fit_transform(x_test)



In [None]:
x_train

In [None]:
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler,file)
    

## ANN Implementation

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime


In [None]:
## Build our ANN Model
model=Sequential(
    [
        Dense(64,activation='relu', input_shape=(x_train.shape[1],)), ## first hidden layer
        Dense(32, activation='relu'), # hidden layer 2
        Dense(1, activation='relu') ## output layer
        
    ]
)

In [None]:
model.summary()

In [None]:
import tensorflow
opt=tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss= tensorflow.keras.losses.BinaryCrossentropy()
loss

In [None]:
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
## Set up the TensorBoard
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

log_dir="logs/fit"+datetime.datetime.now().strftime("%y%m%d-%H%M%S")
tensorflow_callbacks=TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
## Setup Early Stopping and TensorBoard
early_stopping_callbacks=EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)



In [None]:
## Train the model
history=model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test,),
    epochs=100,
    callbacks=[tensorflow_callbacks, early_stopping_callbacks,]
                  
                  )

In [None]:
model.save('model.h5')

In [None]:
## Load Tensorboard Extensions
%load_ext tensorboard


In [58]:
%tensorboard --logdir  logs/fit241014-024110

Reusing TensorBoard on port 6007 (pid 25668), started 0:01:07 ago. (Use '!kill 25668' to kill it.)

In [119]:
from tensorflow.keras.models import  load_model
import pickle, pandas as pd, numpy as np
h5=load_model('model.h5')

## load the encoder and scaller
with open('onehot_encoder_geo.pkl', 'rb') as file:
    label_encoder_geo=pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    scaler= pickle.load(file)



In [126]:
input_data={
    
    'CreditScore':800,
    'Geography':'France',
    'Gender':'Male',
    'Age':40,
    'Tenure':3,
    'Balance':60000,
    'NumOfProducts':2,
    'HasCrCard':1,
    'IsActiveMember':1,
    'EstimatedSalary':50000
}

In [127]:
geo_encoded=label_encoder_geo.transform([[input_data['Geography'],]]).toarray()
geo_encoder_df=pd.DataFrame(geo_encoded, columns=label_encoder_geo.get_feature_names_out(['Geography']))
geo_encoder_df




Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0


In [128]:
input_data=pd.DataFrame([input_data])



In [129]:
input_data
#geo_encoder_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,800,France,Male,40,3,60000,2,1,1,50000


In [130]:
# Combine one-hot encoded columns with input data

input_data=pd.concat([input_data.reset_index(drop=True),geo_encoder_df], axis=1)



In [132]:
input_df=input_data
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,800,France,Male,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [133]:
## Encode Categorical variables
input_df['Gender']=label_encoder_gender.transform(input_df['Gender'])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,800,France,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


In [134]:
input_df=input_df.drop('Geography', axis=1)

In [107]:
input_df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,60000,2,1,1,50000,1.0,0.0,0.0


input_df

In [135]:
## Scalling the input data
input_scaled=scaler.transform(input_df)
input_scaled

array([[ 1.57627031,  0.90911166,  0.09477172, -0.69844549, -0.29010416,
         0.80510537,  0.63367318,  0.95214374, -0.84805047,  0.98019606,
        -0.57581067, -0.56349184]])

In [139]:
## Predict churn
prediction=model.predict(input_scaled)
prediction



array([[0.]], dtype=float32)

In [137]:
input_scaled

array([[ 1.57627031,  0.90911166,  0.09477172, -0.69844549, -0.29010416,
         0.80510537,  0.63367318,  0.95214374, -0.84805047,  0.98019606,
        -0.57581067, -0.56349184]])

In [138]:
prediction_probabillity=prediction[0][0]

In [118]:
prediction_probabillity=prediction[0][0]
if prediction_probabillity>0.5:
    print("The Cutomer is likely to churn")
else:
    print("The customer is not likely to churn")
    

The customer is not likely to churn


This is jitendra, kohar
