In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import os
import pickle


In [3]:
df = pd.read_csv(r"Churn_Modelling.csv")
print(df.shape)
df.head()


(10000, 14)


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Wrangle Function

In [4]:
#Spliting the data into X and y
X=df.drop('Exited',axis=1)
y=df['Exited']

In [5]:
def wrangle(dataframe, encoder_path="ohe_preprocessor.pkl"):

    # Dropping Columns.

    dataframe = dataframe.drop(
        columns=["RowNumber", "CustomerId", "Surname"],
        errors="ignore",axis=1
    )

    # Label Encoding.

    if dataframe["Gender"].dtype == "object":
        dataframe["Gender"] = dataframe["Gender"].map({
            "Male": 1,
            "Female": 0
        })

     # ColumnTransformer for Geography (OHE)
     
    if "Geography" in dataframe.columns:

        preprocessor = ColumnTransformer(
            transformers=[
                ("geo",
                 OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
                 ["Geography"])
            ],
            remainder="passthrough"
        )

        # Load or fit encoder
        if os.path.exists(encoder_path):
            with open(encoder_path, "rb") as file:
                preprocessor = pickle.load(file)
            X_transformed = preprocessor.transform(dataframe)
        else:
            X_transformed = preprocessor.fit_transform(dataframe)
            with open(encoder_path, "wb") as file:
                pickle.dump(preprocessor, file)

        geo_feature_names = (
            preprocessor.named_transformers_["geo"]
            .get_feature_names_out(["Geography"])
        )

        remaining_cols = [col for col in dataframe.columns if col != "Geography"]
        final_columns = list(geo_feature_names) + remaining_cols

        dataframe = pd.DataFrame(X_transformed,columns=final_columns,index=df.index)
            
            
            
        
                           
    










    return dataframe

In [6]:
#df['Gender'].value_counts()

In [7]:
#df.isnull().sum()

In [8]:
#df.describe().T

In [9]:
X=wrangle(X)
X.head()


Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,1.0,0.0,0.0,619.0,0.0,42.0,2.0,0.0,1.0,1.0,1.0,101348.88
1,0.0,0.0,1.0,608.0,0.0,41.0,1.0,83807.86,1.0,0.0,1.0,112542.58
2,1.0,0.0,0.0,502.0,0.0,42.0,8.0,159660.8,3.0,1.0,0.0,113931.57
3,1.0,0.0,0.0,699.0,0.0,39.0,1.0,0.0,2.0,0.0,0.0,93826.63
4,0.0,0.0,1.0,850.0,0.0,43.0,2.0,125510.82,1.0,1.0,1.0,79084.1


In [10]:
# Train test Split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [11]:
# Scaling of Features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [12]:
X_train

array([[ 1.00053348, -0.57776083, -0.57735027, ..., -1.53973586,
         0.96978946, -1.38284424],
       [-0.99946681,  1.73082   , -0.57735027, ...,  0.64946204,
        -1.03115165, -1.19589044],
       [ 1.00053348, -0.57776083, -0.57735027, ..., -1.53973586,
        -1.03115165, -0.68342242],
       ...,
       [ 1.00053348, -0.57776083, -0.57735027, ..., -1.53973586,
        -1.03115165, -0.14675423],
       [ 1.00053348, -0.57776083, -0.57735027, ...,  0.64946204,
        -1.03115165, -0.0548954 ],
       [-0.99946681,  1.73082   , -0.57735027, ...,  0.64946204,
         0.96978946, -0.81796934]])

In [13]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [14]:
import tensorflow 
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard # type: ignore
import datetime




In [15]:
model = Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)),
    Dense(32,activation='relu'),
    Dense(16,activation='relu'),
    Dense(1,activation='sigmoid') #O/P layer
])




In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 3457 (13.50 KB)
Trainable params: 3457 (13.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [17]:
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)


In [18]:
model.compile(optimizer=opt,loss="binary_crossentropy",metrics=['accuracy'])

In [19]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)

In [20]:
early_stopping_callback=EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)

In [21]:
history=model.fit(
    X_train,y_train,validation_data=(X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


In [22]:
model.save('model.h5')

  saving_api.save_model(


In [23]:
#%load_ext tensorboard

In [24]:
#%tensorboard --logdir logs --port 6007
