# Project Development


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('Saved_Model/Dataset/Churn_Modelling.csv')
data.head()

#### **1. Handle missing values**

In [None]:
data.isnull().sum()

In [None]:
data.info()

#### **2. Remove Duplicates values**

In [None]:
data.duplicated().sum()

In [None]:
data.drop_duplicates(inplace=True)

#### **3. Remove unwanted column**

In [None]:
data.drop(labels=['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)
data.head()

In [None]:
categorical_columns = data.select_dtypes(include = 'object').columns
print(categorical_columns)

In [None]:
numeric_columns = data.select_dtypes(exclude='object').columns
print(numeric_columns)

### 4. Check Value Counts of Catagorical Columns 

In [None]:
data["Geography"].unique()

In [None]:
data.Geography.value_counts()

In [None]:
data.Gender.value_counts()

#### **5. Correlation Heatmap**


In [None]:
plt.figure(figsize=(14,10))
corr_matrix = data[numeric_columns].corr()
sns.heatmap(
    corr_matrix, 
    annot=True,
    fmt='.2f',
    linewidths=0.5
    )
plt.title('Correlation Heatmap', fontsize=16,fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
data.head()

## Split

 + Dependent and Independent Variables

In [None]:
x = data.drop('Exited', axis=1)
y = data['Exited']

In [None]:
x.head()

In [None]:
categorical_cols=x.select_dtypes(include='object').columns
numerical_cols=x.select_dtypes(exclude='object').columns

print("categorical column:",categorical_cols)
print("numerical column:",numerical_cols)

### 5. Encoding of Catagiorical Features

In [None]:
from sklearn.preprocessing import OrdinalEncoder , StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
gender_cat = ['Female', 'Male']
geo_cat = ['France', 'Spain', 'Germany']

In [None]:
gender_cat

In [None]:
geo_cat

In [None]:
num_pipeline = Pipeline(steps=[('scaler', StandardScaler())])
cat_pipeline = Pipeline(steps=[('ordinalencoder', OrdinalEncoder(
                    categories=[geo_cat, gender_cat]))])

preprocessor = ColumnTransformer(
                     [("num_pipeline", num_pipeline, numerical_cols),
                      ("cat_pipeline", cat_pipeline, categorical_cols)]
                     )

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    x, y,
    test_size=0.2,
    random_state=42
)

In [None]:
x_train = pd.DataFrame(preprocessor.fit_transform(x_train), columns=preprocessor.get_feature_names_out())
x_test = pd.DataFrame(preprocessor.transform(x_test), columns=preprocessor.get_feature_names_out())

In [None]:
x_train.head()

In [None]:
y_train.head()

## Feature Engineering

In [None]:
import pickle
with open("Saved_Model/preprocessor.pkl", "wb") as f:
    pickle.dump(preprocessor, f)

# ANN  IMPLEMENTATION

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense   
from tensorflow.keras.callbacks import EarlyStopping , TensorBoard
import datetime     

In [None]:
x_train.shape

In [None]:
x_train.shape[1]

In [None]:
model=Sequential ([
     Dense(64, activation = 'relu', input_shape=(x_train.shape[1],)),
     Dense(32, activation = 'relu'),
     Dense(1, activation = 'sigmoid')
 ])

In [None]:
model.summary()

In [None]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
model.compile(
    optimizer=opt,
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

##  Model Train

In [None]:
log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callbacks = TensorBoard(log_dir=log_dir,histogram_freq=1)

In [None]:
early_stopping_callback = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

In [None]:
history=model.fit(
    x_train,y_train,
    validation_data=(x_test,y_test),
    epochs=100,
    callbacks=[tensorflow_callbacks, early_stopping_callback]
)

# Save  Model

In [None]:
model.save("Saved_Model/churn_model.h5")

In [None]:
loss=history.history['loss']
acc=history.history['accuracy']
val_loss=history.history['val_loss']
val_acc=history.history['val_accuracy']

In [None]:
epochs = len(loss)
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

### load tensorboard

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/