## End to End Deep Learning Project - ANN(Artifical Neural Network)

1. Churn Modeling Dataset => Classification

2. Basic FE(Feature Engineering) - Convert into Categorical
      
    a. Variable into numerical

   
    b. Standardization
4. Model Creation(ANN)
5. Model Training
6. Model weights into pickel file
7. Streamlit app
8. Web App
9. Deployment

In [1]:
## Import the Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [2]:
## Load the dataset
df = pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
## Process the Data
### Drop irrelevant features
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis = 1)

In [4]:
## Encode Cat variables
label_encoder_gender = LabelEncoder()
df['Gender'] = label_encoder_gender.fit_transform(df['Gender'])
df.head(2)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0


In [None]:
## OHE
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse_output = False)
geo_ohe = ohe.fit_transform(df[['Geography']])

print(ohe.get_feature_names_out())

geo_df = pd.DataFrame(geo_ohe, columns = ohe.get_feature_names_out())

## Merge the geo_df and df
df = pd.concat([geo_df, df.drop(['Geography'], axis = 1)], axis = 1)
df.head(2)

In [10]:
### Save the encoders and scaler
with open('label_encoder_gender.pkl', 'wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('ohe_geography.pkl', 'wb') as file:
    pickle.dump(ohe, file)

In [12]:
df.head(2)

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1.0,0.0,0.0,619,0,42,2,0.0,1,1,1,101348.88,1
1,0.0,0.0,1.0,608,0,41,1,83807.86,1,0,1,112542.58,0


In [14]:
## Divide Data into Dependent and Independent Features
X = df.drop(['Exited'], axis = 1)
y = df['Exited']

## Train & Test Split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)
### Scale these features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
## save scaler into pkl
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

## ANN Implementation

In [18]:
'''
1. Sequential Network
2. Dense --> 64 (Nodes)
3. Activation Function ---> Sigmoid, tanh, Relu, Leaky Relu
4. Optimizer ---> Back Propagation --> Updating the weights
5. Loss function --> Try to decrease
6. Metrics ---> [Accuracy] [mse, mae]
7. Training ---> Logs ---> Folder ---> Tensorboard --->Visualization
'''

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime




In [24]:
### Model Creation - ANN(Artificial Neural Network)

ann_model = Sequential([
                Dense(64, activation = 'relu', input_shape = (X_train.shape[1],)), ## HL1 Connected with input layer
                Dense(32, activation = 'relu'), ## HL2
                Dense(1, activation = 'sigmoid') ## output layer
])

In [25]:
ann_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                832       
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [29]:
## tf -> tensorflow
opt = tf.keras.optimizers.Adam(learning_rate = 0.01)

In [31]:
## Compile the model
# model.compile( optimizer = "adam", loss = "binary_crossentropy", metrics = ['accuracy']) ---> fixed learning 
### use optimizer module to define and add learning rate
ann_model.compile( optimizer = opt, loss = "binary_crossentropy", metrics = ['accuracy'])

In [46]:
## Set up the Tensorboard
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq = 1)

In [47]:
## Stepup Early Stopping
early_stopping_callbacks = EarlyStopping(monitor = 'val_loss', patience = 10, restore_best_weights = True)

In [48]:
### Training the model
history = ann_model.fit(
    X_train,y_train, 
    validation_data = (X_test, y_test),
    epochs = 100,
    callbacks = [tensorflow_callback, early_stopping_callbacks]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [49]:
ann_model.save('ann_model.h5') ## h5 extention is compatible with keras

  saving_api.save_model(


In [50]:
## Load tensorboard extention
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [52]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 25604), started 0:07:48 ago. (Use '!kill 25604' to kill it.)

In [None]:
### Load the picke file
