In [48]:
import pandas as pd 

In [49]:
data = pd.read_csv('Churn_Modelling.csv')
data;

In [50]:
data['Tenure'].max()

np.int64(10)

In [51]:
data.isnull().sum();

In [52]:
data.drop(columns = {'RowNumber','CustomerId','Surname'},inplace = True)

In [53]:
print(data['Gender'].unique())
print(data['Geography'].unique())

['Female' 'Male']
['France' 'Spain' 'Germany']


In [54]:
# Gender column -> we can solve by LabelEncoder 
# Geography column-> we can solve by OneHotEncoder

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])

onehot_encoder = OneHotEncoder(sparse_output=False)
encoded = onehot_encoder.fit_transform(data[['Geography']])
encoded = pd.DataFrame(encoded,columns = onehot_encoder.get_feature_names_out(['Geography']))
encoded.rename(columns = {'Geography_France':'France', 'Geography_Germany':'Germany', 'Geography_Spain':'Spain'},inplace = True)
data = pd.concat([data,encoded],axis = 1)

In [55]:
import pickle
with open('gender.pkl','wb') as file:
    pickle.dump(label_encoder,file)
with open('geography.pkl','wb') as file:
    pickle.dump(onehot_encoder,file)

In [56]:
feature = data.drop(columns = ['Exited','Geography'],axis = 1)
target = data['Exited']

In [57]:
# Splitting of the data 
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(feature,target,train_size=0.8,random_state=42)

In [58]:
# We will use the scalar for giving the value equal weightage
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [59]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

In [60]:
#Steps to go in ANN Classification 
# 1 - Feature Engineering 
# 2 - ANN Model Creation/ Sequential Diagram (Hidden layer neuron + activation function + input)
# 3 - Compile the Model/backpropogation (optimizer + loss function + metrics)
# 4 - Callbacks to reduce the duration of the model running
# 5 - test the model
# 6 - logs visualization through tensorboard

In [61]:
## ANN ka skeleton tyyar hua h -> kitne neuron + activation + kitne input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(64,activation = 'relu',input_shape = (X_train.shape[1],)),
    Dense(34,activation = 'relu'),
    Dense(1,activation = 'sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [62]:
## Back propagation ki tyyari -> loss function konsa + metrics me kya + optimizer konsa rhega
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

opt = Adam(learning_rate = 0.01)
loss = BinaryCrossentropy()

model.compile(loss = loss, optimizer = opt,metrics = ['accuracy'])

model.summary()

In [63]:
## model ke training Params ko log me collect krte h 
from datetime import datetime
from tensorflow.keras.callbacks import TensorBoard

log_dir = "log/fit/" + datetime.now().strftime("%Y%m%D-%H%M%S")
log_visual = TensorBoard(log_dir = log_dir, histogram_freq =1 )


In [64]:
#Model jha pe convergance pe aa jaye wha se hi model training exit kr dega aur best params store kr lega
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(patience = 10,restore_best_weights = True)

In [65]:
## Ab model ko fit krte h apne data ke saath 
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs = 50,callbacks = [log_visual,early_stop])

Epoch 1/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.6781 - loss: 363.6536 - val_accuracy: 0.8035 - val_loss: 97.5683
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6871 - loss: 36.0350 - val_accuracy: 0.7210 - val_loss: 17.9129
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6865 - loss: 16.1964 - val_accuracy: 0.6055 - val_loss: 13.5830
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6827 - loss: 8.1603 - val_accuracy: 0.6595 - val_loss: 2.1072
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.6775 - loss: 4.2113 - val_accuracy: 0.5570 - val_loss: 2.1118
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6951 - loss: 1.9868 - val_accuracy: 0.8000 - val_loss: 1.1263
Epoch 7/50
[1m250/2

<keras.src.callbacks.history.History at 0x2441d8d8f50>

In [66]:
#saving the whole model 
model.save('ANNModel.h5')



In [67]:
model.summary()

In [68]:
##To visualize the logs 
%reload_ext tensorboard

In [69]:
%tensorboard --logdir log/fit/20251010

Reusing TensorBoard on port 6006 (pid 5248), started 1 day, 6:58:12 ago. (Use '!kill 5248' to kill it.)