In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder,OneHotEncoder
import warnings
warnings.filterwarnings("ignore")
import pickle

In [4]:
df=pd.read_csv('Churn_Modelling.csv')

In [5]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [7]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [8]:
gender_encoder = LabelEncoder()

In [9]:
df['Gender'] = gender_encoder.fit_transform(df['Gender'])

In [10]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [11]:
df.Geography.unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [12]:
geography_encoder = OneHotEncoder(sparse=False, dtype=int)

In [13]:
geography_encoded = geography_encoder.fit_transform(df[['Geography']])
geography_encoded_df = pd.DataFrame(geography_encoded, columns=geography_encoder.get_feature_names_out(['Geography']))

In [14]:
df = pd.concat([df, geography_encoded_df], axis=1)
df.drop(['Geography'],axis=1,inplace=True)

In [15]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,0,1


In [16]:
X = df.drop('Exited', axis=1)  # Features
y = df['Exited']               # Target

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
print(f"Training data size: {X_train.shape}")
print(f"Testing data size: {X_test.shape}")

Training data size: (8000, 12)
Testing data size: (2000, 12)


In [19]:
scaler=StandardScaler()

In [20]:
X_train = scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)

In [21]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime




In [22]:
X_train.shape

(8000, 12)

In [23]:
model = Sequential()




In [24]:
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                832       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
# model.compile(
#     optimizer='adam',              # Optimizer (Adam is commonly used)
#     loss='binary_crossentropy',     # Loss function for binary classification
#     metrics=['accuracy']            # Metrics to track during training
# )

In [27]:
opt=tf.keras.optimizers.Adam(learning_rate=0.01)

In [28]:
model.compile(
    optimizer=opt,              # Optimizer (Adam is commonly used)
    loss='binary_crossentropy',     # Loss function for binary classification
    metrics=['accuracy'],            # Metrics to track during training
)

In [29]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [30]:
print("Logging directory:", log_dir)

Logging directory: logs/fit/20240919-182946


In [31]:
tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [32]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss',           # Monitor the validation loss
    patience=10,                   # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True     # Restore model weights from the epoch with the best validation loss
)

In [33]:
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorflow_callback, early_stopping_callback],
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


In [34]:
model.save('model.h5')

In [35]:
%load_ext tensorboard

In [63]:
%tensorboard --logdir logs/fit/20240919-182946

In [37]:
input_data={
  "CreditScore": 600,
  "Geography": "France",
  "Gender": "Male",
  "Age": 40,
  "Tenure": 3,
  "Balance": 80000,
  "NumOfProducts": 2,
  "HasCrCard": 1,
  "IsActiveMember": 1,
  "EstimatedSalary": 80000
}

In [38]:
df_input = pd.DataFrame([input_data])

In [39]:
df_input

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,600,France,Male,40,3,80000,2,1,1,80000


In [40]:
df_input['Gender'] = gender_encoder.transform(df_input['Gender'])

In [41]:
geography_encoded = geography_encoder.transform(df_input[['Geography']])
geography_encoded_df = pd.DataFrame(geography_encoded, columns=geography_encoder.get_feature_names_out(['Geography']))
df_input = pd.concat([df_input, geography_encoded_df], axis=1)
df_input.drop(['Geography'],axis=1,inplace=True)

In [42]:
df_input

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,80000,2,1,1,80000,1,0,0


In [43]:
df_input=scaler.transform(df_input)

In [44]:
df_input

array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349,  0.0624086 ,
         0.80843615,  0.64920267,  0.97481699, -0.3552321 ,  1.00150113,
        -0.57946723, -0.57638802]])

In [45]:
model.predict(df_input)[0][0]>0.5



False

In [46]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1,0,0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1,0,0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1,0,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0,0,1


In [47]:
X = df.drop('EstimatedSalary', axis=1)  # Features
y = df['EstimatedSalary']               # Target

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
scaler=StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test= scaler.transform(X_test)

In [50]:
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

In [51]:
opt=tf.keras.optimizers.Adam(learning_rate=0.01)

In [52]:
model.compile(
    optimizer=opt,
    loss='mean_absolute_error',   # Use MAE for loss if desired
    metrics=['mse']
)

In [53]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                832       
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2945 (11.50 KB)
Trainable params: 2945 (11.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [54]:
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[tensorflow_callback, early_stopping_callback],
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


In [55]:
input_data={
  "CreditScore": 600,
  "Geography": "France",
  "Gender": "Male",
  "Age": 40,
  "Tenure": 3,
  "Balance": 80000,
  "NumOfProducts": 2,
  "HasCrCard": 1,
  "IsActiveMember": 1,
    "Exited":0,
}

In [56]:
df_input = pd.DataFrame([input_data])

In [57]:
df_input 

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,600,France,Male,40,3,80000,2,1,1,0


In [58]:
df_input['Gender'] = gender_encoder.transform(df_input['Gender'])
geography_encoded = geography_encoder.transform(df_input[['Geography']])
geography_encoded_df = pd.DataFrame(geography_encoded, columns=geography_encoder.get_feature_names_out(['Geography']))
df_input = pd.concat([df_input, geography_encoded_df], axis=1)
df_input.drop(['Geography'],axis=1,inplace=True)

In [59]:
df_input

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited,Geography_France,Geography_Germany,Geography_Spain
0,600,1,40,3,80000,2,1,1,0,1,0,0


In [60]:
df_input=scaler.transform(df_input)

In [61]:
df_input

array([[-0.53598516,  0.91324755,  0.10479359, -0.69539349,  0.0624086 ,
         0.80843615,  0.64920267,  0.97481699, -0.50857963,  1.00150113,
        -0.57946723, -0.57638802]])

In [62]:
model.predict(df_input)[0][0]



89374.305