In [1]:
import pandas as pd
df=pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [2]:
df=df.drop(["RowNumber","CustomerId","Surname"],axis=1)
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder

label_encoder_gender=LabelEncoder()
df["Gender"]=label_encoder_gender.fit_transform(df["Gender"])
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [4]:
#one hot encoding "geography"

onehot_encoder_geo=OneHotEncoder(handle_unknown="ignore")
geo_encoded=onehot_encoder_geo.fit_transform(df[["Geography"]]).toarray()
encoded_dataframe=pd.DataFrame(geo_encoded,columns=onehot_encoder_geo.get_feature_names_out(["Geography"]))
encoded_dataframe

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [5]:
df=pd.concat([df.drop("Geography",axis=1),encoded_dataframe],axis=1)
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [6]:
x=df.drop("EstimatedSalary",axis=1)
y=df["EstimatedSalary"]

In [7]:
## train and testing
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [8]:
# standardscaler
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

In [9]:
##save models
import pickle
with open("label_encoder_gender.pkl","wb") as file:
    pickle.dump(label_encoder_gender,file)
    
with open("onehot_encoder_geo.pkl","wb") as file:
    pickle.dump(onehot_encoder_geo,file)

with open("scaler.pkl","wb") as file:
    pickle.dump(scaler,file)

In [10]:
#first method

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

#build the model
model=Sequential(
    [
        Dense(64,activation="relu",input_dim=(x_train_scaled.shape[1])),
        Dense(32,activation="relu"),
        Dense(1)
    ]
)
model.compile(loss="mean_absolute_error",optimizer="adam",metrics=["mae"])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
#second method


# from tensorflow.keras.models import Sequential 
# from tensorflow.keras.layers import Dense,Dropout

# #defining the ann model
# model=Sequential()

# #input layer
# model.add(Dense(68,activation="relu",input_dim=x_train_scaled.shape[1]))

# #hidden layer
# model.add(Dense(32,activation="relu"))
# model.add(Dense(24,activation="relu"))
# model.add(Dense(22,activation="relu"))

# #output layer
# model.add(Dense(1))

# #compile our model
# model.compile(loss="mean_absolute_error",optimizer="adam",metrics=["mae"])
# model.summary()

In [12]:
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

log_dir=r"regressionlogs"+ datetime.datetime.now().strftime("%y%m%d-%H%M%S")

tensorboard_callback=TensorBoard(log_dir=log_dir,histogram_freq=1)
#set up Earlystopping
early_stopping=EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    x_train_scaled,y_train,
    validation_data=(x_test_scaled,y_test),
    epochs=50,
    callbacks=[early_stopping,tensorboard_callback]
    )

Epoch 1/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 99473.4141 - mae: 99473.4141 - val_loss: 98491.3750 - val_mae: 98491.3750
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 100775.1875 - mae: 100775.1875 - val_loss: 96823.9219 - val_mae: 96823.9219
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 97242.6484 - mae: 97242.6484 - val_loss: 92603.9844 - val_mae: 92603.9844
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 92825.9062 - mae: 92825.9062 - val_loss: 85616.0703 - val_mae: 85616.0703
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 84523.0078 - mae: 84523.0078 - val_loss: 76807.5234 - val_mae: 76807.5234
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 76247.1562 - mae: 76247.1562 - val_loss: 67908.8594 - val_mae: 6

In [13]:
#evaluate model on test data
test_loss,test_mae=model.evaluate(x_test,y_test)
print(f"Test MAE : {test_mae}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1551679616.0000 - mae: 1551679616.0000
Test MAE : 1516236416.0


In [14]:
model.save("regression_model.h5")

