In [31]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import pickle
import datetime

In [11]:
df = pd.read_csv("Churn_Modelling.csv")
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [12]:
# Preprocess the data
df = df.drop(['RowNumber','CustomerId','Surname'], axis=1)

In [13]:
# ENcode categorical variables
label_encoder = LabelEncoder()
oh_encoder = OneHotEncoder(sparse_output=False, drop='first')

In [14]:
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df[oh_encoder.get_feature_names_out()] = oh_encoder.fit_transform(df[['Geography']])
df = df.drop('Geography', axis=1)
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,1.0


In [17]:
# Create X and y
X = df.drop('EstimatedSalary', axis=1)
y = df['EstimatedSalary']
X.shape, y.shape

((10000, 11), (10000,))

In [18]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8000, 11), (2000, 11), (8000,), (2000,))

In [36]:
# Scale  the inputs
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [134]:
# Save the scaler and encoders
with open('scaler_regression.pkl','wb') as file:
    pickle.dump(scaler, file)
with open('oh_encoder_regression.pkl','wb') as file:
    pickle.dump(oh_encoder, file)
with open('label_encoder_regression.pkl','wb') as file:
    pickle.dump(label_encoder, file)

# ANN Regression

In [105]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsolutePercentageError
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras import backend as K

In [119]:
X_train.shape

(8000, 11)

In [120]:
# Create the model
model = Sequential(
    [
        Input((11,)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1)
    ]
)
model.summary()

In [121]:
# Create optimiser and Loss
opt = Adam(learning_rate=0.001)
loss = MeanAbsolutePercentageError()

In [122]:
# Compile the model
model.compile(optimizer=opt, loss="mean_absolute_error", metrics=['mae'])

In [123]:
# Create Torchboard Callback
log_dir = "logs/regression_fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tf_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [124]:
# Create Early Stopping Callback
es_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [125]:
X_train_scaled

array([[-0.39361362,  0.9056767 ,  0.19594523, ..., -0.50760604,
        -0.57523331, -0.57311631],
       [-1.99135389, -1.10414677, -0.0910664 , ..., -0.50760604,
        -0.57523331, -0.57311631],
       [-1.38318179,  0.9056767 ,  0.67429793, ..., -0.50760604,
        -0.57523331, -0.57311631],
       ...,
       [ 0.10117046, -1.10414677,  2.10935604, ..., -0.50760604,
        -0.57523331,  1.74484652],
       [-3.08400208, -1.10414677,  1.72667388, ...,  1.97003173,
         1.73842505, -0.57311631],
       [-0.14622158,  0.9056767 ,  0.29161577, ..., -0.50760604,
        -0.57523331, -0.57311631]])

In [126]:
# Fit the model
history = model.fit(
    X_train_scaled, y_train, validation_data=(X_test_scaled, y_test),
    callbacks=[tf_callback, es_callback], epochs=100
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 100105.0312 - mae: 100105.0312 - val_loss: 98507.7188 - val_mae: 98507.7188
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 92889.7734 - mae: 92889.7734 - val_loss: 68746.6250 - val_mae: 68746.6250
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 60086.5391 - mae: 60086.5391 - val_loss: 51128.3477 - val_mae: 51128.3477
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 49963.2305 - mae: 49963.2305 - val_loss: 50968.2656 - val_mae: 50968.2656
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 50371.1016 - mae: 50371.1016 - val_loss: 50906.0273 - val_mae: 50906.0273
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 50216.2500 - mae: 50216.2500 - val_loss: 50863.2930 - val_

In [127]:
# Save the model
model.save('regression_model.h5')



In [128]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [129]:
%tensorboard --logdir logs/regression_fit/ 

Reusing TensorBoard on port 6007 (pid 60200), started 0:07:32 ago. (Use '!kill 60200' to kill it.)

In [130]:
# load the model
from tensorflow.keras.models import load_model
loaded_model = load_model('regression_model.h5')



In [139]:
# Predict the output
# Example Input Data
input_data = {
    'CreditScore': 600,
    'Geography': 'France',
    'Gender': 'Male',
    'Age': 40,
    'Tenure': 3,
    'Balance': 60000,
    'NumOfProducts': 2,
    'HasCrCard': 1,
    'IsActiveMember': 1,
    'Exited': 0,
}

In [140]:
input_df = pd.DataFrame([input_data])
input_df

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,Exited
0,600,France,Male,40,3,60000,2,1,1,0


In [141]:
# load the scalers and encoders
with open("scaler_regression.pkl",'rb') as file:
    scaler = pickle.load(file)
with open("label_encoder_regression.pkl",'rb') as file:
    label_encoder = pickle.load(file)
with open("oh_encoder_regression.pkl",'rb') as file:
    oh_encoder = pickle.load(file)

In [142]:
input_df['Gender'] = label_encoder.transform(input_df['Gender'])
input_df[oh_encoder.get_feature_names_out()] = oh_encoder.transform(input_df[['Geography']])
input_df = input_df.drop("Geography", axis=1)
input_df = scaler.transform(input_df.values)
input_df



array([[-0.54538295,  0.93980805,  0.11280776, -0.69540934, -0.24673561,
         0.76624957,  0.62902576,  0.96655883, -0.49843701, -0.59274898,
        -0.57658047]])

In [143]:
model.predict(input_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


array([[88819.016]], dtype=float32)