## Regression using ANN


1. Importing required Library
2. Loading the Dataset
3. Data Preprocessing
4. Dividing the data into Independent and Dependent
5. ANN Implementation (Regression)


___

### 1. Importting required Library

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

### 2. Loading the Dataset

In [3]:
data = pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### 3. Data Preprocessing

In [4]:
# dropping the coulumns that won't affect the model

data = data.drop(['RowNumber',	'CustomerId',	'Surname'], axis=1)
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [12]:
# Encoding Categorical Variable

label_encoder_gender = LabelEncoder()
data['Gender'] = label_encoder_gender.fit_transform(data['Gender'])
data


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,1.0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,1.0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,1.0,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,0.0,1.0,0.0


In [5]:
# One Hot Encoding on Geography

from sklearn.preprocessing import OneHotEncoder
onehot_encoder_geo = OneHotEncoder()
geo_encoder = onehot_encoder_geo.fit_transform(data[['Geography']])
geo_encoder

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [6]:
geo_encoder.toarray()

array([[1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [7]:
cl = onehot_encoder_geo.get_feature_names_out(['Geography'])

In [8]:
geo_encoder = pd.DataFrame(geo_encoder.toarray(), columns = cl)
geo_encoder

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [9]:
data

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [10]:
# Combining one hot encoded values with original data

data = data.drop(['Geography'], axis=1)   # removing Geography column
data = pd.concat([data, geo_encoder], axis=1)
data.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,Female,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,Female,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,Female,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,Female,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,Female,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [13]:
# Saving our Encoders as pickel file

with open('label_encoder_gender.pkl', 'wb') as f :
    pickle.dump(label_encoder_gender, f)

with open('onehot_encoder_geo.pkl', 'wb') as f :
    pickle.dump(onehot_encoder_geo, f)

### 4. Dividing the data into Independent and Dependent
- And Splitting them in Train & Test

In [14]:
# Dividing 

X = data.drop('EstimatedSalary', axis=1)
y = data['EstimatedSalary']


# Spliting the data into Train and Test

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=47)

# Scale these Features

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [16]:
# Saving the Scaler

with open('scaler.pkl', 'wb') as f :
    pickle.dump(scaler, f)

### 5. ANN Implementation (Regression)

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [18]:
print(X_train.shape)
X_train.shape[1],

(8000, 12)


(12,)

In [19]:
# Building the ANN model

model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'), # HL1 Connected with input layer
    Dense(32, activation='relu'),  # HL2
    Dense(1)  # output layer for Regression

])

In [22]:
# Optimizer

import tensorflow

opt = tensorflow.keras.optimizers.Adam(learning_rate = 0.01)


In [None]:
# Compling the model

model.compile(optimizer=opt, loss='mean_absolute_error', metrics=['mae'])  

In [24]:
model.summary()

In [25]:
# Set up Tensorboard

from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

log_dir = 'regression_logs/fit/' + datetime.datetime.now().strftime('%Y%m%D-%H%M%S')
tensorflow_callbacks = TensorBoard(log_dir = log_dir, histogram_freq = 1)


In [26]:
# Set up Early Stopping

early_stopping_callbacks = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [27]:
# Traing the model

history = model.fit(
    X_train, y_train, 
    validation_data=(X_test, y_test), 
    epochs = 100,
    callbacks = [early_stopping_callbacks, tensorflow_callbacks]
)



Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 93612.7500 - mae: 93612.7500 - val_loss: 50811.4648 - val_mae: 50811.4648
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 50641.1758 - mae: 50641.1758 - val_loss: 50352.8125 - val_mae: 50352.8125
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 49831.3555 - mae: 49831.3555 - val_loss: 50414.5508 - val_mae: 50414.5508
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 49970.3984 - mae: 49970.3984 - val_loss: 50349.8672 - val_mae: 50349.8672
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 50032.8906 - mae: 50032.8906 - val_loss: 50394.0508 - val_mae: 50394.0508
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 49627.1758 - mae: 49627.1758 - val_loss: 50429.1445 - val_ma

In [28]:
# Load Tensorboard Extension

%load_ext tensorboard

In [30]:
%tensorboard
%tensorboard --logdir regression_logs/fit/

ERROR: Failed to launch TensorBoard (exited with 1).
Contents of stderr:
2025-06-27 19:03:38.251227: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-27 19:03:39.944483: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
Error: A logdir or db must be specified. For example `tensorboard --logdir mylogdir` or `tensorboard --db sqlite:~/.tensorboard.db`. Run `tensorboard --helpfull` for details and examples.

Reusing TensorBoard on port 6007 (pid 8164), started 0:00:30 ago. (Use '!kill 8164' to kill it.)

In [32]:
# Evalulate the Data

test_loss, test_mae = model.evaluate(X_test,y_test)
print(f'Test Mean Absolute Error : {test_mae}')


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 50676.3320 - mae: 50676.3320
Test Mean Absolute Error : 50084.35546875


In [33]:
# Saving the model 

# model.save('regression_model.h5')
model.save('regression_model.keras')