# ANN (Artificial Neural Network)

## ANN for Classification

### Data Preprocessing

In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import tensorflow as tf

print(f"Tensorflow Version: {tf.__version__}")


Tensorflow Version: 2.9.1


In [2]:
# Importing Dataset
dataset = pd.read_csv("../deeplearning/dataset_dl/Churn_Modelling.csv")

In [3]:
# Top data Preview
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Information about Dataset Columns
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [5]:
# Dropping Useless Columns
useless_col = ["RowNumber","CustomerId","Surname"]
dataset = dataset.drop(labels=useless_col, axis=1)
dataset.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
# Statistical Info of Dataset
dataset.describe()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [7]:
# Splitting Data
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [8]:
print(f"X:-\n {X}\n{'*'*80}\ny:-\n {y}")


X:-
 [[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
********************************************************************************
y:-
 [1 0 1 ... 1 1 0]


#### Encoding Categorical Data

##### LabelEncoding the Gender

In [9]:
print("Before LabelEnc:\n",X[0:5])

Before LabelEnc:
 [[619 'France' 'Female' 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 'Female' 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 'Female' 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 'Female' 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 'Female' 43 2 125510.82 1 1 1 79084.1]]


In [10]:
# Labelencoding Gender Data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])

In [11]:
print("After LabelEnc:\n",X[0:5])

After LabelEnc:
 [[619 'France' 0 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 0 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 0 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 0 43 2 125510.82 1 1 1 79084.1]]


##### OneHotEncoding the Geography


In [12]:
# OneHotEncoding Geography Column with ColumnTransformer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = ct.fit_transform(X)

In [13]:
print("After LabelEnc & OneHotEnc:\n",X[0:5])

After LabelEnc & OneHotEnc:
 [[1.0 0.0 0.0 619 0 42 2 0.0 1 1 1 101348.88]
 [0.0 0.0 1.0 608 0 41 1 83807.86 1 0 1 112542.58]
 [1.0 0.0 0.0 502 0 42 8 159660.8 3 1 0 113931.57]
 [1.0 0.0 0.0 699 0 39 1 0.0 2 0 0 93826.63]
 [0.0 0.0 1.0 850 0 43 2 125510.82 1 1 1 79084.1]]


#### Splitting Data

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.2, random_state=0)

In [15]:
print("Train:\n",X_test[1],"*"*80,"\n\nTest:\n",X_test[1])

Train:
 [1.0 0.0 0.0 523 0 40 2 102967.41 1 1 0 128702.1] ******************************************************************************** 

Test:
 [1.0 0.0 0.0 523 0 40 2 102967.41 1 1 0 128702.1]


#### Feature Scalling (Necessary for ANN)

In [16]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Building ANN

#### Defining ANN

In [17]:
ann = tf.keras.Sequential()

In [18]:
# First Input Layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Second Hideen Layer
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

# Output Layer
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

#### Compiling ANN

In [19]:
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#### Training ANN

In [24]:
r = ann.fit(X_train, y_train, batch_size=32, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


### Predicting Results

__Given:__ 
* Geography : France,
* Credit Score: 600,
* Gender: Male,
* Age: 40yrs,
* Tenure: 3yrs,
* Balance: 60000,
* No of Products: 2,
* Does this Customer have a credit card: Yes,
* is this customer an active member: Yes,
* Estimated Salary:  50000,

__To predict:__ Will customer Leave (Yes/No)


In [36]:
country = [1,0,0] # france, Germany, Spain
credit = 600
gender = 1 # male=1, female=0
age = 40
tenure = 3
balance = 60000
products = 2
isCreditCardOwner = 1 #yes
isActiveMember = 1 #yes
salary =  50000

input_data = [*country, credit, gender, age, tenure, balance, products, isCreditCardOwner, isActiveMember, salary]
print("Input Before Scalling:", input_data)
print("No of Input Features Given:", len(input_data))

Input Before Scalling: [1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]
No of Input Features Given: 12


#### Scalling Inputs Before Passing to predict()

In [37]:
input_data= sc.transform([input_data])
print("Input Before Scalling:\n", input_data)

Input Before Scalling:
 [[ 0.98560362 -0.5698444  -0.57369368 -0.52111599  0.91601335  0.10961719
  -0.68538967 -0.2569057   0.8095029   0.64259497  0.9687384  -0.87203322]]


In [47]:
res = ann.predict(input_data, verbose=False)[0][0]
print(f"Probability of Customer Leaving: {round(res*100,4)}%")
print("Will Customer Leave Bank:",res>0.5)

Probability of Customer Leaving: 2.3882%
Will Customer Leave Bank: False


## ANN for Regression