# Artificial Neural Network

### Importing the libraries

In [15]:
import numpy as np
import pandas as pd
import torch

In [16]:
torch.__version__

'2.9.1+cpu'

## Part 1 - Data Preprocessing

### Importing the dataset

In [17]:
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values # write down here why we are using 3:-1 why these columns
y = dataset.iloc[:, -1].values

"""
So, the first 3 columns: row number, customer id and username are irrelevant to the prediction model as they have no inherit structure
that the model can learn. 

The first colon (:) indicates that keep all rows - we don't need to delete any rows.
The 3:-1 indicates keep all columns except the columns 0, 1, 2.
"""

print ( dataset.columns )

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')


In [18]:
print(X)
print ( X.shape )

print ( "Geography unique:", np.unique ( X[:, 1] ) )


[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
(10000, 10)
Geography unique: ['France' 'Germany' 'Spain']


In [19]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical data

Label Encoding the "Gender" column

In [20]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

X[:, 2] = le.fit_transform(X[:, 2])

In [21]:
print(X)
print ( X.shape ) # Keeping track of shape after encodings!!

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]
(10000, 10)


One Hot Encoding the "Geography" column

In [22]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer ( transformers = [ ( 'encoder', OneHotEncoder ( drop = 'first' ),  [ 1 ] ) ], remainder = 'passthrough' )

X = ct.fit_transform(X)

In [23]:
print(X)
print ( X.shape )

[[0.0 0.0 619 ... 1 1 101348.88]
 [0.0 1.0 608 ... 0 1 112542.58]
 [0.0 0.0 502 ... 1 0 113931.57]
 ...
 [0.0 0.0 709 ... 0 1 42085.58]
 [1.0 0.0 772 ... 1 0 92888.52]
 [0.0 0.0 792 ... 1 0 38190.78]]
(10000, 11)


### Splitting the dataset into the Training set and Test set

In [24]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split ( X, y, test_size = 0.2, random_state  = 42 )

### Feature Scaling

In [25]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

x_train = sc.fit_transform ( x_train )
x_test = sc.fit_transform ( x_test )

print ( x_train.shape )

(8000, 11)


## Part 2 - Building the ANN

### Initializing the ANN

In [26]:
import torch.nn as nn
import torch.nn.functional as F

class ANN ( nn.Module ):
    def __init__ ( self ):
        super().__init__()
        
        self.fc1 = nn.Linear ( 11, 20 )
        self.fc2 = nn.Linear ( 20, 20 )
        self.fc3 = nn.Linear ( 20, 1 )

        nn.init.xavier_uniform_ ( self.fc1.weight )
        nn.init.xavier_uniform_ ( self.fc2.weight )
        nn.init.xavier_uniform_ ( self.fc3.weight )

    def forward ( self, x ):
        x = F.relu ( self.fc1 ( x ) )
        x = F.relu ( self.fc2 ( x ) )
        x = torch.sigmoid ( self.fc3 ( x ) )
        return x

ann = ANN()
print ( ann )


ANN(
  (fc1): Linear(in_features=11, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=1, bias=True)
)


## Part 3 - Training the ANN

### Compiling the ANN

In [27]:
import torch.optim as optim

optimizer = optim.Adam ( ann.parameters() )
loss = nn.BCELoss()

### Training the ANN on the Training set

In [34]:
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split

X = torch.tensor ( x_train, dtype = torch.float32 )
y = torch.tensor ( y_train, dtype = torch.float32 ).view ( -1, 1 )

dataset = TensorDataset ( X, y )

val_size = int ( 0.2 * len ( dataset ) )
train_size = len ( dataset ) - val_size

train_dataset, val_dataset = random_split ( dataset, [ train_size, val_size ] )

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

for epoch in range ( 100 ):
    ann.train()
    train_loss = 0.0

    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = ann ( xb )
        loss_back = loss ( preds, yb )
        loss_back.backward()
        optimizer.step()

        train_loss += loss_back.item() * xb.size(0)

    train_loss /= train_size

    ann.eval()
    val_loss = 0.0

    with torch.no_grad():
        for xb, yb in val_loader:
            preds = ann ( xb )
            loss_back = loss ( preds, yb )
            val_loss += loss_back.item() * xb.size(0)

    val_loss /= val_size

    print ( "Epoch: {}; Training loss: {}; Val loss: {}".format ( str ( epoch + 1 ), str ( train_loss ), str ( val_loss ) ) )

Epoch: 1; Training loss: 0.3078401216864586; Val loss: 0.31403514444828035
Epoch: 2; Training loss: 0.30567233383655545; Val loss: 0.317722747027874
Epoch: 3; Training loss: 0.30452233731746675; Val loss: 0.323212109208107
Epoch: 4; Training loss: 0.30389862865209577; Val loss: 0.32183813661336896
Epoch: 5; Training loss: 0.3025967674329877; Val loss: 0.32247563093900683
Epoch: 6; Training loss: 0.3010979598760605; Val loss: 0.32563634842634204
Epoch: 7; Training loss: 0.3005891751125455; Val loss: 0.3280494573712349
Epoch: 8; Training loss: 0.30017781458795073; Val loss: 0.32958600282669065
Epoch: 9; Training loss: 0.29955273926258086; Val loss: 0.32940798103809354
Epoch: 10; Training loss: 0.2985866117477417; Val loss: 0.33127622187137606
Epoch: 11; Training loss: 0.29879594188183545; Val loss: 0.33080740660429003
Epoch: 12; Training loss: 0.2976669857278466; Val loss: 0.33331413358449935
Epoch: 13; Training loss: 0.29687682550400496; Val loss: 0.3321657946705818
Epoch: 14; Training 

## Part 4 - Making the predictions and evaluating the model

### Predicting the result of a single observation

**Extra**

Use our ANN model to predict if the customer with the following informations will leave the bank: 

Geography: France

Credit Score: 600

Gender: Male

Age: 40 years old

Tenure: 3 years

Balance: \$ 60000

Number of Products: 2

Does this customer have a credit card ? Yes

Is this customer an Active Member: Yes

Estimated Salary: \$ 50000

So, should we say goodbye to that customer ?

**Solution**

In [37]:
import numpy as np

row_in = np.array ( [[600,'France','Male',40,3,60000,2,1,1,50000]] )

row_in[:, 2] = le.fit_transform ( row_in[:, 2] )
row_in = ct.transform ( row_in )
row_in = sc.transform ( row_in )

row_tensor = torch.tensor ( row_in, dtype = torch.float32 )

ann.eval()
with torch.no_grad():
    prob = ann ( row_tensor ).item()

print( "So, should we say goodbye to that customer? : ", prob <= 0.5 )


So, should we say goodbye to that customer? :  True



### Predicting the Test set results

### Making the Confusion Matrix

In [173]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

y_pred = ann.predict ( x_test )
y_pred = ( y_pred > 0.5 )

print ( "Test Accuracy:", accuracy_score ( y_test, y_pred ) )

con_matr = confusion_matrix ( y_test, y_pred )
print ( "Confusion matrix:\n", con_matr )

print ( "Classification Report:\n", classification_report ( y_test, y_pred ) )

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Test Accuracy: 0.852
Confusion matrix:
 [[1525   82]
 [ 214  179]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.95      0.91      1607
           1       0.69      0.46      0.55       393

    accuracy                           0.85      2000
   macro avg       0.78      0.70      0.73      2000
weighted avg       0.84      0.85      0.84      2000



write down about precision recall f1-score, why is it better than just accuracy, what are some other interesting metrics u can find

Precision = TP / ( TP + FP )
TP - True positive, FP - False positive
Precision basically tells us out of all +ves predicted, how many were actually positive. So a high precision means less false positives.

Recall = TP / ( TP + FN )
FN - False negative
Recall tells us, out of all actual positive test cases how many the model correctly identified. High recall => Less false negatives.

F1 score = harmonic mean ( Precision, Recall ). Its like a balance for pr:ecision and recall. A model cannot cheat by having just high recall/precision.

The problem with accuracy is that if the test cases are highly imbalanced towards one class, say 80% is zero class. Then a model that predicts everything as zero will have 80% accurac, which is not a good metric.

There is also a metric called specificity - how well a model can identify negatives.
specificity = TN / ( TN + FP )
TN - true negative