# Use-case: We will create a model that can predict whether the customer is a good or bad customer on the basis of customer's age and customer's estimated salary

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
data = pd.read_csv('Social_Network_Ads.csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [4]:
# Identify if the given classification dataset is a balanced or unbalanced dataset
data.Purchased.value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [5]:
# Unbalanced dataset

In [6]:
# Features : Age , EstimatedSalary
# Label : Purchased

In [7]:
# Rules for Classification in DL
# 1. Data must be complete
# 2. Data must be strictly numeric
# 3. Features & label must be in the form of 2d Np array
# 4. Standardizing Features is  Mandatory (StandardScaler / RobustScaler)
# 5. For Binary Classification, label must be represented as 0 = False or 1 = True respectively (pd.replace)
# 6. For Multi Class Classification, label must be discrete Numerical (Setosa  = 0, Versicolor = 1, Virginica = 2)

In [8]:
features = data.iloc[:,[2,3]].values
label = data.iloc[:,[4]].values

In [9]:
# Standardizing Features

from sklearn.preprocessing import RobustScaler
rbFeatures = RobustScaler()
features = rbFeatures.fit_transform(features)

In [10]:
# Work with labels as per rules
#For this dataset we are lucky since already the data is in binary form

In [11]:
# Train Test Split

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(features,
                                                label,
                                                test_size=0.2,
                                                random_state=10)

In [12]:
#Architect model

# Suggestion for no of units in hidden layer for Classification
# no of units = 3 * no of columns in feature set
# no of units =  no of columns in feature set
# no of units = 1/3 * no of columns in feature set

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense( units = 6 , activation= "relu", input_shape=(2,)))
model.add(tf.keras.layers.Dense( units = 6 , activation= "relu"))
model.add(tf.keras.layers.Dense( units = 6 , activation= "relu"))
model.add(tf.keras.layers.Dense( units = 1 , activation= "sigmoid"))


In [25]:
# Compile Model 

# Loss Functions for Classification
# Dealing with Binary Classification: binary_crossentropy
# Dealing with Multi-class Classification: categorical_crossentropy
#                                        : sparse_categorical_crossentropy

# Optimizer Preferences:
# 1. Adam
# 2. Nadam
# 3. RMSProp
# 4. SGD

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
             loss="binary_crossentropy",
             metrics=["accuracy"])

In [26]:
# Create Custom EarlyStopping Condition --- Callbacks
# I want my model to be generalized and my testScore >= CL
# Assume for this use-case : SL = 0.05
# CL = 1 - SL = 0.95


class MyThresholdCallback(tf.keras.callbacks.Callback):
    def __init__(self,cl):
        super(MyThresholdCallback,self).__init__()
        self.cl=cl
        
    def on_epoch_end(self, epoch, logs=None):
        testScore = logs['val_accuracy']
        trainScore = logs['accuracy']
        
        if testScore > trainScore and testScore >= self.cl:
            self.model.stop_training = True

In [27]:
# Fit model

model.fit(X_train, y_train, epochs=2000, validation_data=(X_test,y_test), callbacks=[MyThresholdCallback(cl=0.95)])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

<tensorflow.python.keras.callbacks.History at 0x7f9456a25bd0>

In [28]:
# 1. Check for Generalization

print("Training Score is {} and Testing score is {}".format(model.evaluate(X_train,y_train)[1],
                                                            model.evaluate(X_test,y_test)[1]))



Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7f94da5204d0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/iterator_ops.py", line 546, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1264, in delete_iterator
    _ctx, "DeleteIterator", name, handle, deleter)
KeyboardInterrupt: 


Training Score is 0.921875 and Testing score is 0.9624999761581421


In [29]:
model.save("SocialNetworkAdsv1")

INFO:tensorflow:Assets written to: SocialNetworkAdsv1/assets


In [37]:
model.predict_classes(rbFeatures.transform(np.array([[23,345678]])))



array([[1]], dtype=int32)

In [36]:
 1 if model.predict(rbFeatures.transform(np.array([[23,345678]])))[0][0] > 0.5 else 0

1

In [39]:
#Confusion Matrix for Entire dataset

from sklearn.metrics import confusion_matrix
y_true = label
y_pred = model.predict_classes(features)
confusion_matrix(y_true,y_pred)



array([[244,  13],
       [ 15, 128]])

In [40]:
from sklearn.metrics import classification_report
print(classification_report(y_true,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.95      0.95       257
           1       0.91      0.90      0.90       143

    accuracy                           0.93       400
   macro avg       0.92      0.92      0.92       400
weighted avg       0.93      0.93      0.93       400

