In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
import pandas as pd
from sklearn.model_selection import train_test_split

%matplotlib inline

In [3]:
df = pd.read_csv('insurance_data.csv')
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [4]:
df.shape

(28, 3)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1], df.iloc[:,-1], test_size=0.2, random_state=0)

In [6]:
X_train.shape

(22, 2)

We scale the data inorder to increase the performance of the model. <br>
So, we convert the age in the range of 0 and 1.

In [7]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age']/100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age']/100

In [8]:
X_train_scaled.head()

Unnamed: 0,age,affordibility
24,0.5,1
13,0.29,0
20,0.21,1
25,0.54,1
16,0.25,0


# With Tensorflow

In [9]:
# model attributes
# 1 - one neuron
# input_shape = (2, ) - 2 independent attributes
# activation = 'sigmoid' - activation function
# loss = 'binary_crossentropy' - loss function (log loss)
# kernel_initializer = 'ones' - initializer for the weights
# bias_initializer = 'zeros' - initializer for the bias
model = keras.Sequential()
model.add(keras.layers.Dense(1, input_shape=(2,), activation='sigmoid', kernel_initializer='ones', bias_initializer='zeros'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_scaled, y_train, epochs=2000)

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

<keras.callbacks.History at 0x17e978d6388>

In [10]:
model.evaluate(X_test_scaled, y_test)



[0.5762953758239746, 1.0]

In [11]:
y_pred = model.predict(X_test_scaled)
y_pred

array([[0.55879223],
       [0.5293582 ],
       [0.56713575],
       [0.60414857],
       [0.5960103 ],
       [0.47853637]], dtype=float32)

In [12]:
#construct a confusion matrix
y_pred_class = np.where(y_pred > 0.5, 1, 0)
y_pred_class


array([[1],
       [1],
       [1],
       [1],
       [1],
       [0]])

In [13]:
# consfusion matrix from tensorflow
tf.math.confusion_matrix(y_test, y_pred_class)


<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 0],
       [0, 5]])>

### weights and biases after the last epoch

In [15]:
coef, intercept = model.get_weights()

print("Weight 1: " + str(coef[0]))
print("Weight 2: " + str(coef[1]))

print("Bias: " + str(intercept))

Weight 1: [1.6956285]
Weight 2: [0.63263583]
Bias: [-1.1933192]


# Without Tensorflow - (From Scratch)

In [16]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [17]:
def prediction_function(age, affordance):
    return sigmoid(coef[0]*age + coef[1]*affordance + intercept) # sigmoid of weighted sum

In [24]:
prediction_function(.47, 1)

array([0.5587923], dtype=float32)

In [31]:
X_test_scaled.iloc[0, :], y_pred[0]

(age              0.47
 affordibility    1.00
 Name: 2, dtype: float64,
 array([0.55879223], dtype=float32))

As we can see here, the prediction_function and tensorflow model are giving the same output.

In [43]:
# function to calculate the logarithmic loss

def log_loss(y_true, y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i, epsilon) for i in y_predicted]
    y_predicted_new = [min(i, 1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))


![image](GD_partial_derivatives.png)

In [46]:
# gradient descent function
def gradient_descent(age, affordability, y_true, epochs, loss_threshold):
    w1 = w2 = 1
    bias = 0
    learning_rate = 0.01
    n = len(age)
    for i in range(epochs):
        weighted_sum = w1*age + w2*affordability + bias
        y_pred = sigmoid(weighted_sum)

        loss = log_loss(y_true, y_pred)

        # derivatives to change the weights and biases
        w1d = -(1/n) * np.dot(np.transpose(age), (y_true - y_pred))
        w2d = -(1/n) * np.dot(np.transpose(affordability), (y_true - y_pred))

        bias_derivative = np.mean(y_pred - y_true)

        w1 = w1 - learning_rate * w1d
        w2 = w2 - learning_rate * w2d

        bias = bias - learning_rate * bias_derivative

        print("Epoch: " + str(i + 1) + " Loss: " + str(loss) + "Weight 1: " + str(w1) + " Weight 2: " + str(w2) + " Bias: " + str(bias))

        if loss <= loss_threshold:
            break


    return w1, w2, bias


In [48]:
w1, w2, bias = gradient_descent(X_train_scaled['age'],
                 X_train_scaled['affordibility'], y_train, 1000, 0.5681)


Epoch: 1 Loss: 0.7960362503355486Weight 1: 0.9992898313363374 Weight 2: 0.9984485000394037 Bias: -0.00304348887567105
Epoch: 2 Loss: 0.7948204667256131Weight 1: 0.9985825883662556 Weight 2: 0.9969015718319503 Bias: -0.006079157358744767
Epoch: 3 Loss: 0.7936112629710453Weight 1: 0.9978782692102469 Weight 2: 0.9953592159610566 Bias: -0.009107011729941526
Epoch: 4 Loss: 0.7924086170393032Weight 1: 0.9971768719398659 Weight 2: 0.9938214329097408 Bias: -0.012127058394218626
Epoch: 5 Loss: 0.7912125068166399Weight 1: 0.9964783945779571 Weight 2: 0.992288223060877 Bias: -0.015139303880105436
Epoch: 6 Loss: 0.790022910109845Weight 1: 0.9957828350988852 Weight 2: 0.9907595866974589 Bias: -0.018143754839031235
Epoch: 7 Loss: 0.788839804647986Weight 1: 0.9950901914287696 Weight 2: 0.9892355240028714 Bias: -0.021140418044645875
Epoch: 8 Loss: 0.7876631680841442Weight 1: 0.9944004614457213 Weight 2: 0.9877160350611711 Bias: -0.024129300392133388
Epoch: 9 Loss: 0.7864929779971526Weight 1: 0.9937136

In [49]:
# Without tensorflow
w1, w2, bias


(1.043888955219281, 0.697520256682321, -1.0525764845945411)

In [51]:
# with tensorflow
coef[0], coef[1], intercept

(array([1.6956285], dtype=float32),
 array([0.63263583], dtype=float32),
 array([-1.1933192], dtype=float32))