In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
path = "insurance_data.csv"

In [3]:
data = pd.read_csv(path)
data.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [4]:
y = data["bought_insurance"]
X = data.drop(columns=["bought_insurance"])

In [5]:
#feature scaling
X["age"] = X["age"]/100

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=25)

In [7]:
def sigmoid(x):
    return 1 / (1 +  np.exp(-x))

In [8]:
def prediction(X, W, b):
    weighted_sum = np.dot(X,W)+b
    return sigmoid(weighted_sum)

In [9]:
def prediction_0_1(y):
    return [1 if i > 0.5 else 0 for i in y]

In [10]:
# Binary cross entropy
def log_loss(y,y_):
    epsilon = 1e-15
    y_ = [epsilon if ele == 0 else ele for ele in y_]
    y_ = [1-epsilon if ele == 1 else ele for ele in y_]
    #print(y_)
    y_ = np.array(y_)
    return -np.mean(y*np.log(y_)+(1-y)*np.log(1-y_))    

In [11]:
# accuracy function from scratch
def accuracy(y,y_):
    n = len(y)
    correct = 0
    for i,j in zip(y,y_):
        if(i==j):
            correct+=1
    return correct/n

In [12]:
class NN:
    def __init__(self):
        self.W = 0
        self.b = 0
    def fit(self,X,y,epochs =5,rate = 0.5):
        self.W , self.b = self.gradient_descent(X,y,epochs,rate)
        pass
    def predict(self,X):
        return prediction_0_1(prediction(X,self.W,self.b))
    def evaluate(self,X,y):
        y_predicted = self.predict(X)
        loss = log_loss(y,y_predicted)
        accuracy_value = accuracy(y,y_predicted)
        print(f'W : {self.W}, bais : {self.b:.4f}, loss : {loss:.4f}, accuracy : {accuracy_value:.4f}')
        pass
    def gradient_descent(self, X,y,epochs =5,rate = 0.5):
        # initialize w
        W = np.ones(len(X.columns))
        #print(W.shape)
        b = 0
        n = len(X)
        for i in range(epochs):
            y_predicted = prediction(X,W,b)
            #print(y_predicted)

            b_derivative = np.mean(y_predicted-y)
            w_derivative = (1/n)*np.dot(np.transpose(X),(y_predicted-y))
            #print(w_derivative)
            W = W -rate * w_derivative
            #print(W)
            b = b - rate * b_derivative
            loss = log_loss(y,y_predicted)

            accuracy_value = accuracy(y,prediction_0_1(y_predicted)) 
            if i%50==0:
                print(f'epoch : {i} , W : {W}, bais : {b:.4f}, loss : {loss:.4f}, accuracy : {accuracy_value:.4f}')

        return W,b

In [13]:
model = NN()

In [14]:
model.fit(X_train,y_train,epochs =500)

epoch : 0 , W : [0.97490763 0.94834813], bais : -0.1134, loss : 0.7113, accuracy : 0.5000
epoch : 50 , W : [1.50331955 1.10838479], bais : -1.2319, loss : 0.5676, accuracy : 0.6818
epoch : 100 , W : [2.20071313 1.2941584 ], bais : -1.6607, loss : 0.5391, accuracy : 0.6818
epoch : 150 , W : [2.84957278 1.36968955], bais : -1.9861, loss : 0.5176, accuracy : 0.7727
epoch : 200 , W : [3.44301697 1.40422186], bais : -2.2571, loss : 0.5005, accuracy : 0.8636
epoch : 250 , W : [3.98245049 1.42391273], bais : -2.4944, loss : 0.4865, accuracy : 0.8636
epoch : 300 , W : [4.47217952 1.43878799], bais : -2.7074, loss : 0.4751, accuracy : 0.8636
epoch : 350 , W : [4.91724587 1.45256608], bais : -2.9012, loss : 0.4656, accuracy : 0.9091
epoch : 400 , W : [5.32256217 1.46648101], bais : -3.0788, loss : 0.4577, accuracy : 0.9091
epoch : 450 , W : [5.69260053 1.48081125], bais : -3.2422, loss : 0.4512, accuracy : 0.9091


In [15]:
model.predict(X_train)

[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1]

In [16]:
model.evaluate(X_test,y_test)

W : [6.02482454 1.49519776], bais : -3.3902, loss : 0.0000, accuracy : 1.0000
