# GRADIENT DESCENT

In [None]:
# Important for interviews. (Core of ML Domain)
# suppose  y = m1*a + m2*b + m3*c +...+ d.
# in order to where m1 ,m2,m3.... are variables we give input.
# BUT a,b,c,d... are constants to find their values we use gradient descent.



In [None]:
# steps 

# w1 = w1 - something
# w1 = w1 - learningrate*d/dw1
# d/dw1--> derivative of loss w.r.t to w1.

# look for convex loss function --> https://am207.github.io/2017/wiki/gradientdescent.html
# we find the values that correspond to min. loss.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv("insurance_data.csv")
df.head()

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df[['age','affordibility']],df.bought_insurance,test_size=0.2,random_state=25)

In [None]:
len(x_train)

In [None]:
df.shape

In [None]:
x_train_scaled = x_train.copy()
x_train_scaled['age'] = x_train_scaled['age']/100

x_test_scaled = x_test.copy()
x_test_scaled['age'] = x_test_scaled['age']/100


In [None]:
x_train_scaled

In [None]:
# scaling helps to bring age and affordability on the same scale.


In [None]:
model = keras.Sequential(
    [
        keras.layers.Dense(1,input_shape=(2,),activation='sigmoid',kernel_initializer='ones',bias_initializer='zeros')
    ]
)
# note it is zeros not zeroes
# binary crossentropy is same as log loss.

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy'])

model.fit(x_train_scaled,y_train,epochs=5000)

In [None]:
model.evaluate(x_test_scaled,y_test)

In [None]:
model.predict(x_test_scaled)
# if greater than 0.5 means buy the insurance.

In [None]:
x_test_scaled

In [None]:
y_test

In [None]:
coef , intercept = model.get_weights()
print("Coeffecients",coef,"\n Bias Intercept:",intercept)

In [None]:
import math
# takes one value
def sigmoid(x):
    return 1/(1+math.exp(-x))
sigmoid(10)

In [None]:
# below code is like a derivation or you can say that we made the model with tensorflow help.
def prediction_function(age,affordability):
    age = age/100
    weighted_sum = coef[0]*age + coef[1]*affordability + intercept
    return sigmoid(weighted_sum)
prediction_function(47,1)

In [None]:
def log_loss(y_true,y_predicted):
    epsilon = 1e-15
    y_predicted_new = [max(i,epsilon) for i in y_predicted]
    y_predicted_new = [min(i,1-epsilon) for i in y_predicted_new]
    y_predicted_new = np.array(y_predicted_new)
    return -np.mean(y_true*np.log(y_predicted_new)+(1-y_true)*np.log(1-y_predicted_new))
def sigmoid_numpy(x):
    return 1/(1+np.exp(-x))
sigmoid_numpy(np.array([12,0,1]))

In [None]:
# passing a loss threshold meanibg stop when it matches.
def gradient_descent(age,affordibility,y_true,epochs,loss_thresold):
    # w1,w2,bias
    # here age is numpy array and afforability also and numpy array
    age = np.array(age)
    age = age/100 # scaling of age
    affordibility = np.array(affordibility)
    w1 = w2 = 1
    bias = 0
    rate = 0.5
    n = len(age)
    
    for i in range (epochs):
        weighted_sum = w1*age + w2*affordibility + bias
        y_predicted = sigmoid_numpy(weighted_sum)
        loss = log_loss(y_true,y_predicted)
        # d/dw1 ---> 1/n( summasion( xi* (y^i - yi) ) ) where xi is age or like factors.
#         in numpy for multiply us np.dot
        deriv_w1 = np.mean( np.dot( np.transpose(age) ,(y_predicted-y_true) ) )
        deriv_w2 = np.mean( np.dot( np.transpose(affordibility) ,(y_predicted-y_true) ) )
        deriv_bias = np.mean(y_predicted-y_true)
        
        w1 = w1 - rate*deriv_w1
        w2 = w2 - rate*deriv_w2
        bias = bias - rate*deriv_bias
        
        print(f'Epoch:{i},w1:{w1},w2{w2},bias:{bias},loss:{loss}')
        if(loss<=loss_thresold):
            break
    return w1,w2,bias

In [None]:
gradient_descent(x_train['age'],x_train['affordibility'],y_train,5000,0.4631)

In [None]:
coef , intercept


In [None]:
# customModel = myNN()
# customModel.fit(x_train_scaled,y_train,epochs=8000,loss_thresold=0.4631)
# customModel.predict(x_test_scaled)

In [None]:
class myNN:
    def __init__(self):
        self.w1 = 1
        self.w2 = 1
        self.bias = 0
    
    def fit(self,x,y,epochs,loss_thresold):
        self.w1,self.w2,self.bias = self.gradient_descent(x['age'],x['affordibility'],y,epochs,loss_thresold)
        
    # passing a loss threshold meanibg stop when it matches.
    def gradient_descent(age,affordibility,y_true,epochs,loss_thresold):
        # w1,w2,bias
        # here age is numpy array and afforability also and numpy array
        age = np.array(age)
        age = age/100 # scaling of age
        affordibility = np.array(affordibility)
        w1 = w2 = 1
        bias = 0
        rate = 0.5
        n = len(age)
        for i in range (epochs):
            weighted_sum = w1*age + w2*affordibility + bias
            y_predicted = sigmoid_numpy(weighted_sum)
            loss = log_loss(y_true,y_predicted)
            # d/dw1 ---> 1/n( summasion( xi* (y^i - yi) ) ) where xi is age or like factors.
            #in numpy for multiply us np.dot
            deriv_w1 = np.mean( np.dot( np.transpose(age) ,(y_predicted-y_true) ) )
            deriv_w2 = np.mean( np.dot( np.transpose(affordibility) ,(y_predicted-y_true) ) )
            deriv_bias = np.mean(y_predicted-y_true)
            w1 = w1 - rate*deriv_w1
            w2 = w2 - rate*deriv_w2
            bias = bias - rate*deriv_bias
            print(f'Epoch:{i},w1:{w1},w2{w2},bias:{bias},loss:{loss}')
            if(loss<=loss_thresold):
                break
        return w1,w2,bias

In [None]:
customModel = myNN()
customModel.fit(x_train_scaled,y_train,epochs=8000,loss_thresold=0.4631)