# Labwork 3: Logistic Regression from scratch

-------------------------------------------------------------------------

In this code notebook, we will recreate the gradient descend without any website packages, just only inbuilt packages for python 3.12 such as:
```
 import math 
 import matplotlib as plt.
```

In [139]:
e = 2.71828

In [140]:
import math
import matplotlib as plt

## The model and its derivatives

\begin{equation*}
y_p = w_1\times x_1 + w_2\times x_2 + w_0
\end{equation*}

In [141]:
def yp(w0,w1,w2,x1,x2):
    return w1 * x1 + w2 * x2 + w0

\begin{equation*}
\frac{\partial y_p}{\partial w_1} = x_1
\end{equation*}

\begin{equation*}
\frac{\partial y_p}{\partial w_2} = x_2
\end{equation*}

\begin{equation*}
\frac{\partial y_p}{\partial w_0} = 1
\end{equation*}

In [142]:
def dyp_dw1(w0,w1,w2,x1,x2):
    return x1

In [143]:
def dyp_dw2(w0,w1,w2,x1,x2):
    return x2

## Sigmoid function, and its derivative

\begin{equation*}
\sigma(z) = \frac{1}{1+e^{-z}}
\end{equation*}

In [144]:
def sigmoid(z):
    return 1 / (1+e**(-z))

\begin{equation*}
\frac{d\sigma(z)}{dz} = \frac{z'\times(e^{-z})}{(1+e^{-z})^{2}}
\end{equation*}

In [145]:
def dsigmoid(z,dz):
    return (dz*e**(-z))/((1+e**(-z))**2)

In [146]:
def dsigmoid_dw0(z):
    return (e**(-z))/((1+e**(-z))**2)

## Loss function for one selected data point

\begin{equation*}
L_i = -(y_i\times log(\sigma(y_p)) + (1-y_i)\times log(1-\sigma(y_p)))
\end{equation*}

In [147]:
def loss(w0,w1,w2,x1,x2,yi):
    return -(yi*math.log(sigmoid(yp(w0,w1,w2,x1,x2)),e) + (1-yi)*math.log(1-sigmoid(yp(w0,w1,w2,x1,x2))),e)

The derivative for the loss function:

\begin{equation*}
\frac{\partial L_i}{\partial w_j} = -(y_i\times \frac{\frac{\partial\sigma(y_p)}{\partial w_j}}{\sigma(y_p)} + (y_i-1)\frac{\frac{\partial\sigma(y_p)}{\partial w_j}}{1-\sigma(y_p)})
\end{equation*}

In [148]:
def dloss_dw1(w0,w1,w2,x1,x2,yi):
    return -(
            yi* ( dsigmoid(yp(w0,w1,w2,x1,x2),dyp_dw1(w0,w1,w2,x1,x2)) / sigmoid(yp(w0,w1,w2,x1,x2)) )
             + (1-yi)*( (-dsigmoid(yp(w0,w1,w2,x1,x2),dyp_dw1(w0,w1,w2,x1,x2))) / (1-sigmoid(yp(w0,w1,w2,x1,x2))) )
             )

In [149]:
def dloss_dw2(w0,w1,w2,x1,x2,yi):
    return -(
            yi* ( dsigmoid(yp(w0,w1,w2,x1,x2),dyp_dw2(w0,w1,w2,x1,x2)) / sigmoid(yp(w0,w1,w2,x1,x2)) )
             + (1-yi)*( (-dsigmoid(yp(w0,w1,w2,x1,x2),dyp_dw2(w0,w1,w2,x1,x2))) / (1-sigmoid(yp(w0,w1,w2,x1,x2))) )
             )

In [150]:
def dloss_dw0(w0,w1,w2,x1,x2,yi):
    if yi == 0:
        return -(
            (1-yi)*( (-dsigmoid_dw0(yp(w0,w1,w2,x1,x2))) / (1-sigmoid(yp(w0,w1,w2,x1,x2))) )
             )
    if yi == 1:
        return -(yi* ( dsigmoid_dw0(yp(w0,w1,w2,x1,x2)) / sigmoid(yp(w0,w1,w2,x1,x2)) ))

CF loss for sigmoid function

In [151]:
def nloss(w0,w1,w2,x1,x2,yi):
    sum = 0
    for i in range(len(x1)):
        sum = sum + loss(w0,w1,w2,x1[i],x2[i],yi[i])
    return sum / len(x1)

Derive the function in the sum:

In [152]:
def dnloss_dw1(w0,w1,w2,x1,x2,yi):
    sum = 0
    for i in range(len(x1)):
        sum = sum + dloss_dw1(w0,w1,w2,x1[i],x2[i],yi[i])
    return sum / len(x1)

In [153]:
def dnloss_dw2(w0,w1,w2,x1,x2,yi):
    sum = 0
    for i in range(len(x1)):
        sum = sum + dloss_dw2(w0,w1,w2,x1[i],x2[i],yi[i])
    return sum / len(x1)

In [154]:
def dnloss_dw0(w0,w1,w2,x1,x2,yi):
    sum = 0
    for i in range(len(x1)):
        sum = sum + dloss_dw0(w0,w1,w2,x1[i],x2[i],yi[i])
    return sum / len(x1)

Perform the gradient descend algorithm, and print the values for weights, losses for each iteration. 

In [155]:
def gd(w0,w1,w2,x1,x2,yi,lr,t):
    i = 1
    while True:
        lsigmoid = []
        l0 = dnloss_dw0(w0,w1,w2,x1,x2,yi)
        l1 = dnloss_dw1(w0,w1,w2,x1,x2,yi)
        l2 = dnloss_dw2(w0,w1,w2,x1,x2,yi)
        for j in range(len(x1)):
            lsigmoid.append(sigmoid(yp(w0,w1,w2,x1[j],x2[j])))
        print("------------------------------")
        print("Step "+str(i))
        print(lsigmoid)
        print("------------------------------")
        print("w0 = "+str(w0)+"; w1 = "+str(w1)+"; w2 = "+str(w2))
        print("loss w.r.t w0: "+str(l0))
        print("loss w.r.t w1: "+str(l1))
        print("loss w.r.t w2: "+str(l2))

        w0 = w0 - lr * l0
        w1 = w1 - lr * l1
        w2 = w2 - lr * l2
        i = i + 1
        if ( (l0 < t and l1 < t and l2 < t and l0 > -t and l1 > -t and l2 > -t)):
            print("")
            print("------------| GRADIENT DESCEND SUCCESS! |-----------")
            print("We have hit bottom after "+str(i-1)+" steps")
            break
        elif i == 200:
            print("")
            print("------------| GRADIENT DESCEND FAILURE! |-----------")
            print("We have not hit bottom, time to try again! ")
            break

## Load the csv file
Each line is printed as a tuple element in the list

In [156]:
with open('loan2.csv', 'r') as f:
    results = []
    for line in f:
            words = line.split(',')
            results.append(words)
    print(results)

[['Experience', ' Salary', ' Loan\n'], ['3', '4', '1\n'], ['2.5', '4', '1\n'], ['1', '4', '0\n'], ['2.5', '5', '1\n'], ['2', '5', '1\n'], ['1.5', '5', '0\n'], ['0.5', '5', '0\n'], ['1.75', '6', '1\n'], ['0.25', '6', '0\n'], ['1', '7', '1\n'], ['0.25', '7', '0\n'], ['0.20', '7', '0\n'], ['0.15', '7', '0\n'], ['2', '8', '1\n'], ['1', '8', '0\n'], ['0.15', '8', '0\n'], ['0.10', '8', '0\n'], ['0.5', '9', '1\n'], ['1', '10', '1']]


The experience is used as the $x_1$, salary as $x_2$, and loan as $y_i$.

In [157]:
x1 = []
x2 = []
yi = []
for i in range(1,len(results)):
    x1.append(float(results[i][0]))
    x2.append(float(results[i][1]))
    yi.append(int(results[i][2]))
print(x1)
print(x2)
print(yi)

[3.0, 2.5, 1.0, 2.5, 2.0, 1.5, 0.5, 1.75, 0.25, 1.0, 0.25, 0.2, 0.15, 2.0, 1.0, 0.15, 0.1, 0.5, 1.0]
[4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 10.0]
[1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1]


## Testing gradient descend:

Sigmoid was too small, maybe I need to change the learning rate. It is best to not make the weights too small, since it can cause the sigmoid function to go wrong. For the lowest possible loss, I decided to increase the threshold to 0.27, most learning rate I tried did not reach the lower threshold. 

In [158]:
gd(0,1,2,x1,x2,yi,0.007,0.27)

------------------------------
Step 1
[0.9999832984545768, 0.9999724641144097, 0.9998766046770884, 0.9999962733293816, 0.9999938557758031, 0.999989869930659, 0.9999724641144097, 0.9999989322872547, 0.9999952148660759, 0.9999996940946866, 0.9999993523989945, 0.9999993191958264, 0.9999992842902989, 0.999999984769836, 0.999999958600151, 0.9999999031390346, 0.9999998981728707, 0.9999999907624354, 0.9999999992417332]
------------------------------
w0 = 0; w1 = 1; w2 = 2
loss w.r.t w0: 0.5263040189528949
loss w.r.t w1: 0.26840543218501844
loss w.r.t w2: 3.4210020035298947
------------------------------
Step 2
[0.9999814474354158, 0.9999694410907778, 0.9998634445169712, 0.9999957639512457, 0.9999930225156671, 0.9999885069308541, 0.9999688178763141, 0.9999987586833347, 0.999994452490586, 0.9999996362497481, 0.9999992310257579, 0.9999991916756036, 0.9999991503118143, 0.9999999814160981, 0.9999999495785757, 0.9999998822199953, 0.9999998761929214, 0.9999999884875991, 0.9999999990311903]
---------