In [18]:
import math, copy
import numpy as np

In [19]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [20]:
def sigmoid(z):
    g = 1/(1 + np.exp(-z))
    return g

In [21]:
def compute_cost_logistic(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(X[i], w) + b
        f_wb_i = sigmoid(z_i)
        cost += -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
    cost /= m
    return cost

### Logistic Gradient Descent

Recall the gradient descent algorithm utilizes this calculation: <br> <center> repeat until convergence { </center> $$ \frac {\partial J(w,b)}{\partial w_{j}} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})x_{j}^{(i)} $$
$$ \frac {\partial J(w,b)}{\partial b} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) $$
* m is the number of training examples
* $ f_{w,b}(x^{(i)}) $ is the model's prediction, while $ y^{(i)} $ is the target
* For a logistic regression model <br> $ z = w \cdot x + b $ <br> $ f_{w,b}(x) = g(z) $ <br> where g(z) is the sigmoid function: <br> $ g(z) = \frac {1}{1+e^{(-z)}} $

In [22]:
def compute_gradient_logistic(X, y, w, b):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i], w) + b)
        err_i = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j]+= err_i * X[i,j]
        dj_db += err_i
    dj_dw /= m
    dj_db /= m

    return dj_db, dj_dw

In [23]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

dj_db: 0.49861806546328574
dj_dw: [0.498333393278696, 0.49883942983996693]


In [24]:
def gradient_descent(X, y, w_in, b_in, alpha, num_iters):
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in

    for i in range(num_iters):
        dj_db, dj_dw = compute_gradient_logistic(X, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i<100000:
            J_history.append(compute_cost_logistic(X, y, w, b))
        
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}")
        
    return w, b, J_history        


In [25]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

Iteration    0: Cost 0.684610468560574
Iteration 1000: Cost 0.1590977666870457
Iteration 2000: Cost 0.08460064176930078
Iteration 3000: Cost 0.05705327279402531
Iteration 4000: Cost 0.04290759421682
Iteration 5000: Cost 0.03433847729884557
Iteration 6000: Cost 0.02860379802212006
Iteration 7000: Cost 0.02450156960879306
Iteration 8000: Cost 0.02142370332569295
Iteration 9000: Cost 0.019030137124109114

updated parameters: w:[5.28123029 5.07815608], b:-14.222409982019837
