# Logistic Regression implementation using only numpy

## Cost function for logistic regression
    loss = L(z) = 1/1+e^(-z), where z is a single data point.
    cost fn = 1/m*summation(-y*log(L(z)) - (1-y)*log(1-L(z)))

In [66]:
import numpy as np

In [67]:
def sigmoid(z):
    sig = 1/(1+np.exp((-1)*z))
    return sig

In [68]:
sigmoid(10)

0.9999546021312976

In [69]:
X = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y = np.array([0,0,0,1,1,1])

In [70]:
def compute_logistic_cost(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        z_i = np.dot(w,X[i]) + b
        f_z = sigmoid(z_i)
        cost += -y[i] * np.log(f_z) - (1-y[i]) * np.log(1 - f_z)
    cost = cost / m
    return cost

In [71]:
w_temp = np.array([0,0])
b_temp = 0

In [72]:
compute_logistic_cost(X,y,w_temp,b_temp)

0.6931471805599453

## Gradient descent implementation for logistic regression
    w_new = w_old - lr*(dj/dw)
    b_new = b_old - lr*(dj/db), where lr is the learning rate, w and b are the parameters to be adjusted and j is the logistic regression function

In [73]:
def compute_gradient_logistic(X,y,w,b):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i],w) + b)
        err_i = f_wb_i- y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err_i * X[i,j]
        dj_db = dj_db + err_i
    dj_dw = dj_dw/m
    dj_db = dj_db/m

    return dj_db, dj_dw

In [74]:
compute_gradient_logistic(X,y,w_temp,b_temp)

(0.0, array([-0.25      , -0.16666667]))

In [100]:
import copy,math
def gradient_descent(X, y, w_in, b_in, alpha, num_iters): 

    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = compute_gradient_logistic(X, y, w, b)   
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               
        if i<100000:
            J_history.append( compute_logistic_cost(X, y, w, b) )
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
    return w, b

In [97]:
gradient_descent(X,y,w_temp,b_temp,0.1,100000)

Iteration    0: Cost 0.684610468560574   
Iteration 10000: Cost 0.01711604647887364   
Iteration 20000: Cost 0.008523403979166485   
Iteration 30000: Cost 0.005672197191107651   
Iteration 40000: Cost 0.004250161053834308   
Iteration 50000: Cost 0.003398230224179212   
Iteration 60000: Cost 0.00283084256010047   
Iteration 70000: Cost 0.002425848306579758   
Iteration 80000: Cost 0.0021222573122028584   
Iteration 90000: Cost 0.0018862216652143864   


(array([8.35313087, 8.15226727]), -22.690605796630248)

In [98]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient_logistic(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

dj_db: 0.49861806546328574
dj_dw: [0.498333393278696, 0.49883942983996693]


In [99]:
gradient_descent(X_tmp,y_tmp,w_tmp,b_tmp,0.1,1000)

Iteration    0: Cost 2.9268211860885565   
Iteration  100: Cost 0.47207014490600896   
Iteration  200: Cost 0.38715163873342845   
Iteration  300: Cost 0.3286394164896976   
Iteration  400: Cost 0.28398075952100293   
Iteration  500: Cost 0.24904645247162985   
Iteration  600: Cost 0.22116215253509477   
Iteration  700: Cost 0.19850101105667262   
Iteration  800: Cost 0.17978868340702284   
Iteration  900: Cost 0.16411810425810738   


(array([2.37684375, 2.15000639]), -6.113586242730276)

# logistic regression using scikit-learn

In [112]:
X = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y = np.array([0, 0, 0, 1, 1, 1])

In [113]:
from sklearn.linear_model import LogisticRegression

In [114]:
model = LogisticRegression()

In [115]:
model.fit(X,y)

In [116]:
y_preds = model.predict(X)

In [117]:
y

array([0, 0, 0, 1, 1, 1])

In [118]:
y_preds

array([0, 0, 0, 1, 1, 1])

In [120]:
model.predict([[1.5,2]])

array([1])

## logistic regression using regularization to fix the problem of overfitting (only using numpy)

In [121]:
def compute_cost_regularized(X,y,w,b,lambda_ = 1):
    cost = 0.0
    m,n = X.shape

    for i in range(m):
        z_i = np.dot(X[i],w) + b
        f_z = sigmoid(z_i)
        cost += -y[i]*np.log(f_z) - (1-y[i])*np.log(1-f_z)
    cost = cost/m

    reg_cost = 0
    for j in range(n):
        reg_cost += (w[j]**2)
    reg_cost = (lambda_/(2*m)) * reg_cost
    tot_cost = cost + reg_cost
    return reg_cost

In [122]:
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,)-0.5
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_regularized(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 0.02759763650177679


In [124]:
compute_logistic_cost(X_tmp, y_tmp, w_tmp, b_tmp)

0.7137962067774293

In [1]:
def compute_gradient_regularized(X,y,w,b,lambda_):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.0

    for i in range(m):
        f_z = sigmoid(np.dot(X[i], w) + b)
        err_i = f_z - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err_i * X[i,j]
        dj_db = dj_db + err_i
    dj_dw = dj_dw/m
    dj_db = dj_db/m

    for i in range(n):
        dj_dw[j] = dj_dw[j] + (lambda_/m) * w[j]
    
    return dj_db,dj_dw

In [129]:
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  compute_gradient_regularized(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.341872525026184
Regularized dj_dw:
 [0.20795321668295585, 0.09691259129884092, 0.36432189201683896]


In [133]:
def gradient_descent_regularized(X, y, w_in, b_in, lambda_, alpha, num_iters): 

    w = w_in
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = compute_gradient_regularized(X, y, w, b, lambda_)   
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db
    return w,b

In [134]:
gradient_descent_regularized(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp, 0.01, 100)

(array([ 0.0523142 ,  0.60944219, -0.02772624]), 0.22509820087761287)