# 004: Implement logistic regression

needs data loading, train/test split, training, evaluation

In [1]:
import sys

import numpy as np

sys.path.append("../")
import helpers
from implementations import sigmoid

%load_ext autoreload
%autoreload 2

In [2]:
train = np.load("../data/dataset_prep/train.npz")
x_train = train["x_train"]
y_train = train["y_train"]

In [3]:
def calculate_loss(y, tx, w):
    """compute the cost by negative log likelihood.

    Args:
        y:  shape=(N, 1)
        tx: shape=(N, D)
        w:  shape=(D, 1)

    Returns:
        a non-negative loss
    """
    assert y.shape[0] == tx.shape[0]
    assert tx.shape[1] == w.shape[0]

    pred = sigmoid(tx @ w)
    # clipping for stability
    #eps = 1e-15
    #pred = np.clip(pred, eps, 1 - eps)
    loss = (y * np.log(pred) + (1 - y) * np.log(1 - pred))
    return - 1 / pred.shape[0] * np.sum(loss).item()


def calculate_gradient(y, tx, w, sample_weights=None):
    """compute the gradient of loss.

    Args:
        y:  shape=(N, 1)
        tx: shape=(N, D)
        w:  shape=(D, 1)

    Returns:
        a vector of shape (D, 1)
    """
    assert y.shape[0] == tx.shape[0]
    assert tx.shape[1] == w.shape[0]

    pred = sigmoid(tx @ w)

    if sample_weights is None:
        sample_weights = np.ones_like(y)

    gradient = tx.T @ (sample_weights * (pred - y)) / np.sum(sample_weights)

    assert gradient.shape == w.shape
    return gradient

def learning_by_gradient_descent(y, tx, w, gamma, sample_weights=None):
    """
    Do one step of gradient descent using logistic regression. Return the loss and the updated w.

    Args:
        y:  shape=(N, 1)
        tx: shape=(N, D)
        w:  shape=(D, 1)
        gamma: float

    Returns:
        loss: scalar number
        w: shape=(D, 1)
    """
    loss = calculate_loss(y, tx, w)
    w_new = w - gamma * calculate_gradient(y, tx, w, sample_weights)
    return loss, w_new

In [9]:
def logistic_regression_gradient_descent_demo(y, x):
    # init parameters
    max_iter = 1000
    threshold = 1e-8
    gamma = 1e-2
    losses = []

    # build tx
    tx = np.c_[np.ones((y.shape[0], 1)), x]
    w = np.zeros((tx.shape[1], 1))

    pos_weight = y.shape[0] / (2 * np.sum(y))
    print(pos_weight)
    neg_weight = y.shape[0] / (2 * np.sum(1 - y))
    print(neg_weight)
    sample_weights = np.where(y == 1, pos_weight, neg_weight).reshape(-1, 1)

    # start the logistic regression
    for iter in range(max_iter):
        # get loss and update w.
        loss, w = learning_by_gradient_descent(y, tx, w, gamma, sample_weights)
        # log info
        if iter % 1 == 0:
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
        print("L1 norm of w:", np.sum(np.abs(w)))
        print(np.mean(tx @ w))
        
        print(np.mean(np.sign(sigmoid(tx @ w))==y))
    
    
    print("loss={l}".format(l=calculate_loss(y, tx, w)))

In [10]:
logistic_regression_gradient_descent_demo(y_train.reshape(-1, 1), x_train)

-0.6072413346410792
0.27421363150153766
Current iteration=0, loss=0.6931471805599446
L1 norm of w: 0.022216470901901573
-0.020460927745187076
0.08830207079403295
Current iteration=1, loss=0.6659929013320601
L1 norm of w: 0.04429424914370591
-0.04086998227615552
0.08830207079403295
Current iteration=2, loss=0.6390178498529868
L1 norm of w: 0.0662343983184137
-0.06122733948494986
0.08830207079403295
Current iteration=3, loss=0.6122208779454954
L1 norm of w: 0.08803800638104849
-0.08153318543888916
0.08830207079403295
Current iteration=4, loss=0.5856008098341852
L1 norm of w: 0.10970618399090082
-0.10178771611066716
0.08830207079403295
Current iteration=5, loss=0.5591564434051673
L1 norm of w: 0.1312400628831538
-0.12199113710558547
0.08830207079403295
Current iteration=6, loss=0.532886551453647
L1 norm of w: 0.1526407942724315
-0.1421436633864304
0.08830207079403295
Current iteration=7, loss=0.5067898829179607
L1 norm of w: 0.17390954729056068
-0.16224551899648876
0.08830207079403295
Cur