In [33]:
import numpy as np
import matplotlib as plt

In [34]:
data = np.genfromtxt(
        "data/train.csv", delimiter=",", skip_header=1)

In [35]:
def compute_loss_MSE(y, tx, w):
    """Calculate the loss using either MSE or MAE.
    
    Args:
        y: shape=(N, )
        tx: shape=(N,2)
        w: shape=(2,). The vector of model parameters.

    Returns:
        the value of the loss (a scalar), corresponding to the input parameters w.
    """
    error = y - tx.dot(w)
    return 1/2 * np.mean(error**2)

In [36]:
def compute_gradient(y, tx, w):
    """Computes the gradient at w.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        w: numpy array of shape=(2, ). The vector of model parameters.
        
    Returns:
        An numpy array of shape (2, ) (same shape as w), containing the gradient of the loss at w.
    """
    N = len(y)
    error = y - tx.dot(w)
    gradient = -(1/N) * tx.T.dot(error)
    
    return gradient

In [37]:
def gradient_descent(y, tx, initial_w, max_iters, gamma):
    """The Gradient Descent (GD) algorithm.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        initial_w: numpy array of shape=(2, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of GD
        gamma: a scalar denoting the stepsize
        
    Returns:
        losses: a list of length max_iters containing the loss value (scalar) for each iteration of GD
        ws: a list of length max_iters containing the model parameters as numpy arrays of shape (2, ), for each iteration of GD 
    """
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # compute gradient
        gradient = compute_gradient(y, tx, w)
        
        # compute loss
        loss = compute_loss(y, tx, w)
        
        # update w by gradient
        w = w - (gamma * gradient)
        
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("GD iter. {bi}/{ti}: loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws

In [38]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient at w from just few examples n and their corresponding y_n labels.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        w: numpy array of shape=(2, ). The vector of model parameters.
        
    Returns:
        A numpy array of shape (2, ) (same shape as w), containing the stochastic gradient of the loss at w.
    """
    N = len(y)
    error = y - tx.dot(w)
    stoch_gradient = -(1/N) * tx.T.dot(error)
    
    return stoch_gradient

In [39]:
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma):
    """The Stochastic Gradient Descent algorithm (SGD).
            
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        initial_w: numpy array of shape=(2, ). The initial guess (or the initialization) for the model parameters
        batch_size: a scalar denoting the number of data points in a mini-batch used for computing the stochastic gradient
        max_iters: a scalar denoting the total number of iterations of SGD
        gamma: a scalar denoting the stepsize
        
    Returns:
        losses: a list of length max_iters containing the loss value (scalar) for each iteration of SGD
        ws: a list of length max_iters containing the model parameters as numpy arrays of shape (2, ), for each iteration of SGD 
    """
    
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size=batch_size):
            # compute stoch_gradient
            stoch_gradient = compute_stoch_gradient(minibatch_y, minibatch_tx, w)
            
            # compute loss
            loss = compute_loss(y, tx, w)
            
            # update w by gradient
            w = w - gamma * stoch_gradient
            
            # store w and loss
            ws.append(w)
            losses.append(loss)
        print("SGD iter. {bi}/{ti}: loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
        
    return losses, ws

In [40]:
def least_squares(y, tx):
    a = tx.T.dot(tx)
    b = tx.T.dot(y)
    w = np.linalg.solve(a, b)
    e = compute_loss_MSE(y, tx, w)
    return w, e

In [41]:
def ridge_regression(y, tx, lambda_):
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    e = compute_loss_MSE(y, tx, w)
    return np.linalg.solve(a, b), e

In [42]:
def sigmoid(t):
    return 1 / (1 + np.exp(-t))

In [43]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    """The logistic regression algorithm.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        initial_w: numpy array of shape=(2, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of logistic regression
        gamma: a scalar denoting the stepsize, aka learning rate
        
    Returns:
        losses: a list of length max_iters containing the loss value (scalar) for each iteration of logistic_regression
        ws: a list of length max_iters containing the model parameters as numpy arrays of shape (2, ), 
            for each iteration of Logistic regression 
    """
    ws = [initial_w]
    losses = []
    w = initial_w
    N = len(y)
    
    for n_iter in range(max_iters):
        # compute gradient
        gradient = 2/N * tx.T.dot(sigmoid(tx.dot(w)) - y)
        
        # update w by gradient
        w = w - (gamma * gradient)
        
        y_pred = sigmoid(tx.dot(w))
        
        # compute loss
        loss = -np.sum(np.dot(y.T,np.log(y_pred)+ np.dot((1-y).T, np.log(1-y_pred)))) /(len(y_pred))
        
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Logistic regression iter. {bi}/{ti}: loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws

In [None]:
def reg_logistic_regression(y, tx, lambda_ initial_w, max_iters, gamma):
    """The reg logistic regression algorithm.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,2)
        lambda_: regularization factor
        initial_w: numpy array of shape=(2, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of reg logistic regression
        gamma: a scalar denoting the stepsize, aka learning rate
        
    Returns:
        losses: a list of length max_iters containing the loss value (scalar) for each iteration of reg logistic_regression
        ws: a list of length max_iters containing the model parameters as numpy arrays of shape (2, ), 
            for each iteration of reg Logistic regression 
    """
    ws = [initial_w]
    losses = []
    w = initial_w
    N = len(y)
    
    for n_iter in range(max_iters):
        # compute gradient
        gradient = 2/N * tx.T.dot(sigmoid(tx.dot(w)) - y) + (lambda_/N)*w
        
        # update w by gradient
        w = w - (gamma * gradient)
        
        y_pred = sigmoid(tx.dot(w))
        
        # compute loss
        loss = -np.sum(np.dot(y.T,np.log(y_pred)+ np.dot((1-y).T, np.log(1-y_pred)))) /(len(y_pred)) + (lambda_/(2*N))*(w.T.dot(w))
        
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Reg Logistic regression iter. {bi}/{ti}: loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws