In [32]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
from algorithms import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [79]:
#tested
def least_squares_GD(y, tx, initial_w, max_iters, gamma):

    # number of training data
    N = tx.shape[0]
    
    # Define initial values of w and its associated mse loss
    w_k = initial_w
    loss_k = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w_k))**2
    
    # stopping criterion definition:
    n_iter = 0;
    while (n_iter < max_iters):
        # computation of the gradient
        grad = -(1/N) * np.transpose(tx).dot(y - tx.dot(w_k))
        
        # update w
        w_kp1 = w_k - gamma * grad
        
        # upsate loss wrt mse cost function
        loss_kp1 = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w_kp1))**2
        
        loss_k = loss_kp1
        w_k = w_kp1
        
        # update n_iter: number of iterations
        n_iter += 1
        
    # Printing the results
    try:
        initial_w.shape[1]
        print("least_squares_GD({bi}/{ti}): loss={l}, w = {w}".format(
              bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k[:,0]))
    except (IndexError, AttributeError):
        print("least_squares_GD({bi}/{ti}): loss={l}, w = {w}".format(
                  bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k))
    return w_k, loss_k

In [80]:
#tested
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    # number of training data
    N = tx.shape[0]
    
    # Define initial values of w and its associated mse loss
    w_k = initial_w
    loss_k = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w_k))**2
    
    # stopping criterion definition:
    n_iter = 0;
    while (n_iter < max_iters):
        # computation of the searching direction by sampling one training data from the data set
        for y_b, tx_b in batch_iter(y, tx, 1):
            g = - np.transpose(tx_b).dot(y_b - tx_b.dot(w_k))
        
        # update w_kp1
        w_kp1 = w_k - gamma * g
        
        # upsate loss wrt mse cost function
        loss_kp1 = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w_kp1))**2
        
        loss_k = loss_kp1
        w_k = w_kp1
        
        # update n_iter: number of iterations
        n_iter += 1
        
    # Printing the results
    try:
        initial_w.shape[1]
        print("least_squares_SGD({bi}/{ti}): loss={l}, w = {w}".format(
              bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k[:,0]))
    except (IndexError, AttributeError):
        print("least_squares_SGD({bi}/{ti}): loss={l}, w = {w}".format(
                  bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k))
        
    return w_k, loss_k

In [81]:
# tested
def least_squares(y, tx):
    N = tx.shape[0]
    w = np.linalg.solve(np.transpose(tx).dot(tx), np.transpose(tx).dot(y))
    loss = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w))**2
    
    return w, loss

In [82]:
# tested
def ridge_regression(y, tx, lambda_):
    N = tx.shape[0]
    D = tx.shape[1]
    lambda_prime = lambda_ * 2 * N
    w = np.linalg.solve(np.transpose(tx).dot(tx) + lambda_prime * np.identity(D), np.transpose(tx).dot(y))
    loss = (1 / (2*N)) * np.linalg.norm(y - tx.dot(w))**2 + lambda_ * np.linalg.norm(w)**2
    return w, loss

In [113]:
def sigmoid(t):
    """apply sigmoid function on t."""
    return np.exp(t) / (1 + np.exp(t))

In [83]:
# tested
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    
    # check that initial_w has the wanted dimensions
    try:
        initial_w.shape[1]
    except IndexError:
        initial_w = np.expand_dims(initial_w, 1)
    
    # number of training data
    N = tx.shape[0]
    
    # Define initial values of w and its associated mse loss
    w_k = initial_w
    loss_k = np.sum(np.log(1 + np.exp(tx.dot(w_k))) - y * tx.dot(w_k))
    
    # stopping criterion definition:
    n_iter = 0;
    while (n_iter < max_iters):
        
        # computation of the gradient
        grad = np.transpose(tx).dot(sigmoid(tx.dot(w_k)) - y)
        
        # update w
        w_kp1 = w_k - gamma * grad
        
        # upsate loss wrt mse cost function
        loss_kp1 = np.sum(np.log(1 + np.exp(tx.dot(w_k))) - y * tx.dot(w_k))
        
        loss_k = loss_kp1
        w_k = w_kp1
        
        # update n_iter: number of iterations
        n_iter += 1
        
    # Printing the results
    try:
        initial_w.shape[1]
        print("logistic_GD({bi}/{ti}): loss={l}, w = {w}".format(
              bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k[:,0]))
    except (IndexError, AttributeError):
        print("logistic_GD({bi}/{ti}): loss={l}, w = {w}".format(
                  bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k))
    return w_k, loss_k

In [126]:
# tested
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    
    # convert w to a numpy array if it is passed as a list
    if (type(initial_w) != np.ndarray):
        initial_w = np.array(initial_w)
    
    # check that initial_w has the wanted dimensions
    try:
        initial_w.shape[1]
    except IndexError:
        initial_w = np.expand_dims(initial_w, 1)
        
    
    # number of training data
    N = tx.shape[0]
    
    # Define initial values of w and its associated mse loss
    w_k = initial_w
    loss_k = np.sum(np.log(1 + np.exp(tx.dot(w_k))) - tx.dot(w_k) * y) + (lambda_ / 2) * np.linalg.norm(w_k)**2
    
    # stopping criterion definition:
    n_iter = 0;
    while (n_iter < max_iters):        
        # computation of the gradient
        grad = np.transpose(tx).dot(sigmoid(tx.dot(w_k)) - y) + lambda_ * w_k
        
        # update w
        w_kp1 = w_k - gamma * grad
        
        # upsate loss wrt mse cost function
        loss_kp1 = np.sum(np.log(1 + np.exp(tx.dot(w_k))) - tx.dot(w_k) * y) + (lambda_ / 2) * np.linalg.norm(w_k)**2
        
        loss_k = loss_kp1
        w_k = w_kp1
        
        # update n_iter: number of iterations
        n_iter += 1
        
    # Printing the results
    try:
        initial_w.shape[1]
        print("reg_logistic_GD({bi}/{ti}): loss={l}, w = {w}".format(
              bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k[:,0]))
    except (IndexError, AttributeError):
        print("reg_logistic_GD({bi}/{ti}): loss={l}, w = {w}".format(
                  bi=n_iter-1, ti=max_iters - 1, l=loss_k, w = w_k))
    return w_k, loss_k

In [127]:
def testing_ls_ridge():
    height, weight, gender = load_data_from_ex02(sub_sample=False, add_outlier=False)
    x, mean_x, std_x = standardize(height)
    y, tx = build_model_data(x, weight)
        
    w0_grid_test = np.linspace(-100, 100, 100)
    w1_grid_test = np.linspace(-100, 100, 100)
    grid_loss, grid_w = grid_search(y, tx, w0_grid_test, w1_grid_test)
    
    initial_w = [0, 0]
    gamma_GD = 0.7
    gamma_GD_mae = 10
    max_iters = 500
    GD_w, GD_loss = gradient_descent(y, tx, initial_w, max_iters, gamma_GD, cost='mse', tol=1e-2, thresh_test_div=10)
    GD_w_mae, GD_loss_mae = gradient_descent(y, tx, initial_w, max_iters, gamma_GD_mae, cost='mae', tol=1e-2, thresh_test_div=10)

    gamma_SGD = 0.01
    gamma_SGD_mae = 0.1
    max_iters = 1000
    batch_size = 1
    SGD_w, SGD_loss = stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma_SGD, cost='mse', tol=1e-4, thresh_test_div=100)
    SGD_w_mae, SGD_loss_mae = stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma_SGD_mae, cost='mae', tol=1e-2, thresh_test_div=100)
    
    
    
    gamma_ridge = 0.01
    lambda_ridge = 0.3
    max_iters = 10000
    
    ridge_w, ridge_loss = gradient_descent(y, tx, initial_w, max_iters, gamma_ridge, cost='ridge', lambda_ = lambda_ridge, tol=1e-8, thresh_test_div=10)
    
    
    
    max_iters_test = 1000
    gamma_test = 0.01
    test_w_GD, test_loss_GD = least_squares_GD(y, tx, initial_w, max_iters_test, gamma_test)
    test_w_SGD, test_loss_SGD = least_squares_SGD(y, tx, initial_w, max_iters_test, gamma_test)
    test_w_ls, test_loss_ls = least_squares(y, tx)
    test_w_ridge, test_loss_ridge = ridge_regression(y, tx, lambda_ridge)

    print("grid_w:", grid_w)
    print("GD_w:", GD_w)
    print("GD_w_mae:", GD_w_mae)
    print("SGD_w:", SGD_w)
    print("SGD_w_mae", SGD_w_mae)
    print("ridge_w:", ridge_w)
    print("test_w_GD:", test_w_GD)
    print("test_w_SGD:", test_w_SGD)
    print("test_w_ls:", test_w_ls)
    print("test_w_ridge:", test_w_ridge)

    

    
    return 0;

In [128]:
testing()

GD(6/499), cost: mse: loss=15.386020684743531, w=[ 73.27789262  13.47676442]
GD(11/499), cost: mae: loss=4.4280692313059635, w=[ 73.172       13.33995363]
SGD(389/999), cost: mse: loss=16.31177891921242, w=[ 72.02696226  12.983129  ]
SGD(676/999), cost: mae: loss=9.983030937042374, w=[ 64.7          6.74438094]
GD(696/9999), cost: ridge: loss=1056.7049475009694, w=[ 45.80810073   8.42470983]
least_squares_GD(999/999): loss=15.38589304420346, w = [ 73.29075781  13.4791305 ]
least_squares_SGD(999/999): loss=15.49966273645759, w = [ 72.83520484  13.34883729]
grid_w: [73.737373737373758, 13.131313131313135]
GD_w: [ 73.27789262  13.47676442]
GD_w_mae: [ 73.172       13.33995363]
SGD_w: [ 72.02696226  12.983129  ]
SGD_w_mae [ 64.7          6.74438094]
ridge_w: [ 45.80810073   8.42470983]
test_w_GD: [ 73.29075781  13.4791305 ]
test_w_SGD: [ 72.83520484  13.34883729]
test_w_ls: [ 73.293922    13.47971243]
test_w_ridge: [ 45.80870125   8.42482027]


0

In [129]:
w = [0, 0]
numar = np.array([0, 3])
type(numar) == np.ndarray

True

In [131]:
def testing_logistic():
    # load data.
    height, weight, gender = load_data_from_ex02()

    # build sampled x and y.
    seed = 1
    y = np.expand_dims(gender, axis=1)
    X = np.c_[height.reshape(-1), weight.reshape(-1)]
    y, X = sample_data(y, X, seed, size_samples=200)
    x, mean_x, std_x = standardize(X)
    
    max_iters = 100000
    gamma = 0.001

    # build tx
    tx = np.c_[np.ones((y.shape[0], 1)), x]

    initial_w = np.zeros((tx.shape[1], 1))
    
    lambda_ = 0.3
    
    w_logistic, loss_logistic = gradient_descent(y, tx, initial_w, max_iters, gamma, cost='logistic',\
                                                 lambda_=0, tol=1e-15, thresh_test_div=10, update_gamma=False)
    
    w_test_log, loss_test_log = logistic_regression(y, tx, initial_w, max_iters, gamma)
    
    w_reg_log, loss_reg_log = gradient_descent(y, tx, initial_w, max_iters, gamma, cost='reg_logistic',\
                                                 lambda_=lambda_, tol=1e-15, thresh_test_div=10, update_gamma=False)
    
    w_test_reg_log, loss_test_reg_log = reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma)
    
    print("w_logistic:", w_logistic[:,0])
    print("w_test_log:", w_test_log[:,0])

In [132]:
testing_logistic()

GD(27164/99999), cost: logistic: loss=40.894825609907, w=[ 0.23840571  2.46366568 -6.99516202]
logistic_GD(99999/99999): loss=40.894825609890574, w = [ 0.23840601  2.46367002 -6.9951691 ]
GD(11980/99999), cost: reg_logistic: loss=46.47750249347819, w=[ 0.15959974  1.26814654 -5.03057715]
logistic_GD(99999/99999): loss=46.47750249347092, w = [ 0.15959985  1.26814854 -5.0305801 ]
w_logistic: [ 0.23840571  2.46366568 -6.99516202]
w_test_log: [ 0.23840601  2.46367002 -6.9951691 ]
