In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

# Regression

### Global constants

In [2]:
seed = 1
np.random.seed(seed)

N = 10000
D = 17
x_lim = 20
w_lim = 10
noise_std = 3.0
train_ratio = 0.6

### Generate data

In [3]:
from manipulate_data import split_data

def gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std):
    tx = x_lim * (2 * np.random.rand(N, D) - 1)
    true_w = w_lim * (2 * np.random.rand(D) - 1)
    y = np.dot(tx, true_w) + np.random.normal(0.0, noise_std, N)
    
    return y, tx, true_w


y, tx, true_w = gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std)
y_train, tx_train, y_test, tx_test = split_data(y, tx, train_ratio)

print(y_train.shape, tx_train.shape, y_test.shape, tx_test.shape)

(6000,) (6000, 17) (4000,) (4000, 17)


### Try linear regression with regularised/unregularised MSE using GD/SGD/NE

In [20]:
from training import *
from implementations import compute_reg_mse_loss

reg_trainers = {"Regularised with GD" : train_reg_mse_GD,
                "Regularised with SGD" : train_reg_mse_SGD}
unreg_trainers = {"Unregularised with GD" : train_unreg_mse_GD,
                  "Unregularised with SGD" : train_unreg_mse_SGD}
lambdas = np.logspace(-5, 1, 10)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 5000
gamma = 0.0001


def try_regressors(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, 
                   lambdas, initial_w, max_iters, gamma):    
    # Regularised with GD/SGD
    for name in reg_trainers:
        print("\n", name)
        trainer = reg_trainers[name]
        for lambda_ in lambdas:
            w, train_loss, regressor, classifier = trainer(y_train, tx_train, lambda_, initial_w, max_iters, gamma, 0)
            test_loss = compute_reg_mse_loss(y_test, tx_test, w, 0)
            err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)
            
            print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
            lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Unregularised with GD/SGD
    for name in unreg_trainers:
        print("\n", name)
        trainer = unreg_trainers[name]
        w, train_loss, regressor, classifier = trainer(y_train, tx_train, initial_w, max_iters, gamma, 0)
        test_loss = compute_reg_mse_loss(y_test, tx_test, w, 0)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Regularised with NE
    print("\nRegularised with NE")
    for lambda_ in lambdas:
        w, train_loss, regressor, classifier = train_reg_mse_NE(y_train, tx_train, lambda_, 0)
        test_loss = compute_reg_mse_loss(y_test, tx_test, w, 0)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))

    # Unregularised with NE
    print("\nUnregularised wiht NE")
    w, train_loss, regressor, classifier = train_unreg_mse_NE(y_train, tx_train, 0)
    test_loss = compute_reg_mse_loss(y_test, tx_test, w, 0)
    err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

    print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
    train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    return None


try_regressors(y_train, tx_train, y_test, tx_test, true_w, reg_trainers, unreg_trainers, lambdas, initial_w, max_iters, gamma)


 Regularised with GD
lam = 0.000: train_loss = 4.396, test_loss = 4.623, err_w = 0.001
lam = 0.000: train_loss = 4.416, test_loss = 4.623, err_w = 0.001
lam = 0.000: train_loss = 4.513, test_loss = 4.623, err_w = 0.001
lam = 0.001: train_loss = 4.962, test_loss = 4.624, err_w = 0.001
lam = 0.005: train_loss = 7.047, test_loss = 4.625, err_w = 0.001
lam = 0.022: train_loss = 16.720, test_loss = 4.636, err_w = 0.001
lam = 0.100: train_loss = 61.551, test_loss = 4.753, err_w = 0.002
lam = 0.464: train_loss = 268.217, test_loss = 6.743, err_w = 0.007
lam = 2.154: train_loss = 1197.807, test_loss = 45.152, err_w = 0.033
lam = 10.000: train_loss = 4958.767, test_loss = 692.135, err_w = 0.135

 Regularised with SGD
lam = 0.000: train_loss = 5.070, test_loss = 5.257, err_w = 0.004
lam = 0.000: train_loss = 4.826, test_loss = 4.967, err_w = 0.003
lam = 0.000: train_loss = 4.926, test_loss = 5.072, err_w = 0.004
lam = 0.001: train_loss = 5.519, test_loss = 5.083, err_w = 0.004
lam = 0.005: trai