In [77]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Regression

### Global constants

In [119]:
seed = 1
np.random.seed(seed)

N = 10000
D = 17
x_lim = 20
w_lim = 20
noise_std = 3.0
train_ratio = 0.6

### Generate data

In [120]:
from manipulate_data import standardise, split_data

def gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std, randomised=True):
    x = x_lim * (2 * np.random.rand(N, D) - 1)
    x, mean_x, std_x = standardise(x)
    true_w = w_lim * (2 * np.random.rand(D) - 1)
    y = np.dot(x, true_w)
    if randomised:
        y += np.random.normal(0.0, noise_std, N)
    
    return y, x, true_w


y, x, true_w = gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std, True)
y_train, x_train, y_test, x_test = split_data(y, x, train_ratio)

print(y_train.shape, x_train.shape, y_test.shape, x_test.shape)

(6000,) (6000, 17) (4000,) (4000, 17)


### Try regression with regularised/unregularised MSE using GD/SGD/NE

In [121]:
from training import *
from testing import assess_regressor_mse

reg_trainers = {"Regularised with GD" : train_reg_mse_GD,
                "Regularised with SGD" : train_reg_mse_SGD}
unreg_trainers = {"Unregularised with GD" : train_unreg_mse_GD,
                  "Unregularised with SGD" : train_unreg_mse_SGD}
lambdas = np.logspace(-7, 1, 10)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 50000
gamma = 0.05


def try_regressors(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, 
                   lambdas, initial_w, max_iters, gamma):    
    # Regularised with GD/SGD
    for name in reg_trainers:
        print("\n", name)
        trainer = reg_trainers[name]
        for lambda_ in lambdas:
            w, train_loss, regressor, classifier = trainer(y_train, x_train, lambda_, initial_w, max_iters, gamma, 0)
            test_loss = assess_regressor_mse(y_test, x_test, regressor)
            err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)
            
            print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
            lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Unregularised with GD/SGD
    for name in unreg_trainers:
        print("\n", name)
        trainer = unreg_trainers[name]
        w, train_loss, regressor, classifier = trainer(y_train, x_train, initial_w, max_iters, gamma, 0)
        test_loss = assess_regressor_mse(y_test, x_test, regressor)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Regularised with NE
    print("\nRegularised with NE")
    for lambda_ in lambdas:
        w, train_loss, regressor, classifier = train_reg_mse_NE(y_train, x_train, lambda_, 0)
        test_loss = assess_regressor_mse(y_test, x_test, regressor)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))

    # Unregularised with NE
    print("\nUnregularised with NE")
    w, train_loss, regressor, classifier = train_unreg_mse_NE(y_train, x_train, 0)
    test_loss = assess_regressor_mse(y_test, x_test, regressor)
    err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

    print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
    train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    return None


try_regressors(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, lambdas, initial_w, max_iters, gamma)


 Regularised with GD
lam = 0.000: train_loss = 4.390, test_loss = 9.246, err_w = 0.004
lam = 0.000: train_loss = 4.392, test_loss = 9.246, err_w = 0.004
lam = 0.000: train_loss = 4.404, test_loss = 9.246, err_w = 0.004
lam = 0.000: train_loss = 4.496, test_loss = 9.247, err_w = 0.004
lam = 0.000: train_loss = 5.211, test_loss = 9.254, err_w = 0.005
lam = 0.003: train_loss = 10.714, test_loss = 9.370, err_w = 0.008
lam = 0.022: train_loss = 51.532, test_loss = 13.757, err_w = 0.044
lam = 0.167: train_loss = 287.620, test_loss = 160.903, err_w = 0.258
lam = 1.292: train_loss = 806.838, test_loss = 1212.484, err_w = 0.729
lam = 10.000: train_loss = 1056.113, test_loss = 2071.914, err_w = 0.954

 Regularised with SGD
lam = 0.000: train_loss = 7.656, test_loss = 15.225, err_w = 0.053
lam = 0.000: train_loss = 6.366, test_loss = 12.791, err_w = 0.040
lam = 0.000: train_loss = 7.719, test_loss = 15.937, err_w = 0.055
lam = 0.000: train_loss = 9.237, test_loss = 18.509, err_w = 0.064
lam = 0.

# Classification

### Global constants

In [122]:
seed = 1
np.random.seed(seed)

N = 10000
D = 17
x_lim = 20
w_lim = 2
train_ratio = 0.6

### Generate data

In [125]:
from manipulate_data import standardise, split_data
from implementations import sigmoid

def gen_bogus_classification_data(N, D, x_lim, w_lim, randomised=True):
    x = x_lim * (2 * np.random.rand(N, D) - 1)
    x, mean_x, std_x = standardise(x)
    true_w = w_lim * (2 * np.random.rand(D) - 1)
    p = sigmoid(np.dot(x, true_w))
    if randomised:
        y = np.where(np.random.rand(N) < p, 1, 0)
    else:
        y = np.where(0.5 < p, 1, 0)
    
    return y, x, true_w


y, x, true_w = gen_bogus_classification_data(N, D, x_lim, w_lim, True)
y_train, x_train, y_test, x_test = split_data(y, x, train_ratio)

print(y_train.shape, x_train.shape, y_test.shape, x_test.shape)
print(y[::200])

(6000,) (6000, 17) (4000,) (4000, 17)
[1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 0
 0 1 0 1 1 0 1 1 1 1 0 1 0]


### Try classification with regularised/unregularised MSE/logistic using GD/SGD/NE

In [126]:
from training import *
from testing import assess_classifier_nhd

reg_trainers = {"Regularised Least-Squares with GD" : train_reg_mse_GD,
                "Regularised Least-Squares with SGD" : train_reg_mse_SGD,
                "Regularised Logistic with GD" : train_reg_log_GD,
                "Regularised Logistic with SGD" : train_reg_log_SGD,
               }
unreg_trainers = {"Unregularised Least-Squares with GD" : train_unreg_mse_GD,
                  "Unregularised Least-Squares with SGD" : train_unreg_mse_SGD,
                  "Unregularised Logistic with GD" : train_unreg_log_GD,
                  "Unregularised Logistic with SGD" : train_unreg_log_SGD,}
lambdas = np.logspace(-7, 1, 10)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 5000
gamma = 0.001


def try_classifiers(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, 
                   lambdas, initial_w, max_iters, gamma):    
    # Regularised Least-Squares/Logistic with GD/SGD
    for name in reg_trainers:
        print("\n", name)
        trainer = reg_trainers[name]
        for lambda_ in lambdas:
            w, train_loss, regressor, classifier = trainer(y_train, x_train, lambda_, initial_w, max_iters, gamma, 0.5)
            test_loss = assess_classifier_nhd(y_test, x_test, classifier)
            err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)
                     
            print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
            lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))
                
    # Unregularised Least-Squares/Logistic with GD/SGD
    for name in unreg_trainers:
        print("\n", name)
        trainer = unreg_trainers[name]
        w, train_loss, regressor, classifier = trainer(y_train, x_train, initial_w, max_iters, gamma, 0.5)
        test_loss = assess_classifier_nhd(y_test, x_test, classifier)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Regularised with NE
    print("\nRegularised Least-Squares with NE")
    for lambda_ in lambdas:
        w, train_loss, regressor, classifier = train_reg_mse_NE(y_train, x_train, lambda_, 0.5)
        test_loss = assess_classifier_nhd(y_test, x_test, classifier)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))

    # Unregularised with NE
    print("\nUnregularised Least-Squares with NE")
    w, train_loss, regressor, classifier = train_unreg_mse_NE(y_train, x_train, 0.5)
    test_loss = assess_classifier_nhd(y_test, x_test, classifier)
    err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

    print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
    train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    return None


try_classifiers(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, lambdas, initial_w, max_iters, gamma)


 Regularised Least-Squares with GD
lam = 0.000: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.000: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.000: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.000: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.000: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.003: train_loss = 0.186, test_loss = 0.424, err_w = 0.901
lam = 0.022: train_loss = 0.188, test_loss = 0.430, err_w = 0.905
lam = 0.167: train_loss = 0.202, test_loss = 0.471, err_w = 0.924
lam = 1.292: train_loss = 0.232, test_loss = 0.502, err_w = 0.972
lam = 10.000: train_loss = 0.247, test_loss = 0.502, err_w = 0.995

 Regularised Least-Squares with SGD
lam = 0.000: train_loss = 0.187, test_loss = 0.419, err_w = 0.900
lam = 0.000: train_loss = 0.186, test_loss = 0.419, err_w = 0.900
lam = 0.000: train_loss = 0.187, test_loss = 0.406, err_w = 0.893
lam = 0.000: train_loss = 0.186, test_loss = 0.417, err_w = 0.899
la