In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

# Regression

### Constants

In [2]:
seed = 1
np.random.seed(seed)

N = 10000
D = 17
x_lim = 20
w_lim = 20
noise_std = 3.0
train_ratio = 0.6

### Generate data

In [3]:
from manipulate_data import standardise, split_data

def gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std, randomised=True):
    x = x_lim * (2 * np.random.rand(N, D) - 1)
    x, mean_x, std_x = standardise(x)
    true_w = w_lim * (2 * np.random.rand(D) - 1)
    y = np.dot(x, true_w)
    if randomised:
        y += np.random.normal(0.0, noise_std, N)
    
    return y, x, true_w


y, x, true_w = gen_bogus_regression_data(N, D, x_lim, w_lim, noise_std, True)
y_train, x_train, y_test, x_test = split_data(y, x, train_ratio)

print(y_train.shape, x_train.shape, y_test.shape, x_test.shape)

(6000,) (6000, 17) (4000,) (4000, 17)


### Try regression with regularised/unregularised MSE using GD/SGD/NE

In [9]:
from training import *
from testing import assess_regressor_mse, cross_validation

lambdas = np.logspace(-7, 1, 1)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 5000
batch_size = 32
gamma = 0.05
reg_trainers = {"Regularised Least-Squares with GD" : (train_reg_ls_GD, (initial_w, max_iters, gamma)),
                "Regularised Least-Squares with SGD" : (train_reg_ls_SGD, (initial_w, max_iters, batch_size, gamma))}
unreg_trainers = {"Unregularised Least-Squares with GD" : (train_unreg_ls_GD, (initial_w, max_iters, gamma)),
                  "Unregularised Least-Squares with SGD" : (train_unreg_ls_SGD, (initial_w, max_iters, batch_size, gamma))}


def try_regressors(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, 
                   lambdas, initial_w, max_iters, gamma):    
    # Regularised with GD/SGD
    for name in reg_trainers:
        print("\n", name)
        trainer, extra_params = reg_trainers[name]
        for lambda_ in lambdas:
            w, train_loss, regressor, classifier = trainer(y_train, x_train, lambda_, *extra_params, 0)
            test_loss = assess_regressor_mse(y_test, x_test, regressor)
            err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)
            
            print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
            lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Unregularised with GD/SGD
    for name in unreg_trainers:
        print("\n", name)
        trainer, extra_params = unreg_trainers[name]
        w, train_loss, regressor, classifier = trainer(y_train, x_train, *extra_params, 0)
        test_loss = assess_regressor_mse(y_test, x_test, regressor)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Regularised with NE
    print("\nRegularised Least-Squares with NE")
    for lambda_ in lambdas:
        w, train_loss, regressor, classifier = train_reg_ls_NE(y_train, x_train, lambda_, 0)
        test_loss = assess_regressor_mse(y_test, x_test, regressor)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))

    # Unregularised with NE
    print("\nUnregularised Least-Squares with NE")
    w, train_loss, regressor, classifier = train_unreg_ls_NE(y_train, x_train, 0)
    test_loss = assess_regressor_mse(y_test, x_test, regressor)
    err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

    print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
    train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    return None


try_regressors(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, lambdas, initial_w, max_iters, gamma)
print("CV:", cross_validation(y, x, train_reg_ls_NE, (1e-5,), 0.5, 5, "regressor"))


 Regularised Least-Squares with GD
lam = 0.000: train_loss = 4.390, test_loss = 9.246, err_w = 0.004

 Regularised Least-Squares with SGD
lam = 0.000: train_loss = 4.425, test_loss = 9.327, err_w = 0.008

 Unregularised Least-Squares with GD
train_loss = 4.390, test_loss = 9.246, err_w = 0.004

 Unregularised Least-Squares with SGD
train_loss = 4.416, test_loss = 9.257, err_w = 0.005

Regularised Least-Squares with NE
lam = 0.000: train_loss = 4.390, test_loss = 9.246, err_w = 0.004

Unregularised Least-Squares with NE
train_loss = 4.390, test_loss = 9.246, err_w = 0.004
CV: (8.98926464283262, 4.496750516737043)


# Classification

### Constants

In [10]:
seed = 1
np.random.seed(seed)

N = 10000
D = 17
x_lim = 20
w_lim = 2
train_ratio = 0.6

### Generate data

In [11]:
from manipulate_data import standardise, split_data
from implementations import sigmoid

def gen_bogus_classification_data(N, D, x_lim, w_lim, randomised=True):
    x = x_lim * (2 * np.random.rand(N, D) - 1)
    x, mean_x, std_x = standardise(x)
    true_w = w_lim * (2 * np.random.rand(D) - 1)
    p = sigmoid(np.dot(x, true_w))
    if randomised:
        y = np.where(np.random.rand(N) < p, 1, 0)
    else:
        y = np.where(0.5 < p, 1, 0)
    
    return y, x, true_w


y, x, true_w = gen_bogus_classification_data(N, D, x_lim, w_lim, True)
y_train, x_train, y_test, x_test = split_data(y, x, train_ratio)

print(y_train.shape, x_train.shape, y_test.shape, x_test.shape)
print(y[::200])

(6000,) (6000, 17) (4000,) (4000, 17)
[1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 0 1
 1 0 0 0 0 0 1 1 1 0 1 1 0]


### Try classification with regularised/unregularised MSE/logistic using GD/SGD/NE

In [14]:
from training import *
from testing import assess_classifier_nhd

reg_trainers = {"Regularised Least-Squares with GD" : train_reg_ls_GD,
                "Regularised Least-Squares with SGD" : train_reg_ls_SGD,
                "Regularised Logistic with GD" : train_reg_log_GD,
                "Regularised Logistic with SGD" : train_reg_log_SGD,
               }
unreg_trainers = {"Unregularised Least-Squares with GD" : train_unreg_ls_GD,
                  "Unregularised Least-Squares with SGD" : train_unreg_ls_SGD,
                  "Unregularised Logistic with GD" : train_unreg_log_GD,
                  "Unregularised Logistic with SGD" : train_unreg_log_SGD,}
lambdas = np.logspace(-7, 1, 1)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 5000
gamma = 0.001


def try_classifiers(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, 
                   lambdas, initial_w, max_iters, gamma):    
    # Regularised Least-Squares/Logistic with GD/SGD
    for name in reg_trainers:
        print("\n", name)
        trainer = reg_trainers[name]
        for lambda_ in lambdas:
            w, train_loss, regressor, classifier = trainer(y_train, x_train, lambda_, initial_w, max_iters, gamma, 0.5)
            test_loss = assess_classifier_nhd(y_test, x_test, classifier)
            err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)
                     
            print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
            lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))
                
    # Unregularised Least-Squares/Logistic with GD/SGD
    for name in unreg_trainers:
        print("\n", name)
        trainer = unreg_trainers[name]
        w, train_loss, regressor, classifier = trainer(y_train, x_train, initial_w, max_iters, gamma, 0.5)
        test_loss = assess_classifier_nhd(y_test, x_test, classifier)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    # Regularised with NE
    print("\nRegularised Least-Squares with NE")
    for lambda_ in lambdas:
        w, train_loss, regressor, classifier = train_reg_mse_NE(y_train, x_train, lambda_, 0.5)
        test_loss = assess_classifier_nhd(y_test, x_test, classifier)
        err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

        print("lam = {lam:.3f}: train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
        lam=lambda_, train_loss=train_loss, test_loss=test_loss, err_w=err_w))

    # Unregularised with NE
    print("\nUnregularised Least-Squares with NE")
    w, train_loss, regressor, classifier = train_unreg_mse_NE(y_train, x_train, 0.5)
    test_loss = assess_classifier_nhd(y_test, x_test, classifier)
    err_w = np.linalg.norm(w - true_w) / np.linalg.norm(true_w)

    print("train_loss = {train_loss:.3f}, test_loss = {test_loss:.3f}, err_w = {err_w:.3f}".format(
    train_loss=train_loss, test_loss=test_loss, err_w=err_w))
    
    return None


try_classifiers(y_train, x_train, y_test, x_test, true_w, reg_trainers, unreg_trainers, lambdas, initial_w, max_iters, gamma)


 Regularised Least-Squares with GD
lam = 0.000: train_loss = 0.178, test_loss = 0.412, err_w = 0.924

 Regularised Least-Squares with SGD


TypeError: train_reg_ls_SGD() missing 1 required positional argument: 'threshold'

## Cross validation

In [17]:
from training import *
from testing import cross_validation

lambdas = np.logspace(-7, 1, 15)
initial_w = w_lim * (2 * np.random.rand(D) - 1)
max_iters = 500
gamma = 1e-3


def cv_classifiers(y, x, lambdas, initial_w, max_iters, gamma):    
    # Regularised Least-Squares with NE
    print("\nRegularised Least-Squares with NE")
    for lambda_ in lambdas:
        hyper_params = (lambda_, )
        avg_test_loss, avg_train_loss = cross_validation(y, x, train_reg_ls_NE, hyper_params, 0.5, 5, "classifier")

        print("lam = {lam:.3f}: avg_test_loss = {avg_test_loss:.3f}, avg_train_loss = {avg_train_loss:.3f}".format(
        lam=lambda_, avg_test_loss=avg_test_loss, avg_train_loss=avg_train_loss))

    # Regularised Logistic with GD
    print("\nRegularised Logistic with GD")
    for lambda_ in lambdas:
        hyper_params = (lambda_, initial_w, max_iters, gamma)
        avg_test_loss, avg_train_loss = cross_validation(y, x, train_reg_log_GD, hyper_params, 0.5, 5, "classifier")

        print("lam = {lam:.3f}: avg_test_loss = {avg_test_loss:.3f}, avg_train_loss = {avg_train_loss:.3f}".format(
        lam=lambda_, avg_test_loss=avg_test_loss, avg_train_loss=avg_train_loss))
    
    return None


cv_classifiers(y, x, lambdas, initial_w, max_iters, gamma)

[autoreload of training failed: Traceback (most recent call last):
  File "C:\anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 394, in superreload
    module = reload(module)
  File "C:\anaconda3\lib\imp.py", line 314, in reload
    return importlib.reload(module)
  File "C:\anaconda3\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 783, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "C:\Users\Anzuoni Elia\Documents\EPFL\Machine Learning\Projects\project1\scripts\toolbox\training.py", line 10, in <module>
    from .implementations import *
ImportError: attempted relative import with no known parent package
]



Regularised Least-Squares with NE
lam = 0.000: avg_test_loss = 0.402, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.401, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.402, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.402, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.402, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.403, avg_train_loss = 0.177
lam = 0.000: avg_test_loss = 0.402, avg_train_loss = 0.177
lam = 0.001: avg_test_loss = 0.404, avg_train_loss = 0.177
lam = 0.004: avg_test_loss = 0.404, avg_train_loss = 0.178
lam = 0.014: avg_test_loss = 0.408, avg_train_loss = 0.179
lam = 0.052: avg_test_loss = 0.426, avg_train_loss = 0.184
lam = 0.193: avg_test_loss = 0.466, avg_train_loss = 0.197
lam = 0.720: avg_test_loss = 0.496, avg_train_loss = 0.220
lam = 2.683: avg_test_loss = 0.497, avg_train_loss = 0.237
lam = 10.000: avg_test_loss = 0.497, avg_train_loss = 0.245

Regularised Logistic with GD
lam = 0.000: avg_test_loss = 0.498, avg_train_los

KeyboardInterrupt: 