# Regularisation with weight decay

In [1]:
import numpy as np

#### Loading the data
Each line of the files corresponds to a two-dimensional input x = (x1, x2), so that X = R^2, followed by the corresponding
label from Y = {-1,1}. We are going to apply Linear Regression with a non-linear transformation for classifcation. 

In [2]:
train = np.loadtxt("in.dta")
test = np.loadtxt("out.dta")

#### Defining plain linear regression functions

In [3]:
def nonlinear_transform(data):
    """Perform nonlinear transformation as per q2 spec"""
    result = []
    for row in data:
        x1 = row[0]
        x2 = row[1]

        result.append([1, x1, x2, np.multiply(x1, x1), np.multiply(x2, x2),
                       np.multiply(x1, x2), np.abs(x1 - x2), np.abs(x1 + x2)])

    return np.array(result)

def extract_labels(dataset):
    """Return correct classifications from dataset"""
    return dataset[:, 2]

def linreg(dataset, y):
    """Return weights from linear regression"""
    pseudo_inverse = np.linalg.pinv(dataset)
    w = pseudo_inverse.dot(y)

    return w

def evaluate_points(dataset, line):
    """Return list classifying points in dataset as above or below line"""

    return np.sign(dataset.dot(line))

def calculate_error(dataset, weights, y):
    """Calculate error in weights"""
    output = evaluate_points(dataset, weights)
    comparison = np.equal(output, y)

    number_false = 0
    for c in comparison:
        if c == False:
            number_false += 1

    return number_false / len(y)

## Question 2

In [4]:
data = nonlinear_transform(train)
labels = extract_labels(train)
lr = linreg(data, labels)
er = calculate_error(data, lr, labels)

out = nonlinear_transform(test)
out_labels = extract_labels(test)
out_er = calculate_error(out, lr, out_labels)

print("In sample error: ", er)
print("Out of sample error: ", out_er)

In sample error:  0.02857142857142857
Out of sample error:  0.084


## Question 3

#### Linear regression with weight decay

In [5]:
def linreg_weight_decay(dataset, y, reg_factor):
    """Return weights from linear regression with weight decay"""

    a = dataset.T.dot(dataset) + (np.identity(dataset.shape[1]) * reg_factor)
    b = np.linalg.inv(a)
    c = b.dot(dataset.T)
    w = c.dot(y)

    return w

#### Running the test
lambda = 10^k  
k = -3

In [6]:
data = nonlinear_transform(train)
labels = extract_labels(train)
lr = linreg_weight_decay(data, labels, np.power(10, -3))
er = calculate_error(data, lr, labels)

out = nonlinear_transform(test)
out_labels = extract_labels(test)
out_er = calculate_error(out, lr, out_labels)

print("In sample error: ", er)
print("Out of sample error: ", out_er)

In sample error:  0.02857142857142857
Out of sample error:  0.084


## Question 4
k = 3

In [7]:
data = nonlinear_transform(train)
labels = extract_labels(train)
lr = linreg_weight_decay(data, labels, np.power(10, 3))
er = calculate_error(data, lr, labels)

out = nonlinear_transform(test)
out_labels = extract_labels(test)
out_er = calculate_error(out, lr, out_labels)


print("In sample error: ", er)
print("Out of sample error: ", out_er)

In sample error:  0.37142857142857144
Out of sample error:  0.436


## Question 5
Comparing Eout for k = [2, 1, 0, -1, -2]

In [8]:
for i in [2, 1, 0, -1, -2]:
    data = nonlinear_transform(train)
    labels = extract_labels(train)
    lr = linreg_weight_decay(data, labels, np.power(10, i))
    er = calculate_error(data, lr, labels)

    out = nonlinear_transform(test)
    out_labels = extract_labels(test)
    out_er = calculate_error(out, lr, out_labels)

    print(str(i) + ":  " + " Ein: " + str(er) + " Eout: " + str(out_er))

2:   Ein: 0.2 Eout: 0.228
1:   Ein: 0.05714285714285714 Eout: 0.124
0:   Ein: 0.0 Eout: 0.092
-1:   Ein: 0.02857142857142857 Eout: 0.084
-2:   Ein: 0.02857142857142857 Eout: 0.084
