## Linear Regression Error
### Question 1

In [1]:
o = 0.1
d = 8
N = 100

e_in = (o ** 2)* (1 - ((d + 1) / N))

print(e_in)

0.009100000000000002


## Gradient descent
Minimise error on nonlinear error surface using gradient descent. Learning rate = 0.1.
### Question 4
Finding the partial derivative.

In [2]:
import sympy as sp
import mpmath as mp

u = sp.Symbol('u')
v = sp.Symbol('v')


print(sp.diff(((u*mp.e**v)-2*v*mp.e**-u)**2, u))


(2*2.71828182845905**v + 4.0*2.71828182845905**(-u)*v)*(2.71828182845905**v*u - 2*2.71828182845905**(-u)*v)


## Question 5
How many iterations does it take for the error to fall below 10^-14 for the first time?

In [3]:
import numpy as np

def in_sample_error(x, y):
    """Return error from nonlinear error surface E(x, y)"""
    return ((x * (np.e ** y)) - (2 * y * (np.e ** -x))) ** 2


def partial_x(x, y):
    """Return partial derivative E(x, y) with respect to x"""
    return (2*np.e**y + 4.0*np.e**(-x)*y)*(np.e**y*x - 2*np.e**(-x)*y)


def partial_y(x, y):
    """Return partial derivative E(x, y) with respect to y"""
    return (np.e**y*x - 2*np.e**(-x)*y)*(2.0*np.e**y*x - 4*np.e**(-x))


def gradient_descent(x, y, learning_rate, target_error):
    """Run gradient descent according q5 spec"""
    count = 0
    error = in_sample_error(x, y)

    while error > target_error:
        x_temp = x
        x = x - learning_rate * partial_x(x, y)
        y = y - learning_rate * partial_y(x_temp, y)
        error = in_sample_error(x, y)
        count += 1

    return (count, x, y, error)

In [4]:
print(gradient_descent(1, 1, 0.1, 10 ** -14))

(10, 0.04473629039778207, 0.023958714099141746, 1.2086833944220747e-15)


10 iterations

## 'Coordinate descent'
### Question 7
In each iteration, we have two steps along the 2 coordinates. Step 1 is to move only along
the u coordinate to reduce the error (assume first-order approximation holds like in gradient descent), and step 2 is to reevaluate and move only along the v coordinate to reduce the error (again, assume first-order approximation holds).
Use the same learning rate of 0.1 as we did in gradient descent.  
  
What will the error E(u; v) be closest to after 15 full iterations (30 steps)?

In [5]:
def coordinate_descent(x, y, learning_rate, iterations):
    """Run 'coordinate descent' according q7 spec"""

    error = in_sample_error(x, y)

    for x in range(iterations):
        x = x - learning_rate * partial_x(x, y)
        error = in_sample_error(x, y)
        y = y - learning_rate * partial_y(x, y)
        error = in_sample_error(x, y)

    return (x, y, error)

In [6]:
print(coordinate_descent(1, 1, 0.1, 15))

(13.998368453740682, -3.7466462092782167, 0.10911280071980022)


## Logistic Regression
### Question 8
#### Setting up the experiment
In this problem you will create your own target function f (probability in this case) and data set D to see how Logistic Regression works. For simplicity, we will take f to be a 0/1 probability so y is a deterministic function of x.  
  
Take d = 2 so you can visualize the problem, and let X = [-1,1] X [-1,1] with uniform probability of picking each x in X. Choose a line in the plane as the boundary between f(x) = 1 (where y has to be +1) and f(x) = 0 (where y has to be -1) by taking two random, uniformly distributed points from X and taking the line passing through them as the boundary between y = +-1. Pick N = 100 training points at random from X, and evaluate the outputs yn for each of these points xn.

In [7]:
def create_dataset(number_of_points):
    """Return dataset of random points in form x0=1, x1, x2"""
    ones = np.ones((number_of_points, 1))
    points = np.random.uniform(-1.0, 1.0, size=(number_of_points, 2))
    return np.concatenate((ones, points), axis=1)


def create_f(number_of_points):
    """Return coeficients of random straight line x0=1, m, c"""
    points = np.random.uniform(-1.0, 1.0, size=(number_of_points, 2))
    w0 = 1.0
    b = [-w0, -w0]
    w1, w2 = np.linalg.solve(points, b)
    return np.array([w0, w1, w2])


def evaluate_points(dataset, line):
    """Return list classifying points in dataset as above or below line"""

    return np.sign(dataset.dot(line))

#### Defining functions for logistic regression with stochastic gradient descent

In [8]:
def create_weights(dataset):
    """Return empty weight vector of appropriate size for dataset"""
    length = len(dataset[0])
    return np.zeros(length, int)


def error(point, weights, output):
    """Return gradient delta Ein for stochastic gradient descent"""
    return (-point * output) / (1 + np.e**(output * weights.dot(point)))


def epoch(dataset, output, weights, learning_rate):
    """Return weights after one epoch"""
    random_order = np.arange(100)
    np.random.shuffle(random_order)

    for point in random_order:
        point_error = error(dataset[point], weights, output[point])
        weights = weights - learning_rate * point_error

    return weights


def SGD(dataset, outputs, weights, learning_rate, stop):
    """Return number of epochs and final weights from SGD"""
    count = 0

    while True:
        old_weights = weights
        weights = epoch(dataset, outputs, weights, learning_rate)
        count += 1
        if np.linalg.norm(old_weights - weights) < stop:
            break

    return (count, weights)


def cross_entropy_error(point, weights, output):
    return np.log(1 + np.exp(-output * weights.dot(point)))


def out_of_sample_error(weights, target_function):
    dataset = create_dataset(1000)
    outputs = evaluate_points(dataset, target_function)
    errors = []
    for point in range(len(dataset)):
        error = cross_entropy_error(dataset[point], weights, outputs[point])
        errors.append(error)

    return errors

#### Run the experiment

In [9]:
def run_SGD_experiment(runs):
    """
    Return number of epochs and out of sample error after
    runnning SGD according to q8 spec.
    
    Creates dataset of 100 points, creates line, evaluate points.
    """

    iterations_needed = []
    out_of_sample_errors = []

    for run in range(runs):
        #Status
        print("Run" + str(run))
        
        # Initialisations
        dataset = create_dataset(100)
        target_function = create_f(2)
        outputs = evaluate_points(dataset, target_function)
        weights = create_weights(dataset)

        # Run SGD
        result = SGD(dataset, outputs, weights, 0.01, 0.01)

        # Remember number of iterations require to complete SGD for this run
        iterations_needed.append(result[0])

        # Calculate and remember out of sample error for this run
        out_of_sample_errors.append(out_of_sample_error(result[1], target_function))

    return (sum(iterations_needed) / runs, np.mean(out_of_sample_errors))

In [10]:
print(run_SGD_experiment(10))

Run0
Run1
Run2
Run3
Run4
Run5
Run6
Run7
Run8
Run9
(333.5, 0.10064068740898795)
