In [4]:
import numpy as np

In [2]:
def error_func(u, v):
    '''This is the error function used in Problems 4 - 7'''
    return (u * np.exp(v) - 2 * v * np.exp(-u)) ** 2

def error_func_du(u, v):
    '''Derivative of the error function wrt to u'''
    return 2 * (u * np.exp(v) - 2 * v * np.exp(-u)) * (np.exp(v) + 2 * v * np.exp(-u))

def error_func_dv(u, v):
    '''Derivative of the error function wrt to v'''
    return 2 * (u * np.exp(v) - 2 * v * np.exp(-u)) * (u * np.exp(v) - 2 * np.exp(-u))

def error_func_grad(u, v):
    return np.array([error_func_du(u, v), error_func_dv(u, v)])

# Problems 5 and 6

In [52]:
learning_rate = 0.1
u, v = (1, 1)
iters = 0

while error_func(u, v) >= 10**-14:
    delta_w = -1 * learning_rate * error_func_grad(u, v)
    u += delta_w[0]
    v += delta_w[1]
    iters += 1

# Problem 7

In [54]:
learning_rate = 0.1
u, v = (1, 1)

for i in range(15):
    grad = error_func_grad(u, v)
    u_direction = -1 * grad[0]
    u += learning_rate * u_direction
    # Recalculate gradient
    grad = error_func_grad(u, v)
    v_direction = -1 * grad[1]
    v += learning_rate * v_direction

In [55]:
error_func(u, v)

0.13981379199615315

# Problem 8

In [1]:
import random

In [43]:
#### Helper functions ####

def generate_point():
    '''Generate a random point in [-1, 1] x [-1, 1]'''
    return [random.uniform(-1, 1), random.uniform(-1, 1)]

def define_f():
    '''Return a slope and y-intercept based on two random points in
       [-1, 1] x [-1, 1]. This defines the function f(x) = slope * x + y_int. 
       This can fail if f is vertical. However, this is extremely unlikely given 
       that our two points are chosen randomly. If it does fail, we'll get an error.'''
    p1 = generate_point()
    p2 = generate_point()
    slope = (p2[1] - p1[1]) / (p2[0] - p1[0])
    y_int = (-1 * slope * p1[0]) + p1[1] # From point-slope form
    return slope, y_int

def evaluate_point(pt, slope, y_int):
    ''' Return 1 if the point is above f, -1 otherwise'''
    if pt[1] >= slope * pt[0] + y_int: # pt[0] is x coord, pt[1] is y coord
        return 1
    return -1

def build_data_set(N, slope, y_int):
    '''Build a set of N data points of the form ([1.0, x1, x2], y). The 1.0 is the
       artificial coordinate used to simplify the math. x1 and x2 come from
       [-1, 1] x [-1, 1], and y is -1 or +1, depending on whether the point (x1, x2) is
       above or below f. f is defined by f(x) = slope * x + y_int'''
    data_set = []
    for i in range(N):
        xn = generate_point() 
        yn = evaluate_point(xn, slope, y_int)
        data_set.append((np.array([1.] + xn), yn)) # Note the artificial coordinate
    return data_set
    
def logit_grad(xn, yn, weights):
    '''Evaluate the logit gradient at a single point (xn, yn).'''
    return -1 * yn * xn / (1 + np.exp(yn * np.dot(weights.T, xn)))

In [59]:
learning_rate = 0.01
N = 100
test_set_size = 1000
threshold = 0.01
num_simulations = 100

avg_num_epochs = 0
avg_cross_ent_error = 0

for _ in range(num_simulations):
    # Generate the training set
    slope, y_int = define_f()
    training_set = build_data_set(N, slope, y_int)
    weights = np.zeros(3)
    old_weights = weights

    # Train a logistic regression model
    num_epochs = 0
    while True:
        np.random.shuffle(training_set)
        for pt in training_set:
            weights = weights - learning_rate  * logit_grad(pt[0], pt[1], weights)
        num_epochs += 1
        if np.linalg.norm(old_weights - weights) < threshold:
            break
        old_weights = weights
    avg_num_epochs += num_epochs / num_simulations

    # Generate the test set
    test_set = build_data_set(test_set_size, slope, y_int)
    cross_ent_error = 0

    for pt in test_set:
        xn = pt[0]
        yn = pt[1]
        cross_ent_error += (1 / test_set_size) * np.log(1 + np.exp(-1 * yn * np.dot(weights.T, xn)))
    avg_cross_ent_error += cross_ent_error / num_simulations

In [60]:
avg_cross_ent_error

0.10322702878421613

In [61]:
avg_num_epochs

334.42