# Gradient Descent

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import uniform,randint, random, shuffle, sample
from __future__ import division

In [2]:
def descend(E_in, u, v, eta):
    """
    Perform gradient descent for the error surface
    E(u, v) = (u*e^v - 2*v*e^-u)^2
    eta = learning rate (= 0.1)
    """
    tmp_u = u
    tmp_v = v
    
    #grad(E_in) = dE/du + dE/dv
    du = 2 * (tmp_u * np.exp(tmp_v) - 2*tmp_v*np.exp(-tmp_u)) * (np.exp(tmp_v) + 2*tmp_v*np.exp(-tmp_u))
    dv = 2*(tmp_u*np.exp(tmp_v) - 2*tmp_v*np.exp(-tmp_u))*(tmp_u*np.exp(tmp_v) - 2*np.exp(-tmp_u))
    #Update u and v
    u = u - eta * du
    v = v - eta * dv
    E_in = (u *np.exp(v) - 2*v*np.exp(-u))**2
    
    return (E_in, u, v)

#Initialise values
u = 1
v = 1
E_in = (u *np.exp(v) - 2*v*np.exp(-u))**2
n= 0

while E_in > 1E-14:
    n += 1
    new_values = descend(E_in, u, v, 0.1)
    u = new_values[1]
    v = new_values[2]
    E_in = new_values[0]
    
print n
print u
print v

10
0.0447362903978
0.0239587140991


# Co-ordinate Descent

In [3]:
def co_descend(E_in, u, v, eta):
    """
    Perform coordinate descent for the error surface
    E(u, v) = (u*e^v - 2*v*e^-u)^2
    eta = learning rate (= 0.1)
    """
    
    #grad(E_in) = dE/du + dE/dv
    #First update u
    du = 2 * (u * np.exp(v) - 2*v*np.exp(-u)) * (np.exp(v) + 2*v*np.exp(-u))
    u = u - eta * du
    #Use updated u to update v
    dv = 2*(u*np.exp(v) - 2*v*np.exp(-u))*(u*np.exp(v) - 2*np.exp(-u))

    v = v - eta * dv
    E_in = (u *np.exp(v) - 2*v*np.exp(-u))**2
    
    return (E_in, u, v)

#Initialise values
u = 1
v = 1
E_in = (u *np.exp(v) - 2*v*np.exp(-u))**2
n= 0

while n < 15:
    n += 1
    new_values = co_descend(E_in, u, v, 0.1)
    u = new_values[1]
    v = new_values[2]
    E_in = new_values[0]
    
print n
print u
print v
print E_in

15
6.29707589931
-2.85230695408
0.139813791996


# Logistic Regression
### Using stochastic gradient descent

In [42]:
#Generates a random line in the square [-1,1], and returns the gradient and intercept
def f ():
    #Generate two random points in the square
    (x1,y1) = (uniform(-1,1),uniform(-1,1))
    (x2,y2) = (uniform(-1,1),uniform(-1,1))
    
    m = (y2 - y1) / (x2 - x1)
    c = y1 - m * x1
    
    return (m, c)

#Generate an array of n lists, containing a point in a square, uniformly distributed in [-1,1]
def arrays(n):
    if n == 0:
        return np.array([])
    ar = np.ones((n,3))
    n -= 1
    while n >= 0:
        #x_0 = 1
        ar[n] = [1, uniform(-1,1), uniform (-1,1)]
        n-= 1
    return ar

#for a given x and f, finds the values of y
def yarray(x, m, c, n):
    ys = np.zeros(n)
    for i in range(len(x)):
        if x[i, 2] > m * x [i, 1] + c:
            ys[i] = 1
        else:
            ys[i] = -1
    return ys

#Return the out of sample error E_out
def e_out (w, x, y):
    total = 0
    for i in range(len(x)):
        total += np.log(1 + np.exp(- y[i] * np.dot(x[i], w))) 
    
    return total / len(x)

#Apply logistic regression
def log_reg (w, x, y):
    for i in range(len(x)):
        dw = - y[i] * x[i] / (1 + np.exp(y[i] * np.dot(x[i], w)))
        #update w. eta = 0.01
        w = w - 0.01 * dw
    return w

N = 0
E_out = 0
#Repeat experiment 100 times and take average values
for j in range(100):

    #Initialise values
    (m, c) = f()
    #N = 100
    x = arrays(100)
    y = yarray(x, m, c, 100)
    #Initial weight vector
    w = np.zeros(3)
    n = 0
    #Convergence when difference in weights between epochs is less than 0.01
    convergence = False

    while not convergence:
        n += 1
        #Permute the order of x and y values
        x_shuf = np.ones((100, 3))
        y_shuf = np.ones(100)
        index = range(len(x))
        shuffle(index)
        k = 0
        for i in index:
            x_shuf[i] = x[k]
            y_shuf = y[k]
            k += 1
        w_old = w
        w = log_reg(w, x, y)
        if np.linalg.norm(w - w_old) < 0.01:
            convergence = True
    
    #Calculate E_out
    x_out = arrays(1000)
    y_out = yarray(x_out, m, c, 1000)
    error = e_out(w, x_out, y_out)
    N += n
    E_out += error

print N/100
print E_out/100

340.09
0.103527927403


In [31]:
print np.dot([3, 4], [1, 1])

7
