Group: Austin Wang, Joe Higgins, Lawrence Moore

In [1]:
import numpy as np
e = np.exp(1)

## Data

In order to prevent our data from being linearly separable (and therefore leading the solution blowing up for logistic regression), we slightly alter the (0,0) data points so that the data is no longer linearly separable.

In [2]:
pos_data = np.matrix([
    [ -0.01, -0.01],
    [ 1, 0],
    [ 0, 1]
])
neg_data = np.matrix([
    [ 0.01, 0.01],
    [-1, 0],
    [ 0,-1]
])

# Add shift variable
pos_data = np.hstack((pos_data, np.ones((3,1))))
neg_data = np.hstack((neg_data, np.ones((3,1))))

# Define gradient function
def grad(x):
    
    pos_data_sum = \
        e**(-1*np.dot(pos_data[0,:], x))/(1 + e**(-1*np.dot(pos_data[0,:], x))) * \
        pos_data[0,:] + e**(-1*np.dot(pos_data[1,:], x))/ \
        (1 + e**(-1*np.dot(pos_data[1,:], x))) * pos_data[1,:] + \
        e**(-1*np.dot(pos_data[2,:], x))/(1 + e**(-1*np.dot(pos_data[2,:], x))) * \
        pos_data[2,:]

    neg_data_sum = \
        e**(np.dot(neg_data[0,:], x))/(1 + e**(np.dot(neg_data[0,:], x))) * \
        neg_data[0,:] + e**(np.dot(neg_data[1,:], x))/ \
        (1 + e**(np.dot(neg_data[1,:], x))) * neg_data[1,:] + \
        e**(np.dot(neg_data[2,:], x))/(1 + e**(np.dot(neg_data[2,:], x))) * \
        neg_data[2,:]
        
    return np.transpose(-1*pos_data_sum + neg_data_sum)

## Steepest Descent

In [3]:
# Initialize looping variables
x_0 = np.matrix([
    [0], 
    [0],
    [0]
])
check = 1000
max_iter = 10000
k = 0
x_k = x_0

# Perform algorithm
alpha = 0.5 # Learning rate
while check > 10**-8 and k < max_iter:

    x_k1 = x_k - alpha * grad(x_k)
    check = np.linalg.norm(x_k1 - x_k)
    x_k = x_k1
    k = k+1

In [4]:
print('Solution converged to: ')
print(x_k1)
print('Number of Iterations Necessary: ', k)

Solution converged to: 
[[5.24199363]
 [5.24199363]
 [0.        ]]
Number of Iterations Necessary:  2557


## Accelerated Steepest Descent

In [5]:
# Get next lambda
def next_lam(x):
    res = (1+np.sqrt(1+4*(x)**2))/2
    return res

# Initial values
x_k = x_0
x_k_til = np.matrix((np.random.rand(3,1)*0.01))
lam_k = 0
check = 1000
max_iter = 10000
k = 0
beta = 1

# Perform algorithm
while check > 10**-8 and k < max_iter:
    
    x_k_til1 = x_k-(1/beta)*grad(x_k)
    lam_k1 = next_lam(lam_k)
    alpha_k = (1-lam_k)/lam_k1
    x_k1 = (1-alpha_k)*x_k_til1 + alpha_k*x_k_til
    check = np.linalg.norm(x_k1 - x_k)
    x_k = x_k1
    x_k_til = x_k_til1
    lam_k = lam_k1
    k = k+1

In [6]:
print('Solution converged to: ')
print(x_k1)
print('Number of Iterations Necessary: ', k)

Solution converged to: 
[[ 5.24195789e+00]
 [ 5.24195789e+00]
 [-1.76615718e-16]]
Number of Iterations Necessary:  805


## Conjugate Direction

In [7]:
# Initial values
check = 1000
max_iter = 10000
k = 0
x_k = x_0
g_0 = grad(x_0)
g_k = g_0
d_0 = -1 * g_0
d_k = d_0
alpha = 0.2

# Perform algorithm
while check > 10**-8 and k < max_iter:

    # Next iterates
    x_k1 = x_k + alpha * d_k
    g_k1 = grad(x_k1)
    B_k = (np.linalg.norm(g_k1)**2)/(np.linalg.norm(g_k)**2)
    d_k1 = -1*g_k1 + B_k*d_k
    
    # Set next iterates as current iterates
    check = np.linalg.norm(x_k1 - x_k)
    x_k = x_k1
    d_k = d_k1
    g_k = g_k1
    k = k+1

In [8]:
print('Solution converged to: ')
print(x_k1)
print('Number of Iterations Necessary: ', k)

Solution converged to: 
[[5.24199482]
 [5.24199482]
 [0.        ]]
Number of Iterations Necessary:  371


## BB Method

In [9]:
# Get new alpha
def new_alpha(x_k, x_km1):
    delta_x = np.subtract(x_k, x_km1)
    delta_grad_x = np.subtract(grad(x_k), grad(x_km1))
    return np.asscalar(np.dot(np.transpose(delta_x), delta_grad_x) / 
                       np.dot(np.transpose(delta_grad_x), delta_grad_x))

# Initial values
max_iter = 10000
k = 0
alpha = 0.5
x_km1 = x_0
x_k = x_km1 - alpha * grad(x_km1)
check = np.linalg.norm(x_k - x_km1)

# Peform algorithm
while check > 10**-8 and k < max_iter:
    alpha = new_alpha(x_k, x_km1)
    x_kp1 = x_k - alpha * grad(x_k)
    check = np.linalg.norm(x_kp1 - x_k)

    x_km1 = x_k
    x_k = x_kp1
    k = k+1

In [10]:
print('Solution converged to: ')
print(x_k)
print('Number of Iterations Necessary: ', k)

Solution converged to: 
[[5.24199496]
 [5.24199496]
 [0.        ]]
Number of Iterations Necessary:  12


## Conclusion

Using a tolerance of 10^-8, and with the data slightly altered to make it linearly separable, we found that the algorithms from fastest to slowest are:

1. BB Method (12 iterations)
2. Conjugate Direction (371 iterations)
3. Accelerated Steepest Descent (805 iterations)
4. Steepest Descent (2557 iterations)