# Import packages

In [1]:
using Toms566
using PyPlot
using MathProgBase
using ReverseDiffSource

## Define: gradient descent method
* Uses eigenvalues of the Hessian to adjust step size

In [2]:
function gradDescentMethod(obj, x0; maxIts = 1000, optTol = 1.0e-8, verbose=false)
    its = 0
    (f,g,H) = obj(x0)
    g0 = g
    x = x0
    
    while (its < maxIts && norm(g,2) > optTol && norm(g,2) > (1.0e-4*norm(g0,2)) )
        (f,g,H) = obj(x)
        # Step size based on eigenvalue
        H_eigs = eig(H)[1]
        maxeig = maximum(H_eigs)
        alpha = 1/maxeig
        
        d = -g

        x = x + alpha * d
        
        its += 1
    end
    
    if verbose
        print("Done!\n")
        @printf "Optimal value: %f\n" f
        print("Location: \n")
        print(x)
        print("\n")
        @printf "Iterations: %d\n" its 
        print("\n\n")
    end
    
    return (x,f,norm(g,2),its) 
end

Qt: Untested Windows version 10.0 detected!


gradDescentMethod (generic function with 1 method)

## Define: Newton's method
* Fixes Hessian to be positive definite
* Uses backtracking linesearch

In [159]:
function newtonMethod(obj, x0; maxIts = 500, optTol = 1.0e-8, verbose=false)
    its = 0
    x = x0
    fvals = []
    gnormvals = []
    (f,g,H) = obj(x)
    g0 = g
    
    while (its < maxIts && norm(g,2) > optTol && norm(g,2) > (1.0e-4*norm(g0,2)))
        (f,g,H) = obj(x)
        
        # Modify Hessian if not positive definite
        E = eigfact(H);
        V = E[:vectors];
        lambda = diagm(max(E[:values],1e-2));
        d = -V*inv(lambda)*transpose(V)*g;
        
        # Backtracking linesearch
        alpha = 1;
        mu = 10^-2.0;
        (newf,newg,newH) = obj(x+alpha*d);
        while newf > f + (alpha*mu)*(dot(g,d))
            (newf, newg, newH) = obj(x + alpha*d);
            alpha = alpha/2;
        end
        
        x = x + alpha * d
        
        its += 1
        fvals = [fvals; f]
        gnormvals = [gnormvals; norm(g,2)]
    end
    
    if verbose
        print("Done!\n")
        @printf "Optimal value: %f\n" f
        print("Location: \n")
        print(x)
        print("\n")
        @printf "Iterations: %d\n" its
        print("\n\n")
    end
    
    return (x,f,norm(g,2),its)
end

newtonMethod (generic function with 1 method)

## Problem 1: Toms566 problems

In [160]:
print("Running Dmitry's Newton method on all problems!\n\n")
@printf "%5s %30s %20s %20s %20s \n" "i" "Problem name" "f(x)" "|grad(f(x))|" "its"

for i = 1:18
    p = Problem(i)
    
    function obj(x)
        return (p.obj(x), p.grd(x), p.hes(x))
    end
    
    (x,f,gnorm,its) = newtonMethod(obj,p.x0)
    @printf "%5d %30s %20f %20f %20d \n" i p.name f gnorm its
    
end

Running Dmitry's Newton method on all problems!

    i                   Problem name                 f(x)         |grad(f(x))|                  its 
    1                Hellical valley             0.000010             0.110821                   35 
    2                    Bigg's EXP6             0.255601             0.138184                  500 
    3                       Gaussian             0.000000             0.000000                    3 
    4                         Powell             0.000101             1.999961                   28 
    5                      Box 3-dim             0.000165             0.001989                   19 
    6           Variably dimensioned         31543.569717       1392845.689824                    4 
    7                         Watson             3.527223             5.749285                  500 
    8                      Penalty I          6396.051688          2865.310581                    9 
    9                     Penalty II      

### Comments:
*14 and 15 took a really long time. Probably has ugly Hessians.

# PROBLEM 2

## Extract the data

In [4]:
# Get the data
data = readdlm("binary.csv",',','\r');
# Separate the independent var
y = data[2:end-1 , 1];
# Separate the dependent vars
u = data[2:end-1 , 2:4];
# Pad with 1's for \beta
o = ones(size(u,1));
u = [u o];
m = size(y,1);
n = 4;

## Define likelihood

In [147]:
# Define the likelihood function
expr = :(valL = 0;
    for i=1:m
        valL = valL - (y[i]*(a[1]*u[i,1] + a[2]*u[i,2] + a[3]*u[i,3] + a[4]*u[i,4])) + log(1 + exp(a[1]*u[i,1]+a[2]*u[i,2]+a[3]*u[i,3]+a[4]*u[i,4]))
    end;
return valL
)

quote 
    valL = 0
    begin 
        for i = 1:m # In[147], line 4:
            valL = (valL - y[i] * (a[1] * u[i,1] + a[2] * u[i,2] + a[3] * u[i,3] + a[4] * u[i,4])) + log(1 + exp(a[1] * u[i,1] + a[2] * u[i,2] + a[3] * u[i,3] + a[4] * u[i,4]))
        end
        return valL
    end
end

## Setup rdiff

In [148]:
diffL = rdiff( expr, a=Float64[0.1,0.1,0.1,1], order = 2);
@eval obj(a) = $diffL

## Newton!

In [156]:
newtonMethod(obj,a,verbose=true)

Done!


([0.0022939594979677455,0.7770135443955085,-0.560031364269944,-3.4495483310868513],229.72088458579955,5.579578808331352,5)

Optimal value: 229.720885
Location: 
[0.0022939594979677455,0.7770135443955085,-0.560031364269944,-3.4495483310868513]
Iterations: 5


