In [1]:
using BenchmarkTools, Compat, DataFrames, ForwardDiff

In [2]:
# Basic trust region with conjugate gradient.

df = readtable("data/model_australia.txt", separator = ' ', header = false)

names!(df, [Symbol("C$i") for i in 1:4])

head(df)

Unnamed: 0,C1,C2,C3,C4
1,1,0,0,0
2,0,1,0,0
3,0,0,1,0
4,35,0,0,0
5,69,34,35,0
6,70,71,70,30


In [3]:
immutable BasicTrustRegion{T<:Real}
    η1::T
    η2::T
    γ1::T
    γ2::T
end

function BTRDefaults()
    return BasicTrustRegion(0.01, 0.9, 0.5, 0.5)
end

type BTRState
    iter::Int
    β::Vector
    βcand::Vector
    g::Vector
    step::Vector
    Δ::Float64
    ρ::Float64

    function BTRState()
        return new()
    end
end

In [4]:
function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
    if state.ρ >= b.η1
        return true
    else
        return false
    end
end

function updateRadius!(state::BTRState, b::BasicTrustRegion)
    if state.ρ >= b.η2
        stepnorm = norm(state.step)
        state.Δ = min(1e20, max(4*stepnorm, state.Δ))
    elseif state.ρ >= b.η1
        state.Δ *= b.γ2
    else
        state.Δ *= b.γ1
    end
end

updateRadius! (generic function with 1 method)

In [5]:
function ConjugateGradient(A::Matrix, b::Vector, β0::Vector, δ::Float64 = 1e-6)
    n = length(β0)
    β = β0
    g = b+A*β
    d = -g
    k = 0
    δ *= δ
    while dot(g, g) > δ
        Ad = A*d
        normd = dot(d, Ad)
        α = -dot(d, g)/normd
        β += α*d
        g = b+A*β
        γ = dot(g, Ad)/normd
        d = -g+γ*d
        k += 1
    end
    normd = dot(d, A*d)
    α = -dot(d, g)/normd
    β += α*d
    return β
end

ConjugateGradient (generic function with 2 methods)

In [6]:
function btr(f::Function, g!::Function, H!::Function, Step::Function, β0::Vector)
    b = BTRDefaults()
    state = BTRState()
    state.iter = 0
    state.Δ = 1.0
    state.β = β0
    n = length(β0)
    tol = 1e-6*1e-6
    state.g = zeros(n)
    H = zeros(n, n)
    fβ = f(β0)
    g!(β0, state.g)
    H!(β0, H)
    nmax = 1000

    function model(s::Vector, g::Vector, H::Matrix)
        return dot(s, g)+0.5*dot(s, H*s)
    end

    while dot(state.g, state.g) > tol && state.iter < nmax
        state.step = Step(H, state.g, β0)
        state.βcand = state.β+state.step
        fcand = f(state.βcand)
        state.ρ = (fcand-fβ)/(model(state.step, state.g, H))
        if acceptCandidate!(state, b)
            state.β = copy(state.βcand)
            g!(state.β, state.g)
            H!(state.β, H)
            fβ = fcand
        end
        updateRadius!(state, b)
        state.iter += 1
    end
    return state.β, state.iter
end

btr (generic function with 1 method)

In [7]:
function individual(β::Vector, i::Int64)
    data = convert(Array, df[i*7-6:i*7-1, :])
    choices = convert(Array, df[i*7:i*7, :])
    alternatives = find(choices .== 0)
    choice = find(choices .== 1)[1]
    
    function utility(β::Vector, i::Int64)
        return dot(vec(data[:, i]), β)
    end
    
    function probability(β::Vector, t::Float64 = 0.0)
        c = utility(β, choice)
        for alternative in alternatives
            t += exp(utility(β, alternative)-c)
        end
        return 1/(1+t)
    end
    
    return probability
end

individual (generic function with 1 method)

In [8]:
function f(β::Vector, model::Float64 = 0.0, n::Int64 = 210)
    i = 1
    while i <= n
        probability = individual(β, i)
        model += log(probability(β))
        i += 1
    end
    return -model/n
end

f (generic function with 3 methods)

In [9]:
function g(β::Vector, t::Float64 = 0.0, n::Int64 = 210)
    for i = 1:n
        probability = individual(β, i)
        t += (1/probability(β))*ForwardDiff.gradient(probability, β)
    end
    return -t/n
end

function g!(β::Vector, storage::Vector)
    s = g(β)
    storage[1:length(s)] = s[1:length(s)]
end

g! (generic function with 1 method)

In [10]:
function H(β::Vector)
    return ForwardDiff.hessian(f, β)
end

function H!(β::Vector, storage::Matrix)
    s = H(β)
    n, m = size(s)
    storage[1:n, 1:m] = s[1:length(s)]
end

H! (generic function with 1 method)

In [11]:
@btime btr(f, g!, H!, ConjugateGradient, zeros(6))

  294.373 ms (764179 allocations: 81.25 MiB)


([5.20743, 3.86904, 3.16319, 0.0132871, -0.0961247, -0.0155015], 5)