In [3]:
using BenchmarkTools, Compat, DataFrames, Distributions, ForwardDiff

In [4]:
# Basic trust region with truncated conjugate gradient.

df = readtable("model_australia.txt", separator = ' ', header = false)

names!(df, [Symbol("x$i") for i in 1:4])

head(df)

Unnamed: 0,x1,x2,x3,x4
1,1,0,0,0
2,0,1,0,0
3,0,0,1,0
4,35,0,0,0
5,69,34,35,0
6,70,71,70,30


In [5]:
immutable BasicTrustRegion{T<:Real}
    η1::T
    η2::T
    γ1::T
    γ2::T
end

function BTRDefaults()
    return BasicTrustRegion(0.01, 0.9, 0.5, 0.5)
end

type BTRState
    iter::Int
    β::Vector
    βcand::Vector
    g::Vector
    step::Vector
    Δ::Float64
    ρ::Float64

    function BTRState()
        return new()
    end
end

In [6]:
function acceptCandidate!(state::BTRState, b::BasicTrustRegion)
    if state.ρ >= b.η1
        return true
    else
        return false
    end
end

function updateRadius!(state::BTRState, b::BasicTrustRegion)
    if state.ρ >= b.η2
        stepnorm = norm(state.step)
        state.Δ = min(1e20, max(4*stepnorm, state.Δ))
    elseif state.ρ >= b.η1
        state.Δ *= b.γ2
    else
        state.Δ *= b.γ1
    end
end

updateRadius! (generic function with 1 method)

In [7]:
function TruncatedCG(g::Vector, H::Matrix, Δ::Float64)
    n = length(g)
    s = zeros(n)
    normg0 = norm(g)
    v = g
    d = -v
    gv = dot(g, v)
    norm2d = gv
    norm2s = 0
    sMd = 0
    k = 0
    Δ = Δ*Δ
    while stopCG(norm(g), normg0, k, n) == false
        Hd = H*d
        κ = dot(d, Hd)
        if κ <= 0
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ-dot(s, s))))/norm2d
            s += σ*d
            break
        end
        α = gv/κ
        norm2s += α*(2*sMd+α*norm2d)
        if norm2s >= Δ
            σ = (-sMd+sqrt(sMd*sMd+norm2d*(Δ-dot(s, s))))/norm2d
            s += σ*d
            break
        end
        s += α*d
        g += α*Hd
        v = g
        newgv = dot(g, v)
        β = newgv/gv
        gv = newgv
        d = -v+β*d
        sMd = β*(sMd+α*norm2d)
        norm2d = gv+β*β*norm2d
        k += 1
    end
    return s
end

TruncatedCG (generic function with 1 method)

In [8]:
function stopCG(normg::Float64, normg0::Float64, k::Int, kmax::Int)
    χ::Float64 = 0.1
    θ::Float64 = 0.5
    if (k == kmax) || (normg <= normg0*min(χ, normg0^θ))
        return true
    else
        return false
    end
end

stopCG (generic function with 1 method)

In [9]:
function btr(f::Function, g!::Function, H!::Function, Step::Function, β0::Vector,
        state::BTRState = BTRState(), ApproxH::Bool = false)
    b = BTRDefaults()
    state.iter = 0
    state.β = β0
    n = length(β0)
    tol = 1e-6*1e-6
    state.g = zeros(n)
    H = eye(n, n)
    fβ = f(β0)
    g!(β0, state.g)
    state.Δ = 0.1*norm(state.g)
    if ApproxH
        y = zeros(n)
        gcand = zeros(n)
    else
        H!(β0, H)
    end
    nmax = 1000
    
    function model(s::Vector, g::Vector, H::Matrix)
        return dot(s, g)+0.5*dot(s, H*s)
    end
    
    while dot(state.g, state.g) > tol && state.iter < nmax
        state.step = Step(state.g, H, state.Δ)
        state.βcand = state.β+state.step
        fcand = f(state.βcand)
        state.ρ = (fcand-fβ)/(model(state.step, state.g, H))
        if ApproxH
            g!(state.βcand, gcand)
            y = gcand-state.g;
            H = H!(H, y, state.step)
        end
        if acceptCandidate!(state, b)
            state.β = copy(state.βcand)
            if ApproxH == false
                g!(state.β, state.g)
                H!(state.β, H)
            else
                state.g = copy(gcand)
            end
            fβ = fcand
        end
        updateRadius!(state, b)
        state.iter += 1
    end
    return state.β, state.iter
end

btr (generic function with 3 methods)

In [10]:
function individual(β::Vector, i::Int64)
    data = convert(Array, df[i*7-6:i*7-1, :])
    choices = convert(Array, df[i*7:i*7, :])
    alternatives = find(choices .== 0)
    choice = find(choices .== 1)[1]
    
    function utility(β::Vector, i::Int64)
        return dot(vec(data[:, i]), β)
    end
    
    function probability(β::Vector, t::Float64 = 0.0)
        c = utility(β, choice)
        for alternative in alternatives
            t += exp(utility(β, alternative)-c)
        end
        return 1/(1+t)
    end
    
    return probability
end

individual (generic function with 1 method)

In [11]:
function f(β::Vector, model::Float64 = 0.0, n::Int64 = 210)
    i = 1
    while i <= n
        probability = individual(β, i)
        model += log(probability(β))
        i += 1
    end
    return -model/n
end

f (generic function with 3 methods)

In [12]:
function g(β::Vector, n::Int64 = 210)
    t = zeros(length(β))
    for i = 1:n
        probability = individual(β, i)
        t += (1/probability(β))*ForwardDiff.gradient(probability, β)
    end
    return -t/n
end

function g!(β::Vector, storage::Vector)
    s = g(β)
    storage[1:length(s)] = s[1:length(s)]
end

g! (generic function with 1 method)

In [13]:
function H(β::Vector)
    return ForwardDiff.hessian(f, β)
end

function H!(β::Vector, storage::Matrix)
    s = H(β)
    n, m = size(s)
    storage[1:n, 1:m] = s[1:length(s)]
end

H! (generic function with 1 method)

In [14]:
function BFGS(B::Matrix, y::Vector, s::Vector)
    Bs = B*s
    return B-(Bs*Bs')/dot(s, Bs)+(y*y')/dot(s, y)
end

function BFGS!(B::Matrix, y::Vector, s::Vector)
    n, m = size(B)
    Bs = B*s
    B[1:n, 1:m] = B-(Bs*Bs')/dot(s, Bs)+(y*y')/dot(s, y)
end

BFGS! (generic function with 1 method)

In [15]:
function SR1(B::Matrix, y::Vector, s::Vector)
    Bs = B*s
    return B+((y-Bs)*(y-Bs)')/((y-Bs)'*s)
end

function SR1!(B::Matrix, y::Vector, s::Vector)
    n, m = size(B)
    Bs = B*s
    B[1:n, 1:m] = B+((y-Bs)*(y-Bs)')/((y-Bs)'*s)
end

SR1! (generic function with 1 method)

In [16]:
@benchmark btr(f, g!, H!, TruncatedCG, zeros(6), BTRState(), false)

BenchmarkTools.Trial: 
  memory estimate:  148.94 MiB
  allocs estimate:  1401197
  --------------
  minimum time:     560.187 ms (3.53% GC)
  median time:      621.097 ms (3.86% GC)
  mean time:        655.335 ms (3.66% GC)
  maximum time:     917.922 ms (3.00% GC)
  --------------
  samples:          8
  evals/sample:     1

In [17]:
@benchmark btr(f, g!, BFGS!, TruncatedCG, zeros(6), BTRState(), true)

BenchmarkTools.Trial: 
  memory estimate:  370.42 MiB
  allocs estimate:  3968254
  --------------
  minimum time:     1.562 s (3.90% GC)
  median time:      1.641 s (4.10% GC)
  mean time:        1.650 s (4.05% GC)
  maximum time:     1.755 s (4.10% GC)
  --------------
  samples:          4
  evals/sample:     1

In [18]:
@benchmark btr(f, g!, SR1!, TruncatedCG, zeros(6), BTRState(), true)

BenchmarkTools.Trial: 
  memory estimate:  327.32 MiB
  allocs estimate:  3506797
  --------------
  minimum time:     1.422 s (3.77% GC)
  median time:      1.487 s (3.87% GC)
  mean time:        1.483 s (3.82% GC)
  maximum time:     1.537 s (3.74% GC)
  --------------
  samples:          4
  evals/sample:     1

In [19]:
mixed_logit = DataFrame(P = 1.0:210.0)

names!(mixed_logit, [Symbol("Score")])

storage = DataFrame(P = 1.0:5.0)

names!(storage, [Symbol("Score")])

srand(100)

MersenneTwister(UInt32[0x00000064], Base.dSFMT.DSFMT_state(Int32[-2036630343, 1072818225, 1299231502, 1073154435, 1563612565, 1073206618, 176198161, 1073683625, 381415896, 1073699088  …  163992627, 1073241259, 385818456, 1072878963, 399273729, 595433664, 390891112, 1704156657, 382, 0]), [1.18482, 1.94701, 1.3957, 1.75219, 1.91874, 1.35688, 1.21766, 1.37772, 1.65955, 1.10977  …  1.86745, 1.12758, 1.24194, 1.72613, 1.16021, 1.85644, 1.02689, 1.02149, 1.17622, 1.6859], 382)

In [20]:
function sim(k::Int64, rows::Int64 = size(storage)[1])
    for i = 1:rows
        β = zeros(6)
        β[5] = rand(Uniform(), 1)[1]
        probability = individual(β, k)
        storage[i, 1] = probability(β)            
    end
    return mean(storage[1])
end

sim (generic function with 2 methods)

In [21]:
function prep(n::Int64 = 210)
    for i = 1:n
        mixed_logit[i, 1] = sim(i)
    end
end

prep()

In [22]:
head(mixed_logit)

Unnamed: 0,Score
1,0.0016069479515177
2,5.865629124161623e-16
3,1.5400962956513888e-06
4,0.0004246919507657
5,0.0016559108160814
6,0.093653128151229


In [23]:
tail(mixed_logit)

Unnamed: 0,Score
1,0.0002450826824472
2,0.0076491348019567
3,0.966593918114844
4,0.0579366167266801
5,6.290531391917113e-06
6,2.079137633259921e-05


In [24]:
function beta(γ::Vector, θ::Vector, β::Float64 = 0.0)
    for k = 1:length(γ)
        β += θ[k*1]+θ[k*2]*γ[k]
    end
    return β
end

beta([rand(Uniform(), 6)], [0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])

6-element Array{Float64,1}:
 0.448055 
 0.15229  
 0.510681 
 0.949908 
 0.417275 
 0.0175326