In [49]:
using BenchmarkTools, Compat, DataFrames, Distributions, ForwardDiff, PyCall

In [50]:
# Stochastic gradient descent, mixed logit.

df = readtable("../data/parsed_model_australia.txt", separator = ' ', header = false)

a, b = size(df)

const n_individuals = a
const n_alternatives = 4
const n_parameters = b
const n_simulations = 5
const simulated_b = 5

srand(123456)

rand_contdist(Dist::Distribution) = quantile(Dist, rand())

rand_contdist (generic function with 1 method)

In [51]:
head(df)

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25
1,4,1,0,0,0,0,1,0,0,0,0,1,0,35,0,0,0,69,34,35,0,70,71,70,30
2,4,1,0,0,0,0,1,0,0,0,0,1,0,30,0,0,0,64,44,53,0,68,84,85,50
3,4,1,0,0,0,0,1,0,0,0,0,1,0,40,0,0,0,69,34,35,0,129,195,149,101
4,4,1,0,0,0,0,1,0,0,0,0,1,0,70,0,0,0,64,44,53,0,59,79,81,32
5,4,1,0,0,0,0,1,0,0,0,0,1,0,45,0,0,0,64,44,53,0,82,93,94,99
6,2,1,0,0,0,0,1,0,0,0,0,1,0,20,0,0,0,69,40,35,0,70,57,58,43


In [52]:
mixed_logit = DataFrame(β1 = 1.0:Float64(n_individuals),
                        β2 = 1.0:Float64(n_individuals),
                        β3 = 1.0:Float64(n_individuals),
                        β4 = 1.0:Float64(n_individuals),
                        β5 = 1.0:Float64(n_individuals))

function simulate()
    for i = 1:n_individuals, j = 1:n_simulations
        mixed_logit[i, j] = rand()
    end
end

simulate()

head(mixed_logit)

Unnamed: 0,β1,β2,β3,β4,β5
1,0.5152104671804307,0.7412898556237781,0.5821424283521601,0.2099687343318144,0.1806485932541042
2,0.5743318350142363,0.4733804534506725,0.6350533219682404,0.0039460230465964,0.9987169142802474
3,0.391493560981647,0.0110828226551173,0.728900346094737,0.5885694843677474,0.0955464156850605
4,0.3161831307020315,0.9868070201531224,0.3248312054217537,0.6275088202683465,0.9742043509623348
5,0.9133245457961848,0.3026406935881607,0.2403469292573343,0.6929933674978992,0.9948456891059492
6,0.7798750382138562,0.7603354924796097,0.9018641475932664,0.0922113973063749,0.400661483972383


In [53]:
function individual(θ::Vector, i::Int64)
    m, n = size(df)
    choice = df[i, 1][1]
    alternatives = collect(1:n_alternatives)
    splice!(alternatives, choice)
    
    function utility(β::Vector, k::Int64)
        temp = Float64[]
        k += 1
        while k <= n_parameters
            push!(temp, df[i, k])
            k += n_alternatives
        end
        return dot(temp, β)
    end
    
    function construct(γ::Vector, θ::Vector)
        return θ[1]+θ[2]*γ[1]
    end

    function probability(θ::Vector)
        logit = 0.0
        t = 0.0
        for k = 1:n_simulations
            β = []
            for j = 1:simulated_b-1
                push!(β, θ[j])
            end
            γ = [mixed_logit[i, k]]
            push!(β, construct(γ, θ[simulated_b:simulated_b+1]))
            for j = simulated_b+2:length(θ)
                push!(β, θ[j])
            end 
            c = utility(β, choice)
            for alternative in alternatives
                t += exp(utility(β, alternative)-c)
            end
            logit += 1/(1+t)
        end
        return logit/n_simulations
    end
    
    return probability
end

individual (generic function with 1 method)

In [54]:
function f(θ::Vector, i::Int64 = 1)
    probability = individual(θ, i)
    return log(probability(θ))
end

f (generic function with 2 methods)

In [55]:
function g(x::Vector, i::Int64)
    probability = individual(x, i)
    return ForwardDiff.gradient(probability, x)
end

function g!(x::Vector, i::Int64, storage::Vector)
    s = g(x, i)
    storage[1:length(s)] = s[1:length(s)]
end

g! (generic function with 1 method)

In [56]:
function shuffle!(df::DataFrame)
    for i = size(df, 1):-1:2
        j = rand(1:i)
        df[i, :], df[j, :] = df[j, :], df[i, :]
    end
end

shuffle! (generic function with 1 method)

In [68]:
function sgd(ω0::Vector, η::Float64 = 0.0001, tol::Float64 = 1e-6, kmax::Int64 = 10000)
    k = 1
    ω = ω0
    n = length(ω)
    tol *= tol
    dfω = ones(n)
    while norm(dfω) > tol && k < kmax
        for i = 1:n_individuals
            ω -= η*g!(ω, i, dfω)
        end
        k += 1
    end
    return ω, k
end

sgd (generic function with 4 methods)

In [69]:
sgd(zeros(7)) # pas exact

([0.0618202, -1.22894, 0.974303, -0.436661, -8.15488e-5, 0.286384, 0.11079], 10000)