In [1]:
using DataFrames, Distributions, DataFramesMeta

In [2]:
function gen_data(; n = 500000, mu = [0, 2], sigma = 1, pZ1 = 0.8)
    data = DataFrame(
        Obs_ID = 1:n,
        Z = (rand(Uniform(0,1), n) .> pZ1) .+ 1,
        )

    @transform!(data, :Y = rand(Normal(0, sigma), n) + mu[:Z])    
    @select!(data, :Obs_ID, :Y)
    @transform!(data, :p_Y_given_Z_1 = pdf.(Normal(mu[1], sigma), :Y))
    @transform!(data, :p_Y_given_Z_2 = pdf.(Normal(mu[2], sigma), :Y))

    return data
  end

gen_data (generic function with 1 method)

In [17]:
data = gen_data(n = 1000, pZ1 = 0.8);
first(data, 6)

Unnamed: 0_level_0,Obs_ID,Y,p_Y_given_Z_1,p_Y_given_Z_2
Unnamed: 0_level_1,Int64,Float64,Float64,Float64
1,1,1.45223,0.13898,0.343364
2,2,1.44106,0.141243,0.341249
3,3,0.11809,0.39617,0.0678992
4,4,2.95368,0.00508711,0.253171
5,5,-0.454903,0.359728,0.0196003
6,6,0.366154,0.373076,0.105014


In [25]:
function fit_model!(
    data; 
    pi_hat_0 = 0.5, 
    tolerance = 0.001,
    max_iterations = 1000,
    progress = DataFrame(iter = 1:(max_iterations+1), pi_hat = undef, ll = undef, ll_diff = undef))

    pi_hat = pi_hat_0
    E_step!(data, pi_hat)
    ll = loglik!(data)
    # progress = [(iter = 0, pi_hat, ll, ll_diff = NaN)]
    progress[1,:] = (iter = 0, pi_hat, ll, ll_diff = NaN)
    
    for i in 1:max_iterations
        pi_hat = M_step(data)
        E_step!(data, pi_hat)
        
        ll_old = ll
        ll = loglik!(data)
        ll_diff = ll - ll_old
        progress[i+1,:] = (i, pi_hat, ll, ll_diff)

        if ll_diff < tolerance
            break
        end
    end
    return progress
end

fit_model! (generic function with 1 method)

In [4]:
function E_step!(data, pi_hat)
    @transform!(data, :pY_Z1 = :p_Y_given_Z_1 .* pi_hat)
    @transform!(data, :pY_Z2 = :p_Y_given_Z_2 .* (1- pi_hat))
    @transform!(data, :pY = :pY_Z1 + :pY_Z2)
    @transform!(data, :pZ1_given_Y = :pY_Z1 ./ :pY)
end

E_step! (generic function with 1 method)

In [5]:
function M_step(data)
    mean(data[!, :pZ1_given_Y])
end

M_step (generic function with 1 method)

In [6]:
function loglik!(data)
    sum(log.(data[!, :pY]))
end

loglik! (generic function with 1 method)

In [26]:
@time progress = fit_model!(data)


LoadError: MethodError: [0mCannot `convert` an object of type [92mFloat64[39m[0m to an object of type [91mUndefInitializer[39m
[0mClosest candidates are:
[0m  convert(::Type{T}, [91m::T[39m) where T at Base.jl:61