# Random Number Decomposition
Idea: Decompose the sum of random numbers into its contributions

for a given set of $x_i$ and $a_{ik}$ with
$$x_i = \Sigma_{j=0}^n \Sigma_{k=1}^{m_j} a_{ik}y_{k}$$ 
with
* $m_0 = 1$
* $m_j$ the number of contributers per layer
* $a_{ik} \in \{0, 1\}$
* $\Sigma a_{ik} = 1$ 

calculate the Distributions $a_{ik} \sim N(μ_{l}, σ_{l})$

In [1]:
using Pkg
Pkg.activate(".")
#Pkg.add("Turing")

[32m[1m  Activating[22m[39m project at `d:\Code\ProbabilisticProgramming`


In [2]:
using Turing, Distributions, Statistics, Distributed, Test

In [10]:
cc_12 = [
    Dict( 1 => (0, 5)),
    Dict( 
        1 => (1, 1),
        2 => (2, 1),
        ),
]
cc_14 = [
    Dict( 1 => (0, 5)),
    Dict( 
        1 => (1, 1),
        2 => (2, 1),
        3 => (3, 2),
        4 => (4, 1)
        ),
]
cc_29 = [
    Dict( 1 => (0, 5)),
    Dict( 
        1 => (1, 1),
        2 => (2, 1),
        3 => (3, 2),
        4 => (4, 1)
        ),
    Dict( 
        1 => (1, 1),
        2 => (2, 1),
        3 => (3, 2),
        4 => (4, 1),
        5 => (5, 1)
        ),
]

3-element Vector{Dict{Int64, Tuple{Int64, Int64}}}:
 Dict(1 => (0, 5))
 Dict(4 => (4, 1), 2 => (2, 1), 3 => (3, 2), 1 => (1, 1))
 Dict(5 => (5, 1), 4 => (4, 1), 2 => (2, 1), 3 => (3, 2), 1 => (1, 1))

In [4]:
"""align coefficients to have a mean of 0 for every layer"""
function align_coefficients(coefficients)
    @assert (length(coefficients[1]) == 1) "first level is just allowed to have one contributer"
    coefs2 = []
    for layer in coefficients
        means = [m for (k, (m, v)) in layer]
        m0 = mean(means)
        # adjust the means
        append!(coefs2, [Dict(k=> (m - m0, v) for (k, (m, v)) in layer)]) #every element from the list will be added => [Dict()]
    end
    coefs2
end
@test_throws AssertionError begin
    cc = [ Dict( 1 => (0, 5), 2 => (3, 4))]
    align_coefficients(cc)
end
@test begin
    cc = [
    Dict( 1 => (2, 5)),
    Dict( 
        1 => (1, 1),
        2 => (2, 1),
        3 => (3, 2),
        ),
]
    align_coefficients(cc) == [Dict(1 => (0.0, 5))
    Dict(2 => (0.0, 1), 3 => (1.0, 2), 1 => (-1.0, 1))]
end

"""create random numbers out of a set of coefficients"""
function generate_data(coefficients, n_samples)
    @assert n_samples > 0
    coefficients = align_coefficients(coefficients)
    data = Vector{Float16}(undef, n_samples)
    data .= 0

    contributers = []
    for layer in coefficients
        max_con = maximum(collect(keys(layer)))
        cons = rand(1:max_con, n_samples)
        for (con, (mu, sigma)) in layer
            rows = cons.== con
            n_rows = sum(rows)
            N = Normal(mu, sigma)
            data[rows] += rand(N, n_rows)
        end
        append!(contributers, [cons])

    end
    popfirst!(contributers) # first set has by default just one contributer
    data, contributers
end

generate_data

In [5]:
data, contributers = generate_data(cc_01, 1000)

(Float16[3.023, -7.6, 4.52, 1.509, 2.234, -0.3215, -10.53, 1.698, -5.34, 1.77  …  3.629, 1.803, 0.1256, 5.312, -4.156, 5.883, -2.285, 4.9, 4.21, 1.487], Any[[4, 2, 1, 3, 4, 3, 4, 3, 2, 1  …  1, 3, 1, 3, 3, 2, 2, 4, 4, 2]])

In [6]:
@model function model_fun3(data, contributers)
    n_contributers = maximum(contributers)
    sigmax = var(data)
    max_con = 5 #Maximum expected value for contributers
    μ_a ~ Normal( 0, 10) 
    sigma ~ Uniform(0, sigmax)

    μ_cons = Vector{Float16}(undef, n_contributers)

    cons = Vector{Float16}(undef, n_contributers)

    for i in 1:(n_contributers-1)
        μ_cons[i] ~ Uniform(0, max_con)
        cons[i] ~ Normal(μ_cons[i], sigmax)
    end
    μ_cons[n_contributers] ~ Uniform(0, max_con)
    cons[n_contributers] ~ Normal(-sum(μ_cons[1:(n_contributers-1)]), sigmax) #normalization
    

    for con = 1:n_contributers
        rows = contributers .== con
        data[rows] .~ Normal(μ_a + cons[con], sigma)
    end
end # Works :-D

@model function model_fun4(data, contributers)
    n_contributers = maximum(contributers)
    sigmax = var(data)
    max_con = 5 #Maximum expected value for contributers
    μ_a ~ Normal( 0, 10) 
    sigma ~ Uniform(0, sigmax)

    μ_cons = Vector{Float16}(undef, n_contributers)

    cons = Vector{Float16}(undef, n_contributers)

    for i in 1:(n_contributers-1)
        μ_cons[i] ~ Uniform(0, max_con)
        cons[i] ~ Normal(μ_cons[i], sigmax)
    end
    μ_cons[n_contributers] ~ Uniform(0, max_con)
    cons[n_contributers] ~ Normal(-sum(μ_cons[1:(n_contributers-1)]), sigmax) #normalization
    
    m = Vector{Float16}(undef, length(data))
    m .= μ_a

    for con = 1:n_contributers
        rows = contributers .== con
        m[rows] .+= cons[con]
        #data[rows] .~ Normal(μ_a + cons[con], sigma)
    end
    for i=1:length(data)
        data[i] ~ Normal(m[i], sigma)
    end
end # DOES NOT WORK

@model function model_fun5(data, contributers)
    n_contributers = maximum(contributers)
    sigmax = var(data)
    max_con = 5 #Maximum expected value for contributers
    μ_a ~ Normal( 0, 10) 
    sigma ~ Uniform(0, sigmax)

    μ_cons = Vector{Float16}(undef, n_contributers)

    cons = Vector{Float16}(undef, n_contributers)

    for i in 1:(n_contributers-1)
        μ_cons[i] ~ Uniform(0, max_con)
        cons[i] ~ Normal(μ_cons[i], sigmax)
    end
    μ_cons[n_contributers] ~ Uniform(0, max_con)
    cons[n_contributers] ~ Normal(-sum(μ_cons[1:(n_contributers-1)]), sigmax) #normalization

    for i in eachindex(data)
        data[i] ~ Normal(μ_a + cons[contributers[i]], sigma)
    end
end #works

@model function model_fun6(data, contributers)
    n_contributers = maximum(contributers)
    sigmax = var(data)
    max_con = 5 #Maximum expected value for contributers
    μ_a ~ Normal( 0, 10) 
    sigma ~ Uniform(0, sigmax)

    cons = Vector{Float16}(undef, n_contributers)

    for i in eachindex(cons)
        cons[i] ~ Normal(0, sigmax)
    end

    for i in eachindex(data)
        data[i] ~ Normal(μ_a + cons[contributers[i]], sigma)
    end
end #works

@model function model_fun7(data, contributers)
    n_contributers = maximum(contributers)
    sigmax = var(data)
    max_con = 5 #Maximum expected value for contributers
    μ_a ~ Normal( 0, 10) 
    sigma ~ Uniform(0, sigmax)

    μ_cons = Vector{Float16}(undef, n_contributers)
    σ_cons = Vector{Float16}(undef, n_contributers)

    cons = Vector{Float16}(undef, n_contributers)

    for i in 1:(n_contributers-1)
        μ_cons[i] ~ Uniform(0, max_con)
        σ_cons[i] ~ Uniform(0, sigmax)
        cons[i] ~ Normal(μ_cons[i], σ_cons[i] )
    end
    μ_cons[n_contributers] ~ Uniform(0, max_con)
    σ_cons[n_contributers] ~ Uniform(0, sigmax)
    cons[n_contributers] ~ Normal(-sum(μ_cons[1:(n_contributers-1)]), σ_cons[n_contributers]) #normalization

    for i in eachindex(data)
        data[i] ~ Normal(μ_a + cons[contributers[i]], sigma)
    end
end #works


model_fun7 (generic function with 2 methods)

In [13]:
data_12, contributers_12 = generate_data(cc_12, 1000)
model = model_fun7(data_12, contributers_12[1])
chain = sample(model, NUTS(), 10000)

[32mSampling   0%|█                                         |  ETA: N/A[39m
┌ Info: Found initial step size
│   ϵ = 0.2
└ @ Turing.Inference C:\Users\fuerf\.julia\packages\Turing\QN7BL\src\mcmc\hmc.jl:212
[32mSampling   0%|█                                         |  ETA: 0:01:29[39m
[32mSampling   1%|█                                         |  ETA: 0:01:38[39m
[32mSampling   2%|█                                         |  ETA: 0:01:26[39m
[32mSampling   2%|█                                         |  ETA: 0:01:24[39m
[32mSampling   2%|██                                        |  ETA: 0:01:20[39m
[32mSampling   3%|██                                        |  ETA: 0:01:14[39m
[32mSampling   4%|██                                        |  ETA: 0:01:09[39m
[32mSampling   4%|██                                        |  ETA: 0:01:06[39m
[32mSampling   4%|██                                        |  ETA: 0:01:05[39m
[32mSampling   5%|███                                 

Chains MCMC chain (10000×20×1 Array{Float64, 3}):

Iterations        = 1001:1:11000
Number of chains  = 1
Samples per chain = 10000
Wall duration     = 60.3 seconds
Compute duration  = 60.3 seconds
parameters        = μ_a, sigma, μ_cons[1], σ_cons[1], cons[1], μ_cons[2], σ_cons[2], cons[2]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
 [1m parameters [0m [1m    mean [0m [1m     std [0m [1m    mcse [0m [1m  ess_bulk [0m [1m  ess_tail [0m [1m    rhat [0m [1m[0m ⋯
 [90m     Symbol [0m [90m Float64 [0m [90m Float64 [0m [90m Float64 [0m [90m   Float64 [0m [90m   Float64 [0m [90m Float64 [0m [90m[0m ⋯

         μ_a   -0.0180    5.1360    0.1014   2615.8442   2804.2183    1.0011   ⋯
       sigma    5.0648    0.1130    0.0014   6552.0203   5854.6646    1.0014   ⋯
   μ_cons[1]    2.2427  

In [17]:
c = get(chain, :cons)

(cons = ([-1.9950906337878604; -5.347828021785617; … ; 13.048937930289512; 12.200154474861817;;], [-1.1938562709053198; -4.536622082188424; … ; 13.415795380109135; 13.091347655847091;;]),)

In [21]:
typeof(c[1][1])

AxisMatrix{Float64, Matrix{Float64}, Tuple{Axis{:iter, StepRange{Int64, Int64}}, Axis{:chain, UnitRange{Int64}}}}[90m (alias for [39m[90mAxisArrays.AxisArray{Float64, 2, Array{Float64, 2}, Tuple{AxisArrays.Axis{:iter, StepRange{Int64, Int64}}, AxisArrays.Axis{:chain, UnitRange{Int64}}}}[39m[90m)[39m

In [24]:
mean(mean(c[1]))

-0.026749748878092817