# GSMM mixture modelling with synthetic data

## Import packages and functions

In [1]:
import Pkg; Pkg.activate("C:/Users/s151781/AppData/Local/Julia-1.3.1/GN/Project.toml")
using Revise
using FFTW
using Compat
using WAV
using DSP
using Base64
using ForneyLab
using LinearAlgebra
using ProgressMeter
using PyPlot
using Clustering

[32m[1mActivating[22m[39m environment at `C:\Users\s151781\AppData\Local\Julia-1.3.1\GN\Project.toml`


In [5]:
include("../extensions/ComplexNormal.jl")
include("../extensions/ComplexHGF.jl")
include("../extensions/ComplexToReal.jl")
include("../functions/auxiliary/workflow.jl") # for some workflow simplifications
;

## Generate data

In [6]:
# data generation parameters
nr_samples = 200
nr_freqs = 5 
nr_clusters = 7

# import sampling functions
import Distributions: Normal, MvNormal, MixtureModel, Dirichlet

# set means of ξ for the different clusters
μ_ξ = vcat([collect(k:k:k*nr_freqs) for k = 1:nr_clusters])
Σ_ξ = 1e-4*Ic(nr_freqs)
Σ_meas = 1e-10*Ic(nr_freqs)

# create arrays over samples
cluster_id = Array{Int64,1}(undef, nr_samples)
ξ_samples = Array{Array{Float64,1},1}(undef, nr_samples)
X_samples = Array{Array{Complex{Float64},1},1}(undef, nr_samples)
y_samples = Array{Array{Complex{Float64},1},1}(undef, nr_samples)

# generate samples independent from each other
for n = 1:nr_samples
     
    # create arrays over frequencies
    ξ_samples[n] = Array{Float64,1}(undef, nr_freqs)
    X_samples[n] = Array{Complex{Float64},1}(undef, nr_freqs)
    y_samples[n] = Array{Complex{Float64},1}(undef, nr_freqs)
    
    # randomly pick cluster to get means from 
    cluster_id[n] = rand(collect(1:nr_clusters))
    μ_ξi = μ[cluster_id[n]]
    
    # calculate samples fro frequencies
    for k = 1:nr_freqs
        
        # generate samples
        sample_ξ = rand(Normal(μ_ξi[k], sqrt(Σ_ξ[k,k])))
        sample_X = rand(Normal(0, sqrt(0.5*exp(sample_ξ)))) + 1im*rand(Normal(0, sqrt(0.5*exp(sample_ξ))))
        sample_y = rand(Normal(real(sample_X), sqrt(0.5*Σ_meas[k,k]))) + 1im*rand(Normal(imag(sample_X), sqrt(0.5*Σ_meas[k,k])))

        # save samples
        ξ_samples[n][k] = sample_ξ
        X_samples[n][k] = sample_X
        y_samples[n][k] = sample_y
        
    end
    
end

t = collect(1:nr_samples)
Y = y_samples;

UndefVarError: UndefVarError: μ_ξi2 not defined

# Building graph

In [4]:
# initialization:

# reshape Y
Yi = hcat(Y...)

# approximate with log-power
Yi = log.(abs2.(Yi))

# perform kmeans clustering
prior_m = kmeans(Yi,3).centers

5×3 Array{Float64,2}:
 0.392998   2.45901  1.21974
 1.71851    5.31288  3.42602
 2.65035    8.43273  5.52626
 3.50981   11.2986   7.30464
 4.08299   14.556    9.45059

In [5]:
#nr_freqs = dimension

nr_clusters = 3

fg = FactorGraph()
α = 1.0
# Specify generative model
@RV _pi ~ ForneyLab.Dirichlet(α*ones(nr_clusters))
@RV w_1 ~ Wishart(diagm(ones(nr_freqs))/(nr_freqs), nr_freqs)
@RV m_1 ~ GaussianMeanPrecision(prior_m[:,1], w_1)
@RV w_2 ~ Wishart(diagm(ones(nr_freqs))/(nr_freqs), nr_freqs)
@RV m_2 ~ GaussianMeanPrecision(prior_m[:,2], w_2)
@RV w_3 ~ Wishart(diagm(ones(nr_freqs))/(nr_freqs), nr_freqs)
@RV m_3 ~ GaussianMeanPrecision(prior_m[:,3], w_3)

z = Vector{Variable}(undef, nr_samples)
ξ = Vector{Variable}(undef, nr_samples)
X = Vector{Variable}(undef, nr_samples)
y = Vector{Variable}(undef, nr_samples)
for i in 1:nr_samples
    @RV z[i] ~ Categorical(_pi)
    @RV ξ[i] ~ GaussianMixture(z[i], m_1, w_1, m_2, w_2, m_3, w_3)
    # HGF
    @RV X[i] ~ HGF(ξ[i])

    # observation model
    @RV y[i] ~ ComplexNormal(X[i], 1e-10*diagm(ones(nr_freqs)).+0im, mat(0.0+0.0im))
    
    
    placeholder(y[i], :y, index=i, dims=(nr_freqs,))
end
# draw graph
# ForneyLab.draw(fg)

In [7]:
# Build the algorithm
q = PosteriorFactorization(_pi, m_1, w_1, m_2, w_2, m_3, w_3, z, X, ξ, ids=[:PI :M1 :W1 :M2 :W2 :M3 :W3 :Z :X :Ξ])
algo = variationalAlgorithm(q)

# Generate source code
source_code = algorithmSourceCode(algo);

# Load algorithm
eval(Meta.parse(source_code));

In [8]:
data = Dict(:y => Y)

# Prepare posterior factors
marginals = Dict(:_pi => vague(ForneyLab.Dirichlet, nr_clusters),
                 :m_1 => ProbabilityDistribution(Multivariate, GaussianMeanPrecision, m=prior_m[:,1], w=1e0*diagm(ones(nr_freqs))/nr_freqs),
                 :w_1 => ProbabilityDistribution(MatrixVariate, ForneyLab.Wishart, v=1e0*diagm(ones(nr_freqs))/nr_freqs, nu=nr_freqs),
                 :m_2 => ProbabilityDistribution(Multivariate, GaussianMeanPrecision, m=prior_m[:,2], w=1e0*diagm(ones(nr_freqs))/nr_freqs),
                 :w_2 => ProbabilityDistribution(MatrixVariate, ForneyLab.Wishart, v=1e0*diagm(ones(nr_freqs))/nr_freqs, nu=nr_freqs),
                 :m_3 => ProbabilityDistribution(Multivariate, GaussianMeanPrecision, m=prior_m[:,3], w=1e0*diagm(ones(nr_freqs))/nr_freqs),
                 :w_3 => ProbabilityDistribution(MatrixVariate, ForneyLab.Wishart, v=1e0*diagm(ones(nr_freqs))/nr_freqs, nu=nr_freqs))
for i in 1:nr_samples
    marginals[:z_*i] = vague(Categorical)
    marginals[:X_*i] = ProbabilityDistribution(Multivariate, ComplexNormal, μ=zeros(nr_freqs) .+ 0.0im, Γ=1e10*diagm(ones(nr_freqs)).+0im, C=mat(0.0+0.0im));
    marginals[:ξ_*i] = ProbabilityDistribution(ForneyLab.Multivariate, GaussianMeanVariance, m=zeros(nr_freqs), v=diagm(ones(nr_freqs)))
end



In [9]:
# Execute algorithm
nr_its = 10
@showprogress for i in 1:nr_its
    stepX!(data, marginals)
    stepΞ!(data, marginals)
    stepZ!(data, marginals)
    stepPI!(data, marginals) 
    stepM1!(data, marginals)
    stepW1!(data, marginals)
    stepM2!(data, marginals)
    stepW2!(data, marginals) 
    stepM3!(data, marginals)
    stepW3!(data, marginals)  
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:37[39m


In [21]:
function confusmat(k::Integer, gts::Array{Int64,1}, preds::Array{Int64,1})
    # borrowed from MLBase
    n = length(gts)
    length(preds) == n || throw(DimensionMismatch("Inconsistent lengths."))
    R = zeros(Int, k, k)
    for i = 1:n
        @inbounds g = gts[i]
        @inbounds p = preds[i]
        R[g, p] += 1
    end
    return R
end

confusmat(nr_clusters, cluster_id .- minimum(cluster_id) .+ 1, z_inferred .- minimum(z_inferred) .+ 1)

3×3 Array{Int64,2}:
 73   0   0
  1   0  63
  0  61   2