# Overparameterization in Probabilistic Circuits

Based on the article "[Optimisation of Overparametrized Sum-Product Networks][Trapp]" of Trapp, Peharz and Pernkopf.

[Trapp]: https://arxiv.org/abs/1905.08196.pdf

In [1]:
using Pkg
Pkg.activate("/home/jonasrlg/code/RPCircuits.jl/")
using RPCircuits, Random, DelimitedFiles, Plots
using Distributions: rand, truncated, Normal
using Statistics: mean, std

[32m[1m  Activating[22m[39m project at `~/code/RPCircuits.jl`
┌ Info: Precompiling RPCircuits [a494de23-34c1-4aeb-b541-d9435dced8c8]
└ @ Base loading.jl:1423


In [2]:
function build_sum(dimension::Int; var::Float64 = 0.1)
    # Builds the sub-component of the variable w.r.t index = dimension
    a, na = Indicator(dimension, 1), Indicator(dimension, 0)
    # Initialize weights close to zero
    w = rand(truncated(Normal(0, var), 0, Inf), 2)
    return Sum([a,na], w)
end

function build_component(ndim::Int; var::Float64 = 0.1)
    # Builds a component: a Prodcut node that has 'ndim' children,
    # where each children corresponds to a Sum node that takes
    # two Indicator nodes (of the same variable) as children
    sums = Vector{Node}(undef, ndim)
    for dim ∈ 1:ndim
        sums[dim] = build_sum(dim; var=var)
    end
    return Product(sums)
end

function build_pc(ncomponents::Int, depth::Int, ndim::Int; var::Float64 = 0.1)
    if depth == 1
        components = Vector{Node}(undef, ncomponents)
        for c ∈ 1:ncomponents
            components[c] = build_component(ndim; var=var)
        end
        # Initialize weights close to zero
        w = rand(truncated(Normal(0, var), 0, Inf), ncomponents)
        return Sum(components, w)
    end
    # Recursevily builds left and right circuits
    # Initialize weights close to zero
    w = rand(truncated(Normal(0, var), 0, Inf), 2)
    l = build_pc(ncomponents ÷ 2,depth-1, ndim; var=var)
    r = build_pc(ncomponents ÷ 2,depth-1, ndim; var=var)
    # Creates root
    return Sum([l,r], w)
end

build_pc (generic function with 1 method)

In [3]:
# Name of the desired dataset
name = "nltcs"

# Maxiumum depth of the circuit (Number of components = 2^k)
k = 3
ncomps = 2^k

# Learning rate
η = 0.0001

# Number of iterations
maxiter = 500

# Number of re-runs
runs = 3;

# Variance of the gaussian that Initializes the random weights
init_var = 0.1

0.1

In [4]:
# Load dataset
path = "/home/jonasrlg/code/Density-Estimation-Datasets/datasets/" * name * "/" * name* ".train.data"
data = readdlm(path, ',', Int)
n, dim = size(data)

(16181, 16)

In [5]:
# Train llh values for PCs with deepth 1, 2, ..., k
llh = Vector{Matrix{Float64}}(undef, k)
for i ∈ 1:k
    llh[i] = Matrix{Float64}(undef, runs, maxiter)
end

In [6]:
# Runing experiments
for run ∈ 1:runs
    # d is the depth of the PC
    for d ∈ 1:k
        # Creates PC with weights close to zero and depth = d
        C = build_pc(ncomps,d,dim; var=init_var)
        L_C = Gradient(C)
        for iter ∈ 1:maxiter
            update(L_C, data; learningrate=η) # One iteration of Gradient Descent
            llh[d][run, iter] = -L_C.score
        end
    end
end

In [7]:
plot(title = "Overparameterization on " * name, xlabel = "Iteration", 
    ylabel = "Train LLH", legend=:topleft)

llh_mean = Vector{Vector{Float64}}(undef, k)
# Estimate performance over all re-runs.
for fig ∈ 1:k
    llh_mean[fig] = vec(mean(llh[fig], dims=1))
    plot!(llh_mean[fig], label="Depth = $fig", ribbon=vec(std(llh[fig], dims=1)))
end

In [8]:
# Save to disk.
savefig(name * "_notebook_experiment.pdf")

In [9]:
open(name * "_notebook_all_experiment.txt", "w") do file
    write(file, string(llh))
end

open(name * "_notebook_mean_experiment.txt", "w") do file
    write(file, string(llh_mean))
end

31503