In [1]:
using Revise
using LogicCircuits, ProbabilisticCircuits

include("product.jl");
include("log.jl");
include("support_circuit.jl");
include("real_power.jl");
include("integrate.jl");
includet("information.jl");
include("quotient.jl");

Pipeline

In [2]:
function ent(pc::ProbCircuit)
    println("> Size of the input circuits: pc - $(num_edges(pc))")
    t = @elapsed log_pc = log_circuit(pc; log_prob = true)
    println("> Computing log(pc) took $(t) seconds; the result circuit has $(num_edges(log_pc)) edges")
    t = @elapsed pc_t_log_pc = product_circuit(pc, log_pc; m_log_prob = true, n_log_prob = false, compatible = true)
    println("> Computing pc * log(pc) took $(t) seconds; the result circuit has $(num_edges(pc_t_log_pc)) edges")
    t = @elapsed ent_val = -integrate_circuit(pc_t_log_pc; log_prob = false)
    println("> Computing int(pc * log(pc)) took $(t) seconds.")
    println("> The entropy of pc is $(ent_val).")
end

ent (generic function with 1 method)

In [3]:
function xent(pc1::ProbCircuit, pc2::ProbCircuit)
    println("> Size of the input circuits: pc1 - $(num_edges(pc1)), pc2 - $(num_edges(pc2))")
    t = @elapsed log_pc2 = log_circuit(pc2; log_prob = true)
    println("> Computing log(pc2) took $(t) seconds; the result circuit has $(num_edges(log_pc2)) edges")
    t = @elapsed pc1_t_log_pc2 = product_circuit(pc1, log_pc2; m_log_prob = true, n_log_prob = false, 
                                                 compatible = true)
    println("> Computing pc1 * log(pc2) took $(t) seconds; the result circuit has $(num_edges(pc1_t_log_pc2)) edges")
    t = @elapsed xent_val = -integrate_circuit(pc1_t_log_pc2; log_prob = false)
    println("> Computing int(pc1 * log(pc2)) took $(t) seconds.")
    println("> The cross entropy between pc1 and pc2 is $(xent_val).")
    num_edges(pc1), num_edges(pc2), num_edges(log_pc2), num_edges(pc1_t_log_pc2)
end

xent (generic function with 1 method)

In [4]:
function kld(pc1::ProbCircuit, pc2::ProbCircuit)
    println("> Size of the input circuits: pc1 - $(num_edges(pc1)), pc2 - $(num_edges(pc2))")
    t = @elapsed inv_pc2 = circuit_real_power(pc2, -1.0)
    println("> Computing 1/pc2 took $(t) seconds; the result circuit has $(num_edges(inv_pc2)) edges")
    t = @elapsed pc1_q_pc2 = product_circuit(pc1, inv_pc2; m_log_prob = true, n_log_prob = true, compatible = true)
    println("> Computing pc1/pc2 took $(t) seconds; the result circuit has $(num_edges(pc1_q_pc2)) edges")
    t = @elapsed log_pc1_q_pc2 = log_circuit(pc1_q_pc2; log_prob = false)
    println("> Computing log(pc1/pc2) took $(t) seconds; the result circuit has $(num_edges(log_pc1_q_pc2)) edges")
    t = @elapsed pc1_t_log_pc1_q_pc2 = product_circuit(pc1, log_pc1_q_pc2; m_log_prob = true, n_log_prob = false, compatible = true)
    println("> Computing pc1 * log(pc1/pc2) took $(t) seconds; the result circuit has $(num_edges(pc1_t_log_pc1_q_pc2)) edges")
    t = @elapsed kld_val = integrate_circuit(pc1_t_log_pc1_q_pc2; log_prob = false)
    println("> Computing int(pc1 * log(pc1/pc2)) took $(t) seconds.")
    println("> The KLD between pc1 and pc2 is $(kld_val).")
    num_edges(pc1_q_pc2), num_edges(log_pc1_q_pc2), num_edges(pc1_t_log_pc1_q_pc2)
end

kld (generic function with 1 method)

In [5]:
function reiny_alpha_div(pc1::ProbCircuit, pc2::ProbCircuit; alpha::Float64)
    println("> Size of the input circuits: pc1 - $(num_edges(pc1)), pc2 - $(num_edges(pc2))")
    t = @elapsed powpc1 = circuit_real_power(pc1, alpha)
    println("> Computing pc1^alpha took $(t) seconds; the result circuit has $(num_edges(powpc1)) edges")
    t = @elapsed powpc2 = circuit_real_power(pc2, 1.0 - alpha)
    println("> Computing pc2^(1-alpha) took $(t) seconds; the result circuit has $(num_edges(powpc2)) edges")
    t = @elapsed powpc1_t_powpc2 = product_circuit(powpc1, powpc2; m_log_prob = true, n_log_prob = true, 
                                                   compatible = true)
    println("> Computing pc1^alpha * pc2^(1-alpha) took $(t) seconds; the result circuit has " *
            "$(num_edges(powpc1_t_powpc2)) edges")
    t = @elapsed reiny_val = log(integrate_circuit(powpc1_t_powpc2; log_prob = false)) / (1.0 - alpha)
    println("> Computing int(pc1^alpha * pc2^(1-alpha)) took $(t) seconds.")
    println("> The Reiny's Alpha Divergence between pc1 and pc2 is $(reiny_val).")
    num_edges(powpc1), num_edges(powpc2), num_edges(powpc1_t_powpc2)
end

reiny_alpha_div (generic function with 1 method)

In [16]:
function renyi_entropy(pc::ProbCircuit; alpha::Float64)
    println("> Size of the input circuit: pc - $(num_edges(pc))")
    t = @elapsed powpc = circuit_real_power(pc, alpha)
    println("> Computing pc^alpha took $(t) seconds; the result circuit has $(num_edges(powpc)) edges")
    t = @elapsed reiny_val = log(integrate_circuit(powpc; log_prob = true)) / (1.0 - alpha)
    println("> Computing int(pc^alpha) took $(t) seconds.")
    reiny_val
end

renyi_entropy (generic function with 1 method)

In [18]:
function cs_div(pc1::ProbCircuit, pc2::ProbCircuit)
    println("> Size of the input circuits: pc1 - $(num_edges(pc1)), pc2 - $(num_edges(pc2))")
    t = @elapsed pc1_m_pc2 = product_circuit(pc1, pc2; m_log_prob = true, n_log_prob = true, compatible = true)
    println("> Computing pc1*pc2 took $(t) seconds; the result circuit has $(num_edges(pc1_m_pc2)) edges")
    t = @elapsed pc1_sq = circuit_real_power(pc1, 2.0)
    println("> Computing pc1^2 took $(t) seconds; the result circuit has $(num_edges(pc1_sq)) edges")
    t = @elapsed pc2_sq = circuit_real_power(pc2, 2.0)
    println("> Computing pc2^2 took $(t) seconds; the result circuit has $(num_edges(pc2_sq)) edges")
    t = @elapsed div = -log(integrate_circuit(pc1_m_pc2; log_prob = false) / sqrt(
        integrate_circuit(pc1_sq; log_prob = true) * integrate_circuit(pc2_sq; log_prob = true)
    ))
    println("> Computing cs div took $(t) seconds.")
    div
end

cs_div (generic function with 1 method)

In [6]:
for dataset_name in twenty_dataset_names
    pc1 = load_prob_circuit("pcs/$(dataset_name)_200.psdd");
    pc2 = load_prob_circuit("pcs/$(dataset_name)_500.psdd");
    
    println("=========================================")
    println(dataset_name)
    println("")
    pc1_pc2 = product_circuit(pc1, pc2; m_log_prob = true, n_log_prob = true, compatible = true)
    println("> PC1 * PC2 $(num_edges(pc1_pc2)) edges")
    println("")
    ent(pc1)
    println("")
    pc1_size, pc2_size, log_pc2, p_r = xent(pc1, pc2)
    println("")
    pc1_q_pc2, log_s, p_t = kld(pc1, pc2)
    println("")
    pc1_a, pc2_a, pc1a_pc2a = reiny_alpha_div(pc1, pc2; alpha = 1.5)
    
    println("$(pc1_size) & $(pc2_size) & $(pc1_a) & $(pc2_a) & $(log_pc2) & $(pc1_q_pc2) & $(log_s) & " * 
            "$(num_edges(pc1_pc2)) & $(p_r) & $(p_t) & $(pc1a_pc2a)")
end

accidents

> PC1 * PC2 8299 edges

> Size of the input circuits: pc - 3193
> Computing log(pc) took 1.758061784 seconds; the result circuit has 11564 edges
> Computing pc * log(pc) took 0.145833363 seconds; the result circuit has 11564 edges
> Computing int(pc * log(pc)) took 0.279742736 seconds.
> The entropy of pc is 32.40056439544114.

> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing log(pc2) took 0.252252482 seconds; the result circuit has 29891 edges
> Computing pc1 * log(pc2) took 0.115413004 seconds; the result circuit has 30007 edges
> Computing int(pc1 * log(pc2)) took 0.01103132 seconds.
> The cross entropy between pc1 and pc2 is 34.05829064870709.

> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing 1/pc2 took 0.68492835 seconds; the result circuit has 8183 edges
> Computing pc1/pc2 took 0.030132439 seconds; the result circuit has 8299 edges
> Computing log(pc1/pc2) took 0.022388278 seconds; the result circuit has 30239 edges
> Computing pc1 * lo

> Computing int(pc * log(pc)) took 0.115405112 seconds.
> The entropy of pc is 86.75201818365386.

> Size of the input circuits: pc1 - 73828, pc2 - 856955
> Computing log(pc2) took 2.814790303 seconds; the result circuit has 3141981 edges
> Computing pc1 * log(pc2) took 43.636572344 seconds; the result circuit has 3142055 edges
> Computing int(pc1 * log(pc2)) took 1.591675466 seconds.
> The cross entropy between pc1 and pc2 is 93.30240984842088.

> Size of the input circuits: pc1 - 73828, pc2 - 856955
> Computing 1/pc2 took 0.870741593 seconds; the result circuit has 856955 edges
> Computing pc1/pc2 took 10.522954407 seconds; the result circuit has 857029 edges
> Computing log(pc1/pc2) took 2.921172945 seconds; the result circuit has 3142203 edges
> Computing pc1 * log(pc1/pc2) took 42.866988941 seconds; the result circuit has 3142203 edges
> Computing int(pc1 * log(pc1/pc2)) took 1.566064493 seconds.
> The KLD between pc1 and pc2 is 6.550391664767006.

> Size of the input circuits: pc

2765 & 6614 & 2765 & 6614 & 24111 & 6634 & 24171 & 6634 & 24131 & 24171 & 6634
msweb

> PC1 * PC2 9175 edges

> Size of the input circuits: pc - 4859
> Computing log(pc) took 0.040767552 seconds; the result circuit has 17381 edges
> Computing pc * log(pc) took 0.104780781 seconds; the result circuit has 17381 edges
> Computing int(pc * log(pc)) took 0.005380013 seconds.
> The entropy of pc is 10.155486310119123.

> Size of the input circuits: pc1 - 4859, pc2 - 9025
> Computing log(pc2) took 0.052546843 seconds; the result circuit has 32675 edges
> Computing pc1 * log(pc2) took 0.150819793 seconds; the result circuit has 32825 edges
> Computing int(pc1 * log(pc2)) took 0.01111124 seconds.
> The cross entropy between pc1 and pc2 is 10.162931667964264.

> Size of the input circuits: pc1 - 4859, pc2 - 9025
> Computing 1/pc2 took 0.007689092 seconds; the result circuit has 9025 edges
> Computing pc1/pc2 took 0.04009734 seconds; the result circuit has 9175 edges
> Computing log(pc1/pc2) took

> Computing pc1 * log(pc1/pc2) took 0.257631701 seconds; the result circuit has 42956 edges
> Computing int(pc1 * log(pc1/pc2)) took 0.015013036 seconds.
> The KLD between pc1 and pc2 is 0.1467937716198886.

> Size of the input circuits: pc1 - 8309, pc2 - 11732
> Computing pc1^alpha took 0.007057336 seconds; the result circuit has 8309 edges
> Computing pc2^(1-alpha) took 0.009801265 seconds; the result circuit has 11732 edges
> Computing pc1^alpha * pc2^(1-alpha) took 0.06583342 seconds; the result circuit has 11926 edges
> Computing int(pc1^alpha * pc2^(1-alpha)) took 0.004057441 seconds.
> The Reiny's Alpha Divergence between pc1 and pc2 is -0.25634862557081556.
8309 & 11732 & 8309 & 11732 & 42374 & 11926 & 42956 & 11926 & 42568 & 42956 & 11926
tretail

> PC1 * PC2 14994 edges

> Size of the input circuits: pc - 4790
> Computing log(pc) took 0.013743269 seconds; the result circuit has 17383 edges
> Computing pc * log(pc) took 0.075945305 seconds; the result circuit has 17383 edges
>

LoadError: SystemError: opening file "pcs/binarized_mnist_200.psdd": No such file or directory

In [9]:
using Printf: @printf

In [12]:
for dataset_name in twenty_dataset_names[1:end-1]
    pc1 = ProbabilisticCircuits.load_prob_circuit("pcs/$(dataset_name)_200.psdd");
    pc2 = ProbabilisticCircuits.load_prob_circuit("pcs/$(dataset_name)_500.psdd");
    
    println("=========================================")
    println(dataset_name)
    println("")
    t1 = @elapsed ent(pc1);
    println("")
    t2 = @elapsed xent(pc1, pc2);
    println("")
    t3 = @elapsed kld(pc1, pc2);
    println("")
    
    t4 = @elapsed entropy(pc1);
    t5 = @elapsed myxent(pc1, pc2);
    t6 = @elapsed mykld(pc1, pc2)
    
    @printf("%.3f & %.3f & %.3f & %.3f & %.3f & %.3f\n", t1, t4, t2, t5, t3, t6)
    # println("$(t1) & $(t4) & $(t2) & $(t5) & $(t3) & $(t6)")
end

accidents

> Size of the input circuits: pc - 3193
> Computing log(pc) took 0.007937096 seconds; the result circuit has 11564 edges
> Computing pc * log(pc) took 0.048401756 seconds; the result circuit has 11564 edges
> Computing int(pc * log(pc)) took 0.003376639 seconds.
> The entropy of pc is 32.40056439544114.

> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing log(pc2) took 0.02115136 seconds; the result circuit has 29891 edges
> Computing pc1 * log(pc2) took 0.12994314 seconds; the result circuit has 30007 edges
> Computing int(pc1 * log(pc2)) took 0.010002092 seconds.
> The cross entropy between pc1 and pc2 is 34.05829064870709.

> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing 1/pc2 took 0.006517435 seconds; the result circuit has 8183 edges
> Computing pc1/pc2 took 0.029060427 seconds; the result circuit has 8299 edges
> Computing log(pc1/pc2) took 0.02282265 seconds; the result circuit has 30239 edges
> Computing pc1 * log(pc1/pc2) took 0.1289370

> Computing log(pc1/pc2) took 0.039564132 seconds; the result circuit has 52888 edges
> Computing pc1 * log(pc1/pc2) took 0.086505715 seconds; the result circuit has 52888 edges
> Computing int(pc1 * log(pc1/pc2)) took 0.018640823 seconds.
> The KLD between pc1 and pc2 is 0.28268421127200144.

0.117& 0.004 & 0.216 & 0.035 & 0.265 & 0.033
cr52

> Size of the input circuits: pc - 10912
> Computing log(pc) took 0.029341262 seconds; the result circuit has 38466 edges
> Computing pc * log(pc) took 0.056876954 seconds; the result circuit has 38466 edges
> Computing int(pc * log(pc)) took 0.014164388 seconds.
> The entropy of pc is 103.67129218526738.

> Size of the input circuits: pc1 - 10912, pc2 - 14348
> Computing log(pc2) took 0.039359164 seconds; the result circuit has 51094 edges
> Computing pc1 * log(pc2) took 0.12360274 seconds; the result circuit has 51292 edges
> Computing int(pc1 * log(pc2)) took 0.017718296 seconds.
> The cross entropy between pc1 and pc2 is 103.86047923886721.



> Computing pc1 * log(pc2) took 0.255031119 seconds; the result circuit has 24131 edges
> Computing int(pc1 * log(pc2)) took 0.007941234 seconds.
> The cross entropy between pc1 and pc2 is 6.394007219999606.

> Size of the input circuits: pc1 - 2765, pc2 - 6614
> Computing 1/pc2 took 0.005218286 seconds; the result circuit has 6614 edges
> Computing pc1/pc2 took 0.071104589 seconds; the result circuit has 6634 edges
> Computing log(pc1/pc2) took 0.017309708 seconds; the result circuit has 24171 edges
> Computing pc1 * log(pc1/pc2) took 0.275870078 seconds; the result circuit has 24171 edges
> Computing int(pc1 * log(pc1/pc2)) took 0.008153223 seconds.
> The KLD between pc1 and pc2 is 0.1110244701039657.

0.116& 0.001 & 0.297 & 0.195 & 0.396 & 0.181
msweb

> Size of the input circuits: pc - 4859
> Computing log(pc) took 0.021382276 seconds; the result circuit has 17381 edges
> Computing pc * log(pc) took 0.097538783 seconds; the result circuit has 17381 edges
> Computing int(pc * log(pc

In [20]:
for dataset_name in twenty_dataset_names[1:end-1]
    pc1 = load_prob_circuit("pcs/$(dataset_name)_200.psdd");
    pc2 = load_prob_circuit("pcs/$(dataset_name)_500.psdd");
    
    println("=========================================")
    println(dataset_name)
    println("")
    t1 = @elapsed reiny_alpha_div(pc1, pc2; alpha = 1.5)
    t2 = @elapsed renyi_entropy(pc1; alpha = 1.5)
    t3 = @elapsed cs_div(pc1, pc2)
    
    @printf("& %.3f & -\n", t1)
    @printf("& %.3f & - & %.3f & -\n\n", t2, t3)
end

accidents

> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing pc1^alpha took 0.007907935 seconds; the result circuit has 3193 edges
> Computing pc2^(1-alpha) took 0.012534101 seconds; the result circuit has 8183 edges
> Computing pc1^alpha * pc2^(1-alpha) took 0.037019082 seconds; the result circuit has 8299 edges
> Computing int(pc1^alpha * pc2^(1-alpha)) took 0.005026501 seconds.
> The Reiny's Alpha Divergence between pc1 and pc2 is -6.555987547340751.
> Size of the input circuit: pc - 3193
> Computing pc^alpha took 0.005521597 seconds; the result circuit has 3193 edges
> Computing int(pc^alpha) took 0.001815432 seconds.
> Size of the input circuits: pc1 - 3193, pc2 - 8183
> Computing pc1*pc2 took 0.085990308 seconds; the result circuit has 8299 edges
> Computing pc1^2 took 0.005486886 seconds; the result circuit has 3193 edges
> Computing pc2^2 took 0.01364998 seconds; the result circuit has 8183 edges
> Computing cs div took 0.011379333 seconds.
& 0.074 & -
& 0.009 & 

> Computing pc1*pc2 took 0.047194356 seconds; the result circuit has 14546 edges
> Computing pc1^2 took 0.019879476 seconds; the result circuit has 10912 edges
> Computing pc2^2 took 0.036581666 seconds; the result circuit has 14348 edges
> Computing cs div took 0.02382227 seconds.
& 0.125 & -
& 0.031 & - & 0.143 & -

cwebkb

> Size of the input circuits: pc1 - 10598, pc2 - 13397
> Computing pc1^alpha took 0.017674892 seconds; the result circuit has 10598 edges
> Computing pc2^(1-alpha) took 0.019031089 seconds; the result circuit has 13397 edges
> Computing pc1^alpha * pc2^(1-alpha) took 0.02810324 seconds; the result circuit has 13653 edges
> Computing int(pc1^alpha * pc2^(1-alpha)) took 0.00618988 seconds.
> The Reiny's Alpha Divergence between pc1 and pc2 is -0.48145521926605633.
> Size of the input circuit: pc - 10598
> Computing pc^alpha took 0.012551507 seconds; the result circuit has 10598 edges
> Computing int(pc^alpha) took 0.006037552 seconds.
> Size of the input circuits: p

> Computing pc1*pc2 took 0.264211048 seconds; the result circuit has 7202 edges
> Computing pc1^2 took 0.004554679 seconds; the result circuit has 2779 edges
> Computing pc2^2 took 0.01484655 seconds; the result circuit has 7174 edges
> Computing cs div took 0.009300891 seconds.
& 0.339 & -
& 0.013 & - & 0.300 & -

plants

> Size of the input circuits: pc1 - 12909, pc2 - 64018
> Computing pc1^alpha took 0.038952357 seconds; the result circuit has 12909 edges
> Computing pc2^(1-alpha) took 0.180755746 seconds; the result circuit has 64018 edges
> Computing pc1^alpha * pc2^(1-alpha) took 1.454366649 seconds; the result circuit has 64070 edges
> Computing int(pc1^alpha * pc2^(1-alpha)) took 0.046293475 seconds.
> The Reiny's Alpha Divergence between pc1 and pc2 is -3.8712657353901805.
> Size of the input circuit: pc - 12909
> Computing pc^alpha took 0.07478535 seconds; the result circuit has 12909 edges
> Computing int(pc^alpha) took 0.007590373 seconds.
> Size of the input circuits: pc1 