From d2fa8be171feb56ecc09330918865a3372e1cf7d Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Thu, 6 Aug 2020 03:06:32 -0700 Subject: [PATCH 01/18] revise queries --- src/LoadSave/circuit_line_compiler.jl | 2 +- src/Logistic/queries.jl | 41 +++++++++++++++++++++--- test/Logistic/logistic_tests.jl | 29 ----------------- test/Logistic/queries_tests.jl | 46 +++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 34 deletions(-) delete mode 100644 test/Logistic/logistic_tests.jl create mode 100644 test/Logistic/queries_tests.jl diff --git a/src/LoadSave/circuit_line_compiler.jl b/src/LoadSave/circuit_line_compiler.jl index a0d09d7e..02879f55 100644 --- a/src/LoadSave/circuit_line_compiler.jl +++ b/src/LoadSave/circuit_line_compiler.jl @@ -111,7 +111,7 @@ function decorate_logistic(lines::CircuitFormatLines, logic_circuit::LogicCircui function compile(ln::BiasLine) root = id2logisticnode(ln.node_id)::Logistic⋁Node - # @assert length(node.thetas) == 1 + # @assert length(root.thetas) == 1 root.thetas[1,:] .= ln.weights end diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index 7c80e625..ce9b09d5 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,10 +1,15 @@ -export class_conditional_likelihood_per_instance +export class_conditional_likelihood_per_instance, accuracy, predict_class + +using LogicCircuits: UpDownFlow1, flow_and -using ..Probabilistic: get_downflow, get_upflow """ Class Conditional Probability """ -function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data) +@inline function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data) + + @inline downflow(or_parent::Logistic⋁Node, c) = + (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data) + compute_flows(lc, data) likelihoods = zeros(num_examples(data), classes) foreach(lc) do ln @@ -12,11 +17,39 @@ function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes # For each class. orig.thetas is 2D so used eachcol for (idx, thetaC) in enumerate(eachcol(ln.thetas)) foreach(children(ln), thetaC) do c, theta - likelihoods[:, idx] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta + down_flow = Float64.(downflow(ln, c)) + @. likelihoods[:, idx] += down_flow * theta end end end end + + @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) likelihoods end +""" +Class Predictions +""" +@inline function predict_class(lc::LogisticCircuit, classes::Int, data) + class_likelihoods = class_conditional_likelihood_per_instance(lc, classes, data) + predict_class(class_likelihoods) +end + +@inline function predict_class(class_likelihoods::AbstractMatrix) + _, mxindex = findmax(class_likelihoods; dims=2) + dropdims(getindex.(mxindex, 2); dims=2) +end + +""" +Prediction accuracy +""" +@inline accuracy(predicted_class::Vector, labels) = + Float64(sum(@. predicted_class == labels)) / length(labels) + +@inline accuracy(lc::LogisticCircuit, classes::Int, data, labels) = + accuracy(predict_class(lc, classes, data), labels) + +@inline accuracy(class_likelihoods::AbstractMatrix, labels) = + accuracy(predict_class(class_likelihoods), labels) + diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl deleted file mode 100644 index 3461a064..00000000 --- a/test/Logistic/logistic_tests.jl +++ /dev/null @@ -1,29 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - -# This tests are supposed to test queries on the circuits -@testset "Logistic Circuit Class Conditional" begin - # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to - # match with python version. - - EPS = 1e-7; - logistic_circuit = zoo_lc("little_4var.circuit", 2); - @test logistic_circuit isa LogisticCircuit; - - # Step 1. Check Probabilities for 3 samples - data = Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1]); - - true_prob = [3.43147972 4.66740416; - 4.27595352 2.83503504; - 3.67415087 4.93793472] - - CLASSES = 2 - calc_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) - - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ calc_prob[i,j] atol= EPS; - end - end -end \ No newline at end of file diff --git a/test/Logistic/queries_tests.jl b/test/Logistic/queries_tests.jl new file mode 100644 index 00000000..76e1035f --- /dev/null +++ b/test/Logistic/queries_tests.jl @@ -0,0 +1,46 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +# This tests are supposed to test queries on the circuits +@testset "Logistic Circuit Class Conditional" begin + # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to + # match with python version. + + CLASSES = 2 + + logistic_circuit = zoo_lc("little_4var.circuit", CLASSES) + @test logistic_circuit isa LogisticCircuit + + # check probabilities for binary samples + data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1]) + # true_weight_func = [3.43147972 4.66740416; + # 4.27595352 2.83503504; + # 3.67415087 4.93793472] + true_prob = [0.96867400053 0.99069084464; + 0.98629173861 0.94453994990; + 0.97525681816 0.99288164437] + + class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:3 + for j = 1:2 + @test true_prob[i,j] ≈ class_prob[i,j] + end + end + + # check probabilities for float samples + class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:3 + for j = 1:2 + @test true_prob[i,j] ≈ class_prob[i,j] + end + end + + # check predicted_classes + true_labels = [2, 1, 2] + predicted_classes = predict_class(logistic_circuit, CLASSES, data) + @test all(@. predicted_classes == true_labels) + + # check accuracy + @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 +end \ No newline at end of file From a8ea225bd9ab350ce4048fe6afe1f050acf3f92b Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Thu, 6 Aug 2020 03:26:12 -0700 Subject: [PATCH 02/18] retain old functions to mitigate errors --- src/Logistic/queries.jl | 25 ++++++++++++++++++++++++- test/Reasoning/expectation_test.jl | 4 ++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index ce9b09d5..7420c3f3 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,6 +1,29 @@ -export class_conditional_likelihood_per_instance, accuracy, predict_class +export class_conditional_weights_per_instance, + class_conditional_likelihood_per_instance, + accuracy, predict_class using LogicCircuits: UpDownFlow1, flow_and +using ..Probabilistic: get_downflow, get_upflow + +""" +Class Conditional Weights +""" +# This is the old implementation. It is retained to pass the exptation tests. +function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::Int, data) + compute_flows(lc, data) + likelihoods = zeros(num_examples(data), classes) + foreach(lc) do ln + if ln isa Logistic⋁Node + # For each class. orig.thetas is 2D so used eachcol + for (idx, thetaC) in enumerate(eachcol(ln.thetas)) + foreach(children(ln), thetaC) do c, theta + likelihoods[:, idx] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta + end + end + end + end + likelihoods +end """ Class Conditional Probability diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl index c8aafc50..9ad55b98 100644 --- a/test/Reasoning/expectation_test.jl +++ b/test/Reasoning/expectation_test.jl @@ -14,7 +14,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_conditional_weights_per_instance(lc, CLASSES, cur_data_all) true_exp[i, :] = sum(calc_p .* calc_f, dims=1) true_exp[i, :] ./= sum(calc_p) #p_observed end @@ -47,7 +47,7 @@ function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLA calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_conditional_weights_per_instance(lc, CLASSES, cur_data_all) true_mom[i, :] = sum(calc_p .* (calc_f .^ moment), dims=1) true_mom[i, :] ./= sum(calc_p) #p_observed end From 4a974cbd6336bc20e7564900ac7c965de10cdf03 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Fri, 7 Aug 2020 05:44:56 -0700 Subject: [PATCH 03/18] [logistic circuits] parameter learning --- src/Logistic/Logistic.jl | 1 + src/Logistic/parameters.jl | 50 ++++++++++++++++++++ src/Logistic/queries.jl | 23 ++++++---- test/Logistic/logistic_tests.jl | 81 +++++++++++++++++++++++++++++++++ test/Logistic/queries_tests.jl | 46 ------------------- 5 files changed, 145 insertions(+), 56 deletions(-) create mode 100644 src/Logistic/parameters.jl create mode 100644 test/Logistic/logistic_tests.jl delete mode 100644 test/Logistic/queries_tests.jl diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl index c657449f..b09f76d0 100644 --- a/src/Logistic/Logistic.jl +++ b/src/Logistic/Logistic.jl @@ -5,6 +5,7 @@ using ..Utils include("logistic_nodes.jl") include("queries.jl") +include("parameters.jl") # TODO learning diff --git a/src/Logistic/parameters.jl b/src/Logistic/parameters.jl new file mode 100644 index 00000000..7756b8e1 --- /dev/null +++ b/src/Logistic/parameters.jl @@ -0,0 +1,50 @@ +export learn_parameters + +using LogicCircuits: compute_flows +""" +Maximum likilihood estimation of parameters given data through gradient descent +""" +function learn_parameters(lc::LogisticCircuit, classes::Int, data, labels; num_epochs=30, step_size=0.1, flows_computed=false) + + @inline function one_hot(labels::Vector, classes::Int) + one_hot_labels = zeros(length(labels), classes) + for (i, j) in enumerate(labels) + one_hot_labels[i, j] = 1.0 + end + one_hot_labels + end + + one_hot_labels = one_hot(labels, classes) + if !flows_computed + compute_flows(lc, data) + end + + for _ = 1:num_epochs + class_probs = class_conditional_likelihood_per_instance(lc, classes, data; flows_computed=true) + update_parameters(lc, class_probs, one_hot_labels) + end + + nothing +end + + +@inline function update_parameters(lc::LogisticCircuit, class_probs, one_hot_labels; step_size=0.1) + error = class_probs .- one_hot_labels + + foreach(lc) do ln + if ln isa Logistic⋁Node + #TODO; check whether einsum would speed up calculations here + # For each class. orig.thetas is 2D so used eachcol + for (class, thetaC) in enumerate(eachcol(ln.thetas)) + for (idx, c) in enumerate(children(ln)) + down_flow = Float64.(downflow(ln, c)) + thetaC[idx] -= step_size * sum(error[:, class] .* down_flow) / length(down_flow) + end + end + end + end + + nothing +end + + diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index 7420c3f3..7640a0fa 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,6 +1,6 @@ export class_conditional_weights_per_instance, class_conditional_likelihood_per_instance, - accuracy, predict_class + downflow, accuracy, predict_class using LogicCircuits: UpDownFlow1, flow_and using ..Probabilistic: get_downflow, get_upflow @@ -15,9 +15,9 @@ function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::I foreach(lc) do ln if ln isa Logistic⋁Node # For each class. orig.thetas is 2D so used eachcol - for (idx, thetaC) in enumerate(eachcol(ln.thetas)) + for (class, thetaC) in enumerate(eachcol(ln.thetas)) foreach(children(ln), thetaC) do c, theta - likelihoods[:, idx] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta + likelihoods[:, class] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta end end end @@ -28,20 +28,20 @@ end """ Class Conditional Probability """ -@inline function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data) - - @inline downflow(or_parent::Logistic⋁Node, c) = - (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data) +@inline function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) + if !flows_computed + compute_flows(lc, data) + end - compute_flows(lc, data) likelihoods = zeros(num_examples(data), classes) + #TODO; check whether einsum would speed up calculations here foreach(lc) do ln if ln isa Logistic⋁Node # For each class. orig.thetas is 2D so used eachcol - for (idx, thetaC) in enumerate(eachcol(ln.thetas)) + for (class, thetaC) in enumerate(eachcol(ln.thetas)) foreach(children(ln), thetaC) do c, theta down_flow = Float64.(downflow(ln, c)) - @. likelihoods[:, idx] += down_flow * theta + @. likelihoods[:, class] += down_flow * theta end end end @@ -51,6 +51,9 @@ Class Conditional Probability likelihoods end +@inline downflow(or_parent::Logistic⋁Node, c) = + (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data) + """ Class Predictions """ diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl new file mode 100644 index 00000000..5c83fb9a --- /dev/null +++ b/test/Logistic/logistic_tests.jl @@ -0,0 +1,81 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +# This tests are supposed to test queries on the circuits +@testset "Logistic Circuit Query and Parameter Tests" begin + # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to + # match with python version. + + CLASSES = 2 + + logistic_circuit = zoo_lc("little_4var.circuit", CLASSES) + @test logistic_circuit isa LogisticCircuit + + # check probabilities for binary samples + data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1]) + # true_weight_func = [3.43147972 4.66740416; + # 4.27595352 2.83503504; + # 3.67415087 4.93793472] + true_prob = [0.9686740004857587 0.9906908446064048; + 0.9862917386522077 0.9445399501015433; + 0.9752568180725943 0.9928816444023877] + + class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:size(true_prob)[1] + for j = 1:CLASSES + @test true_prob[i,j] ≈ class_prob[i,j] + end + end + + # check probabilities for float samples + data = Float64.(data) + class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:size(true_prob)[1] + for j = 1:CLASSES + @test true_prob[i,j] ≈ class_prob[i,j] + end + end + + # check predicted_classes + true_labels = [2, 1, 2] + predicted_classes = predict_class(logistic_circuit, CLASSES, data) + @test all(predicted_classes .== true_labels) + + # check accuracy + @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 + + # check parameter updates + original_literal_parameters = Dict{Int, Vector{Float64}}() + foreach(logistic_circuit) do ln + if ln isa Logistic⋁Node + foreach(ln.children, eachrow(ln.thetas)) do c, theta + if c isa LogisticLiteral + original_literal_parameters[c.literal] = copy(theta) + end + end + end + end + + one_hot_labels = [0.0 1.0; + 1.0 0.0; + 0.0 1.0] + true_error = true_prob .- one_hot_labels + step_size = 0.1 + learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) + + foreach(logistic_circuit) do ln + if ln isa Logistic⋁Node + foreach(ln.children, eachrow(ln.thetas)) do c, theta + if c isa LogisticLiteral + for class = 1:CLASSES + true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] + updated_amount = theta[class] - original_literal_parameters[c.literal][class] + @test updated_amount ≈ true_update_amount + end + end + end + end + end + +end \ No newline at end of file diff --git a/test/Logistic/queries_tests.jl b/test/Logistic/queries_tests.jl deleted file mode 100644 index 76e1035f..00000000 --- a/test/Logistic/queries_tests.jl +++ /dev/null @@ -1,46 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - -# This tests are supposed to test queries on the circuits -@testset "Logistic Circuit Class Conditional" begin - # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to - # match with python version. - - CLASSES = 2 - - logistic_circuit = zoo_lc("little_4var.circuit", CLASSES) - @test logistic_circuit isa LogisticCircuit - - # check probabilities for binary samples - data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1]) - # true_weight_func = [3.43147972 4.66740416; - # 4.27595352 2.83503504; - # 3.67415087 4.93793472] - true_prob = [0.96867400053 0.99069084464; - 0.98629173861 0.94453994990; - 0.97525681816 0.99288164437] - - class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ class_prob[i,j] - end - end - - # check probabilities for float samples - class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ class_prob[i,j] - end - end - - # check predicted_classes - true_labels = [2, 1, 2] - predicted_classes = predict_class(logistic_circuit, CLASSES, data) - @test all(@. predicted_classes == true_labels) - - # check accuracy - @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 -end \ No newline at end of file From 159b5d9c1d7274991e7009c16133900560fbd082 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Fri, 7 Aug 2020 18:06:45 -0700 Subject: [PATCH 04/18] [logistic circuits] speed up queries --- Project.toml | 4 ++++ src/Logistic/queries.jl | 40 ++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Project.toml b/Project.toml index cc639b0c..3e0ef01a 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,8 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" [compat] BlossomV = "0.4" @@ -32,4 +34,6 @@ Reexport = "0.2" SimpleWeightedGraphs = "1.1" StatsBase = "0.33" StatsFuns = "0.9" +LoopVectorization = "0.8.20" +MLDatasets = "0.4, 0.5" julia = "1.5" diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index 7640a0fa..aa4a7981 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -2,23 +2,25 @@ export class_conditional_weights_per_instance, class_conditional_likelihood_per_instance, downflow, accuracy, predict_class -using LogicCircuits: UpDownFlow1, flow_and +using LogicCircuits: UpDownFlow1, flow_and, or_nodes using ..Probabilistic: get_downflow, get_upflow +using LoopVectorization: @avx """ Class Conditional Weights """ # This is the old implementation. It is retained to pass the exptation tests. -function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::Int, data) - compute_flows(lc, data) +function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) + if !flows_computed + compute_flows(lc, data) + end + likelihoods = zeros(num_examples(data), classes) - foreach(lc) do ln - if ln isa Logistic⋁Node - # For each class. orig.thetas is 2D so used eachcol - for (class, thetaC) in enumerate(eachcol(ln.thetas)) - foreach(children(ln), thetaC) do c, theta - likelihoods[:, class] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta - end + foreach(or_nodes(lc)) do ln + # For each class. orig.thetas is 2D so used eachcol + for (class, thetaC) in enumerate(eachcol(ln.thetas)) + foreach(children(ln), thetaC) do c, theta + likelihoods[:, class] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta end end end @@ -34,20 +36,14 @@ Class Conditional Probability end likelihoods = zeros(num_examples(data), classes) - #TODO; check whether einsum would speed up calculations here - foreach(lc) do ln - if ln isa Logistic⋁Node - # For each class. orig.thetas is 2D so used eachcol - for (class, thetaC) in enumerate(eachcol(ln.thetas)) - foreach(children(ln), thetaC) do c, theta - down_flow = Float64.(downflow(ln, c)) - @. likelihoods[:, class] += down_flow * theta - end - end - end + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float64.(downflow(ln, c)) + @avx @. likelihoods += flow * theta' + end end - @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) + @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) likelihoods end From 3354c76ff85815515a18836481b6b88914d403bc Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Fri, 7 Aug 2020 18:12:27 -0700 Subject: [PATCH 05/18] [logistic circuits] speed up queries --- src/Logistic/queries.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index aa4a7981..1e60b16d 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -17,13 +17,12 @@ function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::I likelihoods = zeros(num_examples(data), classes) foreach(or_nodes(lc)) do ln - # For each class. orig.thetas is 2D so used eachcol - for (class, thetaC) in enumerate(eachcol(ln.thetas)) - foreach(children(ln), thetaC) do c, theta - likelihoods[:, class] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta - end + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float64.(get_downflow(ln) .& get_upflow(c)) + @avx @. likelihoods += flow * theta' end end + likelihoods end From 25e046c0dedc5208395e2468f18d8f691a70c4c6 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Fri, 7 Aug 2020 19:50:03 -0700 Subject: [PATCH 06/18] [logistic circuits] speed up parameter learning --- src/Logistic/parameters.jl | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Logistic/parameters.jl b/src/Logistic/parameters.jl index 7756b8e1..329b90f5 100644 --- a/src/Logistic/parameters.jl +++ b/src/Logistic/parameters.jl @@ -1,6 +1,8 @@ export learn_parameters -using LogicCircuits: compute_flows +using LogicCircuits: compute_flows, or_nodes +using LoopVectorization: @avx + """ Maximum likilihood estimation of parameters given data through gradient descent """ @@ -29,18 +31,15 @@ end @inline function update_parameters(lc::LogisticCircuit, class_probs, one_hot_labels; step_size=0.1) + num_samples = Float64(size(one_hot_labels)[1]) error = class_probs .- one_hot_labels - - foreach(lc) do ln - if ln isa Logistic⋁Node - #TODO; check whether einsum would speed up calculations here - # For each class. orig.thetas is 2D so used eachcol - for (class, thetaC) in enumerate(eachcol(ln.thetas)) - for (idx, c) in enumerate(children(ln)) - down_flow = Float64.(downflow(ln, c)) - thetaC[idx] -= step_size * sum(error[:, class] .* down_flow) / length(down_flow) - end - end + + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float64.(downflow(ln, c)) + @avx update_amount = flow' * error / num_samples * step_size + update_amount = dropdims(update_amount; dims=1) + @avx @. theta -= update_amount end end From 0e94776f8ed7656daf2e75bc237b8aadb2c5db06 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Wed, 12 Aug 2020 13:17:25 -0700 Subject: [PATCH 07/18] [wip] renaming; some speed benchmark test results --- src/Logistic/logistic_nodes.jl | 4 +- src/Logistic/parameters.jl | 2 +- src/Logistic/queries.jl | 94 ++++++++++++++++++++++-------- test/Logistic/logistic_tests.jl | 15 ++--- test/Reasoning/expectation_test.jl | 4 +- 5 files changed, 83 insertions(+), 36 deletions(-) diff --git a/src/Logistic/logistic_nodes.jl b/src/Logistic/logistic_nodes.jl index 950dd8c8..632867d8 100644 --- a/src/Logistic/logistic_nodes.jl +++ b/src/Logistic/logistic_nodes.jl @@ -54,11 +54,11 @@ A logistic disjunction node (Or node) """ mutable struct Logistic⋁Node <: LogisticInnerNode children::Vector{<:LogisticCircuit} - thetas::Array{Float64, 2} + thetas::Array{Float32, 2} data counter::UInt32 Logistic⋁Node(children, class::Int) = begin - new(convert(Vector{LogisticCircuit}, children), init_array(Float64, length(children), class), nothing, 0) + new(convert(Vector{LogisticCircuit}, children), init_array(Float32, length(children), class), nothing, 0) end end diff --git a/src/Logistic/parameters.jl b/src/Logistic/parameters.jl index 329b90f5..1bb8219c 100644 --- a/src/Logistic/parameters.jl +++ b/src/Logistic/parameters.jl @@ -22,7 +22,7 @@ function learn_parameters(lc::LogisticCircuit, classes::Int, data, labels; num_e end for _ = 1:num_epochs - class_probs = class_conditional_likelihood_per_instance(lc, classes, data; flows_computed=true) + class_probs = class_likelihood_per_instance(lc, classes, data; flows_computed=true) update_parameters(lc, class_probs, one_hot_labels) end diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index 1e60b16d..f4324637 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,59 +1,105 @@ -export class_conditional_weights_per_instance, - class_conditional_likelihood_per_instance, - downflow, accuracy, predict_class +export class_weights_per_instance, class_likelihood_per_instance, downflow, accuracy, predict_class, + do_nothing_test, access_flow_test, dummy_calculation_test, dummy_calculation_test2 -using LogicCircuits: UpDownFlow1, flow_and, or_nodes +using LogicCircuits: UpDownFlow1, UpDownFlow2, or_nodes using ..Probabilistic: get_downflow, get_upflow -using LoopVectorization: @avx +using LoopVectorization: @avx, vifelse + """ -Class Conditional Weights +Class Conditional Probability """ -# This is the old implementation. It is retained to pass the exptation tests. -function class_conditional_weights_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) +# with flows computed (2.03 s (80704 allocations: 3.32 MiB)) +# 5.136 s (275778 allocations: 6.99 GiB) on mnist.circuit +@inline function class_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) if !flows_computed compute_flows(lc, data) end + + weights = class_weights_per_instance(lc, classes, data; flows_computed=true) + @avx @. 1.0 / (1.0 + exp(-weights)) +end - likelihoods = zeros(num_examples(data), classes) +@inline function class_weights_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) + if !flows_computed + compute_flows(lc, data) + end + + weights = zeros(num_examples(data), classes) foreach(or_nodes(lc)) do ln foreach(eachrow(ln.thetas), children(ln)) do theta, c - flow = Float64.(get_downflow(ln) .& get_upflow(c)) - @avx @. likelihoods += flow * theta' - end + flow = Float32.(downflow(ln, c)) + @avx @. weights += flow * theta' + end end + weights +end + +# 5.795 ms (72350 allocations: 6.58 MiB) +@inline function do_nothing_test(lc::LogisticCircuit, classes::Int, data) + likelihoods = zeros(num_examples(data), classes) + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + nothing + end + end + @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) likelihoods end -""" -Class Conditional Probability -""" -@inline function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) - if !flows_computed - compute_flows(lc, data) +# 1.574 s (193840 allocations: 6.98 GiB) +@inline function access_flow_test(lc::LogisticCircuit, classes::Int, data) + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float32.(downflow(ln, c)) + end end - + nothing +end + +# 2.943 s (82272 allocations: 6.74 MiB) +@inline function dummy_calculation_test(lc::LogisticCircuit, classes::Int, data) likelihoods = zeros(num_examples(data), classes) foreach(or_nodes(lc)) do ln foreach(eachrow(ln.thetas), children(ln)) do theta, c - flow = Float64.(downflow(ln, c)) - @avx @. likelihoods += flow * theta' + @avx @. likelihoods += likelihoods + end + end + @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) + likelihoods +end + +# 4.790 s (193843 allocations: 6.99 GiB) +@inline function dummy_calculation_test2(lc::LogisticCircuit, classes::Int, data) + likelihoods = zeros(num_examples(data), classes) + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float32.(downflow(ln, c)) + @avx @. likelihoods += likelihoods end end - @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) likelihoods end @inline downflow(or_parent::Logistic⋁Node, c) = - (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data) + (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data, or_parent.data.upflow) + +@inline flow_and(downflow_n::BitVector, c_flow::UpDownFlow2, upflow_n::BitVector) = + @. downflow_n & c_flow.prime_flow & c_flow.sub_flow + +@inline flow_and(downflow_n::Vector{<:AbstractFloat}, c_flow::UpDownFlow2, upflow_n::Vector{<:AbstractFloat}) = + @avx @. downflow_n * c_flow.prime_flow * make_finite(c_flow.sub_flow/upflow_n) + +@inline make_finite(x::T) where T = vifelse(isfinite(x), x, zero(T)) + """ Class Predictions """ @inline function predict_class(lc::LogisticCircuit, classes::Int, data) - class_likelihoods = class_conditional_likelihood_per_instance(lc, classes, data) + class_likelihoods = class_likelihood_per_instance(lc, classes, data) predict_class(class_likelihoods) end diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl index 5c83fb9a..a58ddc76 100644 --- a/test/Logistic/logistic_tests.jl +++ b/test/Logistic/logistic_tests.jl @@ -17,11 +17,11 @@ using ProbabilisticCircuits # true_weight_func = [3.43147972 4.66740416; # 4.27595352 2.83503504; # 3.67415087 4.93793472] - true_prob = [0.9686740004857587 0.9906908446064048; - 0.9862917386522077 0.9445399501015433; - 0.9752568180725943 0.9928816444023877] + true_prob = [0.9686740008311808 0.9906908445371728; + 0.9862917392724188 0.9445399509069984; + 0.9752568185086389 0.9928816444223209] - class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) for i = 1:size(true_prob)[1] for j = 1:CLASSES @test true_prob[i,j] ≈ class_prob[i,j] @@ -29,8 +29,8 @@ using ProbabilisticCircuits end # check probabilities for float samples - data = Float64.(data) - class_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + data = Float32.(data) + class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) for i = 1:size(true_prob)[1] for j = 1:CLASSES @test true_prob[i,j] ≈ class_prob[i,j] @@ -60,6 +60,7 @@ using ProbabilisticCircuits one_hot_labels = [0.0 1.0; 1.0 0.0; 0.0 1.0] + one_hot_labels = Float32.(one_hot_labels) true_error = true_prob .- one_hot_labels step_size = 0.1 learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) @@ -71,7 +72,7 @@ using ProbabilisticCircuits for class = 1:CLASSES true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] updated_amount = theta[class] - original_literal_parameters[c.literal][class] - @test updated_amount ≈ true_update_amount + @test updated_amount ≈ true_update_amount atol=1e-7 end end end diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl index 9ad55b98..4f886b74 100644 --- a/test/Reasoning/expectation_test.jl +++ b/test/Reasoning/expectation_test.jl @@ -14,7 +14,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_weights_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all) true_exp[i, :] = sum(calc_p .* calc_f, dims=1) true_exp[i, :] ./= sum(calc_p) #p_observed end @@ -47,7 +47,7 @@ function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLA calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_weights_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all) true_mom[i, :] = sum(calc_p .* (calc_f .^ moment), dims=1) true_mom[i, :] ./= sum(calc_p) #p_observed end From 495ed9a9633163a5830223a096456b778093f7a8 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Sun, 16 Aug 2020 19:01:49 -0700 Subject: [PATCH 08/18] [test] expectation: bypass precision problem --- test/Reasoning/expectation_test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl index 4f886b74..97d1cc8a 100644 --- a/test/Reasoning/expectation_test.jl +++ b/test/Reasoning/expectation_test.jl @@ -3,7 +3,7 @@ using LogicCircuits using ProbabilisticCircuits function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int) - EPS = 1e-7; + EPS = 1e-5; COUNT = size(data)[1] # Compute True expectation brute force true_exp = zeros(COUNT, CLASSES) From cffa0f89b084daa3462cb283528582cd7b58cb42 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Sun, 16 Aug 2020 22:39:31 -0700 Subject: [PATCH 09/18] [feature] logistic circuit: gpu support --- Project.toml | 2 + src/Logistic/Logistic.jl | 5 +- .../{parameters.jl => learn_parameters.jl} | 0 src/Logistic/parameter_circuit.jl | 127 ++++++++++++++++++ 4 files changed, 132 insertions(+), 2 deletions(-) rename src/Logistic/{parameters.jl => learn_parameters.jl} (100%) create mode 100644 src/Logistic/parameter_circuit.jl diff --git a/Project.toml b/Project.toml index 3e0ef01a..368a92ad 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.1.1" [deps] BlossomV = "6c721016-9dae-5d90-abf6-67daaccb2332" Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599" @@ -25,6 +26,7 @@ MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" [compat] BlossomV = "0.4" Clustering = "0.14" +CUDA = "1.2" DataStructures = "0.17" LightGraphs = "1.3" LogicCircuits = "0.1.1" diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl index b09f76d0..cd45019e 100644 --- a/src/Logistic/Logistic.jl +++ b/src/Logistic/Logistic.jl @@ -5,8 +5,9 @@ using ..Utils include("logistic_nodes.jl") include("queries.jl") -include("parameters.jl") +include("parameter_circuit.jl") +include("learn_parameters.jl") -# TODO learning +# TODO structure learning end \ No newline at end of file diff --git a/src/Logistic/parameters.jl b/src/Logistic/learn_parameters.jl similarity index 100% rename from src/Logistic/parameters.jl rename to src/Logistic/learn_parameters.jl diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl new file mode 100644 index 00000000..1a88c68a --- /dev/null +++ b/src/Logistic/parameter_circuit.jl @@ -0,0 +1,127 @@ +using CUDA +using LogicCircuits + +export ParameterCircuit, CuParameterCircuit +export class_likelihood_and_flow, class_weights_and_flow + +# In a parameter circuit, +# 1 is true, 2 is false +const TRUE_BITS = Int32(1) +const FALSE_BITS = Int32(2) +# 3:nf+2 are nf positive literals +# nf+3:2nf+2 are nf negative literals +# 2nf+2:end are inner decision nodes + +struct ParameterCircuit + decisions::Matrix{Int32} + elements::Matrix{Int32} + parameters::Matrix{Float32} +end + + +ParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin + @assert is⋁gate(circuit) + decisions::Vector{Int32} = Vector{Int32}() + elements::Vector{Int32} = Vector{Int32}() + parameters::Vector{Float32} = Vector{Float32}() + num_decisions::Int32 = 2 * num_features + 2 + num_elements::Int32 = 0 + # num_parameters always equals num_elements + + f_con(n) = istrue(n) ? TRUE_BITS : FALSE_BITS + f_lit(n) = ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n)) + f_and(n, cs) = begin + @assert length(cs) == 2 + Int32[cs[1], cs[2]] + end + f_or(n, cs) = begin + first_element = num_elements + 1 + foreach(cs, eachrow(n.thetas)) do c, theta + @assert size(theta)[1] == nc + append!(parameters, theta) + num_elements += 1 + if c isa Vector{Int32} + @assert length(c) == 2 + push!(elements, c[1], c[2]) + else + @assert c isa Int32 + push!(elements, c, TRUE_BITS) + end + end + num_decisions += 1 + push!(decisions, first_element, num_elements) + num_decisions + end + foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, Union{Int32, Vector{Int32}}) + decisions2 = reshape(decisions, 2, :) + elements2 = reshape(elements, 2, :) + parameters_nc = reshape(parameters, nc, :) + @assert size(elements2)[2] == size(parameters_nc)[2] + return ParameterCircuit(decisions2, elements2, parameters_nc) +end + +struct CuParameterCircuit + decisions::CuMatrix{Int32} + elements::CuMatrix{Int32} + parameters::CuMatrix{Float32} + CuParameterCircuit(parameter_circuit::ParameterCircuit) = new(CuMatrix(parameter_circuit.decisions), CuMatrix(parameter_circuit.elements), CuMatrix(parameter_circuit.parameters)) +end + +function class_likelihood_and_flow(parameter_circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) + cw, flow = class_weights_and_flow(parameter_circuit, nc, data, reuse_up, reuse_down) + return @. 1.0 / (1.0 + exp(-cw)), flow +end + +function class_weights_and_flow(parameter_circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) + ne = num_examples(data) + nf = num_features(data) + nd = size(parameter_circuit.decisions)[2] + nl = 2 + 2 * nf + + v = value_matrix(CuMatrix, ne, nl + nd, reuse_up) + flow = if reuse_down isa CuArray{Float32} && size(reuse_down) == (ne, nl + nd) + reuse_down + else + CUDA.zeros(Float32, ne, nl + nd) + end + cw = CUDA.zeros(Float32, ne, nc) + + set_leaf_layer(v, data) + num_threads_per_block = 256 + numblocks = ceil(Int, ne/num_threads_per_block) + CUDA.@sync begin + @cuda threads=num_threads_per_block blocks=numblocks class_weights_flow_kernel_cuda(cw, flow, v, parameter_circuit.decisions, parameter_circuit.elements, parameter_circuit.parameters, ne, nc, nd, nl) + end + return cw, flow +end + +function class_weights_flow_kernel_cuda(cw, flow, v, decisions, elements, parameters, ne, nc, nd, nl) + evaluate_kernel_cuda(v, decisions, elements, ne, nd, nl) + + index = (blockIdx().x - 1) * blockDim().x + threadIdx().x + stride = blockDim().x * gridDim().x + for j = index:stride:ne + flow[j, nl + nd] = v[j, nl + nd] + for i = nd:-1:1 + first_elem = decisions[1, i] + last_elem = decisions[2, i] + n_up = v[j, nl + i] + if n_up > zero(Float32) + n_down = flow[j, nl + i] + for k = first_elem:last_elem + e1 = elements[1, k] + e2 = elements[2, k] + c_up = v[j, e1] * v[j, e2] + additional_flow = c_up / n_up * n_down + flow[j, e1] += additional_flow + flow[j, e2] += additional_flow + for class = 1:nc + cw[j, class] += additional_flow * parameters[class, k] + end + end + end + end + end + + return nothing +end \ No newline at end of file From 973ff15bd51fa9358c4af5d94bb7f64ab1303702 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 01:08:09 -0700 Subject: [PATCH 10/18] [feature] logistic circuit: gpu w/ auto batch --- src/Logistic/parameter_circuit.jl | 193 ++++++++++++++++++++++++++++-- 1 file changed, 185 insertions(+), 8 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 1a88c68a..c0ca6d7c 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -4,6 +4,10 @@ using LogicCircuits export ParameterCircuit, CuParameterCircuit export class_likelihood_and_flow, class_weights_and_flow +export LayeredParameterCircuit, CuLayeredParameterCircuit +export class_likelihood_and_flow2, class_weights_and_flow2 + + # In a parameter circuit, # 1 is true, 2 is false const TRUE_BITS = Int32(1) @@ -12,13 +16,13 @@ const FALSE_BITS = Int32(2) # nf+3:2nf+2 are nf negative literals # 2nf+2:end are inner decision nodes + struct ParameterCircuit decisions::Matrix{Int32} elements::Matrix{Int32} parameters::Matrix{Float32} end - ParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin @assert is⋁gate(circuit) decisions::Vector{Int32} = Vector{Int32}() @@ -67,15 +71,15 @@ struct CuParameterCircuit CuParameterCircuit(parameter_circuit::ParameterCircuit) = new(CuMatrix(parameter_circuit.decisions), CuMatrix(parameter_circuit.elements), CuMatrix(parameter_circuit.parameters)) end -function class_likelihood_and_flow(parameter_circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) - cw, flow = class_weights_and_flow(parameter_circuit, nc, data, reuse_up, reuse_down) +function class_likelihood_and_flow(circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) + cw, flow = class_weights_and_flow(circuit, nc, data, reuse_up, reuse_down) return @. 1.0 / (1.0 + exp(-cw)), flow end -function class_weights_and_flow(parameter_circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) +function class_weights_and_flow(circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) ne = num_examples(data) nf = num_features(data) - nd = size(parameter_circuit.decisions)[2] + nd = size(circuit.decisions)[2] nl = 2 + 2 * nf v = value_matrix(CuMatrix, ne, nl + nd, reuse_up) @@ -90,12 +94,12 @@ function class_weights_and_flow(parameter_circuit::CuParameterCircuit, nc::Integ num_threads_per_block = 256 numblocks = ceil(Int, ne/num_threads_per_block) CUDA.@sync begin - @cuda threads=num_threads_per_block blocks=numblocks class_weights_flow_kernel_cuda(cw, flow, v, parameter_circuit.decisions, parameter_circuit.elements, parameter_circuit.parameters, ne, nc, nd, nl) + @cuda threads=num_threads_per_block blocks=numblocks pass_down_and_sum_cw_layer_kernel_cuda(cw, flow, v, circuit.decisions, circuit.elements, circuit.parameters, ne, nc, nd, nl) end return cw, flow end -function class_weights_flow_kernel_cuda(cw, flow, v, decisions, elements, parameters, ne, nc, nd, nl) +function pass_down_and_sum_cw_layer_kernel_cuda(cw, flow, v, decisions, elements, parameters, ne, nc, nd, nl) evaluate_kernel_cuda(v, decisions, elements, ne, nd, nl) index = (blockIdx().x - 1) * blockDim().x + threadIdx().x @@ -124,4 +128,177 @@ function class_weights_flow_kernel_cuda(cw, flow, v, decisions, elements, parame end return nothing -end \ No newline at end of file +end + + + +struct ParameterLayer + decisions::Matrix{Int32} + elements::Matrix{Int32} + parameters::Matrix{Float32} +end + +struct LayeredParameterCircuit + layers::Vector{ParameterLayer} +end + +LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin + @assert is⋁gate(circuit) + decisions::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + elements::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + parameters::Vector{Vector{Float32}} = Vector{Vector{Float32}}() + num_decisions::Int32 = 2 * num_features + 2 + num_elements::Vector{Int32} = Vector{Int32}() + # num_parameters always equals num_elements + + ensure_layer(i) = begin + if length(decisions) < i + # add a new layer + push!(decisions, Int32[]) + push!(elements, Int32[]) + push!(parameters, Float32[]) + push!(num_elements, 0) + end + end + + f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_BITS : FALSE_BITS) + f_lit(n) = LayeredDecisionId(0, + ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n))) + + f_and(n, cs) = begin + @assert length(cs) == 2 + LayeredDecisionId[cs[1], cs[2]] + end + f_or(n, cs) = begin + num_decisions += 1 + # determine layer + layer_id = zero(Int32) + for c in cs + if c isa Vector{LayeredDecisionId} + @assert length(c) == 2 + layer_id = max(layer_id, c[1].layer_id, c[2].layer_id) + else + @assert c isa LayeredDecisionId + layer_id = max(layer_id, c.layer_id) + end + end + layer_id += 1 + ensure_layer(layer_id) + first_element = num_elements[layer_id] + 1 + foreach(cs, eachrow(n.thetas)) do c, theta + @assert size(theta)[1] == nc + append!(parameters[layer_id], theta) + num_elements[layer_id] += 1 + if c isa Vector{LayeredDecisionId} + push!(elements[layer_id], c[1].decision_id, c[2].decision_id) + else + push!(elements[layer_id], c.decision_id, TRUE_BITS) + end + end + push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id]) + LayeredDecisionId(layer_id, num_decisions) + end + + foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, + Union{LayeredDecisionId,Vector{LayeredDecisionId}}) + + layers = map(decisions, elements, parameters) do d, e, p + ParameterLayer(reshape(d, 3, :), reshape(e, 2, :), reshape(p, nc, :)) + end + return LayeredParameterCircuit(layers) +end + +struct CuParameterLayer + decisions::CuMatrix{Int32} + elements::CuMatrix{Int32} + parameters::CuMatrix{Float32} + CuParameterLayer(l::ParameterLayer) = new(CuMatrix(l.decisions), CuMatrix(l.elements), CuMatrix(l.parameters)) +end + +struct CuLayeredParameterCircuit + layers::Vector{CuParameterLayer} + CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(map(CuParameterLayer, l.layers)) +end + +num_decisions(l::CuParameterLayer) = size(l.decisions)[2] +num_decisions(l::CuLayeredParameterCircuit) = sum(num_decisions, l.layers) + +function class_likelihood_and_flow2(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) + cw, flow = class_weights_and_flow2(circuit, nc, data, reuse_up, reuse_down) + return @. 1.0 / (1.0 + exp(-cw)), flow +end + +function class_weights_and_flow2(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) + ne = num_examples(data) + nf = num_features(data) + nd = num_decisions(circuit) + nl = 2 + 2 * nf + v = value_matrix(CuMatrix, ne, nl + nd, reuse_up) + set_leaf_layer(v, data) + + dec_per_thread = 8 + CUDA.@sync for layer in circuit.layers + ndl = num_decisions(layer) + num_threads = balance_threads(ne, ndl / dec_per_thread, 8) + num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) + @cuda threads=num_threads blocks=num_blocks evaluate_layer_kernel_cuda2(v, layer.decisions, layer.elements) + end + + flow = if reuse_down isa CuArray{Float32} && size(reuse_down) == (ne, nl + nd) + reuse_down .= zero(Float32) + reuse_down + else + CUDA.zeros(Float32, ne, nl + nd) + end + flow[:,end] .= v[:,end] # set flow at root + cw = CUDA.zeros(Float32, ne, nc) # initialize class_weights + + dec_per_thread = 4 + CUDA.@sync for layer in Iterators.reverse(circuit.layers) + ndl = num_decisions(layer) + num_threads = balance_threads(ne, ndl / dec_per_thread, 8) + num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) + @cuda threads=num_threads blocks=num_blocks pass_down_and_sum_cw_layer_kernel_cuda2(cw, flow, v, layer.decisions, layer.elements, layer.parameters) + end + + return cw, flow +end + +function pass_down_and_sum_cw_layer_kernel_cuda2(cw, flow, v, decisions, elements, parameters) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + ne, nc = size(cw) + _, num_decisions = size(decisions) + + for j = index_x:stride_x:ne + for i = index_y:stride_y:num_decisions + decision_id = @inbounds decisions[1, i] + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + n_up = @inbounds v[j, decision_id] + if n_up > zero(Float32) + n_down = @inbounds flow[j, decision_id] + for e = first_elem:last_elem + e1 = @inbounds elements[1, e] + e2 = @inbounds elements[2, e] + c_up = @inbounds (v[j, e1] * v[j, e2]) + additional_flow = c_up / n_up * n_down + # following needs to be memory safe + CUDA.@atomic flow[j, e1] += additional_flow #atomic is automatically inbounds + CUDA.@atomic flow[j, e2] += additional_flow #atomic is automatically inbounds + for class=1:nc + CUDA.@atomic cw[j, class] += additional_flow * parameters[class, e] + end + end + end + end + end + + return nothing +end + + + +# TODO; paramter learning \ No newline at end of file From d8822530fb34e30d95811880e53858529d184b66 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 01:19:19 -0700 Subject: [PATCH 11/18] [test] expectation: bypass precision problem --- test/Reasoning/expectation_test.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl index 97d1cc8a..8959c381 100644 --- a/test/Reasoning/expectation_test.jl +++ b/test/Reasoning/expectation_test.jl @@ -36,7 +36,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data end function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int, moment::Int) - EPS = 1e-7; + EPS = 1e-5; COUNT = size(data)[1] # Compute True moment brute force true_mom = zeros(COUNT, CLASSES) From 6eccc2f1a3fb393af0696e2695bef46748688237 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 04:32:41 -0700 Subject: [PATCH 12/18] [refactor] logistic circuits: reuse bit_circuit --- src/Logistic/parameter_circuit.jl | 241 ++++++------------------------ 1 file changed, 44 insertions(+), 197 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index c0ca6d7c..93959780 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -1,145 +1,17 @@ using CUDA using LogicCircuits -export ParameterCircuit, CuParameterCircuit -export class_likelihood_and_flow, class_weights_and_flow - export LayeredParameterCircuit, CuLayeredParameterCircuit -export class_likelihood_and_flow2, class_weights_and_flow2 - +export class_likelihood_and_flow, class_weights_and_flow -# In a parameter circuit, +# in a parameter circuit # 1 is true, 2 is false -const TRUE_BITS = Int32(1) -const FALSE_BITS = Int32(2) -# 3:nf+2 are nf positive literals -# nf+3:2nf+2 are nf negative literals -# 2nf+2:end are inner decision nodes - - -struct ParameterCircuit - decisions::Matrix{Int32} - elements::Matrix{Int32} - parameters::Matrix{Float32} -end - -ParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin - @assert is⋁gate(circuit) - decisions::Vector{Int32} = Vector{Int32}() - elements::Vector{Int32} = Vector{Int32}() - parameters::Vector{Float32} = Vector{Float32}() - num_decisions::Int32 = 2 * num_features + 2 - num_elements::Int32 = 0 - # num_parameters always equals num_elements - - f_con(n) = istrue(n) ? TRUE_BITS : FALSE_BITS - f_lit(n) = ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n)) - f_and(n, cs) = begin - @assert length(cs) == 2 - Int32[cs[1], cs[2]] - end - f_or(n, cs) = begin - first_element = num_elements + 1 - foreach(cs, eachrow(n.thetas)) do c, theta - @assert size(theta)[1] == nc - append!(parameters, theta) - num_elements += 1 - if c isa Vector{Int32} - @assert length(c) == 2 - push!(elements, c[1], c[2]) - else - @assert c isa Int32 - push!(elements, c, TRUE_BITS) - end - end - num_decisions += 1 - push!(decisions, first_element, num_elements) - num_decisions - end - foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, Union{Int32, Vector{Int32}}) - decisions2 = reshape(decisions, 2, :) - elements2 = reshape(elements, 2, :) - parameters_nc = reshape(parameters, nc, :) - @assert size(elements2)[2] == size(parameters_nc)[2] - return ParameterCircuit(decisions2, elements2, parameters_nc) -end - -struct CuParameterCircuit - decisions::CuMatrix{Int32} - elements::CuMatrix{Int32} - parameters::CuMatrix{Float32} - CuParameterCircuit(parameter_circuit::ParameterCircuit) = new(CuMatrix(parameter_circuit.decisions), CuMatrix(parameter_circuit.elements), CuMatrix(parameter_circuit.parameters)) -end - -function class_likelihood_and_flow(circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) - cw, flow = class_weights_and_flow(circuit, nc, data, reuse_up, reuse_down) - return @. 1.0 / (1.0 + exp(-cw)), flow -end - -function class_weights_and_flow(circuit::CuParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) - ne = num_examples(data) - nf = num_features(data) - nd = size(circuit.decisions)[2] - nl = 2 + 2 * nf - - v = value_matrix(CuMatrix, ne, nl + nd, reuse_up) - flow = if reuse_down isa CuArray{Float32} && size(reuse_down) == (ne, nl + nd) - reuse_down - else - CUDA.zeros(Float32, ne, nl + nd) - end - cw = CUDA.zeros(Float32, ne, nc) - - set_leaf_layer(v, data) - num_threads_per_block = 256 - numblocks = ceil(Int, ne/num_threads_per_block) - CUDA.@sync begin - @cuda threads=num_threads_per_block blocks=numblocks pass_down_and_sum_cw_layer_kernel_cuda(cw, flow, v, circuit.decisions, circuit.elements, circuit.parameters, ne, nc, nd, nl) - end - return cw, flow -end - -function pass_down_and_sum_cw_layer_kernel_cuda(cw, flow, v, decisions, elements, parameters, ne, nc, nd, nl) - evaluate_kernel_cuda(v, decisions, elements, ne, nd, nl) - - index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - stride = blockDim().x * gridDim().x - for j = index:stride:ne - flow[j, nl + nd] = v[j, nl + nd] - for i = nd:-1:1 - first_elem = decisions[1, i] - last_elem = decisions[2, i] - n_up = v[j, nl + i] - if n_up > zero(Float32) - n_down = flow[j, nl + i] - for k = first_elem:last_elem - e1 = elements[1, k] - e2 = elements[2, k] - c_up = v[j, e1] * v[j, e2] - additional_flow = c_up / n_up * n_down - flow[j, e1] += additional_flow - flow[j, e2] += additional_flow - for class = 1:nc - cw[j, class] += additional_flow * parameters[class, k] - end - end - end - end - end - - return nothing -end - - - -struct ParameterLayer - decisions::Matrix{Int32} - elements::Matrix{Int32} - parameters::Matrix{Float32} -end +const TRUE_ID = Int32(1) +const FALSE_ID = Int32(2) struct LayeredParameterCircuit - layers::Vector{ParameterLayer} + layered_circuit::LayeredBitCircuit + layered_parameters::Vector{Matrix{Float32}} end LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin @@ -161,7 +33,7 @@ LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Int end end - f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_BITS : FALSE_BITS) + f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_ID : FALSE_ID) f_lit(n) = LayeredDecisionId(0, ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n))) @@ -192,7 +64,7 @@ LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Int if c isa Vector{LayeredDecisionId} push!(elements[layer_id], c[1].decision_id, c[2].decision_id) else - push!(elements[layer_id], c.decision_id, TRUE_BITS) + push!(elements[layer_id], c.decision_id, TRUE_ID) end end push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id]) @@ -202,69 +74,55 @@ LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Int foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, Union{LayeredDecisionId,Vector{LayeredDecisionId}}) - layers = map(decisions, elements, parameters) do d, e, p - ParameterLayer(reshape(d, 3, :), reshape(e, 2, :), reshape(p, nc, :)) + circuit_layers = map(decisions, elements) do d, e + Layer(reshape(d, 3, :), reshape(e, 2, :)) end - return LayeredParameterCircuit(layers) -end - -struct CuParameterLayer - decisions::CuMatrix{Int32} - elements::CuMatrix{Int32} - parameters::CuMatrix{Float32} - CuParameterLayer(l::ParameterLayer) = new(CuMatrix(l.decisions), CuMatrix(l.elements), CuMatrix(l.parameters)) + parameter_layers = map(parameters) do p + reshape(p, nc, :) + end + return LayeredParameterCircuit(LayeredBitCircuit(circuit_layers), parameter_layers) end struct CuLayeredParameterCircuit - layers::Vector{CuParameterLayer} - CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(map(CuParameterLayer, l.layers)) + layered_circuit::CuLayeredBitCircuit + layered_parameters::Vector{CuMatrix{Float32}} + CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters)) end -num_decisions(l::CuParameterLayer) = size(l.decisions)[2] -num_decisions(l::CuLayeredParameterCircuit) = sum(num_decisions, l.layers) - -function class_likelihood_and_flow2(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) - cw, flow = class_weights_and_flow2(circuit, nc, data, reuse_up, reuse_down) +function class_likelihood_and_flow(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) + cw, flow = class_weights_and_flow(circuit, nc, data, reuse_up, reuse_down, reuse_cw) return @. 1.0 / (1.0 + exp(-cw)), flow end -function class_weights_and_flow2(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing) - ne = num_examples(data) - nf = num_features(data) - nd = num_decisions(circuit) - nl = 2 + 2 * nf - v = value_matrix(CuMatrix, ne, nl + nd, reuse_up) - set_leaf_layer(v, data) - - dec_per_thread = 8 - CUDA.@sync for layer in circuit.layers - ndl = num_decisions(layer) - num_threads = balance_threads(ne, ndl / dec_per_thread, 8) - num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) - @cuda threads=num_threads blocks=num_blocks evaluate_layer_kernel_cuda2(v, layer.decisions, layer.elements) - end +function class_weights_and_flow(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) + _, edge_flow, _ = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) + cw = calculate_class_weights(circuit, nc, data, edge_flow, reuse_cw) + return cw, edge_flow +end - flow = if reuse_down isa CuArray{Float32} && size(reuse_down) == (ne, nl + nd) - reuse_down .= zero(Float32) - reuse_down +function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, flow, reuse_cw=nothing) + ne = num_examples(data) + cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc) + reuse_cw .= zero(Float32) + reuse_cw else - CUDA.zeros(Float32, ne, nl + nd) + CUDA.zeros(Float32, ne, nc) end - flow[:,end] .= v[:,end] # set flow at root - cw = CUDA.zeros(Float32, ne, nc) # initialize class_weights - - dec_per_thread = 4 - CUDA.@sync for layer in Iterators.reverse(circuit.layers) - ndl = num_decisions(layer) + + dec_per_thread = 8 + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) num_threads = balance_threads(ne, ndl / dec_per_thread, 8) num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) - @cuda threads=num_threads blocks=num_blocks pass_down_and_sum_cw_layer_kernel_cuda2(cw, flow, v, layer.decisions, layer.elements, layer.parameters) + @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, flow, circuit_layer.decisions, parameter_layer) end - return cw, flow + return cw end -function pass_down_and_sum_cw_layer_kernel_cuda2(cw, flow, v, decisions, elements, parameters) +function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, parameters) index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y stride_x = blockDim().x * gridDim().x @@ -274,23 +132,12 @@ function pass_down_and_sum_cw_layer_kernel_cuda2(cw, flow, v, decisions, element for j = index_x:stride_x:ne for i = index_y:stride_y:num_decisions - decision_id = @inbounds decisions[1, i] first_elem = @inbounds decisions[2, i] last_elem = @inbounds decisions[3, i] - n_up = @inbounds v[j, decision_id] - if n_up > zero(Float32) - n_down = @inbounds flow[j, decision_id] - for e = first_elem:last_elem - e1 = @inbounds elements[1, e] - e2 = @inbounds elements[2, e] - c_up = @inbounds (v[j, e1] * v[j, e2]) - additional_flow = c_up / n_up * n_down - # following needs to be memory safe - CUDA.@atomic flow[j, e1] += additional_flow #atomic is automatically inbounds - CUDA.@atomic flow[j, e2] += additional_flow #atomic is automatically inbounds - for class=1:nc - CUDA.@atomic cw[j, class] += additional_flow * parameters[class, e] - end + for e = first_elem:last_elem + # following needs to be memory safe + for class=1:nc + CUDA.@atomic cw[j, class] += flow[j, e] * parameters[class, e] #atomic is automatically inbounds end end end @@ -301,4 +148,4 @@ end -# TODO; paramter learning \ No newline at end of file +# TODO; parameter learning \ No newline at end of file From 7e6ffead080b316b7d3ad2d01178f419cd5326ec Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 07:10:19 -0700 Subject: [PATCH 13/18] [fix] parameter_circuit: edge flow --- src/Logistic/parameter_circuit.jl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 93959780..974e34e0 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -2,7 +2,7 @@ using CUDA using LogicCircuits export LayeredParameterCircuit, CuLayeredParameterCircuit -export class_likelihood_and_flow, class_weights_and_flow +export class_likelihood, class_weights # in a parameter circuit # 1 is true, 2 is false @@ -89,15 +89,15 @@ struct CuLayeredParameterCircuit CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters)) end -function class_likelihood_and_flow(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) - cw, flow = class_weights_and_flow(circuit, nc, data, reuse_up, reuse_down, reuse_cw) - return @. 1.0 / (1.0 + exp(-cw)), flow +function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) + cw, node_flow, edge_flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + return @. 1.0 / (1.0 + exp(-cw)), node_flow, edge_flow, v end -function class_weights_and_flow(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) - _, edge_flow, _ = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) +function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) + node_flow, edge_flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) cw = calculate_class_weights(circuit, nc, data, edge_flow, reuse_cw) - return cw, edge_flow + return cw, node_flow, edge_flow, v end function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, flow, reuse_cw=nothing) @@ -112,11 +112,12 @@ function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer dec_per_thread = 8 CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) circuit_layer = circuit.layered_circuit.layers[i] + flow_layer = flow[i] parameter_layer = circuit.layered_parameters[i] ndl = num_decisions(circuit_layer) num_threads = balance_threads(ne, ndl / dec_per_thread, 8) num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) - @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, flow, circuit_layer.decisions, parameter_layer) + @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, flow_layer, circuit_layer.decisions, parameter_layer) end return cw From 1e6d67ab72c3b3fa64b22d64090f8f5fef6bee98 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 14:37:41 -0700 Subject: [PATCH 14/18] [feature] logistic circuit: learn parameter w/ gpu --- src/Logistic/parameter_circuit.jl | 71 ++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 974e34e0..2cab8367 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -3,6 +3,7 @@ using LogicCircuits export LayeredParameterCircuit, CuLayeredParameterCircuit export class_likelihood, class_weights +export one_hot, learn_parameters, update_parameters # in a parameter circuit # 1 is true, 2 is false @@ -91,7 +92,8 @@ end function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) cw, node_flow, edge_flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) - return @. 1.0 / (1.0 + exp(-cw)), node_flow, edge_flow, v + one = Float32(1.0) + return @. one / (one + exp(-cw)), node_flow, edge_flow, v end function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) @@ -109,14 +111,14 @@ function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer CUDA.zeros(Float32, ne, nc) end - dec_per_thread = 8 + dec_per_thread = 4 CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) circuit_layer = circuit.layered_circuit.layers[i] flow_layer = flow[i] parameter_layer = circuit.layered_parameters[i] ndl = num_decisions(circuit_layer) num_threads = balance_threads(ne, ndl / dec_per_thread, 8) - num_blocks = (ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)) + num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread) @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, flow_layer, circuit_layer.decisions, parameter_layer) end @@ -136,9 +138,9 @@ function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, paramete first_elem = @inbounds decisions[2, i] last_elem = @inbounds decisions[3, i] for e = first_elem:last_elem - # following needs to be memory safe + # following are memory safe for class=1:nc - CUDA.@atomic cw[j, class] += flow[j, e] * parameters[class, e] #atomic is automatically inbounds + @inbounds cw[j, class] += @inbounds flow[j, e] * parameters[class, e] end end end @@ -148,5 +150,62 @@ function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, paramete end +function one_hot(labels::Vector, nc::Integer) + ne = length(labels) + one_hot_labels = zeros(Float32, ne, nc) + for (i, j) in enumerate(labels) + one_hot_labels[i, j + 1] = 1.0 + end + one_hot_labels +end + +function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001) + cp, node_flow, edge_flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + update_parameters(circuit, labels, cp, edge_flow, step_size) + for _ = 2:num_epochs + cp, node_flow, edge_flow, v = class_likelihood(circuit, nc, data, v, (node_flow, edge_flow), cp) + update_parameters(circuit, labels, cp, edge_flow, step_size) + end + return nothing +end + +function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, flow, step_size=0.0001) + _, nc = size(labels) + step_size = Float32(step_size) + class_per_thread = 1 + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + flow_layer = flow[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) + num_threads = balance_threads(ndl, nc / class_per_thread, 8) + num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, ndl / num_threads[2] / class_per_thread) + @cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size) + end + return nothing +end -# TODO; parameter learning \ No newline at end of file +function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + ne, nc = size(labels) + _, num_decisions = size(decisions) + + for class = index_y:stride_y:nc + for i = index_x:stride_x:num_decisions + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + for e = first_elem:last_elem + for j = 1:ne + u = @inbounds flow[j, e] * (cp[j, class] - labels[j, class]) * step_size + # following are memory safe + @inbounds parameters[class, e] -= u + end + end + end + end + + return nothing +end \ No newline at end of file From 6c0c19a7320662d9dcc88df50e5cf5b4b87fabe9 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Mon, 17 Aug 2020 14:55:11 -0700 Subject: [PATCH 15/18] [fix] logistic circuit: memory safe --- src/Logistic/parameter_circuit.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 2cab8367..48b340e5 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -138,9 +138,9 @@ function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, paramete first_elem = @inbounds decisions[2, i] last_elem = @inbounds decisions[3, i] for e = first_elem:last_elem - # following are memory safe + # following needs to be memory safe for class=1:nc - @inbounds cw[j, class] += @inbounds flow[j, e] * parameters[class, e] + @CUDA.atomic cw[j, class] += flow[j, e] * parameters[class, e] # atomic is automatically inbounds end end end From 68f11feafa6d4bdd4161148a08d0295ab1dec34a Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Wed, 19 Aug 2020 22:10:16 -0500 Subject: [PATCH 16/18] [feature] logistic circuit: recover edge flow from v and node_flow --- src/Logistic/parameter_circuit.jl | 39 ++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 48b340e5..91da4bb7 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -91,18 +91,18 @@ struct CuLayeredParameterCircuit end function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) - cw, node_flow, edge_flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) one = Float32(1.0) - return @. one / (one + exp(-cw)), node_flow, edge_flow, v + return @. one / (one + exp(-cw)), flow, v end function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) - node_flow, edge_flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) - cw = calculate_class_weights(circuit, nc, data, edge_flow, reuse_cw) - return cw, node_flow, edge_flow, v + flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) + cw = calculate_class_weights(circuit, nc, data, v, flow, reuse_cw) + return cw, flow, v end -function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, flow, reuse_cw=nothing) +function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, v, flow, reuse_cw=nothing) ne = num_examples(data) cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc) reuse_cw .= zero(Float32) @@ -114,18 +114,17 @@ function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer dec_per_thread = 4 CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) circuit_layer = circuit.layered_circuit.layers[i] - flow_layer = flow[i] parameter_layer = circuit.layered_parameters[i] ndl = num_decisions(circuit_layer) num_threads = balance_threads(ne, ndl / dec_per_thread, 8) num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread) - @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, flow_layer, circuit_layer.decisions, parameter_layer) + @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, v, flow, circuit_layer.decisions, circuit_layer.elements, parameter_layer) end return cw end -function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, parameters) +function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, elements, parameters) index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y stride_x = blockDim().x * gridDim().x @@ -135,12 +134,21 @@ function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, paramete for j = index_x:stride_x:ne for i = index_y:stride_y:num_decisions - first_elem = @inbounds decisions[2, i] - last_elem = @inbounds decisions[3, i] - for e = first_elem:last_elem - # following needs to be memory safe - for class=1:nc - @CUDA.atomic cw[j, class] += flow[j, e] * parameters[class, e] # atomic is automatically inbounds + decision_id = @inbounds decisions[1, i] + n_up = @inbounds v[j, decision_id] + if n_up > zero(Float32) + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + n_down = @inbounds flow[j, decision_id] + for e = first_elem:last_elem + e1 = @inbounds elements[1,first_elem] + e2 = @inbounds elements[2,first_elem] + e_up = @inbounds (v[j, e1] * v[j, e2]) + edge_flow = e_up / n_up * n_down + # following needs to be memory safe + for class=1:nc + @CUDA.atomic cw[j, class] += edge_flow * parameters[class, e] # atomic is automatically inbounds + end end end end @@ -149,7 +157,6 @@ function calculate_class_weights_layer_kernel_cuda(cw, flow, decisions, paramete return nothing end - function one_hot(labels::Vector, nc::Integer) ne = length(labels) one_hot_labels = zeros(Float32, ne, nc) From f9fc54f9dfee340817e385ce23bbe07587b0feca Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Wed, 2 Sep 2020 00:38:59 -0500 Subject: [PATCH 17/18] chore: formats --- src/Logistic/logistic_nodes.jl | 16 ++++++--------- src/Logistic/parameter_circuit.jl | 33 +++++++++++++++++++------------ src/Probabilistic/prob_nodes.jl | 6 ++++-- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/Logistic/logistic_nodes.jl b/src/Logistic/logistic_nodes.jl index 632867d8..f480879f 100644 --- a/src/Logistic/logistic_nodes.jl +++ b/src/Logistic/logistic_nodes.jl @@ -1,12 +1,8 @@ export LogisticCircuit, - LogisticLeafNode, - LogisticInnerNode, - LogisticLiteral, - Logistic⋀Node, - Logistic⋁Node, - classes, - num_parameters_perclass + LogisticLeafNode, LogisticInnerNode, + LogisticLiteral, Logistic⋀Node, Logistic⋁Node, + num_classes, num_parameters_per_class ##################### # Infrastructure for logistic circuit nodes @@ -54,7 +50,7 @@ A logistic disjunction node (Or node) """ mutable struct Logistic⋁Node <: LogisticInnerNode children::Vector{<:LogisticCircuit} - thetas::Array{Float32, 2} + thetas::Matrix{Float32} data counter::UInt32 Logistic⋁Node(children, class::Int) = begin @@ -77,11 +73,11 @@ import LogicCircuits.GateType # make available for extension import LogicCircuits: children # make available for extension @inline children(n::LogisticInnerNode) = n.children -@inline classes(n::Logistic⋁Node) = size(n.thetas)[2] +@inline num_classes(n::Logistic⋁Node) = size(n.thetas)[2] import ..Utils: num_parameters @inline num_parameters(c::LogisticCircuit) = sum(n -> num_children(n) * classes(n), ⋁_nodes(c)) -@inline num_parameters_perclass(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c)) +@inline num_parameters_per_class(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c)) ##################### # constructors and conversions diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl index 91da4bb7..dc338e77 100644 --- a/src/Logistic/parameter_circuit.jl +++ b/src/Logistic/parameter_circuit.jl @@ -90,6 +90,8 @@ struct CuLayeredParameterCircuit CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters)) end + + function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) one = Float32(1.0) @@ -141,8 +143,8 @@ function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, eleme last_elem = @inbounds decisions[3, i] n_down = @inbounds flow[j, decision_id] for e = first_elem:last_elem - e1 = @inbounds elements[1,first_elem] - e2 = @inbounds elements[2,first_elem] + e1 = @inbounds elements[1, first_elem] + e2 = @inbounds elements[2, first_elem] e_up = @inbounds (v[j, e1] * v[j, e2]) edge_flow = e_up / n_up * n_down # following needs to be memory safe @@ -157,6 +159,8 @@ function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, eleme return nothing end + + function one_hot(labels::Vector, nc::Integer) ne = length(labels) one_hot_labels = zeros(Float32, ne, nc) @@ -167,26 +171,26 @@ function one_hot(labels::Vector, nc::Integer) end function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001) - cp, node_flow, edge_flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp) - update_parameters(circuit, labels, cp, edge_flow, step_size) + cp, flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + update_parameters(circuit, labels, cp, flow, step_size) for _ = 2:num_epochs - cp, node_flow, edge_flow, v = class_likelihood(circuit, nc, data, v, (node_flow, edge_flow), cp) - update_parameters(circuit, labels, cp, edge_flow, step_size) + cp, flow, v = class_likelihood(circuit, nc, data, v, flow, cp) + update_parameters(circuit, labels, cp, v, flow, step_size) end return nothing end -function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, flow, step_size=0.0001) +function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, v, flow, step_size=0.0001) _, nc = size(labels) step_size = Float32(step_size) - class_per_thread = 1 CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) circuit_layer = circuit.layered_circuit.layers[i] flow_layer = flow[i] parameter_layer = circuit.layered_parameters[i] ndl = num_decisions(circuit_layer) - num_threads = balance_threads(ndl, nc / class_per_thread, 8) - num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, ndl / num_threads[2] / class_per_thread) + num_threads = balance_threads(ndl, nc, 6) + num_threads = num_threads[1], num_threads[2], + num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, nc / num_threads[2]), 4 @cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size) end return nothing @@ -195,8 +199,10 @@ end function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size) index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + index_z = (blockIdx().z - 1) * blockDim().z + threadIdx().z stride_x = blockDim().x * gridDim().x stride_y = blockDim().y * gridDim().y + stride_z = blockDim().z * gridDim().z ne, nc = size(labels) _, num_decisions = size(decisions) @@ -205,9 +211,10 @@ function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parame first_elem = @inbounds decisions[2, i] last_elem = @inbounds decisions[3, i] for e = first_elem:last_elem - for j = 1:ne - u = @inbounds flow[j, e] * (cp[j, class] - labels[j, class]) * step_size - # following are memory safe + for j = index_z:stride_z:ne + edge_flow = e_up / n_up * n_down + u = @inbounds edge_flow * (cp[j, class] - labels[j, class]) * step_size + # following needs to be memory safe @inbounds parameters[class, e] -= u end end diff --git a/src/Probabilistic/prob_nodes.jl b/src/Probabilistic/prob_nodes.jl index f002baea..4cb10f16 100644 --- a/src/Probabilistic/prob_nodes.jl +++ b/src/Probabilistic/prob_nodes.jl @@ -1,5 +1,7 @@ -export PlainProbCircuit, ProbLeafNode, ProbInnerNode, ProbLiteralNode, Prob⋀Node, -Prob⋁Node +export + PlainProbCircuit, + ProbLeafNode, ProbInnerNode, + ProbLiteralNode, Prob⋀Node, Prob⋁Node ##################### # Infrastructure for probabilistic circuit nodes From 4e7ae1234878fe8b3eb35549adccd175ae437080 Mon Sep 17 00:00:00 2001 From: YitaoLiang Date: Thu, 3 Sep 2020 01:59:15 -0500 Subject: [PATCH 18/18] [refactoring] logistic circuits: class probs based on bit_circuit --- src/Logistic/Logistic.jl | 4 +- src/Logistic/logistic_nodes.jl | 2 +- src/Logistic/queries.jl | 201 +++++++++++++++++++------------- test/Logistic/logistic_tests.jl | 62 +++++----- 4 files changed, 156 insertions(+), 113 deletions(-) diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl index cd45019e..d5b03b10 100644 --- a/src/Logistic/Logistic.jl +++ b/src/Logistic/Logistic.jl @@ -5,8 +5,8 @@ using ..Utils include("logistic_nodes.jl") include("queries.jl") -include("parameter_circuit.jl") -include("learn_parameters.jl") +# include("parameter_circuit.jl") +# include("learn_parameters.jl") # TODO structure learning diff --git a/src/Logistic/logistic_nodes.jl b/src/Logistic/logistic_nodes.jl index f480879f..0232ca2b 100644 --- a/src/Logistic/logistic_nodes.jl +++ b/src/Logistic/logistic_nodes.jl @@ -50,7 +50,7 @@ A logistic disjunction node (Or node) """ mutable struct Logistic⋁Node <: LogisticInnerNode children::Vector{<:LogisticCircuit} - thetas::Matrix{Float32} + thetas::Matrix{Float64} data counter::UInt32 Logistic⋁Node(children, class::Int) = begin diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index f4324637..7ceba404 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,122 +1,165 @@ -export class_weights_per_instance, class_likelihood_per_instance, downflow, accuracy, predict_class, - do_nothing_test, access_flow_test, dummy_calculation_test, dummy_calculation_test2 +export class_likelihood_per_instance, class_weights_per_instance -using LogicCircuits: UpDownFlow1, UpDownFlow2, or_nodes -using ..Probabilistic: get_downflow, get_upflow +using CUDA using LoopVectorization: @avx, vifelse + """ Class Conditional Probability """ -# with flows computed (2.03 s (80704 allocations: 3.32 MiB)) -# 5.136 s (275778 allocations: 6.99 GiB) on mnist.circuit -@inline function class_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) - if !flows_computed - compute_flows(lc, data) + +function bitcircuit_with_params(lc, nc, data) + params::Vector{Vector{Float64}} = Vector{Vector{Float64}}() + on_decision(n, cs, layer_id, decision_id, first_element, last_element) = begin + if isnothing(n) + # @assert first_element == last_element + push!(params, zeros(Float64, nc)) + else + # @assert last_element-first_element+1 == length(n.log_probs) "$last_element-$first_element+1 != $(length(n.log_probs))" + for theta in eachrow(n.thetas) + push!(params, theta) + end + end end - - weights = class_weights_per_instance(lc, classes, data; flows_computed=true) - @avx @. 1.0 / (1.0 + exp(-weights)) + bc = BitCircuit(lc, data; on_decision) + (bc, permutedims(hcat(params...), (2, 1))) end -@inline function class_weights_per_instance(lc::LogisticCircuit, classes::Int, data; flows_computed=false) - if !flows_computed - compute_flows(lc, data) - end - - weights = zeros(num_examples(data), classes) - foreach(or_nodes(lc)) do ln - foreach(eachrow(ln.thetas), children(ln)) do theta, c - flow = Float32.(downflow(ln, c)) - @avx @. weights += flow * theta' - end - end - - weights +function class_likelihood_per_instance(lc::LogicCircuit, nc::Int, data) + cw = class_weights_per_instance(lc, nc, data) + isgpu(data) ? (@. 1.0 / (1.0 + exp(-cw))) : (@. @avx 1.0 / (1.0 + exp(-cw))) end -# 5.795 ms (72350 allocations: 6.58 MiB) -@inline function do_nothing_test(lc::LogisticCircuit, classes::Int, data) - likelihoods = zeros(num_examples(data), classes) - foreach(or_nodes(lc)) do ln - foreach(eachrow(ln.thetas), children(ln)) do theta, c - nothing - end +function class_weights_per_instance(lc::LogisticCircuit, nc::Int, data) + bc, params = bitcircuit_with_params(lc, nc, data) + if isgpu(data) + class_weights_per_instance_gpu(to_gpu(bc), data, to_gpu(params)) + else + class_weights_per_instance_cpu(bc, data, params) end - @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) - likelihoods end -# 1.574 s (193840 allocations: 6.98 GiB) -@inline function access_flow_test(lc::LogisticCircuit, classes::Int, data) - foreach(or_nodes(lc)) do ln - foreach(eachrow(ln.thetas), children(ln)) do theta, c - flow = Float32.(downflow(ln, c)) - end +function class_weights_per_instance_cpu(bc, data, params) + ne::Int = num_examples(data) + nc::Int = size(params, 2) + cw::Matrix{Float64} = zeros(Float64, ne, nc) + cw_lock::Threads.ReentrantLock = Threads.ReentrantLock() + + @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, locks) + if els_start != els_end + lock(cw_lock) do # TODO: move lock to inner loop? + for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, p] & values[i, s] & flows[i, dec_id] + first_true_bit = trailing_zeros(edge_flow) + 1 + last_true_bit = 64 - leading_zeros(edge_flow) + @simd for j = first_true_bit:last_true_bit + if get_bit(edge_flow, j) + ex_id = ((i-1) << 6) + j + for class = 1:size(cw, 2) + @inbounds cw[ex_id, class] += params[el_id, class] + end + end + end + end + end + end + nothing end - nothing -end -# 2.943 s (82272 allocations: 6.74 MiB) -@inline function dummy_calculation_test(lc::LogisticCircuit, classes::Int, data) - likelihoods = zeros(num_examples(data), classes) - foreach(or_nodes(lc)) do ln - foreach(eachrow(ln.thetas), children(ln)) do theta, c - @avx @. likelihoods += likelihoods - end + @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, locks) + if els_start != els_end + lock(cw_lock) do # TODO: move lock to inner loop? + @avx for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, p] * values[i, s] / values[i, dec_id] * flows[i, dec_id] + edge_flow = vifelse(isfinite(edge_flow), edge_flow, zero(Float32)) + for class = 1:size(cw, 2) + @inbounds cw[i, class] += edge_flow * params[el_id, class] + end + end + end + end + nothing end - @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) - likelihoods -end -# 4.790 s (193843 allocations: 6.99 GiB) -@inline function dummy_calculation_test2(lc::LogisticCircuit, classes::Int, data) - likelihoods = zeros(num_examples(data), classes) - foreach(or_nodes(lc)) do ln - foreach(eachrow(ln.thetas), children(ln)) do theta, c - flow = Float32.(downflow(ln, c)) - @avx @. likelihoods += likelihoods - end + if isbinarydata(data) + compute_values_flows(bc, data; on_edge = on_edge_binary) + else + compute_values_flows(bc, data; on_edge = on_edge_float) end - @avx @. likelihoods = 1.0 / (1.0 + exp(-likelihoods)) - likelihoods + + return cw end -@inline downflow(or_parent::Logistic⋁Node, c) = - (c.data isa UpDownFlow1) ? c.data.downflow : flow_and(or_parent.data.downflow, c.data, or_parent.data.upflow) +function class_weights_per_instance_gpu(bc, data, params) + ne::Int = num_examples(data) + nc::Int = size(params, 2) + cw::CuMatrix{Float64} = CUDA.zeros(Float64, num_examples(data), nc) + cw_device = CUDA.cudaconvert(cw) + params_device = CUDA.cudaconvert(params) + + @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, chunk_id, edge_flow) + if els_start != els_end + first_true_bit = 1+trailing_zeros(edge_flow) + last_true_bit = 64-leading_zeros(edge_flow) + for j = first_true_bit:last_true_bit + if get_bit(edge_flow, j) + ex_id = ((chunk_id-1) << 6) + j + for class = 1:size(cw_device, 2) + CUDA.@atomic cw_device[ex_id, class] += params_device[el_id, class] + end + end + end + end + nothing + end -@inline flow_and(downflow_n::BitVector, c_flow::UpDownFlow2, upflow_n::BitVector) = - @. downflow_n & c_flow.prime_flow & c_flow.sub_flow + @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, ex_id, edge_flow) + if els_start != els_end + for class = 1:size(cw_device, 2) + CUDA.@atomic cw_device[ex_id, class] += edge_flow * params_device[el_id, class] + end + end + nothing + end + + if isbinarydata(data) + v,f = compute_values_flows(bc, data; on_edge = on_edge_binary) + else + @assert isfpdata(data) "Only floating point and binary data are supported" + v,f = compute_values_flows(bc, data; on_edge = on_edge_float) + end + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort -@inline flow_and(downflow_n::Vector{<:AbstractFloat}, c_flow::UpDownFlow2, upflow_n::Vector{<:AbstractFloat}) = - @avx @. downflow_n * c_flow.prime_flow * make_finite(c_flow.sub_flow/upflow_n) + return cw +end -@inline make_finite(x::T) where T = vifelse(isfinite(x), x, zero(T)) """ Class Predictions """ -@inline function predict_class(lc::LogisticCircuit, classes::Int, data) - class_likelihoods = class_likelihood_per_instance(lc, classes, data) +function predict_class(lc::LogisticCircuit, nc::Int, data) + class_likelihoods = class_likelihood_per_instance(lc, nc, data) predict_class(class_likelihoods) end -@inline function predict_class(class_likelihoods::AbstractMatrix) +function predict_class(class_likelihoods) _, mxindex = findmax(class_likelihoods; dims=2) dropdims(getindex.(mxindex, 2); dims=2) end + + """ Prediction accuracy """ -@inline accuracy(predicted_class::Vector, labels) = - Float64(sum(@. predicted_class == labels)) / length(labels) +accuracy(lc::LogisticCircuit, nc::Int, data, labels) = + accuracy(predict_class(lc, nc, data), labels) -@inline accuracy(lc::LogisticCircuit, classes::Int, data, labels) = - accuracy(predict_class(lc, classes, data), labels) +accuracy(predicted_class, labels) = + Float64(sum(@. predicted_class == labels)) / length(labels) -@inline accuracy(class_likelihoods::AbstractMatrix, labels) = +accuracy(class_likelihoods, labels) = accuracy(predict_class(class_likelihoods), labels) - diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl index a58ddc76..9a63f0aa 100644 --- a/test/Logistic/logistic_tests.jl +++ b/test/Logistic/logistic_tests.jl @@ -45,38 +45,38 @@ using ProbabilisticCircuits # check accuracy @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 - # check parameter updates - original_literal_parameters = Dict{Int, Vector{Float64}}() - foreach(logistic_circuit) do ln - if ln isa Logistic⋁Node - foreach(ln.children, eachrow(ln.thetas)) do c, theta - if c isa LogisticLiteral - original_literal_parameters[c.literal] = copy(theta) - end - end - end - end + # # check parameter updates + # original_literal_parameters = Dict{Int, Vector{Float64}}() + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # original_literal_parameters[c.literal] = copy(theta) + # end + # end + # end + # end - one_hot_labels = [0.0 1.0; - 1.0 0.0; - 0.0 1.0] - one_hot_labels = Float32.(one_hot_labels) - true_error = true_prob .- one_hot_labels - step_size = 0.1 - learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) + # one_hot_labels = [0.0 1.0; + # 1.0 0.0; + # 0.0 1.0] + # one_hot_labels = Float32.(one_hot_labels) + # true_error = true_prob .- one_hot_labels + # step_size = 0.1 + # learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) - foreach(logistic_circuit) do ln - if ln isa Logistic⋁Node - foreach(ln.children, eachrow(ln.thetas)) do c, theta - if c isa LogisticLiteral - for class = 1:CLASSES - true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] - updated_amount = theta[class] - original_literal_parameters[c.literal][class] - @test updated_amount ≈ true_update_amount atol=1e-7 - end - end - end - end - end + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # for class = 1:CLASSES + # true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] + # updated_amount = theta[class] - original_literal_parameters[c.literal][class] + # @test updated_amount ≈ true_update_amount atol=1e-7 + # end + # end + # end + # end + # end end \ No newline at end of file