diff --git a/Project.toml b/Project.toml index cc639b0c..368a92ad 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.1.1" [deps] BlossomV = "6c721016-9dae-5d90-abf6-67daaccb2332" Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599" @@ -19,10 +20,13 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" [compat] BlossomV = "0.4" Clustering = "0.14" +CUDA = "1.2" DataStructures = "0.17" LightGraphs = "1.3" LogicCircuits = "0.1.1" @@ -32,4 +36,6 @@ Reexport = "0.2" SimpleWeightedGraphs = "1.1" StatsBase = "0.33" StatsFuns = "0.9" +LoopVectorization = "0.8.20" +MLDatasets = "0.4, 0.5" julia = "1.5" diff --git a/src/LoadSave/circuit_line_compiler.jl b/src/LoadSave/circuit_line_compiler.jl index a0d09d7e..02879f55 100644 --- a/src/LoadSave/circuit_line_compiler.jl +++ b/src/LoadSave/circuit_line_compiler.jl @@ -111,7 +111,7 @@ function decorate_logistic(lines::CircuitFormatLines, logic_circuit::LogicCircui function compile(ln::BiasLine) root = id2logisticnode(ln.node_id)::Logistic⋁Node - # @assert length(node.thetas) == 1 + # @assert length(root.thetas) == 1 root.thetas[1,:] .= ln.weights end diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl index c657449f..d5b03b10 100644 --- a/src/Logistic/Logistic.jl +++ b/src/Logistic/Logistic.jl @@ -5,7 +5,9 @@ using ..Utils include("logistic_nodes.jl") include("queries.jl") +# include("parameter_circuit.jl") +# include("learn_parameters.jl") -# TODO learning +# TODO structure learning end \ No newline at end of file diff --git a/src/Logistic/learn_parameters.jl b/src/Logistic/learn_parameters.jl new file mode 100644 index 00000000..1bb8219c --- /dev/null +++ b/src/Logistic/learn_parameters.jl @@ -0,0 +1,49 @@ +export learn_parameters + +using LogicCircuits: compute_flows, or_nodes +using LoopVectorization: @avx + +""" +Maximum likilihood estimation of parameters given data through gradient descent +""" +function learn_parameters(lc::LogisticCircuit, classes::Int, data, labels; num_epochs=30, step_size=0.1, flows_computed=false) + + @inline function one_hot(labels::Vector, classes::Int) + one_hot_labels = zeros(length(labels), classes) + for (i, j) in enumerate(labels) + one_hot_labels[i, j] = 1.0 + end + one_hot_labels + end + + one_hot_labels = one_hot(labels, classes) + if !flows_computed + compute_flows(lc, data) + end + + for _ = 1:num_epochs + class_probs = class_likelihood_per_instance(lc, classes, data; flows_computed=true) + update_parameters(lc, class_probs, one_hot_labels) + end + + nothing +end + + +@inline function update_parameters(lc::LogisticCircuit, class_probs, one_hot_labels; step_size=0.1) + num_samples = Float64(size(one_hot_labels)[1]) + error = class_probs .- one_hot_labels + + foreach(or_nodes(lc)) do ln + foreach(eachrow(ln.thetas), children(ln)) do theta, c + flow = Float64.(downflow(ln, c)) + @avx update_amount = flow' * error / num_samples * step_size + update_amount = dropdims(update_amount; dims=1) + @avx @. theta -= update_amount + end + end + + nothing +end + + diff --git a/src/Logistic/logistic_nodes.jl b/src/Logistic/logistic_nodes.jl index 950dd8c8..0232ca2b 100644 --- a/src/Logistic/logistic_nodes.jl +++ b/src/Logistic/logistic_nodes.jl @@ -1,12 +1,8 @@ export LogisticCircuit, - LogisticLeafNode, - LogisticInnerNode, - LogisticLiteral, - Logistic⋀Node, - Logistic⋁Node, - classes, - num_parameters_perclass + LogisticLeafNode, LogisticInnerNode, + LogisticLiteral, Logistic⋀Node, Logistic⋁Node, + num_classes, num_parameters_per_class ##################### # Infrastructure for logistic circuit nodes @@ -54,11 +50,11 @@ A logistic disjunction node (Or node) """ mutable struct Logistic⋁Node <: LogisticInnerNode children::Vector{<:LogisticCircuit} - thetas::Array{Float64, 2} + thetas::Matrix{Float64} data counter::UInt32 Logistic⋁Node(children, class::Int) = begin - new(convert(Vector{LogisticCircuit}, children), init_array(Float64, length(children), class), nothing, 0) + new(convert(Vector{LogisticCircuit}, children), init_array(Float32, length(children), class), nothing, 0) end end @@ -77,11 +73,11 @@ import LogicCircuits.GateType # make available for extension import LogicCircuits: children # make available for extension @inline children(n::LogisticInnerNode) = n.children -@inline classes(n::Logistic⋁Node) = size(n.thetas)[2] +@inline num_classes(n::Logistic⋁Node) = size(n.thetas)[2] import ..Utils: num_parameters @inline num_parameters(c::LogisticCircuit) = sum(n -> num_children(n) * classes(n), ⋁_nodes(c)) -@inline num_parameters_perclass(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c)) +@inline num_parameters_per_class(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c)) ##################### # constructors and conversions diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl new file mode 100644 index 00000000..dc338e77 --- /dev/null +++ b/src/Logistic/parameter_circuit.jl @@ -0,0 +1,225 @@ +using CUDA +using LogicCircuits + +export LayeredParameterCircuit, CuLayeredParameterCircuit +export class_likelihood, class_weights +export one_hot, learn_parameters, update_parameters + +# in a parameter circuit +# 1 is true, 2 is false +const TRUE_ID = Int32(1) +const FALSE_ID = Int32(2) + +struct LayeredParameterCircuit + layered_circuit::LayeredBitCircuit + layered_parameters::Vector{Matrix{Float32}} +end + +LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin + @assert is⋁gate(circuit) + decisions::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + elements::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + parameters::Vector{Vector{Float32}} = Vector{Vector{Float32}}() + num_decisions::Int32 = 2 * num_features + 2 + num_elements::Vector{Int32} = Vector{Int32}() + # num_parameters always equals num_elements + + ensure_layer(i) = begin + if length(decisions) < i + # add a new layer + push!(decisions, Int32[]) + push!(elements, Int32[]) + push!(parameters, Float32[]) + push!(num_elements, 0) + end + end + + f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_ID : FALSE_ID) + f_lit(n) = LayeredDecisionId(0, + ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n))) + + f_and(n, cs) = begin + @assert length(cs) == 2 + LayeredDecisionId[cs[1], cs[2]] + end + f_or(n, cs) = begin + num_decisions += 1 + # determine layer + layer_id = zero(Int32) + for c in cs + if c isa Vector{LayeredDecisionId} + @assert length(c) == 2 + layer_id = max(layer_id, c[1].layer_id, c[2].layer_id) + else + @assert c isa LayeredDecisionId + layer_id = max(layer_id, c.layer_id) + end + end + layer_id += 1 + ensure_layer(layer_id) + first_element = num_elements[layer_id] + 1 + foreach(cs, eachrow(n.thetas)) do c, theta + @assert size(theta)[1] == nc + append!(parameters[layer_id], theta) + num_elements[layer_id] += 1 + if c isa Vector{LayeredDecisionId} + push!(elements[layer_id], c[1].decision_id, c[2].decision_id) + else + push!(elements[layer_id], c.decision_id, TRUE_ID) + end + end + push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id]) + LayeredDecisionId(layer_id, num_decisions) + end + + foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, + Union{LayeredDecisionId,Vector{LayeredDecisionId}}) + + circuit_layers = map(decisions, elements) do d, e + Layer(reshape(d, 3, :), reshape(e, 2, :)) + end + parameter_layers = map(parameters) do p + reshape(p, nc, :) + end + return LayeredParameterCircuit(LayeredBitCircuit(circuit_layers), parameter_layers) +end + +struct CuLayeredParameterCircuit + layered_circuit::CuLayeredBitCircuit + layered_parameters::Vector{CuMatrix{Float32}} + CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters)) +end + + + +function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) + cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + one = Float32(1.0) + return @. one / (one + exp(-cw)), flow, v +end + +function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) + flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) + cw = calculate_class_weights(circuit, nc, data, v, flow, reuse_cw) + return cw, flow, v +end + +function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, v, flow, reuse_cw=nothing) + ne = num_examples(data) + cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc) + reuse_cw .= zero(Float32) + reuse_cw + else + CUDA.zeros(Float32, ne, nc) + end + + dec_per_thread = 4 + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) + num_threads = balance_threads(ne, ndl / dec_per_thread, 8) + num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread) + @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, v, flow, circuit_layer.decisions, circuit_layer.elements, parameter_layer) + end + + return cw +end + +function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, elements, parameters) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + ne, nc = size(cw) + _, num_decisions = size(decisions) + + for j = index_x:stride_x:ne + for i = index_y:stride_y:num_decisions + decision_id = @inbounds decisions[1, i] + n_up = @inbounds v[j, decision_id] + if n_up > zero(Float32) + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + n_down = @inbounds flow[j, decision_id] + for e = first_elem:last_elem + e1 = @inbounds elements[1, first_elem] + e2 = @inbounds elements[2, first_elem] + e_up = @inbounds (v[j, e1] * v[j, e2]) + edge_flow = e_up / n_up * n_down + # following needs to be memory safe + for class=1:nc + @CUDA.atomic cw[j, class] += edge_flow * parameters[class, e] # atomic is automatically inbounds + end + end + end + end + end + + return nothing +end + + + +function one_hot(labels::Vector, nc::Integer) + ne = length(labels) + one_hot_labels = zeros(Float32, ne, nc) + for (i, j) in enumerate(labels) + one_hot_labels[i, j + 1] = 1.0 + end + one_hot_labels +end + +function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001) + cp, flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + update_parameters(circuit, labels, cp, flow, step_size) + for _ = 2:num_epochs + cp, flow, v = class_likelihood(circuit, nc, data, v, flow, cp) + update_parameters(circuit, labels, cp, v, flow, step_size) + end + return nothing +end + +function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, v, flow, step_size=0.0001) + _, nc = size(labels) + step_size = Float32(step_size) + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + flow_layer = flow[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) + num_threads = balance_threads(ndl, nc, 6) + num_threads = num_threads[1], num_threads[2], + num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, nc / num_threads[2]), 4 + @cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size) + end + return nothing +end + +function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + index_z = (blockIdx().z - 1) * blockDim().z + threadIdx().z + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + stride_z = blockDim().z * gridDim().z + ne, nc = size(labels) + _, num_decisions = size(decisions) + + for class = index_y:stride_y:nc + for i = index_x:stride_x:num_decisions + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + for e = first_elem:last_elem + for j = index_z:stride_z:ne + edge_flow = e_up / n_up * n_down + u = @inbounds edge_flow * (cp[j, class] - labels[j, class]) * step_size + # following needs to be memory safe + @inbounds parameters[class, e] -= u + end + end + end + end + + return nothing +end \ No newline at end of file diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl index 7c80e625..7ceba404 100644 --- a/src/Logistic/queries.jl +++ b/src/Logistic/queries.jl @@ -1,22 +1,165 @@ -export class_conditional_likelihood_per_instance +export class_likelihood_per_instance, class_weights_per_instance + +using CUDA +using LoopVectorization: @avx, vifelse + + -using ..Probabilistic: get_downflow, get_upflow """ Class Conditional Probability """ -function class_conditional_likelihood_per_instance(lc::LogisticCircuit, classes::Int, data) - compute_flows(lc, data) - likelihoods = zeros(num_examples(data), classes) - foreach(lc) do ln - if ln isa Logistic⋁Node - # For each class. orig.thetas is 2D so used eachcol - for (idx, thetaC) in enumerate(eachcol(ln.thetas)) - foreach(children(ln), thetaC) do c, theta - likelihoods[:, idx] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta + +function bitcircuit_with_params(lc, nc, data) + params::Vector{Vector{Float64}} = Vector{Vector{Float64}}() + on_decision(n, cs, layer_id, decision_id, first_element, last_element) = begin + if isnothing(n) + # @assert first_element == last_element + push!(params, zeros(Float64, nc)) + else + # @assert last_element-first_element+1 == length(n.log_probs) "$last_element-$first_element+1 != $(length(n.log_probs))" + for theta in eachrow(n.thetas) + push!(params, theta) + end + end + end + bc = BitCircuit(lc, data; on_decision) + (bc, permutedims(hcat(params...), (2, 1))) +end + +function class_likelihood_per_instance(lc::LogicCircuit, nc::Int, data) + cw = class_weights_per_instance(lc, nc, data) + isgpu(data) ? (@. 1.0 / (1.0 + exp(-cw))) : (@. @avx 1.0 / (1.0 + exp(-cw))) +end + +function class_weights_per_instance(lc::LogisticCircuit, nc::Int, data) + bc, params = bitcircuit_with_params(lc, nc, data) + if isgpu(data) + class_weights_per_instance_gpu(to_gpu(bc), data, to_gpu(params)) + else + class_weights_per_instance_cpu(bc, data, params) + end +end + +function class_weights_per_instance_cpu(bc, data, params) + ne::Int = num_examples(data) + nc::Int = size(params, 2) + cw::Matrix{Float64} = zeros(Float64, ne, nc) + cw_lock::Threads.ReentrantLock = Threads.ReentrantLock() + + @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, locks) + if els_start != els_end + lock(cw_lock) do # TODO: move lock to inner loop? + for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, p] & values[i, s] & flows[i, dec_id] + first_true_bit = trailing_zeros(edge_flow) + 1 + last_true_bit = 64 - leading_zeros(edge_flow) + @simd for j = first_true_bit:last_true_bit + if get_bit(edge_flow, j) + ex_id = ((i-1) << 6) + j + for class = 1:size(cw, 2) + @inbounds cw[ex_id, class] += params[el_id, class] + end + end + end + end + end + end + nothing + end + + @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, locks) + if els_start != els_end + lock(cw_lock) do # TODO: move lock to inner loop? + @avx for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, p] * values[i, s] / values[i, dec_id] * flows[i, dec_id] + edge_flow = vifelse(isfinite(edge_flow), edge_flow, zero(Float32)) + for class = 1:size(cw, 2) + @inbounds cw[i, class] += edge_flow * params[el_id, class] + end end end end + nothing end - likelihoods + + if isbinarydata(data) + compute_values_flows(bc, data; on_edge = on_edge_binary) + else + compute_values_flows(bc, data; on_edge = on_edge_float) + end + + return cw end +function class_weights_per_instance_gpu(bc, data, params) + ne::Int = num_examples(data) + nc::Int = size(params, 2) + cw::CuMatrix{Float64} = CUDA.zeros(Float64, num_examples(data), nc) + cw_device = CUDA.cudaconvert(cw) + params_device = CUDA.cudaconvert(params) + + @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, chunk_id, edge_flow) + if els_start != els_end + first_true_bit = 1+trailing_zeros(edge_flow) + last_true_bit = 64-leading_zeros(edge_flow) + for j = first_true_bit:last_true_bit + if get_bit(edge_flow, j) + ex_id = ((chunk_id-1) << 6) + j + for class = 1:size(cw_device, 2) + CUDA.@atomic cw_device[ex_id, class] += params_device[el_id, class] + end + end + end + end + nothing + end + + @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, ex_id, edge_flow) + if els_start != els_end + for class = 1:size(cw_device, 2) + CUDA.@atomic cw_device[ex_id, class] += edge_flow * params_device[el_id, class] + end + end + nothing + end + + if isbinarydata(data) + v,f = compute_values_flows(bc, data; on_edge = on_edge_binary) + else + @assert isfpdata(data) "Only floating point and binary data are supported" + v,f = compute_values_flows(bc, data; on_edge = on_edge_float) + end + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort + + return cw +end + + + +""" +Class Predictions +""" +function predict_class(lc::LogisticCircuit, nc::Int, data) + class_likelihoods = class_likelihood_per_instance(lc, nc, data) + predict_class(class_likelihoods) +end + +function predict_class(class_likelihoods) + _, mxindex = findmax(class_likelihoods; dims=2) + dropdims(getindex.(mxindex, 2); dims=2) +end + + + +""" +Prediction accuracy +""" +accuracy(lc::LogisticCircuit, nc::Int, data, labels) = + accuracy(predict_class(lc, nc, data), labels) + +accuracy(predicted_class, labels) = + Float64(sum(@. predicted_class == labels)) / length(labels) + +accuracy(class_likelihoods, labels) = + accuracy(predict_class(class_likelihoods), labels) diff --git a/src/Probabilistic/prob_nodes.jl b/src/Probabilistic/prob_nodes.jl index f002baea..4cb10f16 100644 --- a/src/Probabilistic/prob_nodes.jl +++ b/src/Probabilistic/prob_nodes.jl @@ -1,5 +1,7 @@ -export PlainProbCircuit, ProbLeafNode, ProbInnerNode, ProbLiteralNode, Prob⋀Node, -Prob⋁Node +export + PlainProbCircuit, + ProbLeafNode, ProbInnerNode, + ProbLiteralNode, Prob⋀Node, Prob⋁Node ##################### # Infrastructure for probabilistic circuit nodes diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl index 3461a064..9a63f0aa 100644 --- a/test/Logistic/logistic_tests.jl +++ b/test/Logistic/logistic_tests.jl @@ -3,27 +3,80 @@ using LogicCircuits using ProbabilisticCircuits # This tests are supposed to test queries on the circuits -@testset "Logistic Circuit Class Conditional" begin +@testset "Logistic Circuit Query and Parameter Tests" begin # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to # match with python version. + + CLASSES = 2 - EPS = 1e-7; - logistic_circuit = zoo_lc("little_4var.circuit", 2); - @test logistic_circuit isa LogisticCircuit; + logistic_circuit = zoo_lc("little_4var.circuit", CLASSES) + @test logistic_circuit isa LogisticCircuit - # Step 1. Check Probabilities for 3 samples - data = Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1]); - - true_prob = [3.43147972 4.66740416; - 4.27595352 2.83503504; - 3.67415087 4.93793472] + # check probabilities for binary samples + data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1]) + # true_weight_func = [3.43147972 4.66740416; + # 4.27595352 2.83503504; + # 3.67415087 4.93793472] + true_prob = [0.9686740008311808 0.9906908445371728; + 0.9862917392724188 0.9445399509069984; + 0.9752568185086389 0.9928816444223209] - CLASSES = 2 - calc_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) - - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ calc_prob[i,j] atol= EPS; + class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:size(true_prob)[1] + for j = 1:CLASSES + @test true_prob[i,j] ≈ class_prob[i,j] + end + end + + # check probabilities for float samples + data = Float32.(data) + class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) + for i = 1:size(true_prob)[1] + for j = 1:CLASSES + @test true_prob[i,j] ≈ class_prob[i,j] end end + + # check predicted_classes + true_labels = [2, 1, 2] + predicted_classes = predict_class(logistic_circuit, CLASSES, data) + @test all(predicted_classes .== true_labels) + + # check accuracy + @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 + + # # check parameter updates + # original_literal_parameters = Dict{Int, Vector{Float64}}() + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # original_literal_parameters[c.literal] = copy(theta) + # end + # end + # end + # end + + # one_hot_labels = [0.0 1.0; + # 1.0 0.0; + # 0.0 1.0] + # one_hot_labels = Float32.(one_hot_labels) + # true_error = true_prob .- one_hot_labels + # step_size = 0.1 + # learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) + + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # for class = 1:CLASSES + # true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] + # updated_amount = theta[class] - original_literal_parameters[c.literal][class] + # @test updated_amount ≈ true_update_amount atol=1e-7 + # end + # end + # end + # end + # end + end \ No newline at end of file diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl index c8aafc50..8959c381 100644 --- a/test/Reasoning/expectation_test.jl +++ b/test/Reasoning/expectation_test.jl @@ -3,7 +3,7 @@ using LogicCircuits using ProbabilisticCircuits function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int) - EPS = 1e-7; + EPS = 1e-5; COUNT = size(data)[1] # Compute True expectation brute force true_exp = zeros(COUNT, CLASSES) @@ -14,7 +14,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all) true_exp[i, :] = sum(calc_p .* calc_f, dims=1) true_exp[i, :] ./= sum(calc_p) #p_observed end @@ -36,7 +36,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data end function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int, moment::Int) - EPS = 1e-7; + EPS = 1e-5; COUNT = size(data)[1] # Compute True moment brute force true_mom = zeros(COUNT, CLASSES) @@ -47,7 +47,7 @@ function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLA calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all) true_mom[i, :] = sum(calc_p .* (calc_f .^ moment), dims=1) true_mom[i, :] ./= sum(calc_p) #p_observed end