diff --git a/.gitignore b/.gitignore index 2ec4662b..b4bd018e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ *.vscode **checkpoint.ipynb *Manifest.toml -docs/build/ \ No newline at end of file +docs/build/ +scratch/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 1c3624fc..753ff389 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,22 +12,23 @@ jobs: include: - stage: "Unit Tests" os: linux - julia: 1.3 + julia: 1.5 install: - - julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(url="https://github.com/Juice-jl/LogicCircuits.jl")); Pkg.instantiate(); Pkg.precompile()' + - julia -e 'using Pkg; Pkg.activate("."); Pkg.add(PackageSpec(url="https://github.com/Juice-jl/LogicCircuits.jl")); Pkg.instantiate(); Pkg.precompile();' - julia -e 'using Pkg; Pkg.activate("./test"); Pkg.add(PackageSpec(url="https://github.com/Juice-jl/LogicCircuits.jl")); Pkg.develop(PackageSpec(path = pwd())); Pkg.instantiate(); Pkg.precompile();' script: - julia --code-coverage --color=yes -p2 test/runtests.jl after_success: - - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' + - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder());' - stage: "Deploy Documentation" os: linux - julia: 1.3 + julia: 1.5 script: - - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(url="https://github.com/Juice-jl/LogicCircuits.jl")); Pkg.instantiate(); Pkg.precompile();' + - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate();' - julia --project=docs/ docs/make.jl after_success: skip @@ -38,4 +39,4 @@ notifications: on_start: always on_error: always rooms: - - secure: VMXOgM9g758gZiU06/Gaahns6CFpoSuDYMnl9g0LMv165HEe7tZPlF1IFbTEXk6svr+tAuSEd3oxs/kAyK7onI3hIpP0PSc+Y7/+rnOMk8zU+z7R6JEzQKHHb1M6pQ6MjzOia9BM7SfcfVqedPREVXZx+XJPmVuR4BgTOxUnnyfltZzW0ldSbyeJ37FdDSd9SDRRf7Q4UzbEMN33GfVsTKMZoRqASrZXhvqAVp7deXMdGp1kNlvIbbwVkeICLYTIYrm5zd0HkH2yEhk0AtgeTpyx/kkR1T0Fs2+PCDsLRPhP1EEJs7FdsdQJuP0SueJ92GpPd7yLYZVVWWQkGWudNb6H3iYp2xtbZCoeCBLEUgusrawwdxp0OlNOgP/aeJDc+zNy59ikraluI0sNCV1Pl8dIXu8Ihu6e7W6hoiTQ8K9PjwcXSmBgUsR+kXD8NcCx73RTxynokv+24Xk0M1pkJhu6mjNjZBDIegXVM/CnNew1LSMoMjdi43asuDiDbkZg2uCxfHwaMxlgWuM/M38r662FbOjEfgr13fhCyuUQZRFOKvvqU17HbA+ewC/J40C2g0sBDGPu/uOJsDJaQGPXDpXsh4G+8R7uZRNunhwNPK4OnVdY+uVnYlD+9TG9T1IothaDSRJvYU8HwAcUOJhMNYDDQosWOy+01NQtX0IYRgk= \ No newline at end of file + - secure: VMXOgM9g758gZiU06/Gaahns6CFpoSuDYMnl9g0LMv165HEe7tZPlF1IFbTEXk6svr+tAuSEd3oxs/kAyK7onI3hIpP0PSc+Y7/+rnOMk8zU+z7R6JEzQKHHb1M6pQ6MjzOia9BM7SfcfVqedPREVXZx+XJPmVuR4BgTOxUnnyfltZzW0ldSbyeJ37FdDSd9SDRRf7Q4UzbEMN33GfVsTKMZoRqASrZXhvqAVp7deXMdGp1kNlvIbbwVkeICLYTIYrm5zd0HkH2yEhk0AtgeTpyx/kkR1T0Fs2+PCDsLRPhP1EEJs7FdsdQJuP0SueJ92GpPd7yLYZVVWWQkGWudNb6H3iYp2xtbZCoeCBLEUgusrawwdxp0OlNOgP/aeJDc+zNy59ikraluI0sNCV1Pl8dIXu8Ihu6e7W6hoiTQ8K9PjwcXSmBgUsR+kXD8NcCx73RTxynokv+24Xk0M1pkJhu6mjNjZBDIegXVM/CnNew1LSMoMjdi43asuDiDbkZg2uCxfHwaMxlgWuM/M38r662FbOjEfgr13fhCyuUQZRFOKvvqU17HbA+ewC/J40C2g0sBDGPu/uOJsDJaQGPXDpXsh4G+8R7uZRNunhwNPK4OnVdY+uVnYlD+9TG9T1IothaDSRJvYU8HwAcUOJhMNYDDQosWOy+01NQtX0IYRgk= diff --git a/Project.toml b/Project.toml index 3dd5e37d..94501d5f 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,15 @@ version = "0.1.1" [deps] BlossomV = "6c721016-9dae-5d90-abf6-67daaccb2332" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5" Metis = "2679e427-3c69-5b7f-982b-ece356f1e94b" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -19,17 +24,22 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +TikzGraphs = "b4f28e30-c73f-5eaf-a395-8a9db949a742" [compat] BlossomV = "0.4" +CUDA = "1.2" Clustering = "0.14" +DataFrames = "0.21" DataStructures = "0.17" LightGraphs = "1.3" LogicCircuits = "0.1.1" +LoopVectorization = "0.8.20" +MLDatasets = "0.4, 0.5" MetaGraphs = "0.6" Metis = "1.0" Reexport = "0.2" SimpleWeightedGraphs = "1.1" StatsBase = "0.33" StatsFuns = "0.9" -julia = "1.3" +julia = "1.5" diff --git a/docs/src/api/internals/io.md b/docs/src/api/internals/io.md deleted file mode 100644 index 972aa218..00000000 --- a/docs/src/api/internals/io.md +++ /dev/null @@ -1,6 +0,0 @@ - -# IO - -```@autodocs -Modules = [ProbabilisticCircuits.IO] -``` \ No newline at end of file diff --git a/docs/src/api/internals/loadsave.md b/docs/src/api/internals/loadsave.md new file mode 100644 index 00000000..3f74274e --- /dev/null +++ b/docs/src/api/internals/loadsave.md @@ -0,0 +1,6 @@ + +# LoadSave + +```@autodocs +Modules = [ProbabilisticCircuits.LoadSave] +``` \ No newline at end of file diff --git a/docs/src/api/internals/logistic.md b/docs/src/api/internals/logistic.md deleted file mode 100644 index 6fc07a5e..00000000 --- a/docs/src/api/internals/logistic.md +++ /dev/null @@ -1,5 +0,0 @@ -# Logistic - -```@autodocs -Modules = [Logistic] -``` diff --git a/docs/src/api/internals/probabilistic.md b/docs/src/api/internals/probabilistic.md deleted file mode 100644 index 2d862da9..00000000 --- a/docs/src/api/internals/probabilistic.md +++ /dev/null @@ -1,5 +0,0 @@ -# Probabilistic - -```@autodocs -Modules = [Probabilistic] -``` \ No newline at end of file diff --git a/docs/src/api/internals/reasoning.md b/docs/src/api/internals/reasoning.md deleted file mode 100644 index 65d5400a..00000000 --- a/docs/src/api/internals/reasoning.md +++ /dev/null @@ -1,5 +0,0 @@ -# Reasoning - -```@autodocs -Modules = [Reasoning] -``` diff --git a/docs/src/api/internals/structureLearner.md b/docs/src/api/internals/structureLearner.md deleted file mode 100644 index 72719021..00000000 --- a/docs/src/api/internals/structureLearner.md +++ /dev/null @@ -1,5 +0,0 @@ -# StructureLearner - -```@autodocs -Modules = [StructureLearner] -``` diff --git a/docs/src/api/internals/utils.md b/docs/src/api/internals/utils.md index c8dae019..57998984 100644 --- a/docs/src/api/internals/utils.md +++ b/docs/src/api/internals/utils.md @@ -1,5 +1,5 @@ # Utils ```@autodocs -Modules = [Utils] +Modules = [ProbabilisticCircuits.Utils] ``` diff --git a/src/IO/CircuitLineCompiler.jl b/src/IO/CircuitLineCompiler.jl deleted file mode 100644 index 32abf1a2..00000000 --- a/src/IO/CircuitLineCompiler.jl +++ /dev/null @@ -1,117 +0,0 @@ -using LogicCircuits.IO: constant - -##################### -# Compilers to ProbabilisticCircuits data structures starting from already parsed line objects -##################### - -# reuse some internal infrastructure of LogicCircuits' IO module -using LogicCircuits.IO: CircuitFormatLines, CircuitFormatLine, VtreeFormatLines, CircuitHeaderLine, UnweightedLiteralLine, WeightedLiteralLine, DecisionLine, LCElement, BiasLine, WeightedNamedConstantLine, PSDDElement, CircuitCommentLine, ID, -compile_smooth_struct_logical_m, compile_smooth_logical_m - -""" -Compile lines into a probabilistic circuit. -""" -function compile_prob(lines::CircuitFormatLines)::ProbΔ - # first compile a logical circuit - logical_circuit, id2lognode = compile_smooth_logical_m(lines) - decorate_prob(lines, logical_circuit, id2lognode) -end - -""" -Compile lines into a logistic circuit. -""" -function compile_logistic(lines::CircuitFormatLines, classes::Int)::LogisticΔ - # first compile a logical circuit - logical_circuit, id2lognode = compile_smooth_logical_m(lines) - decorate_logistic(lines, logical_circuit, classes, id2lognode) -end - -""" -Compile circuit and vtree lines into a structured probabilistic circuit (one whose logical circuit origin is structured). -""" -function compile_struct_prob(circuit_lines::CircuitFormatLines, vtree_lines::VtreeFormatLines) - logical_circuit, vtree, id2vtree, id2lognode = compile_smooth_struct_logical_m(circuit_lines, vtree_lines) - prob_circuit = decorate_prob(circuit_lines, logical_circuit, id2lognode) - return prob_circuit, vtree -end - -function decorate_prob(lines::CircuitFormatLines, logical_circuit::LogicalΔ, id2lognode::Dict{ID,<:LogicalΔNode})::ProbΔ - # set up cache mapping logical circuit nodes to their probabilistic decorator - lognode2probnode = ProbCache() - # build a corresponding probabilistic circuit - prob_circuit = ProbΔ(logical_circuit,lognode2probnode) - # map from line node ids to probabilistic circuit nodes - id2probnode(id) = lognode2probnode[id2lognode[id]] - - # go through lines again and update the probabilistic circuit node parameters - - function compile(ln::CircuitFormatLine) - error("Compilation of line $ln into probabilistic circuit is not supported") - end - function compile(::Union{CircuitHeaderLine,CircuitCommentLine,UnweightedLiteralLine}) - # do nothing - end - function compile(ln::WeightedNamedConstantLine) - @assert constant(ln) == true - node = id2probnode(ln.node_id)::Prob⋁ - node.log_thetas .= [ln.weight, log1p(-exp(ln.weight)) ] - end - function compile(ln::DecisionLine{<:PSDDElement}) - node = id2probnode(ln.node_id)::Prob⋁ - node.log_thetas .= [x.weight for x in ln.elements] - end - for ln in lines - compile(ln) - end - - prob_circuit -end - - -function decorate_logistic(lines::CircuitFormatLines, logical_circuit::LogicalΔ, - classes::Int, id2lognode::Dict{ID,<:LogicalΔNode})::LogisticΔ - - # set up cache mapping logical circuit nodes to their logistic decorator - log2logistic = LogisticCache() - # build a corresponding probabilistic circuit - logistic_circuit = LogisticΔ(logical_circuit, classes, log2logistic) - # map from line node ids to probabilistic circuit nodes - id2logisticnode(id) = log2logistic[id2lognode[id]] - - # go through lines again and update the probabilistic circuit node parameters - - function compile(ln::CircuitFormatLine) - error("Compilation of line $ln into logistic circuit is not supported") - end - function compile(::Union{CircuitHeaderLine,CircuitCommentLine,UnweightedLiteralLine}) - # do nothing - end - - function compile(ln::CircuitHeaderLine) - # do nothing - end - - function compile(ln::WeightedLiteralLine) - node = id2logisticnode(ln.node_id)::Logistic⋁ - node.thetas[1, :] .= ln.weights - end - - function compile(ln::DecisionLine{<:LCElement}) - node = id2logisticnode(ln.node_id)::Logistic⋁ - for (ind, elem) in enumerate(ln.elements) - node.thetas[ind, :] .= elem.weights - end - end - - function compile(ln::BiasLine) - node = id2logisticnode(ln.node_id)::Logistic⋁ - # @assert length(node.thetas) == 1 - node.thetas[1,:] .= ln.weights - end - - for ln in lines - compile(ln) - end - - logistic_circuit -end \ No newline at end of file diff --git a/src/IO/IO.jl b/src/IO/IO.jl deleted file mode 100644 index c670cac8..00000000 --- a/src/IO/IO.jl +++ /dev/null @@ -1,33 +0,0 @@ -module IO - -using LogicCircuits -using ..Utils -using ..Probabilistic -using ..Logistic - -export - -# CircuitParser -load_prob_circuit, -load_struct_prob_circuit, -load_psdd_prob_circuit, -load_logistic_circuit, -parse_clt, - -# CircuitSaver -save_as_dot, is_true_node, save_circuit, -# get_node2id,get_vtree2id,vtree_node, decompile, make_element, save_lines, save_psdd_comment_line, save_sdd_comment_line, -# save_line, to_string - - -# Loaders -zoo_psdd, zoo_lc, zoo_clt, -zoo_clt_file - -include("CircuitLineCompiler.jl") -include("CircuitParser.jl") -include("CircuitSaver.jl") - -include("Loaders.jl") - -end \ No newline at end of file diff --git a/src/IO/Loaders.jl b/src/IO/Loaders.jl deleted file mode 100644 index 719a1a60..00000000 --- a/src/IO/Loaders.jl +++ /dev/null @@ -1,22 +0,0 @@ -using LogicCircuits -using Pkg.Artifacts - - -##################### -# Circuit loaders -##################### - -zoo_lc(name, num_classes) = - load_logistic_circuit(zoo_lc_file(name), num_classes) - -zoo_clt_file(name) = - artifact"circuit_model_zoo" * "/Circuit-Model-Zoo-0.1.2/clts/$name" - -zoo_clt(name) = - parse_clt(zoo_clt_file(name)) - -zoo_psdd_file(name) = - artifact"circuit_model_zoo" * "/Circuit-Model-Zoo-0.1.2/psdds/$name" - -zoo_psdd(name) = - load_prob_circuit(zoo_psdd_file(name)) diff --git a/src/LoadSave/LoadSave.jl b/src/LoadSave/LoadSave.jl new file mode 100644 index 00000000..26ae08f2 --- /dev/null +++ b/src/LoadSave/LoadSave.jl @@ -0,0 +1,11 @@ +module LoadSave + +using LogicCircuits +using ...ProbabilisticCircuits + +include("circuit_line_compiler.jl") +include("circuit_loaders.jl") +include("circuit_savers.jl") +include("plot.jl") + +end \ No newline at end of file diff --git a/src/LoadSave/circuit_line_compiler.jl b/src/LoadSave/circuit_line_compiler.jl new file mode 100644 index 00000000..4d7687dd --- /dev/null +++ b/src/LoadSave/circuit_line_compiler.jl @@ -0,0 +1,121 @@ +##################### +# Compilers to ProbabilisticCircuits data structures starting from already parsed line objects +##################### + +# reuse some internal infrastructure of LogicCircuits' LoadSave module +using LogicCircuits.LoadSave: CircuitFormatLines, CircuitFormatLine, lnconstant, +VtreeFormatLines, CircuitHeaderLine, UnweightedLiteralLine, WeightedLiteralLine, +DecisionLine, LCElement, BiasLine, WeightedNamedConstantLine, PSDDElement, +CircuitCommentLine, ID, compile_smooth_struct_logical_m, compile_smooth_logical_m + +""" +Compile lines into a probabilistic circuit +""" +function compile_prob(lines::CircuitFormatLines)::ProbCircuit + # first compile a logic circuit + logic_circuit, id2lognode = compile_smooth_logical_m(lines) + decorate_prob(lines, logic_circuit, id2lognode) +end + +""" +Compile lines into a logistic circuit. +""" +function compile_logistic(lines::CircuitFormatLines, classes::Int)::LogisticCircuit + # first compile a logic circuit + logic_circuit, id2lognode = compile_smooth_logical_m(lines) + decorate_logistic(lines, logic_circuit, classes, id2lognode) +end + +""" +Compile circuit and vtree lines into a structured probabilistic circuit (one whose logic circuit origin is structured). +""" +function compile_struct_prob(circuit_lines::CircuitFormatLines, vtree_lines::VtreeFormatLines) + logic_circuit, vtree, id2lognode, id2vtree = compile_smooth_struct_logical_m(circuit_lines, vtree_lines) + prob_circuit = decorate_prob(circuit_lines, logic_circuit, id2lognode) + return prob_circuit, vtree +end + +function decorate_prob(lines::CircuitFormatLines, logic_circuit::LogicCircuit, id2lognode::Dict{ID,<:LogicCircuit})::ProbCircuit + # set up cache mapping logic circuit nodes to their probabilistic decorator + + prob_circuit = ProbCircuit(logic_circuit) + lognode2probnode = Dict{LogicCircuit, ProbCircuit}() + + prob_lin = linearize(prob_circuit) # TODO better implementation + logic_lin = linearize(logic_circuit) + + foreach(i -> lognode2probnode[logic_lin[i]] = prob_lin[i], 1 : num_nodes(logic_circuit)) + + # map from line node ids to probabilistic circuit nodes + id2probnode(id) = lognode2probnode[id2lognode[id]] + + root = nothing + + # go through lines again and update the probabilistic circuit node parameters + + function compile(ln::CircuitFormatLine) + error("Compilation of line $ln into probabilistic circuit is not supported") + end + function compile(::Union{CircuitHeaderLine,CircuitCommentLine,UnweightedLiteralLine}) + # do nothing + end + function compile(ln::WeightedNamedConstantLine) + @assert lnconstant(ln) == true + root = id2probnode(ln.node_id) + root.log_probs .= [ln.weight, log1p(-exp(ln.weight))] + end + function compile(ln::DecisionLine{<:PSDDElement}) + root = id2probnode(ln.node_id) + root.log_probs .= [x.weight for x in ln.elements] + end + + foreach(compile, lines) + + root +end + +function decorate_logistic(lines::CircuitFormatLines, logic_circuit::LogicCircuit, + classes::Int, id2lognode::Dict{ID,<:LogicCircuit})::LogisticCircuit + + # set up cache mapping logic circuit nodes to their logistic decorator + logistic_circuit = LogisticCircuit(logic_circuit, classes) + log2logistic = Dict{LogicCircuit, LogisticCircuit}() + logistic_lin = linearize(logistic_circuit) + logic_lin = linearize(logic_circuit) + + foreach(i -> log2logistic[logic_lin[i]] = logistic_lin[i], 1 : length(logic_lin)) + id2logisticnode(id) = log2logistic[id2lognode[id]] + + root = nothing + # go through lines again and update the probabilistic circuit node parameters + + function compile(ln::CircuitFormatLine) + error("Compilation of line $ln into logistic circuit is not supported") + end + + function compile(::Union{CircuitHeaderLine,CircuitCommentLine,UnweightedLiteralLine}) + # do nothing + end + + function compile(ln::WeightedLiteralLine) + root = id2logisticnode(ln.node_id)::Logistic⋁Node + root.thetas[1, :] .= ln.weights + end + + function compile(ln::DecisionLine{<:LCElement}) + root = id2logisticnode(ln.node_id)::Logistic⋁Node + for (ind, elem) in enumerate(ln.elements) + root.thetas[ind, :] .= elem.weights + end + end + + function compile(ln::BiasLine) + root = id2logisticnode(ln.node_id)::Logistic⋁Node + # @assert length(root.thetas) == 1 + root.thetas[1,:] .= ln.weights + end + + foreach(compile, lines) + + root +end \ No newline at end of file diff --git a/src/IO/CircuitParser.jl b/src/LoadSave/circuit_loaders.jl similarity index 64% rename from src/IO/CircuitParser.jl rename to src/LoadSave/circuit_loaders.jl index e76e28f5..b8f4955a 100644 --- a/src/IO/CircuitParser.jl +++ b/src/LoadSave/circuit_loaders.jl @@ -1,5 +1,25 @@ +export zoo_clt, zoo_clt_file, zoo_psdd, zoo_lc, load_prob_circuit, +load_struct_prob_circuit, load_logistic_circuit -using MetaGraphs: MetaDiGraph, set_prop!, props +using LogicCircuits +using Pkg.Artifacts +using LogicCircuits.LoadSave: parse_psdd_file, parse_circuit_file, parse_vtree_file + +##################### +# circuit loaders from module zoo +##################### + +zoo_lc(name, num_classes) = + load_logistic_circuit(zoo_lc_file(name), num_classes) + +zoo_clt_file(name) = + artifact"circuit_model_zoo" * "/Circuit-Model-Zoo-0.1.2/clts/$name" + +zoo_clt(name) = + parse_clt(zoo_clt_file(name)) + +zoo_psdd(name) = + load_prob_circuit(zoo_psdd_file(name)) ##################### # general parser infrastructure for circuits @@ -11,9 +31,9 @@ using MetaGraphs: MetaDiGraph, set_prop!, props """ Load a probabilistic circuit from file. Support circuit file formats: - * ".psdd" for PSDD files + * ".psdd" for PSDD files """ -function load_prob_circuit(file::String)::ProbΔ +function load_prob_circuit(file::String)::ProbCircuit @assert endswith(file,".psdd") compile_prob(parse_psdd_file(file)) end @@ -21,40 +41,35 @@ end """ Load a structured probabilistic circuit from file. Support circuit file formats: - * ".psdd" for PSDD files + * ".psdd" for PSDD files Supported vtree file formats: - * ".vtree" for VTree files + * ".vtree" for Vtree files """ -function load_struct_prob_circuit(circuit_file::String, vtree_file::String)::Tuple{ProbΔ,PlainVtree} +function load_struct_prob_circuit(circuit_file::String, vtree_file::String)::Tuple{StructProbCircuit,PlainVtree} @assert endswith(circuit_file,".psdd") circuit_lines = parse_circuit_file(circuit_file) vtree_lines = parse_vtree_file(vtree_file) compile_struct_prob(circuit_lines, vtree_lines) end - -function load_logistic_circuit(circuit_file::String, classes::Int)::LogisticΔ +""" +Load a logistic circuit from file. +Support circuit file formats: + * ".circuit" for logistic files +Supported vtree file formats: + * ".vtree" for Vtree files +""" +function load_logistic_circuit(circuit_file::String, classes::Int)::LogisticCircuit @assert endswith(circuit_file,".circuit") circuit_lines = parse_circuit_file(circuit_file) compile_logistic(circuit_lines, classes) end - ##################### # parse based on file extension ##################### -function parse_circuit_file(file::String)::CircuitFormatLines - if endswith(file,".circuit") - parse_lc_file(file) - elseif endswith(file,".psdd") - parse_psdd_file(file) - elseif endswith(file,".sdd") - parse_sdd_file(file) - else - throw("Cannot parse this file type as a circuit: $file") - end -end +using MetaGraphs: MetaDiGraph, set_prop!, props, add_edge! "Parse a clt from given file" function parse_clt(filename::String)::MetaDiGraph @@ -78,3 +93,4 @@ function parse_clt(filename::String)::MetaDiGraph end return clt end + diff --git a/src/IO/CircuitSaver.jl b/src/LoadSave/circuit_savers.jl similarity index 51% rename from src/IO/CircuitSaver.jl rename to src/LoadSave/circuit_savers.jl index 9bba0d78..fd22fc52 100644 --- a/src/IO/CircuitSaver.jl +++ b/src/LoadSave/circuit_savers.jl @@ -1,74 +1,43 @@ -using Printf: @sprintf +export save_circuit, save_as_dot, save_as_psdd, save_as_logistic -import Base.copy -import LogicCircuits.IO: SDDElement, +using LogicCircuits.LoadSave: SDDElement, PSDDElement, - save_lines, + save_lines, + get_vtree2id, + get_node2id, parse_psdd_file, PsddHeaderLine, LcHeaderLine, - save_sdd_file, - save_as_dot, get_nodes_level -# Saving psdd - ##################### -# decompile for nodes +# decompile for probabilistic nodes ##################### -# decompile for psdd -decompile(n::ProbLiteral, node2id, vtree2id)::UnweightedLiteralLine = - UnweightedLiteralLine(node2id[n], vtree2id[n.origin.vtree], literal(n), true) +"Decompile for psdd circuit, used during saving of circuits to file" +decompile(n::StructProbLiteralNode, node2id, vtree2id)::UnweightedLiteralLine = + UnweightedLiteralLine(node2id[n], vtree2id[n.vtree], literal(n), true) -make_element(n::Prob⋀, w::AbstractFloat, node2id) = - PSDDElement(node2id[n.children[1]], node2id[n.children[2]], w) +make_element(n::StructMulNode, w::AbstractFloat, node2id) = + PSDDElement(node2id[children(n)[1]], node2id[children(n)[2]], w) -is_true_node(n)::Bool = - GateType(n) isa ⋁Gate && num_children(n) == 2 && GateType(children(n)[1]) isa LiteralGate && GateType(children(n)[2]) isa LiteralGate && - positive(children(n)[1]) && negative(children(n)[2]) +istrue_node(n)::Bool = + is⋁gate(n) && num_children(n) == 2 && GateType(children(n)[1]) isa LiteralGate && GateType(children(n)[2]) isa LiteralGate && + ispositive(children(n)[1]) && isnegative(children(n)[2]) -function decompile(n::Prob⋁, node2id, vtree2id)::Union{WeightedNamedConstantLine, DecisionLine{PSDDElement}} - if is_true_node(n) - WeightedNamedConstantLine(node2id[n], vtree2id[n.origin.vtree], lit2var(n.children[1].origin.literal), n.log_thetas[1]) # TODO +function decompile(n::StructSumNode, node2id, vtree2id)::Union{WeightedNamedConstantLine, DecisionLine{PSDDElement}} + if istrue_node(n) + WeightedNamedConstantLine(node2id[n], vtree2id[n.vtree], lit2var(children(n)[1].literal), n.log_probs[1]) # TODO else - DecisionLine(node2id[n], vtree2id[n.origin.vtree], UInt32(num_children(n)), map(x -> make_element(x[1], x[2], node2id), zip(children(n), n.log_thetas))) - end -end - -##################### -# build maping -##################### - -function get_node2id(ln::AbstractVector{X}, T::Type)where X #<: T#::Dict{T, ID} - node2id = Dict{T, ID}() - outnodes = filter(n -> !(GateType(n) isa ⋀Gate), ln) - sizehint!(node2id, length(outnodes)) - index = ID(0) # node id start from 0 - for n in outnodes - node2id[n] = index - index += ID(1) - end - node2id -end - -function get_vtree2id(ln::PlainVtree):: Dict{PlainVtreeNode, ID} - vtree2id = Dict{PlainVtreeNode, ID}() - sizehint!(vtree2id, length(ln)) - index = ID(0) # vtree id start from 0 - - for n in ln - vtree2id[n] = index - index += ID(1) + DecisionLine(node2id[n], vtree2id[n.vtree], UInt32(num_children(n)), map(x -> make_element(x[1], x[2], node2id), zip(children(n), n.log_probs))) end - vtree2id end ##################### # saver for circuits ##################### - +"Returns header for PSDD file format" function psdd_header() """ c ids of psdd nodes start at 0 @@ -84,16 +53,15 @@ function psdd_header() c""" end -function save_psdd_file(name::String, ln::ProbΔ, vtree::PlainVtree) +function save_as_psdd(name::String, circuit::ProbCircuit, vtree::PlainVtree) # TODO add method isstructured - @assert ln[end].origin isa StructLogicalΔNode "PSDD should decorate on StructLogicalΔ" @assert endswith(name, ".psdd") - node2id = get_node2id(ln, ProbΔNode) + node2id = get_node2id(circuit) vtree2id = get_vtree2id(vtree) formatlines = Vector{CircuitFormatLine}() append!(formatlines, parse_psdd_file(IOBuffer(psdd_header()))) - push!(formatlines, PsddHeaderLine(num_nodes(ln))) - for n in filter(n -> !(GateType(n) isa ⋀Gate), ln) + push!(formatlines, PsddHeaderLine(num_nodes(circuit))) + for n in filter(n -> !is⋀gate(n), circuit) push!(formatlines, decompile(n, node2id, vtree2id)) end save_lines(name, formatlines) @@ -121,43 +89,35 @@ function lc_header() c""" end -function save_lc_file(name::String, ln::LogisticΔ, vtree) - @assert ln[end].origin isa StructLogicalΔNode "LC should decorate on StructLogicalΔ" +function save_as_logistic(name::String, circuit::LogisticCircuit, vtree) @assert endswith(name, ".circuit") - node2id = get_node2id(ln, ProbΔNode) + node2id = get_node2id(circuit) vtree2id = get_vtree2id(vtree) formatlines = Vector{CircuitFormatLine}() append!(formatlines, parse_lc_file(IOBuffer(lc_header()))) push!(formatlines, LcHeaderLine()) - for n in filter(n -> !(GateType(n) isa ⋀Gate), ln) + for n in filter(n -> !is⋀gate(n), circuit) push!(formatlines, decompile(n, node2id, vtree2id)) end save_lines(name, formatlines) end -import LogicCircuits.save_circuit # make available for extension +# TODO add Decompile for logistic circuit -function save_circuit(name::String, circuit, vtree=nothing) - if endswith(name, ".circuit") - save_lc_file(name, circuit, vtree) - elseif endswith(name, ".psdd") - save_psdd_file(name, circuit, vtree) - elseif endswith(name, ".sdd") - save_sdd_file(name, circuit, vtree) - else - error("Cannot save circuit to file with this extensions: $name") - end -end +import LogicCircuits.LoadSave: save_circuit, save_as_dot # make available for extension -"Save prob circuit to .dot file" -function save_as_dot(root::ProbΔNode, file::String) - return save_as_dot(node2dag(root), file) -end +"Save a circuit to file" +save_circuit(name::String, circuit::StructProbCircuit, vtree::PlainVtree) = + save_as_psdd(name, circuit, vtree) + +save_circuit(name::String, circuit::LogisticCircuit, vtree::PlainVtree) = + save_as_logistic(name, circuit, vtree) +using Printf: @sprintf "Save prob circuits to .dot file" -function save_as_dot(circuit::ProbΔ, file::String) +function save_as_dot(circuit::ProbCircuit, file::String) # TODO (https://github.com/Juice-jl/LogicCircuits.jl/issues/7) - node_cache = Dict{ProbΔNode, Int64}() + node_cache = Dict{ProbCircuit, Int64}() for (i, n) in enumerate(circuit) node_cache[n] = i end @@ -179,26 +139,26 @@ function save_as_dot(circuit::ProbΔ, file::String) end for n in reverse(circuit) - if n isa Prob⋀ + if n isa PlainMulNode write(f, "$(node_cache[n]) [label=\"*$(node_cache[n])\"]\n") elseif n isa Prob⋁ write(f, "$(node_cache[n]) [label=\"+$(node_cache[n])\"]\n") - elseif n isa ProbLiteral && positive(n) + elseif n isa PlainProbLiteralNode && ispositive(n) write(f, "$(node_cache[n]) [label=\"+$(variable(n.origin))\"]\n") - elseif n isa ProbLiteral && negative(n) + elseif n isa PlainProbLiteralNode && isnegative(n) write(f, "$(node_cache[n]) [label=\"-$(variable(n.origin))\"]\n") else - throw("unknown ProbNode type") + throw("unknown ProbCircuit type") end end for n in reverse(circuit) if n isa Prob⋀ - for c in n.children + for c in children(n) write(f, "$(node_cache[n]) -> $(node_cache[c])\n") end elseif n isa Prob⋁ - for (c, p) in zip(n.children, exp.(n.log_thetas)) + for (c, p) in zip(children(n), exp.(n.log_probs)) prob = @sprintf "%0.1f" p write(f, "$(node_cache[n]) -> $(node_cache[c]) [label=\"$prob\"]\n") end diff --git a/src/LoadSave/plot.jl b/src/LoadSave/plot.jl new file mode 100644 index 00000000..e294e835 --- /dev/null +++ b/src/LoadSave/plot.jl @@ -0,0 +1,34 @@ +export DiGraph, plot +using LightGraphs +using TikzGraphs + +import LightGraphs: DiGraph + +function DiGraph(pc::ProbCircuit) + edge_labels = Dict() + label = label = Vector{String}(undef, num_nodes(pc)) + + add_label!(g, dict, n::ProbCircuit) = begin + label[dict[n]] = + if isliteralgate(n) "$(literal(n))" + elseif ismul(n) "*" + else "+" + end + end + + on_edge(g, id_dict, n, c, n_id, c_id) = noop + on_edge(g, id_dict, n::Union{PlainSumNode, StructSumNode}, c, n_id, c_id) = begin + edge_labels[(n_id, c_id)] = begin + i = findall(x -> x === c, children(n))[1] + "$(round(exp(n.log_probs[i]), digits=3))" + end + end + g, _ = LogicCircuits.LoadSave.DiGraph(pc;on_edge=on_edge, on_var=add_label!) + g, label, edge_labels +end + +import TikzGraphs: plot +plot(pc::ProbCircuit) = begin + g, label, edge_labels = DiGraph(pc) + TikzGraphs.plot(g, label, edge_labels=edge_labels, edge_style="font=\\tiny") +end \ No newline at end of file diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl index 433c9ad5..e36fcb59 100644 --- a/src/Logistic/Logistic.jl +++ b/src/Logistic/Logistic.jl @@ -1,25 +1,11 @@ module Logistic using LogicCircuits -using ..Utils +using ...ProbabilisticCircuits -export - LogisticΔNode, - LogisticLeafNode, - LogisticInnerNode, - LogisticLiteral, - Logistic⋀, - Logistic⋁, - LogisticΔ, - LogisticΔ, - LogisticCache, - num_parameters_perclass, - logistic_origin, - class_conditional_likelihood_per_instance, - classes +include("queries.jl") +include("parameters.jl") - - -include("LogisticCircuits.jl") +# TODO structure learning end \ No newline at end of file diff --git a/src/Logistic/LogisticCircuits.jl b/src/Logistic/LogisticCircuits.jl deleted file mode 100644 index 4e40f70b..00000000 --- a/src/Logistic/LogisticCircuits.jl +++ /dev/null @@ -1,139 +0,0 @@ -####################### -## Logistic Circuits -####################### - - -abstract type LogisticΔNode{O} <: DecoratorΔNode{O} end -abstract type LogisticLeafNode{O} <: LogisticΔNode{O} end -abstract type LogisticInnerNode{O} <: LogisticΔNode{O} end - -struct LogisticLiteral{O} <: LogisticLeafNode{O} - origin::O -end - -struct Logistic⋀{O} <: LogisticInnerNode{O} - origin::O - children::Vector{<:LogisticΔNode{<:O}} -end - -mutable struct Logistic⋁{O} <: LogisticInnerNode{O} - origin::O - children::Vector{<:LogisticΔNode{<:O}} - thetas::Array{Float64, 2} -end - - - -const LogisticΔ{O} = AbstractVector{<:LogisticΔNode{O}} - -##################### -# traits -##################### - -import LogicCircuits.GateType # make available for extension - -@inline GateType(::Type{<:LogisticLiteral}) = LiteralGate() -@inline GateType(::Type{<:Logistic⋀}) = ⋀Gate() -@inline GateType(::Type{<:Logistic⋁}) = ⋁Gate() - - - -##################### -# constructors and conversions -##################### - -function Logistic⋁(::Type{O}, origin, children, classes::Int) where {O} - Logistic⋁{O}(origin, children, Array{Float64, 2}(undef, (length(children), classes))) -end - - -const LogisticCache = Dict{ΔNode, LogisticΔNode} - -function LogisticΔ(circuit::Δ, classes::Int, cache::LogisticCache = LogisticCache()) - - sizehint!(cache, length(circuit)*4÷3) - - O = grapheltype(circuit) # type of node in the origin - - pc_node(::LiteralGate, n::ΔNode) = LogisticLiteral{O}(n) - pc_node(::ConstantGate, n::ΔNode) = error("Cannot construct a logistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") - - pc_node(::⋀Gate, n::ΔNode) = begin - children = map(c -> cache[c], n.children) - Logistic⋀{O}(n, children) - end - - pc_node(::⋁Gate, n::ΔNode) = begin - children = map(c -> cache[c], n.children) - Logistic⋁(O, n, children, classes) - end - - map(circuit) do node - pcn = pc_node(GateType(node), node) - cache[node] = pcn - pcn - end -end - - -##################### -# methods -##################### - -import LogicCircuits: literal, children # make available for extension - -@inline literal(n::LogisticLiteral)::Lit = literal(n.origin) -@inline children(n::LogisticInnerNode) = n.children -@inline classes(n::Logistic⋁) = size(n.thetas)[2] - -num_parameters(n::Logistic⋁) = num_children(n) * classes(n) -num_parameters(c::LogisticΔ) = sum(n -> num_parameters(n), ⋁_nodes(c)) - -num_parameters_perclass(n::Logistic⋁) = num_children(n) -num_parameters_perclass(c::LogisticΔ) = sum(n -> num_parameters_perclass(n), ⋁_nodes(c)) - -"Return the first origin that is a Logistic circuit node" -logistic_origin(n::DecoratorΔNode)::LogisticΔNode = origin(n,LogisticΔNode) - -"Return the first origin that is a Logistic circuit" -logistic_origin(c::DecoratorΔ)::LogisticΔ = origin(c, LogisticΔNode) - - -# TODO Learning - - - -# Class Conditional Probability -function class_conditional_likelihood_per_instance(fc::FlowΔ, - classes::Int, - batch::PlainXData{Bool}) - lc = origin(origin(fc)) - @assert(lc isa LogisticΔ) - pass_up_down(fc, batch) - likelihoods = zeros(num_examples(batch), classes) - for n in fc - orig = logistic_origin(n) - if orig isa Logistic⋁ - # For each class. orig.thetas is 2D so used eachcol - for (idx, thetaC) in enumerate(eachcol(orig.thetas)) - foreach(n.children, thetaC) do c, theta - likelihoods[:, idx] .+= prod_fast(downflow(n), pr_factors(origin(c))) .* theta - end - end - end - end - likelihoods -end - -""" -Calculate conditional log likelihood for a batch of samples with evidence P(c | x). -(Also returns the generated FlowΔ) -""" -function class_conditional_likelihood_per_instance(lc::LogisticΔ, - classes::Int, - batch::PlainXData{Bool}) - opts = (max_factors = 2, compact⋀=false, compact⋁=false) - fc = FlowΔ(lc, num_examples(batch), Float64, opts) - (fc, class_conditional_likelihood_per_instance(fc, classes, batch)) -end - diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl new file mode 100644 index 00000000..ee008dd7 --- /dev/null +++ b/src/Logistic/parameter_circuit.jl @@ -0,0 +1,231 @@ +using CUDA +using LogicCircuits + +export LayeredParameterCircuit, CuLayeredParameterCircuit +export class_likelihood, class_weights +export one_hot, learn_parameters, update_parameters + +############################################################# +############## This is the old implementation ############### +#### Not intended to be used under the current framework #### +############################################################# + + +# in a parameter circuit +# 1 is true, 2 is false +const TRUE_ID = Int32(1) +const FALSE_ID = Int32(2) + +struct LayeredParameterCircuit + layered_circuit::LayeredBitCircuit + layered_parameters::Vector{Matrix{Float32}} +end + +LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin + @assert is⋁gate(circuit) + decisions::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + elements::Vector{Vector{Int32}} = Vector{Vector{Int32}}() + parameters::Vector{Vector{Float32}} = Vector{Vector{Float32}}() + num_decisions::Int32 = 2 * num_features + 2 + num_elements::Vector{Int32} = Vector{Int32}() + # num_parameters always equals num_elements + + ensure_layer(i) = begin + if length(decisions) < i + # add a new layer + push!(decisions, Int32[]) + push!(elements, Int32[]) + push!(parameters, Float32[]) + push!(num_elements, 0) + end + end + + f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_ID : FALSE_ID) + f_lit(n) = LayeredDecisionId(0, + ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n))) + + f_and(n, cs) = begin + @assert length(cs) == 2 + LayeredDecisionId[cs[1], cs[2]] + end + f_or(n, cs) = begin + num_decisions += 1 + # determine layer + layer_id = zero(Int32) + for c in cs + if c isa Vector{LayeredDecisionId} + @assert length(c) == 2 + layer_id = max(layer_id, c[1].layer_id, c[2].layer_id) + else + @assert c isa LayeredDecisionId + layer_id = max(layer_id, c.layer_id) + end + end + layer_id += 1 + ensure_layer(layer_id) + first_element = num_elements[layer_id] + 1 + foreach(cs, eachrow(n.thetas)) do c, theta + @assert size(theta)[1] == nc + append!(parameters[layer_id], theta) + num_elements[layer_id] += 1 + if c isa Vector{LayeredDecisionId} + push!(elements[layer_id], c[1].decision_id, c[2].decision_id) + else + push!(elements[layer_id], c.decision_id, TRUE_ID) + end + end + push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id]) + LayeredDecisionId(layer_id, num_decisions) + end + + foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, + Union{LayeredDecisionId,Vector{LayeredDecisionId}}) + + circuit_layers = map(decisions, elements) do d, e + Layer(reshape(d, 3, :), reshape(e, 2, :)) + end + parameter_layers = map(parameters) do p + reshape(p, nc, :) + end + return LayeredParameterCircuit(LayeredBitCircuit(circuit_layers), parameter_layers) +end + +struct CuLayeredParameterCircuit + layered_circuit::CuLayeredBitCircuit + layered_parameters::Vector{CuMatrix{Float32}} + CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters)) +end + + + +function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing) + cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + one = Float32(1.0) + return @. one / (one + exp(-cw)), flow, v +end + +function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing) + flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down) + cw = calculate_class_weights(circuit, nc, data, v, flow, reuse_cw) + return cw, flow, v +end + +function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, v, flow, reuse_cw=nothing) + ne = num_examples(data) + cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc) + reuse_cw .= zero(Float32) + reuse_cw + else + CUDA.zeros(Float32, ne, nc) + end + + dec_per_thread = 4 + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) + num_threads = balance_threads(ne, ndl / dec_per_thread, 8) + num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread) + @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, v, flow, circuit_layer.decisions, circuit_layer.elements, parameter_layer) + end + + return cw +end + +function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, elements, parameters) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + ne, nc = size(cw) + _, num_decisions = size(decisions) + + for j = index_x:stride_x:ne + for i = index_y:stride_y:num_decisions + decision_id = @inbounds decisions[1, i] + n_up = @inbounds v[j, decision_id] + if n_up > zero(Float32) + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + n_down = @inbounds flow[j, decision_id] + for e = first_elem:last_elem + e1 = @inbounds elements[1, first_elem] + e2 = @inbounds elements[2, first_elem] + e_up = @inbounds (v[j, e1] * v[j, e2]) + edge_flow = e_up / n_up * n_down + # following needs to be memory safe + for class=1:nc + @CUDA.atomic cw[j, class] += edge_flow * parameters[class, e] # atomic is automatically inbounds + end + end + end + end + end + + return nothing +end + + + +function one_hot(labels::Vector, nc::Integer) + ne = length(labels) + one_hot_labels = zeros(Float32, ne, nc) + for (i, j) in enumerate(labels) + one_hot_labels[i, j + 1] = 1.0 + end + one_hot_labels +end + +function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001) + cp, flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp) + update_parameters(circuit, labels, cp, flow, step_size) + for _ = 2:num_epochs + cp, flow, v = class_likelihood(circuit, nc, data, v, flow, cp) + update_parameters(circuit, labels, cp, v, flow, step_size) + end + return nothing +end + +function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, v, flow, step_size=0.0001) + _, nc = size(labels) + step_size = Float32(step_size) + CUDA.@sync for i = 1:length(circuit.layered_circuit.layers) + circuit_layer = circuit.layered_circuit.layers[i] + flow_layer = flow[i] + parameter_layer = circuit.layered_parameters[i] + ndl = num_decisions(circuit_layer) + num_threads = balance_threads(ndl, nc, 6) + num_threads = num_threads[1], num_threads[2], + num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, nc / num_threads[2]), 4 + @cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size) + end + return nothing +end + +function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + index_z = (blockIdx().z - 1) * blockDim().z + threadIdx().z + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + stride_z = blockDim().z * gridDim().z + ne, nc = size(labels) + _, num_decisions = size(decisions) + + for class = index_y:stride_y:nc + for i = index_x:stride_x:num_decisions + first_elem = @inbounds decisions[2, i] + last_elem = @inbounds decisions[3, i] + for e = first_elem:last_elem + for j = index_z:stride_z:ne + edge_flow = e_up / n_up * n_down + u = @inbounds edge_flow * (cp[j, class] - labels[j, class]) * step_size + # following needs to be memory safe + @inbounds parameters[class, e] -= u + end + end + end + end + + return nothing +end \ No newline at end of file diff --git a/src/Logistic/parameters.jl b/src/Logistic/parameters.jl new file mode 100644 index 00000000..fdc4e9f2 --- /dev/null +++ b/src/Logistic/parameters.jl @@ -0,0 +1,123 @@ +export learn_parameters, to_onehot + +using CUDA +using LoopVectorization: @avx, vifelse + +""" +Parameter learning through gradient descents +Note: data need to be DataFrame and Labels need to be in one-hot form. +""" +function learn_parameters(lc::LogisticCircuit, nc::Int, data, labels; num_epochs=25, step_size=0.01) + bc = ParamBitCircuit(lc, nc, data) + if isgpu(data) + @assert isgpu(labels) "Data and labels must be both stored in either GPU or CPU." + for _ = 1:num_epochs + cl = class_likelihood_per_instance(bc, data) + update_parameters_gpu(to_gpu(bc), data, labels, cl, step_size) + end + else + @assert !isgpu(labels) "Data and labels must be both stored in either GPU or CPU." + for _ = 1:num_epochs + cl = class_likelihood_per_instance(bc, data) + update_parameters_cpu(bc, data, labels, cl, step_size) + end + end +end + +function update_parameters_cpu(bc, data, labels, cl, step_size) + ne::Int = num_examples(data) + nc::Int = size(bc.params, 2) + params_lock::Threads.ReentrantLock = Threads.ReentrantLock() + + @inline function on_edge_binary(flows, values, prime, sub, element, grandpa, single_child) + lock(params_lock) do # TODO: move lock to inner loop? + for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, prime] & values[i, sub] & flows[i, grandpa] + first_true_bit = trailing_zeros(edge_flow) + 1 + last_true_bit = 64 - leading_zeros(edge_flow) + @simd for j = first_true_bit:last_true_bit + ex_id = ((i - 1) << 6) + j + if get_bit(edge_flow, j) + for class = 1:nc + @inbounds bc.params[element, class] -= (cl[ex_id, class] - labels[ex_id, class]) * step_size + end + end + end + end + end + end + + @inline function on_edge_float(flows, values, prime, sub, element, grandpa, single_child) + lock(params_lock) do # TODO: move lock to inner loop? + @avx for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, prime] * values[i, sub] / values[i, grandpa] * flows[i, grandpa] + edge_flow = vifelse(isfinite(edge_flow), edge_flow, zero(eltype(flows))) + for class = 1:nc + @inbounds bc.parames[element, class] -= (cl[i, class] - labels[i, class]) * edge_flow * step_size + end + end + end + nothing + end + + if isbinarydata(data) + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_binary) + else + @assert isfpdata(data) "Only floating point and binary data are supported" + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_float) + end + + nothing +end + + +function update_parameters_gpu(bc, data, labels, cl, step_size) + ne::Int = num_examples(data) + nc::Int = size(bc.params, 2) + cl_device = CUDA.cudaconvert(cl) + label_device = CUDA.cudaconvert(labels) + params_device = CUDA.cudaconvert(bc.params) + + @inline function on_edge_binary(flows, values, prime, sub, element, grandpa, chunk_id, edge_flow, single_child) + first_true_bit = 1 + trailing_zeros(edge_flow) + last_true_bit = 64 - leading_zeros(edge_flow) + for j = first_true_bit:last_true_bit + if get_bit(edge_flow, j) + ex_id = ((chunk_id - 1) << 6) + j + for class = 1:nc + CUDA.@atomic params_device[element, class] -= (cl_device[ex_id, class] - label_device[ex_id, class]) * step_size + end + end + end + nothing + end + + @inline function on_edge_float(flows, values, prime, sub, element, grandpa, ex_id, edge_flow, single_child) + for class = 1:nc + CUDA.@atomic params_device[element, class] -= (cl_device[ex_id, class] - label_device[ex_id, class]) * edge_flow * step_size + end + nothing + end + + if isbinarydata(data) + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_binary) + else + @assert isfpdata(data) "Only floating point and binary data are supported" + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_float) + end + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort + + nothing +end + + + +function to_onehot(labels::Vector, nc::Integer) + ne = length(labels) + one_hot_labels = zeros(Float32, ne, nc) + for (i, j) in enumerate(labels) + one_hot_labels[i, j + 1] = 1.0 + end + one_hot_labels +end \ No newline at end of file diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl new file mode 100644 index 00000000..9931d766 --- /dev/null +++ b/src/Logistic/queries.jl @@ -0,0 +1,148 @@ +export class_likelihood_per_instance, class_weights_per_instance + +using CUDA +using LoopVectorization: @avx, vifelse + + +""" +Class Conditional Probability +""" +function class_likelihood_per_instance(lc::LogicCircuit, nc::Int, data) + cw = class_weights_per_instance(lc, nc, data) + one = Float32(1.0) + isgpu(data) ? (@. one / (one + exp(-cw))) : (@. @avx one / (one + exp(-cw))) +end + +function class_likelihood_per_instance(bc, data) + cw = class_weights_per_instance(bc, data) + isgpu(data) ? (@. one / (one + exp(-cw))) : (@. @avx one / (one + exp(-cw))) +end + +function class_weights_per_instance(lc::LogisticCircuit, nc::Int, data) + bc = ParamBitCircuit(lc, nc, data) + class_weights_per_instance(bc, data) +end + +function class_weights_per_instance(bc, data) + if isgpu(data) + class_weights_per_instance_gpu(to_gpu(bc), data) + else + class_weights_per_instance_cpu(bc, data) + end +end + +function class_weights_per_instance_cpu(bc, data) + ne::Int = num_examples(data) + nc::Int = size(bc.params, 2) + cw::Matrix{Float32} = zeros(Float32, ne, nc) + cw_lock::Threads.ReentrantLock = Threads.ReentrantLock() + + @inline function on_edge_binary(flows, values, prime, sub, element, grandpa, single_child) + lock(cw_lock) do # TODO: move lock to inner loop? + for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, prime] & values[i, sub] & flows[i, grandpa] + first_true_bit = trailing_zeros(edge_flow) + 1 + last_true_bit = 64 - leading_zeros(edge_flow) + @simd for j = first_true_bit:last_true_bit + ex_id = ((i - 1) << 6) + j + if get_bit(edge_flow, j) + for class = 1:nc + @inbounds cw[ex_id, class] += bc.params[element, class] + end + end + end + end + end + nothing + end + + @inline function on_edge_float(flows, values, prime, sub, element, grandpa, single_child) + lock(cw_lock) do # TODO: move lock to inner loop? + @avx for i = 1:size(flows, 1) + @inbounds edge_flow = values[i, prime] * values[i, sub] / values[i, grandpa] * flows[i, grandpa] + edge_flow = vifelse(isfinite(edge_flow), edge_flow, zero(eltype(flows))) + for class = 1:nc + @inbounds cw[i, class] += edge_flow * bc.params[element, class] + end + end + end + nothing + end + + if isbinarydata(data) + satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_binary) + else + satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_float) + end + + return cw +end + +function class_weights_per_instance_gpu(bc, data) + ne::Int = num_examples(data) + nc::Int = size(bc.params, 2) + cw::CuMatrix{Float32} = CUDA.zeros(Float32, num_examples(data), nc) + cw_device = CUDA.cudaconvert(cw) + params_device = CUDA.cudaconvert(bc.params) + + @inline function on_edge_binary(flows, values, prime, sub, element, grandpa, chunk_id, edge_flow, single_child) + first_true_bit = 1 + trailing_zeros(edge_flow) + last_true_bit = 64 - leading_zeros(edge_flow) + for j = first_true_bit:last_true_bit + ex_id = ((chunk_id - 1) << 6) + j + if get_bit(edge_flow, j) + for class = 1:nc + CUDA.@atomic cw_device[ex_id, class] += params_device[element, class] + end + end + end + nothing + end + + @inline function on_edge_float(flows, values, prime, sub, element, grandpa, ex_id, edge_flow, single_child) + for class = 1:nc + CUDA.@atomic cw_device[ex_id, class] += edge_flow * params_device[element, class] + end + nothing + end + + if isbinarydata(data) + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_binary) + else + @assert isfpdata(data) "Only floating point and binary data are supported" + v,f = satisfies_flows(bc.bitcircuit, data; on_edge = on_edge_float) + end + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort + + return cw +end + + + +""" +Class Predictions +""" +function predict_class(lc::LogisticCircuit, nc::Int, data) + class_likelihoods = class_likelihood_per_instance(lc, nc, data) + predict_class(class_likelihoods) +end + +function predict_class(class_likelihoods) + _, mxindex = findmax(class_likelihoods; dims=2) + dropdims(getindex.(mxindex, 2); dims=2) +end + + + +""" +Prediction accuracy +""" +accuracy(lc::LogisticCircuit, nc::Int, data, labels::Vector) = + accuracy(predict_class(lc, nc, data), labels) + +accuracy(predicted_class::Vector, labels::Vector) = + Float64(sum(@. predicted_class == labels)) / length(labels) + +accuracy(class_likelihoods::Matrix, labels::Vector) = + accuracy(predict_class(class_likelihoods), labels) diff --git a/src/Probabilistic/Bagging.jl b/src/Probabilistic/Bagging.jl deleted file mode 100644 index 42e60515..00000000 --- a/src/Probabilistic/Bagging.jl +++ /dev/null @@ -1,61 +0,0 @@ -import StatsBase - -function bootstrap_samples_ids(train_x::PlainXData, n_samples::Int - #, rand_gen::AbstractRNG - ) - n_instances = num_examples(train_x) - ids = 1:n_instances - return [StatsBase.sample( - #rand_gen, - ids, n_instances, replace=true) for i in 1:n_samples] -end - -function train_bagging(# pcs::Vector{<:ProbΔ}, - train_x::XBatches{Bool}, - n_components::Int64; - init_models=nothing, - mixture_weights, - learn_base_estimator, - base_estimator_params, - logs) - - @assert length(logs) == n_components "Dimension not match in train bagging." - # bootstrapping samples - bagging_samples = init_bagging_samples(train_x, n_components) - - # weights - weights = nothing - if mixture_weights == "uniform" - weights = ones(Float64, n_components) ./ n_components - else - throw(DomainError(mixture_weights, "Unrecognized mixture weight mode")) - end - - if issomething(init_models) - @assert length(init_models) == n_components "Dimension not match in train bagging." - for i in 1 : n_components - learn_base_estimator(bagging_samples[i], init_models[i]; log=logs[i], base_estimator_params...) - end - - else - for i in 1 : n_components - learn_base_estimator(bagging_samples[i]; log=logs[i], base_estimator_params...) - end - end -end - -function init_bagging_samples(train_x::XBatches{Bool}, num_bags::Int64)::Vector{XBatches{Bool}} - batch_size = max_batch_size(train_x) - - unbatched = unbatch(train_x) - m = feature_matrix(unbatched) - bagging_samples = Vector{XBatches{Bool}}() - - bootstrapped_ids = bootstrap_samples_ids(unbatched, num_bags) - - for i in 1 : num_bags - new_examples = PlainXData(m[bootstrapped_ids[i], :]) - push!(bagging_samples, batch(new_examples, batch_size)) - end - bagging_samples -end diff --git a/src/Probabilistic/Clustering.jl b/src/Probabilistic/Clustering.jl deleted file mode 100644 index 47c30403..00000000 --- a/src/Probabilistic/Clustering.jl +++ /dev/null @@ -1,21 +0,0 @@ -using Clustering - -function clustering(train_x::XData, mix_num::Int64; maxiter=200)::XBatches{<:Bool} - if mix_num == 1 - return convert(XBatches, train_x) - end - - n = num_examples(train_x) - X = feature_matrix(train_x)' - - println("Running K-means clustering algorithm with num of components $mix_num, maximum iterations $maxiter") - R = kmeans(X, mix_num; maxiter=maxiter) - @assert nclusters(R) == mix_num - a = assignments(R) - - clustered_train_x = Vector{PlainXData{Bool,BitMatrix}}() - for k in 1 : mix_num - push!(clustered_train_x, XData(convert(BitMatrix, X[:, findall(x -> x == k, a)]'))) - end - return clustered_train_x -end \ No newline at end of file diff --git a/src/Probabilistic/EMLearner.jl b/src/Probabilistic/EMLearner.jl deleted file mode 100644 index e16b6f7f..00000000 --- a/src/Probabilistic/EMLearner.jl +++ /dev/null @@ -1,160 +0,0 @@ -""" -Train a mixture of probabilistic circuits from data, starting with random example weights. -""" -function train_mixture( pcs::Vector{<:ProbΔ}, - train_x::XBatches{Bool}, - pseudocount, num_iters; - structure_learner=nothing, learnstruct_step = num_iters + 1, # structure learning - logger=nothing, logging_step = 1 # logging or saving results - )::AbstractFlatMixture - - - # create mixture model with uniform component weights - mixture_flow = init_mixture_with_flows(FlatMixture(pcs), train_x) - - # reset aggregate statistics - reset_mixture_aggregate_flows(mixture_flow) - - # do a quick maximization step - for batch in train_x - example_weights = random_example_weights(num_examples(batch), num_components(mixture_flow)) - aggregate_flows(mixture_flow, batch, example_weights) - end - estimate_parameters(mixture_flow, component_weights(mixture_flow); pseudocount=pseudocount) - - train_mixture(mixture_flow, train_x, pseudocount, num_iters; - structure_learner=structure_learner, learnstruct_step=learnstruct_step, - logger=logger, logging_step=logging_step) -end - -""" -Train a mixture model from data. -Learning is initialized from the parameters stored in the given mixture. -When a `structure_learner` is given, it will be called between EM steps to update circuit structures. -""" -function train_mixture( mixture::AbstractFlatMixture, # we start from component weights that are already given - train_x::XBatches{Bool}, - pseudocount, num_iters; - structure_learner=nothing, learnstruct_step = num_iters + 1, # structure learning - logger=nothing, logging_step = 1 # logging or saving results - )::AbstractFlatMixture - - @assert feature_type(train_x) == Bool "Can only learn probabilistic circuits on Bool data" - - # initialize data structures - mixture_flow = init_mixture_with_flows(mixture, train_x) - - if issomething(logger) - logger(mixture_flow) - end - - for i in 1:num_iters - - # reset aggregate statistics - total_component_probability = ones(Float64, num_components(mixture_flow)) .* pseudocount ./ num_components(mixture_flow) - reset_mixture_aggregate_flows(mixture_flow) - - # are we doing structure learning at the end of this iteration? - is_learnstruct_iter = issomething(structure_learner) && i % learnstruct_step == 0 - - all_example_weights = Vector{Matrix{Float64}}() - - # Expectation step (update example weights given mixture parameters) - # + collecting aggregate statistics for subsequent maximization step - for batch in train_x - log_p_of_x_and_c = log_likelihood_per_instance_component(mixture_flow, batch) - example_weights = component_weights_per_example(log_p_of_x_and_c) - - # copy the flows already computed by `log_likelihood_per_instance_component` into the underlying aggregate flow circuit - # this way the maximization step can use them to estimate new parameters - aggregate_flows_cached(mixture_flow, batch, example_weights) - - # store the aggregated component probabilities such that the maximization step can re-estimate the component weights - total_component_probability .+= dropdims(sum(example_weights, dims=1), dims=1) - - # cache the example weights for the structure learner at the end of this EM iteration - is_learnstruct_iter && push!(all_example_weights, example_weights) - end - - # Maximization step (update mixture parameters given example weights (as stored in aggregate circuits)) - estimate_parameters(mixture_flow, total_component_probability; pseudocount=pseudocount) - - # Structural EM step - if is_learnstruct_iter - new_mixture_flow = structure_learner(mixture_flow, train_x, all_example_weights) - # mixture = replace_prob_circuits(mixture, new_pcs) - # re-initialize data structures - mixture_flow = init_mixture_with_flows(new_mixture_flow, train_x) - end - - if i % logging_step == 0 && issomething(logger) - logger(mixture_flow) - end - end - - return mixture_flow -end - -"Ensure we have a FlatMixtureWithFlow where the flow circuits have aggregate flow circuits as origin" -function init_mixture_with_flows(mixture::FlatMixtureWithFlow, ::XBatches{Bool})::FlatMixtureWithFlow - if ! all(fc -> grand_origin(fc) isa AggregateFlowΔ, mixture.flowcircuits) - init_mixture_with_flows(origin(mixture)) - else - mixture - end -end -function init_mixture_with_flows(mixture::FlatMixture, train_x::XBatches{Bool})::FlatMixtureWithFlow - aggr_circuits = [AggregateFlowΔ(pc, Float64) for pc in components(mixture)] - flow_circuits = [FlowΔ(afc, max_batch_size(train_x), Bool, opts_accumulate_flows) for afc in aggr_circuits] - FlatMixtureWithFlow(mixture, flow_circuits) -end - -function reset_mixture_aggregate_flows(mixture_flow::FlatMixtureWithFlow) - for fc in mixture_flow.flowcircuits - reset_aggregate_flows(grand_origin(fc)) - end -end - -"Compute the component weights for each example from likelihoods" -function component_weights_per_example(log_p_of_x_and_c) - log_p_of_x = logsumexp(log_p_of_x_and_c, 2) # marginalize out components - log_p_of_given_x_query_c = mapslices(col -> col .- log_p_of_x, log_p_of_x_and_c, dims=[1]) - p_of_given_x_query_c = exp.(log_p_of_given_x_query_c) # no more risk of underflow, so go to linear space - @assert sum(p_of_given_x_query_c) ≈ size(log_p_of_x_and_c, 1) # each row has proability 1 - p_of_given_x_query_c -end - -"Compute and aggregate flows for mixture components" -function aggregate_flows(mixture_flow, batch, example_weights) - for i in 1:num_components(mixture_flow) - fc = mixture_flow.flowcircuits[i] - wbatch = weighted_batch_for_component(batch, example_weights,i) - accumulate_aggr_flows_batch(fc, wbatch) - end -end - -"Aggregate already-computed flows for mixture components" -function aggregate_flows_cached(mixture_flow, batch, example_weights) - for i in 1:num_components(mixture_flow) - fc = mixture_flow.flowcircuits[i] - wbatch = weighted_batch_for_component(batch, example_weights,i) - accumulate_aggr_flows_cached(fc, wbatch) - end -end - -function estimate_parameters(mixture_flow, total_component_probability; pseudocount) - component_weights(mixture_flow) .= total_component_probability ./ sum(total_component_probability) - for fc in mixture_flow.flowcircuits - estimate_parameters_cached(grand_origin(fc); pseudocount=pseudocount) - end -end - -"Get a new weighted batch for this component" -weighted_batch_for_component(batch::PlainXData, example_weights, component_i)::WXData = - WXData(batch, example_weights[:,component_i]) - -"Create random example weights that sum to one overall components" -function random_example_weights(num_examples::Int, num_components::Int)::Matrix{Float64} - w = rand(Float64, num_examples, num_components) - w ./ sum(w;dims=2) -end \ No newline at end of file diff --git a/src/Probabilistic/Mixtures.jl b/src/Probabilistic/Mixtures.jl deleted file mode 100644 index 68041988..00000000 --- a/src/Probabilistic/Mixtures.jl +++ /dev/null @@ -1,169 +0,0 @@ -##################### -# Probabilistic circuit mixtures -##################### - -"A probabilistic mixture model" -abstract type AbstractMixture end - -"A probabilistic mixture model whose components are not themselves mixtures" -abstract type AbstractFlatMixture <: AbstractMixture end - -"A probabilistic mixture model whose components are themselves mixtures" -abstract type AbstractMetaMixture <: AbstractMixture end - -"A probabilistic mixture model of probabilistic circuits" -struct FlatMixture <: AbstractFlatMixture - weights::Vector{Float64} - components::Vector{<:ProbΔ} - FlatMixture(w,c) = begin - @assert length(w) == length(c) - @assert sum(w) ≈ 1.0 - new(w,c) - end -end - -FlatMixture(c) = FlatMixture(uniform(length(c)),c) - -"A mixture with cached flow circuits for each component (which are assumed to be ProbΔs)" -struct FlatMixtureWithFlow <: AbstractFlatMixture - origin::FlatMixture - flowcircuits::Vector{<:FlowΔ} - FlatMixtureWithFlow(origin,fcs) = begin - @assert num_components(origin) == length(fcs) - foreach(components(origin), fcs) do or, fc - @assert or[end] === prob_origin(fc)[end] - end - new(origin,fcs) - end -end - -FlatMixtureWithFlow(w,c,f) = FlatMixtureWithFlow(FlatMixture(w,c),f) - -"A probabilistic mixture model of mixture models" -struct MetaMixture <: AbstractMetaMixture - weights::Vector{Float64} - components::Vector{<:AbstractMixture} - MetaMixture(w,c) = begin - @assert length(w) == length(c) - @assert sum(w) ≈ 1.0 - new(w,c) - end -end - -MetaMixture(c) = MetaMixture(uniform(length(c)),c) - -Mixture(w, c::Vector{<:AbstractMixture}) = MetaMixture(w, c) -Mixture(w, c::Vector{<:ProbΔ}) = FlatMixture(w, c) - -##################### -# Functions -##################### - -"Get the components in this mixture" -@inline components(m::FlatMixture) = m.components -@inline components(m::FlatMixtureWithFlow) = components(m.origin) -@inline components(m::MetaMixture) = m.components - -"Get the component weights in this mixture" -@inline component_weights(m::FlatMixture) = m.weights -@inline component_weights(m::FlatMixtureWithFlow) = component_weights(m.origin) -@inline component_weights(m::MetaMixture) = m.weights - -"Number of components in a mixture" -@inline num_components(m::AbstractMixture)::Int = length(components(m)) - -"Convert a given flat mixture into one with cached flows" -ensure_with_flows(m::FlatMixture, size_hint::Int)::FlatMixtureWithFlow = begin - flowcircuits = [FlowΔ(pc, size_hint, Bool, opts_accumulate_flows) for pc in components(m)] - FlatMixtureWithFlow(m,flowcircuits) -end -ensure_with_flows(m::FlatMixtureWithFlow, ::Int)::FlatMixtureWithFlow = m - -replace_prob_circuits(m::FlatMixture, pcs::Vector{ProbΔ}) = - FlatMixture(component_weights(m), pcs) - -# log_likelihood - -function log_likelihood(mixture::FlatMixture, batches::XBatches{Bool})::Float64 - mwf = ensure_with_flows(mixture, max_batch_size(batches)) - log_likelihood(mwf, batches) -end - -function log_likelihood(mixture::FlatMixtureWithFlow, batches::XBatches{Bool})::Float64 - # assume the per-batch call will compute a weighted sum over examples - sum(batch -> log_likelihood(mixture, batch), batches) -end - -function log_likelihood(mixture::FlatMixtureWithFlow, batch::PlainXData{Bool})::Float64 - sum(log_likelihood_per_instance(mixture, batch)) -end - -function log_likelihood(mixture::MetaMixture, batches::XBatches{Bool})::Float64 - sum(batch -> log_likelihood(mixture, batch), batches) -end - -function log_likelihood(mixture::MetaMixture, batch::PlainXData{Bool})::Float64 - sum(log_likelihood_per_instance(mixture, batch)) -end - -# log_likelihood_per_instance (including mixture weight likelihood) - -function log_likelihood_per_instance(mixture::FlatMixture, batches::XBatches{Bool})::Vector{Float64} - mwf = ensure_with_flows(mixture, max_batch_size(batches)) - log_likelihood_per_instance(mwf, batches) -end - -function log_likelihood_per_instance(mixture::FlatMixtureWithFlow, batches::XBatches{Bool})::Vector{Float64} - mapreduce(b -> log_likelihood_per_instance(mixture, b), vcat, batches) -end - -function log_likelihood_per_instance(mixture::FlatMixtureWithFlow, batch::PlainXData{Bool})::Vector{Float64} - log_p_of_x_and_c = log_likelihood_per_instance_component(mixture, batch) - logsumexp(log_p_of_x_and_c, 2) -end - -function log_likelihood_per_instance(mixture::MetaMixture, batches::XBatches{Bool})::Vector{Float64} - mapreduce(b -> log_likelihood_per_instance(mixture, b), vcat, batches) -end - -function log_likelihood_per_instance(mixture::MetaMixture, batches::PlainXData{Bool})::Vector{Float64} - log_p_of_x_and_c = log_likelihood_per_instance_component(mixture, batch) - logsumexp(log_p_of_x_and_c, 2) -end - -# Log likelihoods per instance and component (including mixture weight likelihood) - - -"Log likelihood per instance and component. A vector of matrices per batch where the first dimension is instance, second is components." -function log_likelihood_per_instance_component(mixture::FlatMixtureWithFlow, batches::XBatches{Bool})::Vector{Matrix{Float64}} - [log_likelihood_per_instance_component(mixture, batch) for batch in batches] -end - -"Log likelihood per instance and component. First dimension is instance, second is components." -function log_likelihood_per_instance_component(mixture::FlatMixtureWithFlow, batch::PlainXData{Bool})::Matrix{Float64} - hcat(log_likelihood_per_component_instance(mixture, batch)...) -end - -"Log likelihood per component and instance. Outer vector is components, inner vector is instances" -function log_likelihood_per_component_instance(mixture::FlatMixtureWithFlow, batch::PlainXData{Bool})::Vector{Vector{Float64}} - map(mixture.flowcircuits, component_weights(mixture)) do fc, component_weight - log_likelihood_per_instance(fc, batch) .+ log(component_weight) - end -end - -"Log likelihood per instance and component. A vector of matrices per batch where the first dimension is instance, second is components." -function log_likelihood_per_instance_component(mixture::MetaMixture, batches::XBatches{Bool})::Vector{Matrix{Float64}} - [log_likelihood_per_instance_component(mixture, batch) for batch in batches] -end - -"Log likelihood per instance and component. First dimension is instance, second is components." -function log_likelihood_per_instance_component(mixture::MetaMixture, batch::PlainXData{Bool})::Matrix{Float64} - hcat(log_likelihood_per_component_instance(mixture, batch)...) -end - -"Log likelihood per component and instance. Outer vector is components, inner vector is instances" -function log_likelihood_per_component_instance(mixture::MetaMixture, batch::PlainXData{Bool})::Vector{Vector{Float64}} - map(mixture.components, component_weights(mixture)) do c, component_weight - log_likelihood_per_instance(c, batch) .+ log(component_weight) - end -end \ No newline at end of file diff --git a/src/Probabilistic/ProbCircuits.jl b/src/Probabilistic/ProbCircuits.jl deleted file mode 100644 index c6156eb6..00000000 --- a/src/Probabilistic/ProbCircuits.jl +++ /dev/null @@ -1,515 +0,0 @@ -##################### -# Probabilistic circuits -##################### -abstract type ProbΔNode{O} <: DecoratorΔNode{O} end -abstract type ProbLeafNode{O} <: ProbΔNode{O} end -abstract type ProbInnerNode{O} <: ProbΔNode{O} end - -mutable struct ProbLiteral{O} <: ProbLeafNode{O} - origin::O - data - bit::Bool - ProbLiteral(n) = new{node_type(n)}(n, nothing, false) -end - -mutable struct Prob⋀{O} <: ProbInnerNode{O} - origin::O - children::Vector{<:ProbΔNode{<:O}} - data - bit::Bool - Prob⋀(n, children) = begin - new{node_type(n)}(n, convert(Vector{ProbΔNode{node_type(n)}},children), nothing, false) - end -end - -mutable struct Prob⋁{O} <: ProbInnerNode{O} - origin::O - children::Vector{<:ProbΔNode{<:O}} - log_thetas::Vector{Float64} - data - bit::Bool - Prob⋁(n, children) = new{node_type(n)}(n, convert(Vector{ProbΔNode{node_type(n)}},children), some_vector(Float64, length(children)), nothing, false) -end - -const ProbΔ{O} = AbstractVector{<:ProbΔNode{<:O}} - -Base.eltype(::Type{ProbΔ{O}}) where {O} = ProbΔNode{<:O} - -##################### -# traits -##################### - -import LogicCircuits.GateType # make available for extension -import LogicCircuits.node_type - -@inline GateType(::Type{<:ProbLiteral}) = LiteralGate() -@inline GateType(::Type{<:Prob⋀}) = ⋀Gate() -@inline GateType(::Type{<:Prob⋁}) = ⋁Gate() - -@inline node_type(::ProbΔNode) = ProbΔNode - -##################### -# constructors and conversions -##################### - -const ProbCache = Dict{ΔNode, ProbΔNode} - -function ProbΔ2(circuit::Δ)::ProbΔ - node2dag(ProbΔ2(circuit[end])) -end - -function ProbΔ2(circuit::ΔNode)::ProbΔNode - f_con(n) = error("Cannot construct a probabilistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") - f_lit(n) = ProbLiteral(n) - f_a(n, cn) = Prob⋀(n, cn) - f_o(n, cn) = Prob⋁(n, cn) - foldup_aggregate(circuit, f_con, f_lit, f_a, f_o, ProbΔNode{node_type(circuit)}) -end - -function ProbΔ(circuit::Δ, cache::ProbCache = ProbCache()) - - sizehint!(cache, length(circuit)*4÷3) - - pc_node(::LiteralGate, n::ΔNode) = ProbLiteral(n) - pc_node(::ConstantGate, n::ΔNode) = error("Cannot construct a probabilistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") - - pc_node(::⋀Gate, n::ΔNode) = begin - children = map(c -> cache[c], n.children) - Prob⋀(n, children) - end - - pc_node(::⋁Gate, n::ΔNode) = begin - children = map(c -> cache[c], n.children) - Prob⋁(n, children) - end - - map(circuit) do node - pcn = pc_node(GateType(node), node) - cache[node] = pcn - pcn - end -end - -##################### -# methods -##################### - -import LogicCircuits: literal, children # make available for extension - -@inline literal(n::ProbLiteral)::Lit = literal(n.origin) -@inline children(n::ProbInnerNode) = n.children - -num_parameters(n::Prob⋁) = num_children(n) -num_parameters(c::ProbΔ) = sum(n -> num_parameters(n), ⋁_nodes(c)) - -"Return the first origin that is a probabilistic circuit node" -prob_origin(n::DecoratorΔNode)::ProbΔNode = origin(n, ProbΔNode) - -"Return the first origin that is a probabilistic circuit" -prob_origin(c::DecoratorΔ)::ProbΔ = origin(c, ProbΔNode) - -function estimate_parameters2(pc::ProbΔ, data::XData{Bool}; pseudocount::Float64) - Logical.pass_up_down2(pc, data) - w = (data isa PlainXData) ? nothing : weights(data) - estimate_parameters_cached2(pc, w; pseudocount=pseudocount) -end - -function estimate_parameters_cached2(pc::ProbΔ, w; pseudocount::Float64) - flow(n) = Float64(sum(sum(n.data))) - children_flows(n) = sum.(map(c -> c.data[1] .& n.data[1], children(n))) - - if issomething(w) - flow_w(n) = sum(Float64.(n.data[1]) .* w) - children_flows_w(n) = sum.(map(c -> Float64.(c.data[1] .& n.data[1]) .* w, children(n))) - flow = flow_w - children_flows = children_flows_w - end - - estimate_parameters_node2(n::ProbΔNode) = () - function estimate_parameters_node2(n::Prob⋁) - if num_children(n) == 1 - n.log_thetas .= 0.0 - else - smoothed_flow = flow(n) + pseudocount - uniform_pseudocount = pseudocount / num_children(n) - n.log_thetas .= log.((children_flows(n) .+ uniform_pseudocount) ./ smoothed_flow) - @assert isapprox(sum(exp.(n.log_thetas)), 1.0, atol=1e-6) "Parameters do not sum to one locally" - # normalize away any leftover error - n.log_thetas .- logsumexp(n.log_thetas) - end - end - - foreach(estimate_parameters_node2, pc) -end - -function log_likelihood_per_instance2(pc::ProbΔ, data::XData{Bool}) - Logical.pass_up_down2(pc, data) - log_likelihood_per_instance_cached(pc, data) -end - -function log_likelihood_per_instance_cached(pc::ProbΔ, data::XData{Bool}) - log_likelihoods = zeros(num_examples(data)) - indices = some_vector(Bool, num_examples(data))::BitVector - for n in pc - if n isa Prob⋁ && num_children(n) != 1 # other nodes have no effect on likelihood - foreach(n.children, n.log_thetas) do c, log_theta - indices = n.data[1] .& c.data[1] - view(log_likelihoods, indices::BitVector) .+= log_theta # see MixedProductKernelBenchmark.jl - end - end - end - log_likelihoods -end - -import LogicCircuits: conjoin_like, disjoin_like, literal_like, copy_node, normalize, replace_node # make available for extension - -"Conjoin nodes in the same way as the example" -@inline function conjoin_like(example::ProbΔNode, arguments::Vector) - if isempty(arguments) - # @assert false "Probabilistic circuit does not have anonymous true node" - nothing - elseif example isa Prob⋀ && children(example) == arguments - example - else - n = conjoin_like(origin(example), origin.(arguments)) - Prob⋀(n, arguments) - end -end - -"Disjoin nodes in the same way as the example" -@inline function disjoin_like(example::ProbΔNode, arguments::Vector) - if isempty(arguments) - # @assert false "Probabilistic circuit does not have false node" - nothing - elseif example isa Prob⋁ && children(example) == arguments - example - else - n = disjoin_like(origin(example), origin.(arguments)) - # normalize parameters - thetas = zeros(Float64, length(arguments)) - flag = falses(length(arguments)) - for (i, c) in enumerate(arguments) - ind = findfirst(x -> x == c, children(example)) - if issomething(ind) - thetas[i] = exp(example.log_thetas[ind]) - flag[i] = true - end - end - if all(flag) - thetas = thetas / sum(thetas) - end - p = Prob⋁(n, arguments) - p.log_thetas .= log.(thetas) - p - end -end - -"Construct a new literal node like the given node's type" -@inline literal_like(::ProbΔNode, lit::Lit) = ProbLiteral(lit) - -@inline copy_node(n::Prob⋁, cns) = begin - orig = copy_node(origin(n), origin.(cns)) - p = Prob⋁(orig, cns) - p.log_thetas .= copy(n.log_thetas) - p -end - -@inline copy_node(n::Prob⋀, cns) = begin - orig = copy_node(origin(n), origin.(cns)) - Prob⋀(orig, cns) -end - -import LogicCircuits.normalize - -@inline normalize(n::Prob⋁, old_n::Prob⋁, kept::Union{Vector{Bool}, BitArray}) = begin - thetas = exp.(old_n.log_thetas[kept]) - n.log_thetas .= log.(thetas / sum(thetas)) -end - -function estimate_parameters(pc::ProbΔ, data::XBatches{Bool}; pseudocount::Float64) - estimate_parameters(AggregateFlowΔ(pc, aggr_weight_type(data)), data; pseudocount=pseudocount) -end - -function estimate_parameters(afc::AggregateFlowΔ, data::XBatches{Bool}; pseudocount::Float64) - @assert feature_type(data) == Bool "Can only learn probabilistic circuits on Bool data" - @assert (afc[end].origin isa ProbΔNode) "AggregateFlowΔ must originate in a ProbΔ" - collect_aggr_flows(afc, data) - estimate_parameters_cached(afc; pseudocount=pseudocount) - afc -end - -function estimate_parameters(fc::FlowΔ, data::XBatches{Bool}; pseudocount::Float64) - @assert feature_type(data) == Bool "Can only learn probabilistic circuits on Bool data" - @assert (prob_origin(afc[end]) isa ProbΔNode) "FlowΔ must originate in a ProbΔ" - collect_aggr_flows(fc, data) - estimate_parameters_cached(origin(fc); pseudocount=pseudocount) -end - - # turns aggregate statistics into theta parameters -function estimate_parameters_cached(afc::AggregateFlowΔ; pseudocount::Float64) - foreach(n -> estimate_parameters_node(n; pseudocount=pseudocount), afc) -end - -estimate_parameters_node(::AggregateFlowΔNode; pseudocount::Float64) = () # do nothing -function estimate_parameters_node(n::AggregateFlow⋁; pseudocount) - origin = n.origin::Prob⋁ - if num_children(n) == 1 - origin.log_thetas .= 0.0 - else - smoothed_aggr_flow = (n.aggr_flow + pseudocount) - uniform_pseudocount = pseudocount / num_children(n) - origin.log_thetas .= log.( (n.aggr_flow_children .+ uniform_pseudocount) ./ smoothed_aggr_flow ) - @assert isapprox(sum(exp.(origin.log_thetas)), 1.0, atol=1e-6) "Parameters do not sum to one locally: $(exp.(origin.log_thetas)), estimated from $(n.aggr_flow) and $(n.aggr_flow_children). Did you actually compute the aggregate flows?" - #normalize away any leftover error - origin.log_thetas .- logsumexp(origin.log_thetas) - end -end - -# compute log likelihood -function compute_log_likelihood(pc::ProbΔ, data::XBatches{Bool}) - compute_log_likelihood(AggregateFlowΔ(pc, aggr_weight_type(data))) -end - -# compute log likelihood, reusing AggregateFlowΔ but ignoring its current aggregate values -function compute_log_likelihood(afc::AggregateFlowΔ, data::XBatches{Bool}) - @assert feature_type(data) == Bool "Can only test probabilistic circuits on Bool data" - collect_aggr_flows(afc, data) - ll = log_likelihood(afc) - (afc, ll) -end - -# return likelihoods given current aggregate flows. -function log_likelihood(afc::AggregateFlowΔ) - sum(n -> log_likelihood(n), afc) -end - -log_likelihood(::AggregateFlowΔNode) = 0.0 -log_likelihood(n::AggregateFlow⋁) = sum(n.origin.log_thetas .* n.aggr_flow_children) - -""" -Calculates log likelihood for a batch of fully observed samples. -(Also retures the generated FlowΔ) -""" -function log_likelihood_per_instance(pc::ProbΔ, batch::PlainXData{Bool}) - fc = FlowΔ(pc, num_examples(batch), Bool) - (fc, log_likelihood_per_instance(fc, batch)) -end - -function log_proba(pc::ProbΔ, batch::PlainXData{Bool}) - log_likelihood_per_instance(pc, batch)[2] -end - -function log_proba(pc::ProbΔ, batch::PlainXData{Int8}) - marginal_log_likelihood_per_instance(pc, batch)[2] -end - -""" -Calculate log likelihood per instance for batches of samples. -""" -function log_likelihood_per_instance(pc::ProbΔ, batches::XBatches{Bool})::Vector{Float64} - mapreduce(b -> log_likelihood_per_instance(pc, b)[2], vcat, batches) -end - -""" -Calculate log likelihood for a batch of fully observed samples. -(This is for when you already have a FlowΔ) -""" -function log_likelihood_per_instance(fc::FlowΔ, batch::PlainXData{Bool}) - @assert (prob_origin(fc[end]) isa ProbΔNode) "FlowΔ must originate in a ProbΔ" - pass_up_down(fc, batch) - log_likelihoods = zeros(num_examples(batch)) - indices = some_vector(Bool, flow_length(fc))::BitVector - for n in fc - if n isa DownFlow⋁ && num_children(n) != 1 # other nodes have no effect on likelihood - origin = prob_origin(n)::Prob⋁ - foreach(n.children, origin.log_thetas) do c, log_theta - # be careful here to allow for the Boolean multiplication to be done using & before switching to float arithmetic, or risk losing a lot of runtime! - # log_likelihoods .+= prod_fast(downflow(n), pr_factors(c)) .* log_theta - assign_prod(indices, downflow(n), pr_factors(c)) - view(log_likelihoods, indices::BitVector) .+= log_theta # see MixedProductKernelBenchmark.jl - # TODO put the lines above in Utils in order to ensure we have specialized types - end - end - end - log_likelihoods -end - -""" -Calculate log likelihood for a batch of samples with partial evidence P(e). -(Also returns the generated FlowΔ) - -To indicate a variable is not observed, pass -1 for that variable. -""" -function marginal_log_likelihood_per_instance(pc::ProbΔ, batch::PlainXData{Int8}) - opts = (flow_opts★..., el_type=Float64, compact⋀=false, compact⋁=false) - fc = UpFlowΔ(pc, num_examples(batch), Float64, opts) - (fc, marginal_log_likelihood_per_instance(fc, batch)) -end - -""" -Calculate log likelihood for a batch of samples with partial evidence P(e). -(If you already have a FlowΔ) - -To indicate a variable is not observed, pass -1 for that variable. -""" -function marginal_log_likelihood_per_instance(fc::UpFlowΔ, batch::PlainXData{Int8}) - @assert (prob_origin(fc[end]) isa ProbΔNode) "FlowΔ must originate in a ProbΔ" - marginal_pass_up(fc, batch) - pr(fc[end]) -end - -function check_parameter_integrity(circuit::ProbΔ) - for node in filter(n -> GateType(n) isa Prob⋁, circuit) - @assert all(θ -> !isnan(θ), node.log_thetas) "There is a NaN in one of the log_thetas" - end - true -end - -################## -# Sampling from a psdd -################## - -""" -Sample from a PSDD without any evidence -""" -function sample(circuit::ProbΔ)::AbstractVector{Bool} - inst = Dict{Var,Int64}() - simulate(circuit[end], inst) - len = length(keys(inst)) - ans = Vector{Bool}() - for i = 1:len - push!(ans, inst[i]) - end - ans -end - -# Uniformly sample based on the probability of the items -# and return the selected index -function sample(probs::AbstractVector{<:Number})::Int32 - z = sum(probs) - q = rand() * z - cur = 0.0 - for i = 1:length(probs) - cur += probs[i] - if q <= cur - return i - end - end - return length(probs) -end - -function simulate(node::ProbLiteral, inst::Dict{Var,Int64}) - if positive(node) - inst[variable(node.origin)] = 1 - else - inst[variable(node.origin)] = 0 - end -end - -function simulate(node::Prob⋁, inst::Dict{Var,Int64}) - idx = sample(exp.(node.log_thetas)) - simulate(node.children[idx], inst) -end -function simulate(node::Prob⋀, inst::Dict{Var,Int64}) - for child in node.children - simulate(child, inst) - end -end - -""" -Sampling with Evidence from a psdd. -Internally would call marginal pass up on a newly generated flow circuit. -""" -function sample(circuit::ProbΔ, evidence::PlainXData{Int8})::AbstractVector{Bool} - opts= (compact⋀=false, compact⋁=false) - flow_circuit = UpFlowΔ(circuit, 1, Float64, opts) - marginal_pass_up(flow_circuit, evidence) - sample(flow_circuit) -end - -""" -Sampling with Evidence from a psdd. -Assuming already marginal pass up has been done on the flow circuit. -""" -function sample(circuit::UpFlowΔ)::AbstractVector{Bool} - inst = Dict{Var,Int64}() - simulate2(circuit[end], inst) - len = length(keys(inst)) - ans = Vector{Bool}() - for i = 1:len - push!(ans, inst[i]) - end - ans -end - -function simulate2(node::UpFlowLiteral, inst::Dict{Var,Int64}) - if positive(node) - #TODO I don't think we need these 'grand_origin' parts below - inst[variable(grand_origin(node))] = 1 - else - inst[variable(grand_origin(node))] = 0 - end -end - -function simulate2(node::UpFlow⋁, inst::Dict{Var,Int64}) - prs = [ pr(ch)[1] for ch in children(node) ] - idx = sample(exp.(node.origin.log_thetas .+ prs)) - simulate2(children(node)[idx], inst) -end - -function simulate2(node::UpFlow⋀, inst::Dict{Var,Int64}) - for child in children(node) - simulate2(child, inst) - end -end - - - -################## -# Most Probable Explanation MPE of a psdd -# aka MAP -################## - -@inline function MAP(circuit::ProbΔ, evidence::PlainXData{Int8})::Matrix{Bool} - MPE(circuit, evidence) -end - -function MPE(circuit::ProbΔ, evidence::PlainXData{Int8})::Matrix{Bool} - # Computing Marginal Likelihood for each node - fc, lls = marginal_log_likelihood_per_instance(circuit, evidence) - - ans = Matrix{Bool}(zeros(size(evidence.x))) - active_samples = Array{Bool}(ones( num_examples(evidence) )) - - mpe_simulate(fc[end], active_samples, ans) - ans -end - -""" -active_samples: bool vector indicating which samples are active for this node during mpe -result: Matrix (num_samples, num_variables) indicating the final result of mpe -""" -function mpe_simulate(node::UpFlowLiteral, active_samples::Vector{Bool}, result::Matrix{Bool}) - if positive(node) - result[active_samples, variable(node)] .= 1 - else - result[active_samples, variable(node)] .= 0 - end -end -function mpe_simulate(node::UpFlow⋁, active_samples::Vector{Bool}, result::Matrix{Bool}) - prs = zeros( length(node.children), size(active_samples)[1] ) - @simd for i=1:length(node.children) - prs[i,:] .= pr(node.children[i]) .+ (node.origin.log_thetas[i]) - end - - max_child_ids = [a[1] for a in argmax(prs, dims = 1) ] - @simd for i=1:length(node.children) - ids = Vector{Bool}( active_samples .* (max_child_ids .== i)[1,:] ) # Only active for this child if it was the max for that sample - mpe_simulate(node.children[i], ids, result) - end -end -function mpe_simulate(node::UpFlow⋀, active_samples::Vector{Bool}, result::Matrix{Bool}) - for child in node.children - mpe_simulate(child, active_samples, result) - end -end diff --git a/src/Probabilistic/ProbFlowCircuits.jl b/src/Probabilistic/ProbFlowCircuits.jl deleted file mode 100644 index dc145e35..00000000 --- a/src/Probabilistic/ProbFlowCircuits.jl +++ /dev/null @@ -1,105 +0,0 @@ -##################### - -#TODO This code seems to assume logspace flows as floating point numbers. if so, enforca that on type F -function marginal_pass_up(circuit::UpFlowΔ{O,F}, data::XData{E}) where {E <: eltype(F)} where {O,F} - resize_flows(circuit, num_examples(data)) - cache = zeros(Float64, num_examples(data)) #TODO: fix type later - marginal_pass_up_node(n::UpFlowΔNode, ::PlainXData) = () - - function marginal_pass_up_node(n::UpFlowLiteral{O,F}, cache::Array{Float64}, data::PlainXData{E}) where {E <: eltype(F)} where {O,F} - pass_up_node(n, data) - # now override missing values by 1 - npr = pr(n) - npr[feature_matrix(data)[:,variable(n)] .< zero(eltype(F))] .= 1 - npr .= log.( npr .+ 1e-300 ) - return nothing - end - - function marginal_pass_up_node(n::UpFlow⋀Cached, cache::Array{Float64}, ::PlainXData) - pr(n) .= 0 - for i=1:length(n.children) - # pr(n) .+= pr(n.children[i]) - broadcast!(+, pr(n), pr(n), pr(n.children[i])) - end - return nothing - end - - function marginal_pass_up_node(n::UpFlow⋁Cached, cache::Array{Float64}, ::PlainXData) - pr(n) .= 1e-300 - for i=1:length(n.children) - cache .= 0 - # broadcast reduced memory allocation, though accessing prob_origin(n).log_thetas[i] still allocates lots of extra memory, - # it is proabably due to derefrencing the pointer - broadcast!(+, cache, pr(n.children[i]), prob_origin(n).log_thetas[i]) - broadcast!(exp, cache, cache) - broadcast!(+, pr(n), pr(n), cache) - end - broadcast!(log, pr(n), pr(n)); - return nothing - end - - ## Pass Up on every node in order - for n in circuit - marginal_pass_up_node(n, cache, data) - end - return nothing -end - - - -##### marginal_pass_down - -function marginal_pass_down(circuit::DownFlowΔ{O,F}) where {O,F} - resize_flows(circuit, flow_length(origin(circuit))) - for n in circuit - reset_downflow_in_progress(n) - end - for downflow in downflow_sinks(circuit[end]) - # initialize root flows to 1 - downflow.downflow .= one(eltype(F)) - end - for n in Iterators.reverse(circuit) - marginal_pass_down_node(n) - end -end - -marginal_pass_down_node(n::DownFlowΔNode) = () # do nothing -marginal_pass_down_node(n::DownFlowLeaf) = () - -function marginal_pass_down_node(n::DownFlow⋀Cached) - # todo(pashak) might need some changes, not tested, also to convert to logexpsum later - # downflow(n) = EF_n(e), the EF for edges or leaves are note stored - for c in n.children - for sink in downflow_sinks(c) - if !sink.in_progress - sink.downflow .= downflow(n) - sink.in_progress = true - else - sink.downflow .+= downflow(n) - end - end - end -end - -function marginal_pass_down_node(n::DownFlow⋁Cached) - # todo(pashak) might need some changes, not tested, also to convert to logexpsum later - # downflow(n) = EF_n(e), the EF for edges or leaves are note stored - for (ind, c) in enumerate(n.children) - for sink in downflow_sinks(c) - if !sink.in_progress - sink.downflow .= downflow(n) .* exp.(prob_origin(n).log_thetas[ind] .+ pr(origin(c)) .- pr(origin(n)) ) - sink.in_progress = true - else - sink.downflow .+= downflow(n) .* exp.(prob_origin(n).log_thetas[ind] .+ pr(origin(c)) .- pr(origin(n))) - end - end - end -end - -#### marginal_pass_up_down - -function marginal_pass_up_down(circuit::DownFlowΔ{O,F}, data::XData{E}) where {E <: eltype(F)} where {O,F} - @assert !(E isa Bool) - marginal_pass_up(origin(circuit), data) - marginal_pass_down(circuit) -end diff --git a/src/Probabilistic/Probabilistic.jl b/src/Probabilistic/Probabilistic.jl deleted file mode 100644 index 376467f2..00000000 --- a/src/Probabilistic/Probabilistic.jl +++ /dev/null @@ -1,58 +0,0 @@ -module Probabilistic - -using LogicCircuits -using ..Utils - -export - -# ProbCircuits -ProbΔNode, ProbΔ, ProbΔ, ProbLeafNode, ProbInnerNode, -ProbLiteral, Prob⋀, Prob⋁, ProbCache, variable, num_parameters, compute_log_likelihood, -log_proba, -log_likelihood, estimate_parameters, log_likelihood_per_instance, marginal_log_likelihood_per_instance, -initial_mixture_model, estimate_parameters_from_aggregates, compute_ensemble_log_likelihood, -expectation_step, maximization_step, expectation_step_batch, train_mixture_with_structure, check_parameter_integrity, -ll_per_instance_per_component, ll_per_instance_for_ensemble,estimate_parameters_cached, -sample, -MPE, MAP,prob_origin, copy_node, conjoin_like, disjoin_like, literal_like, normalize, replace_node, - -# ProbFlowCircuits -marginal_pass_up, marginal_pass_down, marginal_pass_up_down, - -# Mixtures -Mixture, AbstractFlatMixture, FlatMixture, FlatMixtureWithFlow,component_weights,FlatMixtureWithFlows, -log_likelihood, log_likelihood_per_instance, log_likelihood_per_instance_component, -init_mixture_with_flows, reset_mixture_aggregate_flows, aggregate_flows, estimate_parameters, -AbstractMetaMixture, MetaMixture,AbstractFlatMixture,AbstractMixture, components, num_components, - -# EM Learner -train_mixture, - -# Bagging -bootstrap_samples_ids, learn_mixture_bagging, learn_mixture_bagging2, -init_bagging_samples, train_bagging, - -# VtreeLearner -MetisContext, metis_top_down, BlossomContext, blossom_bottom_up!, -test_top_down, test_bottom_up!,learn_vtree_bottom_up, - -# MutualInformation -mutual_information, DisCache, conditional_entropy, sum_entropy_given_x, - -# Clustering -clustering, - -# Queries -pr_constraint, psdd_entropy, psdd_kl_divergence - -include("Clustering.jl") -include("ProbCircuits.jl") -include("ProbFlowCircuits.jl") -include("MutualInformation.jl") -include("Mixtures.jl") -include("Bagging.jl") -include("EMLearner.jl") -include("VtreeLearner.jl") -include("Queries.jl") - -end diff --git a/src/Probabilistic/Queries.jl b/src/Probabilistic/Queries.jl deleted file mode 100644 index c960ed6c..00000000 --- a/src/Probabilistic/Queries.jl +++ /dev/null @@ -1,230 +0,0 @@ -using DataStructures - -# Arthur Choi, Guy Van den Broeck, and Adnan Darwiche. Tractable learning for structured probability -# spaces: A case study in learning preference distributions. In Proceedings of IJCAI, 2015. -"Calculate the probability of the logic formula given by sdd for the psdd" -function pr_constraint(psdd_node::ProbΔNode, sdd_node::Union{ProbΔNode, StructLogicalΔNode}) - cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - - return pr_constraint(psdd_node, sdd_node, cache) -end -function pr_constraint(psdd_node::ProbΔNode, sdd_node::Union{ProbΔNode, StructLogicalΔNode}, - cache::Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64})::Float64 - if (psdd_node, sdd_node) in keys(cache) # Cache hit - return cache[psdd_node, sdd_node] - elseif psdd_node isa ProbLiteral # Boundary cases - if sdd_node isa Union{ProbLiteral, StructLiteralNode} # Both are literals, just check whether they agrees with each other - if literal(psdd_node) == literal(sdd_node) - return get!(cache, (psdd_node, sdd_node), 1.0) - else - return get!(cache, (psdd_node, sdd_node), 0.0) - end - else - pr_constraint(psdd_node, sdd_node.children[1], cache) - if length(sdd_node.children) > 1 - pr_constraint(psdd_node, sdd_node.children[2], cache) - return get!(cache, (psdd_node, sdd_node), 1.0) - else - return get!(cache, (psdd_node, sdd_node), - literal(sdd_node.children[1]) == literal(psdd_node) ? 1.0 : 0.0 - ) - end - end - elseif psdd_node.children[1] isa ProbLiteral # The psdd is true - theta = exp(psdd_node.log_thetas[1]) - return get!(cache, (psdd_node, sdd_node), - theta * pr_constraint(psdd_node.children[1], sdd_node, cache) + - (1.0 - theta) * pr_constraint(psdd_node.children[2], sdd_node, cache) - ) - else # Both psdds are not trivial - prob = 0.0 - for (prob⋀_node, log_theta) in zip(psdd_node.children, psdd_node.log_thetas) - p = prob⋀_node.children[1] - s = prob⋀_node.children[2] - - theta = exp(log_theta) - for sdd⋀_node in sdd_node.children - r = sdd⋀_node.children[1] - t = sdd⋀_node.children[2] - prob += theta * pr_constraint(p, r, cache) * pr_constraint(s, t, cache) - end - end - return get!(cache, (psdd_node, sdd_node), prob) - end -end - - -"Entropy of the distribution of the input psdd." -function psdd_entropy(psdd_node::ProbΔNode)::Float64 - psdd_entropy_cache = Dict{ProbΔNode, Float64}() - - return psdd_entropy(psdd_node, psdd_entropy_cache) -end -function psdd_entropy(psdd_node::Prob⋁, psdd_entropy_cache::Dict{ProbΔNode, Float64})::Float64 - if psdd_node in keys(psdd_entropy_cache) - return psdd_entropy_cache[psdd_node] - elseif psdd_node.children[1] isa ProbLiteral - return get!(psdd_entropy_cache, psdd_node, - - exp(psdd_node.log_thetas[1]) * psdd_node.log_thetas[1] - - exp(psdd_node.log_thetas[2]) * psdd_node.log_thetas[2] - ) - else - local_entropy = 0.0 - for (prob⋀_node, log_prob) in zip(psdd_node.children, psdd_node.log_thetas) - p = prob⋀_node.children[1] - s = prob⋀_node.children[2] - - local_entropy += exp(log_prob) * (psdd_entropy(p, psdd_entropy_cache) + - psdd_entropy(s, psdd_entropy_cache) - log_prob) - end - return get!(psdd_entropy_cache, psdd_node, local_entropy) - end -end -function psdd_entropy(psdd_node::Prob⋀, psdd_entropy_cache::Dict{ProbΔNode, Float64})::Float64 - return get!(psdd_entropy_cache, psdd_node.children[1], psdd_entropy(psdd_node.children[1], psdd_entropy_cache)) + - get!(psdd_entropy_cache, psdd_node.children[2], psdd_entropy(psdd_node.children[2], psdd_entropy_cache)) -end -function psdd_entropy(psdd_node::ProbLiteral, psdd_entropy_cache::Dict{ProbΔNode, Float64})::Float64 - return get!(psdd_entropy_cache, psdd_node, 0.0) -end - - -"KL divergence calculation for psdds that are not necessarily identical" -function psdd_kl_divergence(psdd_node1::ProbΔNode, psdd_node2::ProbΔNode)::Float64 - kl_divergence_cache = Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}() - pr_constraint_cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - - return psdd_kl_divergence(psdd_node1, psdd_node2, kl_divergence_cache, pr_constraint_cache) -end -function psdd_kl_divergence(psdd_node1::ProbΔNode, psdd_node2::ProbΔNode, - kl_divergence_cache::Dict{Tuple{ProbΔNode, ProbΔNode}, Float64})::Float64 - pr_constraint_cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - - return psdd_kl_divergence(psdd_node1, psdd_node2, kl_divergence_cache, pr_constraint_cache) -end -function psdd_kl_divergence(psdd_node1::ProbΔNode, psdd_node2::ProbΔNode, - kl_divergence_cache::Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}, - pr_constraint_cache::Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}) - @assert !(psdd_node1 isa Prob⋀ || psdd_node2 isa Prob⋀) "Prob⋀ not a valid PSDD node for KL-Divergence" - - # Check if both nodes are normalized for same vtree node - @assert variables(psdd_node1.origin.vtree) == variables(psdd_node2.origin.vtree) "Both nodes not normalized for same vtree node" - - if (psdd_node1, psdd_node2) in keys(kl_divergence_cache) # Cache hit - return kl_divergence_cache[(psdd_node1, psdd_node2)] - elseif psdd_node1.children[1] isa ProbLiteral - if psdd_node2 isa ProbLiteral - psdd_kl_divergence(psdd_node1.children[1], psdd_node2, kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1.children[2], psdd_node2, kl_divergence_cache, pr_constraint_cache) - if literal(psdd_node1.children[1]) == literal(psdd_node2) - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - psdd_node1.log_thetas[1] * exp(psdd_node1.log_thetas[1]) - ) - else - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - psdd_node1.log_thetas[2] * exp(psdd_node1.log_thetas[2]) - ) - end - else - # The below four lines actually assign zero, but still we need to - # call it. - psdd_kl_divergence(psdd_node1.children[1], psdd_node2.children[1], kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1.children[1], psdd_node2.children[2], kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1.children[2], psdd_node2.children[1], kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1.children[2], psdd_node2.children[2], kl_divergence_cache, pr_constraint_cache) - # There are two possible matches - if literal(psdd_node1.children[1]) == literal(psdd_node2.children[1]) - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - exp(psdd_node1.log_thetas[1]) * (psdd_node1.log_thetas[1] - psdd_node2.log_thetas[1]) + - exp(psdd_node1.log_thetas[2]) * (psdd_node1.log_thetas[2] - psdd_node2.log_thetas[2]) - ) - else - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - exp(psdd_node1.log_thetas[1]) * (psdd_node1.log_thetas[1] - psdd_node2.log_thetas[2]) + - exp(psdd_node1.log_thetas[2]) * (psdd_node1.log_thetas[2] - psdd_node2.log_thetas[1]) - ) - end - end - else # the normal case - kl_divergence = 0.0 - - # loop through every combination of prim and sub - for (prob⋀_node1, log_theta1) in zip(psdd_node1.children, psdd_node1.log_thetas) - for (prob⋀_node2, log_theta2) in zip(psdd_node2.children, psdd_node2.log_thetas) - p = prob⋀_node1.children[1] - s = prob⋀_node1.children[2] - - r = prob⋀_node2.children[1] - t = prob⋀_node2.children[2] - - theta1 = exp(log_theta1) - - p11 = pr_constraint(s, t, pr_constraint_cache) - p12 = pr_constraint(p, r, pr_constraint_cache) - - p13 = theta1 * (log_theta1 - log_theta2) - - p21 = psdd_kl_divergence(p, r, kl_divergence_cache, pr_constraint_cache) - p31 = psdd_kl_divergence(s, t, kl_divergence_cache, pr_constraint_cache) - - kl_divergence += p11 * p12 * p13 + theta1 * (p11 * p21 + p12 * p31) - end - end - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), kl_divergence) - end -end -function psdd_kl_divergence(psdd_node1::ProbLiteral, psdd_node2::ProbLiteral, - kl_divergence_cache::Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}, - pr_constraint_cache::Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}) - # Check if literals are over same variables in vtree - @assert variables(psdd_node1.origin.vtree) == variables(psdd_node2.origin.vtree) "Both nodes not normalized for same vtree node" - - if (psdd_node1, psdd_node2) in keys(kl_divergence_cache) # Cache hit - return kl_divergence_cache[psdd_node1, psdd_node2] - else - # In this case probability is 1, kl divergence is 0 - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), 0.0) - end -end -function psdd_kl_divergence(psdd_node1::Prob⋁, psdd_node2::ProbLiteral, - kl_divergence_cache::Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}, - pr_constraint_cache::Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}) - @assert variables(psdd_node1.origin.vtree) == variables(psdd_node2.origin.vtree) "Both nodes not normalized for same vtree node" - - if (psdd_node1, psdd_node2) in keys(kl_divergence_cache) # Cache hit - return kl_divergence_cache[psdd_node1, psdd_node2] - else - psdd_kl_divergence(psdd_node1.children[1], psdd_node2, kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1.children[2], psdd_node2, kl_divergence_cache, pr_constraint_cache) - if literal(psdd_node1.children[1]) == literal(psdd_node2) - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - psdd_node1.log_thetas[1] * exp(psdd_node1.log_thetas[1]) - ) - else - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - psdd_node1.log_thetas[2] * exp(psdd_node1.log_thetas[2]) - ) - end - end -end -function psdd_kl_divergence(psdd_node1::ProbLiteral, psdd_node2::Prob⋁, - kl_divergence_cache::Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}, - pr_constraint_cache::Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}) - @assert variables(psdd_node1.origin.vtree) == variables(psdd_node2.origin.vtree) "Both nodes not normalized for same vtree node" - - if (psdd_node1, psdd_node2) in keys(kl_divergence_cache) # Cache hit - return kl_divergence_cache[psdd_node1, psdd_node2] - else - psdd_kl_divergence(psdd_node1, psdd_node2.children[1], kl_divergence_cache, pr_constraint_cache) - psdd_kl_divergence(psdd_node1, psdd_node2.children[2], kl_divergence_cache, pr_constraint_cache) - if literal(psdd_node1) == literal(psdd_node2.children[1]) - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - -psdd_node2.log_thetas[1] - ) - else - return get!(kl_divergence_cache, (psdd_node1, psdd_node2), - -psdd_node2.log_thetas[2] - ) - end - end -end diff --git a/src/ProbabilisticCircuits.jl b/src/ProbabilisticCircuits.jl index 6ed05250..8ec65ed9 100644 --- a/src/ProbabilisticCircuits.jl +++ b/src/ProbabilisticCircuits.jl @@ -5,24 +5,39 @@ module ProbabilisticCircuits # USE EXTERNAL MODULES using Reexport +@reexport using LogicCircuits include("Utils/Utils.jl") - @reexport using .Utils -# INCLUDE CHILD MODULES -include("Probabilistic/Probabilistic.jl") +include("abstract_prob_nodes.jl") +include("plain_prob_nodes.jl") +include("structured_prob_nodes.jl") +include("logistic_nodes.jl") +include("param_bit_circuit.jl") +include("parameters.jl") + +include("queries/likelihood.jl") +include("queries/marginal_flow.jl") +include("queries/map.jl") +include("queries/sample.jl") +include("queries/pr_constraint.jl") +include("queries/information.jl") +include("queries/expectation_rec.jl") +include("queries/expectation_graph.jl") + include("Logistic/Logistic.jl") -include("IO/IO.jl") -include("StructureLearner/StructureLearner.jl") -include("Reasoning/Reasoning.jl") +@reexport using .Logistic +include("mixtures/shared_prob_nodes.jl") +# include("mixtures/em.jl") -# USE CHILD MODULES (in order to re-export some functions) -@reexport using .Probabilistic -@reexport using .Logistic -@reexport using .IO -@reexport using .StructureLearner -@reexport using .Reasoning +include("structurelearner/chow_liu_tree.jl") +include("structurelearner/init.jl") +include("structurelearner/heuristics.jl") +include("structurelearner/learner.jl") + +include("LoadSave/LoadSave.jl") +@reexport using .LoadSave end diff --git a/src/Reasoning/ExpFlowCircuits.jl b/src/Reasoning/ExpFlowCircuits.jl deleted file mode 100644 index 869cd966..00000000 --- a/src/Reasoning/ExpFlowCircuits.jl +++ /dev/null @@ -1,152 +0,0 @@ -######################## -# Do not use for now -###################### -##################### -# Expectation Flow circuits -# For use of algorithms depending on pairs of nodes of two circuits -##################### - -"A expectation circuit node that has pair of origins of type PC and type LC" -abstract type DecoratorΔNodePair{PC<:ΔNode, LC<:ΔNode} <: ΔNode end - -abstract type ExpFlowΔNode{PC, LC, F} <: DecoratorΔNodePair{PC, LC} end - -const ExpFlowΔ{O} = AbstractVector{<:ExpFlowΔNode{<:O}} - -struct UpExpFlow{PC, LC, F} <: ExpFlowΔNode{PC, LC, F} - p_origin::PC - f_origin::LC - children::Vector{<:ExpFlowΔNode{<:PC, <:LC, <:F}} - f::F - fg::F -end - - -""" -Construct a upward expectation flow circuit from a given pair of PC and LC circuits -Note that its assuming the two circuits share the same vtree -""" -function ExpFlowΔ(pc::ProbΔ, lc::LogisticΔ, batch_size::Int, ::Type{El}) where El - pc_type = grapheltype(pc) - lc_type = grapheltype(lc) - - F = Array{El, 2} - fmem = () -> zeros(1, batch_size) #Vector{El}(undef, batch_size) #some_vector(El, batch_size) # note: fmem's return type will determine type of all UpFlows in the circuit (should be El) - fgmem = () -> zeros(classes(lc[end]), batch_size) - - root_pc = pc[end] - root_lc = lc[end- 1] - - cache = Dict{Pair{ΔNode, ΔNode}, ExpFlowΔNode}() - sizehint!(cache, (length(pc) + length(lc))*4÷3) - expFlowCircuit = Vector{ExpFlowΔNode}() - - function ExpflowTraverse(n::Prob⋁, m::Logistic⋁) - get!(cache, Pair(n, m)) do - children = [ ExpflowTraverse(i, j) for i in n.children for j in m.children] - node = UpExpFlow{pc_type,lc_type, F}(n, m, children, fmem(), fgmem()) - push!(expFlowCircuit, node) - return node - end - end - function ExpflowTraverse(n::Prob⋀, m::Logistic⋀) - get!(cache, Pair(n, m)) do - children = [ ExpflowTraverse(z[1], z[2]) for z in zip(n.children, m.children) ] - node = UpExpFlow{pc_type,lc_type, F}(n, m, children, fmem(), fgmem()) - push!(expFlowCircuit, node) - return node - end - end - function ExpflowTraverse(n::ProbLiteral, m::Logistic⋁) - get!(cache, Pair(n, m)) do - children = Vector{ExpFlowΔNode{pc_type,lc_type, F}}() # TODO - node = UpExpFlow{pc_type,lc_type, F}(n, m, children, fmem(), fgmem()) - push!(expFlowCircuit, node) - return node - end - end - function ExpflowTraverse(n::ProbLiteral, m::LogisticLiteral) - get!(cache, Pair(n, m)) do - children = Vector{ExpFlowΔNode{pc_type,lc_type, F}}() # TODO - node = UpExpFlow{pc_type,lc_type, F}(n, m, children, fmem(), fgmem()) - push!(expFlowCircuit, node) - return node - end - end - - ExpflowTraverse(root_pc, root_lc) - expFlowCircuit -end - -function exp_pass_up(pc::ProbΔ, lc::LogisticΔ, data::XData{E}) where{E <: eltype(F)} where{PC, LC, F} - expFlowCircuit = ExpFlowΔ(pc, lc, num_examples(data), Float64); - for n in expFlowCircuit - exp_pass_up_node(n, data) - end - expFlowCircuit -end - -function exp_pass_up(fc::ExpFlowΔ, data::XData{E}) where{E <: eltype(F)} where{PC, LC, F} - #TODO write resize_flows similar to flow_circuits - # and give as input the expFlowCircuit instead - #expFlowCircuit = ExpFlowΔ(pc, lc, num_examples(data), Float64); - for n in fc - exp_pass_up_node(n, data) - end -end - -function exp_pass_up_node(node::ExpFlowΔNode{PC,LC,F}, data::XData{E}) where{E <: eltype(F)} where{PC, LC, F} - pType = typeof(node.p_origin) - fType = typeof(node.f_origin) - - if node.p_origin isa Prob⋁ && node.f_origin isa Logistic⋁ - #todo this ordering might be different than the ExpFlowΔNode children - pthetas = [exp(node.p_origin.log_thetas[i]) - for i in 1:length(node.p_origin.children) for j in 1:length(node.f_origin.children)] - fthetas = [node.f_origin.thetas[j,:] # only taking the first class for now - for i in 1:length(node.p_origin.children) for j in 1:length(node.f_origin.children)] - - node.f .= 0.0 - node.fg .= 0.0 - for z = 1:length(node.children) - node.f .+= pthetas[z] .* node.children[z].f - node.fg .+= (pthetas[z] .* fthetas[z]) .* node.children[z].f - node.fg .+= pthetas[z] .* node.children[z].fg - end - elseif node.p_origin isa Prob⋀ && node.f_origin isa Logistic⋀ - node.f .= node.children[1].f .* node.children[2].f # assume 2 children - node.fg .= (node.children[1].f .* node.children[2].fg) .+ - (node.children[2].f .* node.children[1].fg) - - elseif node.p_origin isa ProbLiteral - if node.f_origin isa Logistic⋁ - m = node.f_origin.children[1] - elseif node.f_origin isa LogisticLiteral - m = node.f_origin - else - error("Invalid Types of pairs {$pType} - {$fType}") - end - - var = lit2var(literal(m)) - X = feature_matrix(data) - if positive(node.p_origin) && positive(m) - node.f[:, X[:, var] .!= 0 ] .= 1.0 # positive and missing observations - node.f[:, X[:, var] .== 0 ] .= 0.0 - elseif negative(node.p_origin) && negative(m) - node.f[:, X[:, var] .!= 1 ] .= 1.0 # negative and missing observations - node.f[:, X[:, var] .== 1 ] .= 0.0 - else - node.f .= 0.0 - end - - if node.f_origin isa Logistic⋁ - node.fg .= node.f .* transpose(node.f_origin.thetas) - else - node.fg .= 0.0 - end - - else - error("Invalid Types of pairs {$pType} - {$fType}") - end - -end \ No newline at end of file diff --git a/src/Reasoning/Expectation.jl b/src/Reasoning/Expectation.jl deleted file mode 100644 index 783dc923..00000000 --- a/src/Reasoning/Expectation.jl +++ /dev/null @@ -1,262 +0,0 @@ -ExpCacheDict = Dict{Pair{ProbΔNode, LogisticΔNode}, Array{Float64, 2}} -MomentCacheDict = Dict{Tuple{ProbΔNode, LogisticΔNode, Int64}, Array{Float64, 2}} - -struct ExpectationCache - f::ExpCacheDict - fg::ExpCacheDict -end -ExpectationCache() = ExpectationCache(ExpCacheDict(), ExpCacheDict()) - -struct MomentCache - f::ExpCacheDict - fg::MomentCacheDict -end -MomentCache() = MomentCache( ExpCacheDict(), MomentCacheDict()) - - -# Find a better way to cache n_choose_k values -max_k = 31 -choose_cache = [ 1.0 * binomial(i,j) for i=0:max_k+1, j=0:max_k+1 ] -@inline function choose(n::Int, m::Int) - return choose_cache[n+1, m+1] -end - - -# On Tractable Computation of Expected Predictions (https://arxiv.org/abs/1910.02182) -""" -Missing values should be denoted by -1 -""" -function Expectation(pc::ProbΔ, lc::LogisticΔ, data::XData{Int8}) - # 1. Get probability of each observation - fc, log_likelihoods = marginal_log_likelihood_per_instance(pc, data) - p_observed = exp.( log_likelihoods ) - - # 2. Expectation w.r.t. P(x_m, x_o) - cache = ExpectationCache() - results_unnormalized = exp_g(pc[end], lc[end-1], data, cache) # skipping the bias node of lc - - # 3. Expectation w.r.t P(x_m | x_o) - results = transpose(results_unnormalized) ./ p_observed - - # 4. Add Bias terms - biases = lc[end].thetas - results .+= biases - - results, cache -end - -function Moment(pc::ProbΔ, lc::LogisticΔ, data::XData{Int8}, moment::Int) - # 1. Get probability of each observation - fc, log_likelihoods = marginal_log_likelihood_per_instance(pc, data) - p_observed = exp.( log_likelihoods ) - - # 2. Moment w.r.t. P(x_m, x_o) - cache = MomentCache() - biases = lc[end].thetas - results_unnormalized = zeros(num_examples(data), classes(lc[end])) - - for z = 0:moment-1 - results_unnormalized .+= choose(moment, z) .* (biases .^ (z)) .* transpose(moment_g(pc[end], lc[end-1], data, moment - z, cache)) - end - - # 3. Moment w.r.t P(x_m | x_o) - results = results_unnormalized ./ p_observed - - # 4. Add Bias^moment terms - results .+= biases .^ (moment) - - results, cache -end - - -function ExpectationUpward(pc::ProbΔ, lc::LogisticΔ, data::XData{Int8}) - # 1. Get probability of each observation - fc, log_likelihoods = marginal_log_likelihood_per_instance(pc, data) - p_observed = exp.( log_likelihoods ) - - # 2. Expectation w.r.t. P(x_m, x_o) - exps_flow = exp_pass_up(pc, lc, data) - results_unnormalized = exps_flow[end].fg - - # 3. Expectation w.r.t P(x_m | x_o) - results = transpose(results_unnormalized) ./ p_observed - - # 4. Add Bias terms - biases = lc[end].thetas - results .+= biases - - results, exps_flow -end - - -# exp_f (pr-constraint) is originally from: -# Arthur Choi, Guy Van den Broeck, and Adnan Darwiche. Tractable learning for structured probability spaces: A case study in learning preference distributions. In Proceedings of IJCAI, 2015. - -function exp_f(n::Prob⋁, m::Logistic⋁, data::XData{Int8}, cache::Union{ExpectationCache, MomentCache}) - @inbounds get!(cache.f, Pair(n, m)) do - value = zeros(1 , num_examples(data) ) - pthetas = [exp(n.log_thetas[i]) for i in 1:length(n.children)] - @fastmath @simd for i in 1:length(n.children) - @simd for j in 1:length(m.children) - value .+= (pthetas[i] .* exp_f(n.children[i], m.children[j], data, cache)) - end - end - return value - end -end - -function exp_f(n::Prob⋀, m::Logistic⋀, data::XData{Int8}, cache::Union{ExpectationCache, MomentCache}) - @inbounds get!(cache.f, Pair(n, m)) do - value = ones(1 , num_examples(data) ) - @fastmath for (i,j) in zip(n.children, m.children) - value .*= exp_f(i, j, data, cache) - end - return value - # exp_f(n.children[1], m.children[1], data, cache) .* exp_f(n.children[2], m.children[2], data, cache) - end -end - - -@inline function exp_f(n::ProbLiteral, m::LogisticLiteral, data::XData{Int8}, cache::Union{ExpectationCache, MomentCache}) - @inbounds get!(cache.f, Pair(n, m)) do - value = zeros(1 , num_examples(data) ) - var = lit2var(literal(m)) - X = feature_matrix(data) - if positive(n) && positive(m) - # value[1, X[:, var] .== -1 ] .= 1.0 # missing observation always agrees - # value[1, X[:, var] .== 1 ] .= 1.0 # positive observations - value[1, X[:, var] .!= 0 ] .= 1.0 # positive or missing observations - elseif negative(n) && negative(m) - # value[1, X[:, var] .== -1 ] .= 1.0 # missing observation always agrees - # value[1, X[:, var] .== 0 ] .= 1.0 # negative observations - value[1, X[:, var] .!= 1 ] .= 1.0 # negative or missing observations - end - return value - end -end - -""" -Has to be a Logistic⋁ with only one child, which is a leaf node -""" -@inline function exp_f(n::ProbLiteral, m::Logistic⋁, data::XData{Int8}, cache::Union{ExpectationCache, MomentCache}) - @inbounds get!(cache.f, Pair(n, m)) do - exp_f(n, m.children[1], data, cache) - end -end - -####################################################################### -######## exp_g, exp_fg -######################################################################## - -@inline function exp_g(n::Prob⋁, m::Logistic⋁, data::XData{Int8}, cache::ExpectationCache) - exp_fg(n, m, data, cache) # exp_fg and exp_g are the same for OR nodes -end - -# function exp_g(n::Prob⋀, m::Logistic⋀, data::XData{Int8}, cache::ExpectationCache) -# value = zeros(classes(m) , num_examples(data)) -# @fastmath for (i,j) in zip(n.children, m.children) -# value .+= exp_fg(i, j, data, cache) -# end -# return value -# # exp_fg(n.children[1], m.children[1], data, cache) .+ exp_fg(n.children[2], m.children[2], data, cache) -# end - - -function exp_fg(n::Prob⋁, m::Logistic⋁, data::XData{Int8}, cache::ExpectationCache) - @inbounds get!(cache.fg, Pair(n, m)) do - value = zeros(classes(m) , num_examples(data) ) - pthetas = [exp(n.log_thetas[i]) for i in 1:length(n.children)] - @fastmath @simd for i in 1:length(n.children) - for j in 1:length(m.children) - value .+= (pthetas[i] .* m.thetas[j,:]) .* exp_f(n.children[i], m.children[j], data, cache) - value .+= pthetas[i] .* exp_fg(n.children[i], m.children[j], data, cache) - end - end - return value - end -end - -function exp_fg(n::Prob⋀, m::Logistic⋀, data::XData{Int8}, cache::ExpectationCache) - @inbounds get!(cache.fg, Pair(n, m)) do - # Assuming 2 children - value = exp_f(n.children[1], m.children[1], data, cache) .* exp_fg(n.children[2], m.children[2], data, cache) - value .+= exp_f(n.children[2], m.children[2], data, cache) .* exp_fg(n.children[1], m.children[1], data, cache) - return value - end -end - - -""" -Has to be a Logistic⋁ with only one child, which is a leaf node -""" -@inline function exp_fg(n::ProbLiteral, m::Logistic⋁, data::XData{Int8}, cache::ExpectationCache) - @inbounds get!(cache.fg, Pair(n, m)) do - m.thetas[1,:] .* exp_f(n, m, data, cache) - end -end - -@inline function exp_fg(n::ProbLiteral, m::LogisticLiteral, data::XData{Int8}, cache::ExpectationCache) - #dont know how many classes, boradcasting does the job - zeros(1 , num_examples(data)) -end - -####################################################################### -######## moment_g, moment_fg -######################################################################## - -@inline function moment_g(n::Prob⋁, m::Logistic⋁, data::XData{Int8}, moment::Int, cache::MomentCache) - get!(cache.fg, (n, m, moment)) do - moment_fg(n, m, data, moment, cache) - end -end - -""" -Calculating E[g^k * f] -""" -function moment_fg(n::Prob⋁, m::Logistic⋁, data::XData{Int8}, moment::Int, cache::MomentCache) - if moment == 0 - return exp_f(n, m, data, cache) - end - - get!(cache.fg, (n, m, moment)) do - value = zeros(classes(m) , num_examples(data) ) - pthetas = [exp(n.log_thetas[i]) for i in 1:length(n.children)] - @fastmath @simd for i in 1:length(n.children) - for j in 1:length(m.children) - for z in 0:moment - value .+= pthetas[i] .* choose(moment, z) .* m.thetas[j,:].^(moment - z) .* moment_fg(n.children[i], m.children[j], data, z, cache) - end - end - end - return value - end -end - -@inline function moment_fg(n::ProbLiteral, m::Logistic⋁, data::XData{Int8}, moment::Int, cache::MomentCache) - get!(cache.fg, (n, m, moment)) do - m.thetas[1,:].^(moment) .* exp_f(n, m, data, cache) - end -end - -@inline function moment_fg(n::ProbLiteral, m::LogisticLiteral, data::XData{Int8}, moment::Int, cache::MomentCache) - #dont know how many classes, boradcasting does the job - if moment == 0 - exp_f(n, m, data, cache) - else - zeros(1, num_examples(data)) - end -end - -function moment_fg(n::Prob⋀, m::Logistic⋀, data::XData{Int8}, moment::Int, cache::MomentCache) - if moment == 0 - return exp_f(n, m, data, cache) - end - get!(cache.fg, (n, m, moment)) do - value = moment_fg(n.children[1], m.children[1], data, 0, cache) .* moment_fg(n.children[2], m.children[2], data, moment, cache) - - for z in 1:moment - value .+= choose(moment, z) .* moment_fg(n.children[1], m.children[1], data, z, cache) .* moment_fg(n.children[2], m.children[2], data, moment - z, cache) - end - return value - end -end \ No newline at end of file diff --git a/src/Reasoning/Reasoning.jl b/src/Reasoning/Reasoning.jl deleted file mode 100644 index ce486ff4..00000000 --- a/src/Reasoning/Reasoning.jl +++ /dev/null @@ -1,20 +0,0 @@ -module Reasoning - -using LogicCircuits -using ..Probabilistic -using ..Logistic -using ..Utils - -export - UpExpFlow, - ExpFlowΔ, - exp_pass_up, - Expectation, - ExpectationUpward, - Moment - -include("Expectation.jl") -include("ExpFlowCircuits.jl") - - -end \ No newline at end of file diff --git a/src/StructureLearner/CircuitBuilder.jl b/src/StructureLearner/CircuitBuilder.jl deleted file mode 100644 index 8d3c8c96..00000000 --- a/src/StructureLearner/CircuitBuilder.jl +++ /dev/null @@ -1,133 +0,0 @@ -using LightGraphs: topological_sort_by_dfs, outneighbors -using MetaGraphs: get_prop - - -"convert literal+/- to probability value 0/1" -@inline lit2value(l::Lit)::Int = (l > 0 ? 1 : 0) - -""" -Learning from data a circuit with several structure learning algorithms -""" -function learn_probabilistic_circuit(data::Union{XData, WXData}; - pseudocount = 1.0, algo = "chow-liu", algo_kwargs=(α=1.0, clt_root="graph_center"))::ProbΔ - if algo == "chow-liu" - clt = learn_chow_liu_tree(data; algo_kwargs...) - pc = compile_prob_circuit_from_clt(clt) - estimate_parameters(pc, convert(XBatches,data); pseudocount = pseudocount) - pc - else - error("Cannot learn a probabilistic circuit with algorithm $algo") - end -end - -"Build decomposable probability circuits from Chow-Liu tree" -function compile_prob_circuit_from_clt(clt::CLT)::ProbΔ - topo_order = Var.(reverse(topological_sort_by_dfs(clt::CLT))) #order to parse the node - lin = Vector{ProbΔNode}() - node_cache = Dict{Lit, LogicalΔNode}() - prob_cache = ProbCache() - parent = parent_vector(clt) - - prob_children(n)::Vector{<:ProbΔNode{<:node_type(n)}} = - copy_with_eltype(map(c -> prob_cache[c], n.children), ProbΔNode{<:node_type(n)}) - - "default order of circuit node, from left to right: +/1 -/0" - - "compile leaf node into circuits" - function compile_leaf(ln::Var) - pos = LiteralNode( var2lit(ln)) - neg = LiteralNode(-var2lit(ln)) - node_cache[var2lit(ln)] = pos - node_cache[-var2lit(ln)] = neg - pos2 = ProbLiteral(pos) - neg2 = ProbLiteral(neg) - push!(lin, pos2) - push!(lin, neg2) - prob_cache[pos] = pos2 - prob_cache[neg] = neg2 - end - - "compile inner disjunction node" - function compile_⋁inner(ln::Lit, children::Vector{Var})::Vector{⋁Node} - logical_nodes = Vector{⋁Node}() - v = lit2value(ln) - - for c in children - #build logical ciruits - temp = ⋁Node([node_cache[lit] for lit in [var2lit(c), - var2lit(c)]]) - push!(logical_nodes, temp) - n = Prob⋁(temp, prob_children(temp)) - prob_cache[temp] = n - n.log_thetas = zeros(Float64, 2) - cpt = get_prop(clt, c, :cpt) - weights = [cpt[(1, v)], cpt[(0, v)]] - n.log_thetas = log.(weights) - push!(lin, n) - end - - return logical_nodes - end - - "compile inner conjunction node into circuits, left node is indicator, rest nodes are disjunction children nodes" - function compile_⋀inner(indicator::Lit, children::Vector{⋁Node}) - leaf = node_cache[indicator] - temp = ⋀Node(vcat([leaf], children)) - node_cache[indicator] = temp - n = Prob⋀(temp, prob_children(temp)) - prob_cache[temp] = n - push!(lin, n) - end - - "compile inner node, 1 inner variable to 2 leaf nodes, 2 * num_children disjunction nodes and 2 conjunction nodes" - function compile_inner(ln::Var, children::Vector{Var}) - compile_leaf(ln) - pos⋁ = compile_⋁inner(var2lit(ln), children) - neg⋁ = compile_⋁inner(-var2lit(ln), children) - compile_⋀inner(var2lit(ln), pos⋁) - compile_⋀inner(-var2lit(ln), neg⋁) - end - - "compile root, add another disjunction node" - function compile_root(root::Var) - temp = ⋁Node([node_cache[s] for s in [var2lit(root), -var2lit(root)]]) - n = Prob⋁(temp, prob_children(temp)) - prob_cache[temp] = n - n.log_thetas = zeros(Float64, 2) - cpt = get_prop(clt, root, :cpt) - weights = [cpt[1], cpt[0]] - n.log_thetas = log.(weights) - push!(lin, n) - return n - end - - function compile_independent_roots(roots::Vector{ProbΔNode}) - temp = ⋀Node([c.origin for c in roots]) - n = Prob⋀(temp, prob_children(temp)) - prob_cache[temp] = n - push!(lin, n) - temp = ⋁Node([temp]) - n = Prob⋁{LogicalΔNode}(temp, prob_children(temp)) - prob_cache[temp] = n - n.log_thetas = [0.0] - push!(lin, n) - end - - roots = Vector{ProbΔNode}() - for id in topo_order - children = Var.(outneighbors(clt, id)) - if isequal(children, []) - compile_leaf(id) - else - compile_inner(id, children) - end - if 0 == parent[id] - push!(roots, compile_root(id)) - end - end - - if length(roots) > 1 - compile_independent_roots(roots) - end - - return lin -end diff --git a/src/StructureLearner/PSDDInitializer.jl b/src/StructureLearner/PSDDInitializer.jl deleted file mode 100644 index 439b691a..00000000 --- a/src/StructureLearner/PSDDInitializer.jl +++ /dev/null @@ -1,275 +0,0 @@ -using ..Utils - -"Map from literal to LogicalΔNode" -const LitCache = Dict{Lit, LogicalΔNode} - -"Use literal to represent constraint (1 to X, -1 to not X), 0 to represent true" -const ⊤ = convert(Lit, 0) - -""" -Learning from data a structured-decomposable circuit with several structure learning algorithms -""" -function learn_struct_prob_circuit(data::Union{XData, WXData}; - pseudocount = 1.0, algo = "chow-liu", algo_kwargs=(α=1.0, clt_root="graph_center"), vtree = "chow-liu", vtree_kwargs=(vtree_mode="balanced",)) - if algo == "chow-liu" - clt = learn_chow_liu_tree(data; algo_kwargs...) - vtree = learn_vtree_from_clt(clt; vtree_kwargs...); - pc = compile_psdd_from_clt(clt, vtree); - estimate_parameters(pc, convert(XBatches,data); pseudocount = pseudocount) - pc, vtree - else - error("Cannot learn a structured-decomposable circuit with algorithm $algo") - end -end - -############# -# Learn PlainVtree from CLT -############# - -" -Learn a vtree from clt, -with strategy (close to) `linear` or `balanced` -" -function learn_vtree_from_clt(clt::CLT; vtree_mode::String)::PlainVtree - roots = [i for (i, x) in enumerate(parent_vector(clt)) if x == 0] - rootnode = construct_children(Var.(roots), clt, vtree_mode) - - return node2dag(rootnode) -end - -function construct_node(v::Var, clt::CLT, strategy::String)::PlainVtreeNode - children = Var.(outneighbors(clt, v)) - if isempty(children) # leaf node - return PlainVtreeLeafNode(v) - else - right = construct_children(children, clt, strategy) - return add_parent(v, right) - end -end - -function construct_children(children::Vector{Var}, clt::CLT, strategy::String)::PlainVtreeNode - sorted_vars = sort(collect(children)) - children_nodes = Vector{PlainVtreeNode}() - foreach(x -> push!(children_nodes, construct_node(x, clt, strategy)), sorted_vars) - - if strategy == "linear" - construct_children_linear(children_nodes, clt) - elseif strategy == "balanced" - construct_children_balanced(children_nodes, clt) - else - throw("Unknown type of strategy") - end -end - -function construct_children_linear(children_nodes::Vector{PlainVtreeNode}, clt::CLT)::PlainVtreeNode - children_nodes = Iterators.Stateful(reverse(children_nodes)) - - right = popfirst!(children_nodes) - for left in children_nodes - right = PlainVtreeInnerNode(left, right) - end - return right -end - -function construct_children_balanced(children_nodes::Vector{PlainVtreeNode}, clt::CLT)::PlainVtreeNode - if length(children_nodes) == 1 - return children_nodes[1] - elseif length(children_nodes) == 2 - return PlainVtreeInnerNode(children_nodes[1], children_nodes[2]) - else - len = trunc(Int64, length(children_nodes) / 2) - left = construct_children_balanced(children_nodes[1 : len], clt) - right = construct_children_balanced(children_nodes[len + 1 : end], clt) - return PlainVtreeInnerNode(left, right) - end -end - -function add_parent(parent::Var, children::PlainVtreeNode) - return PlainVtreeInnerNode(PlainVtreeLeafNode(parent), children) -end - -##################### -# Compile PSDD from CLT and vtree -##################### - -"Compile a psdd circuit from clt and vtree" -function compile_psdd_from_clt(clt::MetaDiGraph, vtree::PlainVtree) - order = node2dag(vtree[end]) - parent_clt = Var.(parent_vector(clt)) - - lin = Vector{ProbΔNode}() - prob_cache = ProbCache() - lit_cache = LitCache() - v2p = Dict{PlainVtreeNode, ProbΔ}() - - get_params(cpt::Dict) = length(cpt) == 2 ? [cpt[1], cpt[0]] : [cpt[(1,1)], cpt[(0,1)], cpt[(1,0)], cpt[(0,0)]] - function add_mapping!(v::PlainVtreeNode, circuits::ProbΔ) - if !haskey(v2p, v); v2p[v] = Vector{ProbΔNode}(); end - foreach(c -> if !(c in v2p[v]) push!(v2p[v], c);end, circuits) - end - - # compile vtree leaf node to terminal/true node - function compile_from_vtree_node(v::PlainVtreeLeafNode) - var = v.var - children = Var.(outneighbors(clt, var)) - cpt = get_prop(clt, var, :cpt) - parent = parent_clt[var] - if isequal(children, []) - circuit = compile_true_nodes(var, v, get_params(cpt), lit_cache, prob_cache, lin) - else - circuit = compile_literal_nodes(var, v, get_params(cpt), lit_cache, prob_cache, lin) - end - add_mapping!(v, circuit) - end - - # compile to decision node - function compile_from_vtree_node(v::PlainVtreeInnerNode) - left_var = left_most_child(v.left).var - right_var = left_most_child(v.right).var - left_circuit = v2p[v.left] - right_circuit = v2p[v.right] - - if parent_clt[left_var] == parent_clt[right_var] # two nodes are independent, compile to seperate decision nodes - circuit = [compile_decision_node([l], [r], v, [1.0], prob_cache, lin) for (l, r) in zip(left_circuit, right_circuit)] - elseif left_var == parent_clt[right_var] # conditioned on left - cpt = get_prop(clt, left_var, :cpt) - circuit = compile_decision_nodes(left_circuit, right_circuit, v, get_params(cpt), prob_cache, lin) - else - throw("PlainVtree are not learned from the same CLT") - end - add_mapping!(v, circuit) - end - - foreach(compile_from_vtree_node, vtree) - return lin -end - -##################### -# Construct probabilistic circuit node -##################### - -prob_children(n, prob_cache) = - copy_with_eltype(map(c -> prob_cache[c], n.children), ProbΔNode{<:StructLogicalΔNode}) - -"Add leaf nodes to circuit `lin`" -function add_prob_leaf_node(var::Var, vtree::PlainVtreeLeafNode, lit_cache::LitCache, prob_cache::ProbCache, lin) - pos = StructLiteralNode{PlainVtreeNode}( var2lit(var), vtree) - neg = StructLiteralNode{PlainVtreeNode}(-var2lit(var), vtree) - lit_cache[var2lit(var)] = pos - lit_cache[-var2lit(var)] = neg - pos2 = ProbLiteral(pos) - neg2 = ProbLiteral(neg) - prob_cache[pos] = pos2 - prob_cache[neg] = neg2 - push!(lin, pos2) - push!(lin, neg2) - return (pos2, neg2) -end - -"Add prob⋀ node to circuit `lin`" -function add_prob⋀_node(children::ProbΔ, vtree::PlainVtreeInnerNode, prob_cache::ProbCache, lin)::Prob⋀ - logic = Struct⋀Node{PlainVtreeNode}([c.origin for c in children], vtree) - prob = Prob⋀(logic, prob_children(logic, prob_cache)) - prob_cache[logic] = prob - push!(lin, prob) - return prob -end - -"Add prob⋁ node to circuit `lin`" -function add_prob⋁_node(children::ProbΔ, vtree::PlainVtreeNode, thetas::Vector{Float64}, prob_cache::ProbCache, lin)::Prob⋁ - logic = Struct⋁Node{PlainVtreeNode}([c.origin for c in children], vtree) - prob = Prob⋁(logic, prob_children(logic, prob_cache)) - prob.log_thetas = log.(thetas) - prob_cache[logic] = prob - push!(lin, prob) - return prob -end - -"Construct decision nodes given `primes` and `subs`" -function compile_decision_node(primes::ProbΔ, subs::ProbΔ, vtree::PlainVtreeInnerNode, params::Vector{Float64}, prob_cache::ProbCache, lin) - elements = [add_prob⋀_node([prime, sub], vtree, prob_cache, lin) for (prime, sub) in zip(primes, subs)] - return add_prob⋁_node(elements, vtree, params, prob_cache, lin) -end - -"Construct literal nodes given variable `var`" -function compile_literal_nodes(var::Var, vtree::PlainVtreeLeafNode, probs::Vector{Float64}, lit_cache::LitCache, prob_cache::ProbCache, lin) - (pos, neg) = add_prob_leaf_node(var, vtree, lit_cache, prob_cache, lin) - return [pos, neg] -end - -"Construct true nodes given variable `var`" -function compile_true_nodes(var::Var, vtree::PlainVtreeLeafNode, probs::Vector{Float64}, lit_cache::LitCache, prob_cache::ProbCache, lin) - (pos, neg) = add_prob_leaf_node(var, vtree, lit_cache, prob_cache, lin) - return [add_prob⋁_node([pos, neg], vtree, probs[i:i+1], prob_cache, lin) for i in 1:2:length(probs)] -end - -"Construct decision nodes conditiond on different distribution" -function compile_decision_nodes(primes::ProbΔ, subs::ProbΔ, vtree::PlainVtreeInnerNode, params::Vector{Float64}, prob_cache::ProbCache, lin) - return [compile_decision_node(primes, subs, vtree, params[i:i+1], prob_cache, lin) for i in 1:2:length(params)] -end - -##################### -# Map and cache constraints -##################### - -function set_base(index, n::StructLiteralNode, bases) - if positive(n) - bases[n][variable(n)] = 1 - else - bases[n][variable(n)] = -1 - end -end - -function set_base(index, n::Struct⋁Node, bases) - len = num_children(n) - temp = sum([bases[c] for c in n.children]) - bases[n] = map(x-> if x == len 1; elseif -x == len; -1; else 0; end, temp) -end - -function set_base(index, n::Struct⋀Node, bases) - bases[n] = sum([bases[c] for c in n.children]) -end - -function calculate_all_bases(circuit::ProbΔ)::BaseCache - num_var = num_variables(circuit[end].origin.vtree) - bases = BaseCache() - foreach(n -> bases[n.origin] = fill(⊤, num_var), circuit) - foreach(n -> set_base(n[1], n[2].origin, bases), enumerate(circuit)) - @assert all(bases[circuit[end].origin] .== ⊤) "Base of root node should be true" - return bases -end - -##################### -# Compile fully factorized PSDD from vtree, all variables are independent initially -##################### - -function compile_fully_factorized_psdd_from_vtree(vtree::PlainVtree)::ProbΔ - - function ful_factor_node(v::PlainVtreeLeafNode, lit_cache::LitCache, prob_cache::ProbCache, v2n, lin) - var = variables(v)[1] - pos, neg = add_prob_leaf_node(var, v, lit_cache, prob_cache, lin) - prob_or = add_prob⋁_node([pos, neg], v, [0.5, 0.5], prob_cache, lin) - v2n[v] = prob_or - nothing - end - - function ful_factor_node(v::PlainVtreeInnerNode, lit_cache::LitCache, prob_cache::ProbCache, v2n, lin) - left = v2n[v.left] - right = v2n[v.right] - prob_and = add_prob⋀_node([left, right], v, prob_cache, lin) - prob_or = add_prob⋁_node([prob_and], v, [1.0], prob_cache, lin) - v2n[v] = prob_or - nothing - end - - lin = Vector{ProbΔNode}() - prob_cache = ProbCache() - lit_cache = LitCache() - v2n = Dict{PlainVtreeNode, ProbΔNode}() - - for v in vtree - ful_factor_node(v, lit_cache, prob_cache, v2n, lin) - end - - lin -end diff --git a/src/StructureLearner/StructureLearner.jl b/src/StructureLearner/StructureLearner.jl deleted file mode 100644 index b21c0ad7..00000000 --- a/src/StructureLearner/StructureLearner.jl +++ /dev/null @@ -1,24 +0,0 @@ -module StructureLearner - -using LogicCircuits -using ..Utils - -using ..Probabilistic -using ..IO - -export -# ChowLiuTree -learn_chow_liu_tree, parent_vector, print_tree, CLT, - -# CircuitBuilder -compile_prob_circuit_from_clt, learn_probabilistic_circuit, BaseCache, ⊤, LitCache, - -# PSDDInitializer -learn_struct_prob_circuit, -learn_vtree_from_clt, compile_psdd_from_clt,compile_fully_factorized_psdd_from_vtree - -include("ChowLiuTree.jl") -include("CircuitBuilder.jl") -include("PSDDInitializer.jl") - -end diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl index d863c9bc..6e12f494 100644 --- a/src/Utils/Utils.jl +++ b/src/Utils/Utils.jl @@ -1,74 +1,10 @@ """ -Module with general utilities and missing standard library features that could be useful in any Julia project +Module with general utilities and missing standard library features +that could be useful in any Julia project """ module Utils -export to_long_mi, - generate_all, generate_data_all - - -################### -# Misc. -#################### - - -function to_long_mi(m::Matrix{Float64}, min_int, max_int)::Matrix{Int64} - δmi = maximum(m) - minimum(m) - δint = max_int - min_int - return @. round(Int64, m * δint / δmi + min_int) -end - -################### -# One-Hot Encoding -#################### -""" -One-hot encode data (2-D Array) based on categories (1-D Array) -Each row of the return value is a concatenation of one-hot encoding of elements of the same row in data -Assumption: both input arrays have elements of same type -""" -function one_hot_encode(X::Array{T, 2}, categories::Array{T,1}) where {T<:Any} - X_dash = zeros(Bool, size(X)[1], length(categories)*size(X)[2]) - for i = 1:size(X)[1], j = 1:size(X)[2] - X_dash[i, (j-1)*length(categories) + findfirst(==(X[i,j]), categories)] = 1 - end - X_dash -end - -################### -# Testing Utils -#################### - -""" -Given some missing values generates all possible fillings -""" -function generate_all(row::Array{Int8}) - miss_count = count(row .== -1) - lits = length(row) - result = Bool.(zeros(1 << miss_count, lits)) - - if miss_count == 0 - result[1, :] = copy(row) - else - for mask = 0: (1< iszero(x), stack[:,1]) + +all_empty(stack::AbstractArray{T,3}) where T = + all(x -> iszero(x), stack[:,:,1]) + + +length_cuda(stack, i...) = stack[i...,1] + + +################### +# One-Hot Encoding +#################### + +""" +One-hot encode data (2-D Array) based on categories (1-D Array) +Each row of the return value is a concatenation of one-hot encoding of elements of the same row in data +Assumption: both input arrays have elements of same type +""" +function one_hot_encode(X::Array{T, 2}, categories::Array{T,1}) where {T<:Any} + X_dash = zeros(Bool, size(X)[1], length(categories)*size(X)[2]) + for i = 1:size(X)[1], j = 1:size(X)[2] + X_dash[i, (j-1)*length(categories) + findfirst(==(X[i,j]), categories)] = 1 + end + X_dash +end + +################### +# Testing Utils +#################### + +""" +Given some missing values generates all possible fillings +""" +function generate_all(row::Vector) + miss_count = count(ismissing, row) + lits = length(row) + result = Bool.(zeros(1 << miss_count, lits)) + + if miss_count == 0 + result[1, :] = copy(row) + else + for mask = 0: (1< num_parameters_node(n), sum_nodes(c)) + +##################### +# methods to easily construct circuits +##################### + +@inline multiply(xs::ProbCircuit...) = multiply(collect(xs)) +@inline summate(xs::ProbCircuit...) = summate(collect(xs)) + +import LogicCircuits: conjoin, disjoin # make available for extension + +# alias conjoin/disjoin using mul/sum terminology +@inline conjoin(args::Vector{<:ProbCircuit}; reuse=nothing) = + multiply(args; reuse) +@inline disjoin(args::Vector{<:ProbCircuit}; reuse=nothing) = + summate(args; reuse) + +@inline Base.:*(x::ProbCircuit, y::ProbCircuit) = multiply(x,y) +@inline Base.:*(xs::ProbCircuit...) = multiply(xs...) +@inline Base.:+(x::ProbCircuit, y::ProbCircuit) = summate(x,y) +@inline Base.:+(xs::ProbCircuit...) = summate(xs...) + +compile(::Type{<:ProbCircuit}, ::Bool) = + error("Probabilistic circuits do not have constant leafs.") + +struct WeightProbCircuit + tmp_weight :: Float64 + circuit :: ProbCircuit +end + +@inline Base.:*(w::Real, x::ProbCircuit) = WeightProbCircuit(w, x) +@inline Base.:*(x::ProbCircuit, w::Real) = w * x +@inline Base.:+(x::WeightProbCircuit...) = begin + ch = collect(x) + c = map(x -> x.circuit, ch) + w = map(x -> x.tmp_weight, ch) + pc = summate(c) + pc.log_probs .= log.(w) + pc +end + +##################### +# circuit inspection +##################### + +"Get the list of multiplication nodes in a given circuit" +mul_nodes(c::ProbCircuit) = ⋀_nodes(c) + +"Get the list of summation nodes in a given circuit" +sum_nodes(c::ProbCircuit) = ⋁_nodes(c) + +function check_parameter_integrity(circuit::ProbCircuit) + for node in sum_nodes(circuit) + @assert all(θ -> !isnan(θ), node.log_probs) "There is a NaN in one of the log_probs" + end + true +end \ No newline at end of file diff --git a/src/logistic_nodes.jl b/src/logistic_nodes.jl new file mode 100644 index 00000000..78f0d66f --- /dev/null +++ b/src/logistic_nodes.jl @@ -0,0 +1,93 @@ +export + LogisticCircuit, + LogisticLeafNode, LogisticInnerNode, + LogisticLiteral, Logistic⋀Node, Logistic⋁Node, + num_classes, num_parameters_per_class + +##################### +# Infrastructure for logistic circuit nodes +##################### + +"Root of the logistic circuit node hierarchy" +abstract type LogisticCircuit <: LogicCircuit end + +""" +A logistic leaf node +""" +abstract type LogisticLeafNode <: LogisticCircuit end + +""" +A logistic inner node +""" +abstract type LogisticInnerNode <: LogisticCircuit end + +""" +A logistic literal node +""" +mutable struct LogisticLiteral <: LogisticLeafNode + literal::Lit + data + counter::UInt32 + LogisticLiteral(l) = begin + new(l, nothing, 0) + end +end + +""" +A logistic conjunction node (And node) +""" +mutable struct Logistic⋀Node <: LogisticInnerNode + children::Vector{<:LogisticCircuit} + data + counter::UInt32 + Logistic⋀Node(children) = begin + new(convert(Vector{LogisticCircuit}, children), nothing, 0) + end +end + +""" +A logistic disjunction node (Or node) +""" +mutable struct Logistic⋁Node <: LogisticInnerNode + children::Vector{<:LogisticCircuit} + thetas::Matrix{Float32} + data + counter::UInt32 + Logistic⋁Node(children, class::Int) = begin + new(convert(Vector{LogisticCircuit}, children), init_array(Float32, length(children), class), nothing, 0) + end +end + +##################### +# traits +##################### + +import LogicCircuits.GateType # make available for extension +@inline GateType(::Type{<:LogisticLiteral}) = LiteralGate() +@inline GateType(::Type{<:Logistic⋀Node}) = ⋀Gate() +@inline GateType(::Type{<:Logistic⋁Node}) = ⋁Gate() + +##################### +# methods +##################### + +import LogicCircuits: children # make available for extension +@inline children(n::LogisticInnerNode) = n.children +@inline num_classes(n::Logistic⋁Node) = size(n.thetas)[2] + +@inline num_parameters(c::LogisticCircuit) = sum(n -> num_children(n) * classes(n), ⋁_nodes(c)) +@inline num_parameters_per_class(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c)) + + + +##################### +# constructors and conversions +##################### + +function LogisticCircuit(circuit::LogicCircuit, classes::Int) + f_con(n) = error("Cannot construct a logistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") + f_lit(n) = LogisticLiteral(literal(n)) + f_a(n, cn) = Logistic⋀Node(cn) + f_o(n, cn) = Logistic⋁Node(cn, classes) + foldup_aggregate(circuit, f_con, f_lit, f_a, f_o, LogisticCircuit) +end \ No newline at end of file diff --git a/src/mixtures/em.jl b/src/mixtures/em.jl new file mode 100644 index 00000000..6554bda0 --- /dev/null +++ b/src/mixtures/em.jl @@ -0,0 +1,119 @@ +export one_step_em, component_weights_per_example, initial_weights, clustering, +log_likelihood_per_instance_per_component, estimate_parameters_cached, learn_em_model + +using Statistics: mean +using LinearAlgebra: normalize! +using Clustering: kmeans, nclusters, assignments + +function one_step_em(spc, train_x, component_weights; pseudocount) + # E step + lls = log_likelihood_per_instance_per_component(spc, train_x) + lls .+= log.(component_weights) + + example_weights = component_weights_per_example(lls) + component_weights .= sum(example_weights, dims=1) + normalize!(component_weights, 1.0) + + # M step + estimate_parameters_cached(spc, example_weights; pseudocount=pseudocount) + logsumexp(lls, 2), component_weights +end + +function component_weights_per_example(log_p_of_x_and_c) + log_p_of_x = logsumexp(log_p_of_x_and_c, 2) # marginalize out components + log_p_of_given_x_query_c = mapslices(col -> col .- log_p_of_x, log_p_of_x_and_c, dims=[1]) + p_of_given_x_query_c = exp.(log_p_of_given_x_query_c) # no more risk of underflow, so go to linear space + @assert sum(p_of_given_x_query_c) ≈ size(log_p_of_x_and_c, 1) "$(sum(p_of_given_x_query_c)) != $(size(log_p_of_x_and_c))"# each row has proability 1 + Matrix(p_of_given_x_query_c) +end + +function initial_weights(train_x, mix_num::Int64; alg="cluster")::Vector{Float64} + if alg == "cluster" + clustered = clustering(train_x, mix_num) + counting = Float64.(num_examples.(clustered)) + return normalize!(counting, 1) + elseif alg == "random" + return normalize!(rand(Float64, mix_num), 1) + else + error("Initialize weights algorithm is $undefined") + end +end + +function clustering(train_x, mix_num::Int64; maxiter=200)::Vector + train_x = Matrix(train_x) + if mix_num == 1 + return [train_x] + end + + n = num_examples(train_x) + + R = kmeans(train_x, mix_num; maxiter=maxiter) + @assert nclusters(R) == mix_num + a = assignments(R) + + clustered_train_x = Vector() + for k in 1 : mix_num + push!(clustered_train_x, train_x[:, findall(x -> x == k, a)]') + end + + return clustered_train_x +end + +function log_likelihood_per_instance_per_component(pc::SharedProbCircuit, data) + @assert isbinarydata(data) "Can only calculate EVI on Bool data" + + compute_flows(pc, data) + num_mix = num_components(pc) + log_likelihoods = zeros(Float64, num_examples(data), num_mix) + indices = init_array(Bool, num_examples(data))::BitVector + + + ll(n::SharedProbCircuit) = () + ll(n::SharedPlainSumNode) = begin + if num_children(n) != 1 # other nodes have no effect on likelihood + for i in 1 : num_children(n) + c = children(n)[i] + log_theta = reshape(n.log_probs[i, :], 1, num_mix) + indices = get_downflow(n, c) + view(log_likelihoods, indices::BitVector, :) .+= log_theta # see MixedProductKernelBenchmark.jl + end + end + end + + foreach(ll, pc) + log_likelihoods +end + +function estimate_parameters_cached(pc::SharedProbCircuit, example_weights; pseudocount::Float64) + foreach(pc) do pn + if is⋁gate(pn) + if num_children(pn) == 1 + pn.log_probs .= 0.0 + else + smoothed_flow = Float64.(sum(example_weights[get_downflow(pn), :], dims=1)) .+ pseudocount + uniform_pseudocount = pseudocount / num_children(pn) + children_flows = vcat(map(c -> sum(example_weights[get_downflow(pn, c), :], dims=1), children(pn))...) + @. pn.log_probs = log((children_flows + uniform_pseudocount) / smoothed_flow) + @assert all(sum(exp.(pn.log_probs), dims=1) .≈ 1.0) "Parameters do not sum to one locally" + # normalize away any leftover error + pn.log_probs .-= logsumexp(pn.log_probs, dims=1) + end + end + end +end + +function learn_em_model(pc, train_x; + num_mix=5, + pseudocount=1.0, + maxiter=typemax(Int)) + spc = SharedProbCircuit(pc, num_mix) + compute_flows(spc, train_x) + estimate_parameters_cached(spc, ones(Float64, num_examples(train_x), num_mix) ./ num_mix; pseudocount=pseudocount) + component_weights = reshape(initial_weights(train_x, num_mix), 1, num_mix) + + for iter in 1 : maxiter + @assert isapprox(sum(component_weights), 1.0; atol=1e-10) + lls, component_weights = one_step_em(spc, train_x, component_weights; pseudocount=pseudocount) + println("Log likelihood per instance is $(mean(lls))") + end +end \ No newline at end of file diff --git a/src/mixtures/shared_prob_nodes.jl b/src/mixtures/shared_prob_nodes.jl new file mode 100644 index 00000000..c2c3a651 --- /dev/null +++ b/src/mixtures/shared_prob_nodes.jl @@ -0,0 +1,111 @@ +export SharedProbCircuit, SharedProbLeafNode, SharedProbInnerNode, SharedProbLiteralNode, +SharedMulNode, SharedSumNode, num_components + +##################### +# Probabilistic circuits which share the same structure +##################### + +""" +Root of the shared probabilistic circuit node hierarchy +""" +abstract type SharedProbCircuit <: ProbCircuit end + +""" +A shared probabilistic leaf node +""" +abstract type SharedProbLeafNode <: SharedProbCircuit end + +""" +A shared probabilistic inner node +""" +abstract type SharedProbInnerNode <: SharedProbCircuit end + +""" +A shared probabilistic literal node +""" +mutable struct SharedProbLiteralNode <: SharedProbLeafNode + literal::Lit + data + counter::UInt32 + SharedProbLiteralNode(l) = new(l, nothing, 0) +end + +""" +A shared probabilistic multiplcation node +""" +mutable struct SharedMulNode <: SharedProbInnerNode + children::Vector{<:SharedProbCircuit} + data + counter::UInt32 + SharedMulNode(children) = new(children, nothing, 0) +end + +""" +A shared probabilistic summation node +""" +mutable struct SharedSumNode <: SharedProbInnerNode + children::Vector{<:SharedProbCircuit} + log_probs::Matrix{Float64} + data + counter::UInt32 + SharedSumNode(children, n_mixture) = begin + new(children, init_array(Float64, length(children), n_mixture), nothing, 0) + end +end + +##################### +# traits +##################### + +import LogicCircuits.GateType # make available for extension +@inline GateType(::Type{<:SharedProbLiteralNode}) = LiteralGate() +@inline GateType(::Type{<:SharedMulNode}) = ⋀Gate() +@inline GateType(::Type{<:SharedSumNode}) = ⋁Gate() + +##################### +# methods +##################### + +import LogicCircuits: children # make available for extension +@inline children(n::SharedProbInnerNode) = n.children + +@inline num_parameters_node(n::SharedSumNode) = length(n.log_probs) + +"How many components are mixed together in this shared circuit?" +@inline num_components(n::SharedSumNode) = size(n.log_probs,2) + +##################### +# constructors and conversions +##################### + +function multiply(arguments::Vector{<:SharedProbCircuit}; + reuse=nothing) + @assert length(arguments) > 0 + reuse isa SharedMulNode && children(reuse) == arguments && return reuse + return SharedMulNode(arguments) +end + +function summate(arguments::Vector{<:SharedProbCircuit}, num_components=0; + reuse=nothing) + @assert length(arguments) > 0 + reuse isa SharedSumNode && children(reuse) == arguments && return reuse + return SharedSumNode(arguments, num_components) # unknwown number of components; resize later +end + +compile(::Type{<:SharedProbCircuit}, l::Lit) = + SharedProbLiteralNode(l) + +function compile(::Type{<:SharedProbCircuit}, circuit::LogicCircuit, num_components::Int) + f_con(n) = error("Cannot construct a probabilistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") + f_lit(n) = compile(SharedProbCircuit, literal(n)) + f_a(_, cns) = multiply(cns) + f_o(_, cns) = summate(cns, num_components) + foldup_aggregate(circuit, f_con, f_lit, f_a, f_o, SharedProbCircuit) +end + +import LogicCircuits: fully_factorized_circuit #extend + +function fully_factorized_circuit(::Type{<:SharedProbCircuit}, n::Int) + ff_logic_circuit = fully_factorized_circuit(PlainLogicCircuit, n) + compile(SharedProbCircuit, ff_logic_circuit) +end diff --git a/src/param_bit_circuit.jl b/src/param_bit_circuit.jl new file mode 100644 index 00000000..20599b22 --- /dev/null +++ b/src/param_bit_circuit.jl @@ -0,0 +1,72 @@ +export ParamBitCircuit + +"A `BitCircuit` with parameters attached to the elements" +struct ParamBitCircuit{V,M,W} + bitcircuit::BitCircuit{V,M} + params::W +end + +function ParamBitCircuit(pc::ProbCircuit, data; reset=true) + logprobs::Vector{Float64} = Vector{Float64}() + on_decision(n, cs, layer_id, decision_id, first_element, last_element) = begin + if isnothing(n) # this decision node is not part of the PC + # @assert first_element == last_element + push!(logprobs, 0.0) + else + # @assert last_element-first_element+1 == length(n.log_probs) + append!(logprobs, n.log_probs) + end + end + bc = BitCircuit(pc, data; reset=reset, on_decision) + ParamBitCircuit(bc, logprobs) +end + +function ParamBitCircuit(lc::LogisticCircuit, nc, data; reset=true) + thetas::Vector{Vector{Float32}} = Vector{Vector{Float32}}() + on_decision(n, cs, layer_id, decision_id, first_element, last_element) = begin + if isnothing(n) + # @assert first_element == last_element + push!(thetas, zeros(Float32, nc)) + println("here, some node is not part of the logistic circuit") + else + # @assert last_element - first_element + 1 == size(n.thetas, 1) + # @assert size(n.thetas, 2) == nc + for theta in eachrow(n.thetas) + push!(thetas, theta) + end + end + end + bc = BitCircuit(lc, data; reset=reset, on_decision) + thetas_matrix = permutedims(hcat(thetas...), (2, 1)) + ParamBitCircuit(bc, permutedims(hcat(thetas...), (2, 1))) +end + + + +####################### +## Helper functions ### +####################### + +params(c::ParamBitCircuit) = c.params + +import LogicCircuits: num_nodes, num_elements, num_features, num_leafs, nodes, elements + +num_nodes(c::ParamBitCircuit) = num_nodes(c.bitcircuit) +num_elements(c::ParamBitCircuit) = num_elements(c.bitcircuit) +num_features(c::ParamBitCircuit) = num_features(c.bitcircuit) +num_leafs(c::ParamBitCircuit) = num_leafs(c.bitcircuit) + +nodes(c::ParamBitCircuit) = nodes(c.bitcircuit) +elements(c::ParamBitCircuit) = elements(c.bitcircuit) + +import LogicCircuits: to_gpu, to_cpu, isgpu #extend + +to_gpu(c::ParamBitCircuit) = + ParamBitCircuit(to_gpu(c.bitcircuit), to_gpu(c.params)) + +to_cpu(c::ParamBitCircuit) = + ParamBitCircuit(to_cpu(c.bitcircuit), to_cpu(c.params)) + + +isgpu(c::ParamBitCircuit) = + isgpu(c.bitcircuit) && isgpu(c.params) diff --git a/src/parameters.jl b/src/parameters.jl new file mode 100644 index 00000000..6a96968a --- /dev/null +++ b/src/parameters.jl @@ -0,0 +1,167 @@ +export estimate_parameters, uniform_parameters, estimate_parameters_em, test + +using StatsFuns: logsumexp +using CUDA +using LoopVectorization + +""" +Maximum likilihood estimation of parameters given data +""" +function estimate_parameters(pc::ProbCircuit, data; pseudocount::Float64) + @assert isbinarydata(data) "Probabilistic circuit parameter estimation for binary data only" + bc = BitCircuit(pc, data; reset=false) + params = if isgpu(data) + estimate_parameters_gpu(to_gpu(bc), data, pseudocount) + else + estimate_parameters_cpu(bc, data, pseudocount) + end + estimate_parameters_cached!(pc, bc, params) + params +end + +function estimate_parameters_cached!(pc, bc, params) + foreach_reset(pc) do pn + if is⋁gate(pn) + if num_children(pn) == 1 + pn.log_probs .= zero(Float64) + else + id = (pn.data::⋁NodeIds).node_id + @inbounds els_start = bc.nodes[1,id] + @inbounds els_end = bc.nodes[2,id] + @inbounds @views pn.log_probs .= params[els_start:els_end] + @assert isapprox(sum(exp.(pn.log_probs)), 1.0, atol=1e-6) "Parameters do not sum to one locally: $(sum(exp.(pn.log_probs))); $(pn.log_probs)" + pn.log_probs .-= logsumexp(pn.log_probs) # normalize away any leftover error + end + end + end + nothing +end + +function estimate_parameters_cpu(bc::BitCircuit, data, pseudocount) + # no need to synchronize, since each computation is unique to a decision node + node_counts::Vector{UInt} = Vector{UInt}(undef, num_nodes(bc)) + log_params::Vector{Float64} = Vector{Float64}(undef, num_elements(bc)) + + @inline function on_node(flows, values, dec_id) + node_counts[dec_id] = sum(1:size(flows,1)) do i + count_ones(flows[i, dec_id]) + end + end + + @inline function estimate(element, decision, edge_count) + num_els = num_elements(bc.nodes, decision) + log_params[element] = + log((edge_count+pseudocount/num_els) + /(node_counts[decision]+pseudocount)) + end + + @inline function on_edge(flows, values, prime, sub, element, grandpa, single_child) + if !single_child + edge_count = sum(1:size(flows,1)) do i + count_ones(values[i, prime] & values[i, sub] & flows[i, grandpa]) + end + estimate(element, grandpa, edge_count) + end # no need to estimate single child params, they are always prob 1 + end + + v, f = satisfies_flows(bc, data; on_node, on_edge) + + return log_params +end + +function estimate_parameters_gpu(bc::BitCircuit, data, pseudocount) + node_counts::CuVector{Int32} = CUDA.zeros(Int32, num_nodes(bc)) + edge_counts::CuVector{Int32} = CUDA.zeros(Int32, num_elements(bc)) + # need to manually cudaconvert closure variables + node_counts_device = CUDA.cudaconvert(node_counts) + edge_counts_device = CUDA.cudaconvert(edge_counts) + + @inline function on_node(flows, values, dec_id, chunk_id, flow) + c::Int32 = CUDA.count_ones(flow) # cast for @atomic to be happy + CUDA.@atomic node_counts_device[dec_id] += c + end + + @inline function on_edge(flows, values, prime, sub, element, grandpa, chunk_id, edge_flow, single_child) + if !single_child + c::Int32 = CUDA.count_ones(edge_flow) # cast for @atomic to be happy + CUDA.@atomic edge_counts_device[element] += c + end + end + + v, f = satisfies_flows(bc, data; on_node, on_edge) + + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort + + # TODO: reinstate simpler implementation once https://github.com/JuliaGPU/GPUArrays.jl/issues/313 is fixed and released + @inbounds parents = bc.elements[1,:] + @inbounds parent_counts = node_counts[parents] + @inbounds parent_elcount = bc.nodes[2,parents] .- bc.nodes[1,parents] .+ 1 + params = log.((edge_counts .+ (pseudocount ./ parent_elcount)) + ./ (parent_counts .+ pseudocount)) + return to_cpu(params) +end + +""" +Uniform distribution +""" +function uniform_parameters(pc::ProbCircuit) + foreach(pc) do pn + if is⋁gate(pn) + if num_children(pn) == 1 + pn.log_probs .= 0.0 + else + pn.log_probs .= log.(ones(Float64, num_children(pn)) ./ num_children(pn)) + end + end + end +end + +""" +Expectation maximization parameter learning given missing data +""" +function estimate_parameters_em(pc::ProbCircuit, data; pseudocount::Float64) + pbc = ParamBitCircuit(pc, data; reset=false) + params = if isgpu(data) + estimate_parameters_gpu(to_gpu(pbc), data, pseudocount) + else + estimate_parameters_cpu(pbc, data, pseudocount) + end + estimate_parameters_cached!(pc, pbc.bitcircuit, params) + params +end + +function estimate_parameters_cpu(pbc::ParamBitCircuit, data, pseudocount) + # no need to synchronize, since each computation is unique to a decision node + bc = pbc.bitcircuit + node_counts::Vector{Float64} = Vector{Float64}(undef, num_nodes(bc)) + log_params::Vector{Float64} = Vector{Float64}(undef, num_elements(bc)) + + @inline function on_node(flows, values, dec_id) + sum_flows = map(1:size(flows,1)) do i + flows[i, dec_id] + end + node_counts[dec_id] = logsumexp(sum_flows) + end + + @inline function estimate(element, decision, edge_count) + num_els = num_elements(bc.nodes, decision) + log_params[element] = + log((exp(edge_count)+pseudocount/num_els) / (exp(node_counts[decision])+pseudocount)) + end + + @inline function on_edge(flows, values, prime, sub, element, grandpa, single_child) + θ = eltype(flows)(pbc.params[element]) + if !single_child + edge_flows = map(1:size(flows,1)) do i + values[i, prime] + values[i, sub] - values[i, grandpa] + flows[i, grandpa] + θ + end + edge_count = logsumexp(edge_flows) + estimate(element, grandpa, edge_count) + end # no need to estimate single child params, they are always prob 1 + end + + v, f = marginal_flows(pbc, data; on_node, on_edge) + + return log_params +end diff --git a/src/plain_prob_nodes.jl b/src/plain_prob_nodes.jl new file mode 100644 index 00000000..c37cf49e --- /dev/null +++ b/src/plain_prob_nodes.jl @@ -0,0 +1,108 @@ +export PlainProbCircuit, + PlainProbLeafNode, PlainProbInnerNode, + PlainProbLiteralNode, PlainMulNode, PlainSumNode + +##################### +# Plain probabilistic circuit nodes +##################### + +"Root of the plain probabilistic circuit node hierarchy" +abstract type PlainProbCircuit <: ProbCircuit end + +"A probabilistic leaf node" +abstract type PlainProbLeafNode <: PlainProbCircuit end + +"A probabilistic inner node" +abstract type PlainProbInnerNode <: PlainProbCircuit end + +"A probabilistic literal node" +mutable struct PlainProbLiteralNode <: PlainProbLeafNode + literal::Lit + data + counter::UInt32 + PlainProbLiteralNode(l) = new(l, nothing, 0) +end + +"A probabilistic conjunction node (multiplication node)" +mutable struct PlainMulNode <: PlainProbInnerNode + children::Vector{PlainProbCircuit} + data + counter::UInt32 + PlainMulNode(children) = begin + new(convert(Vector{PlainProbCircuit}, children), nothing, 0) + end +end + +"A probabilistic disjunction node (summation node)" +mutable struct PlainSumNode <: PlainProbInnerNode + children::Vector{PlainProbCircuit} + log_probs::Vector{Float64} + data + counter::UInt32 + PlainSumNode(c) = begin + new(c, init_array(Float64, length(c)), nothing, 0) + end +end + +##################### +# traits +##################### + +import LogicCircuits.GateType # make available for extension + +@inline GateType(::Type{<:PlainProbLiteralNode}) = LiteralGate() +@inline GateType(::Type{<:PlainMulNode}) = ⋀Gate() +@inline GateType(::Type{<:PlainSumNode}) = ⋁Gate() + +##################### +# methods +##################### + +import LogicCircuits: children # make available for extension +@inline children(n::PlainProbInnerNode) = n.children + +"Count the number of parameters in the node" +@inline num_parameters_node(n::PlainSumNode) = num_children(n) + +##################### +# constructors and conversions +##################### + +function multiply(arguments::Vector{<:PlainProbCircuit}; + reuse=nothing) + @assert length(arguments) > 0 + reuse isa PlainMulNode && children(reuse) == arguments && return reuse + return PlainMulNode(arguments) +end + +function summate(arguments::Vector{<:PlainProbCircuit}; + reuse=nothing) + @assert length(arguments) > 0 + reuse isa PlainSumNode && children(reuse) == arguments && return reuse + return PlainSumNode(arguments) +end + +# claim `PlainProbCircuit` as the default `ProbCircuit` implementation +compile(::Type{ProbCircuit}, args...) = + compile(PlainProbCircuit, args...) + +compile(::Type{<:PlainProbCircuit}, l::Lit) = + PlainProbLiteralNode(l) + +function compile(::Type{<:PlainProbCircuit}, circuit::LogicCircuit) + f_con(n) = error("Cannot construct a probabilistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") + f_lit(n) = compile(PlainProbCircuit, literal(n)) + f_a(_, cns) = multiply(cns) + f_o(_, cns) = summate(cns) + foldup_aggregate(circuit, f_con, f_lit, f_a, f_o, PlainProbCircuit) +end + +import LogicCircuits: fully_factorized_circuit #extend + +fully_factorized_circuit(::Type{ProbCircuit}, n::Int) = + fully_factorized_circuit(PlainProbCircuit, n) + +function fully_factorized_circuit(::Type{<:PlainProbCircuit}, n::Int) + ff_logic_circuit = fully_factorized_circuit(PlainLogicCircuit, n) + compile(PlainProbCircuit, ff_logic_circuit) +end \ No newline at end of file diff --git a/src/queries/expectation_graph.jl b/src/queries/expectation_graph.jl new file mode 100644 index 00000000..48e9a182 --- /dev/null +++ b/src/queries/expectation_graph.jl @@ -0,0 +1,172 @@ +export UpExpFlow, ExpFlowCircuit, exp_pass_up, ExpectationUpward + +##################### +# Expectation Flow circuits +# For use of algorithms depending on pairs of nodes of two circuits +##################### + +"A expectation circuit node that has pair of origins of type PC and type LC" +abstract type ExpFlowNode{F} end + +const ExpFlowCircuit{O} = Vector{<:ExpFlowNode{<:O}} + +struct UpExpFlow{F} <: ExpFlowNode{F} + p_origin::ProbCircuit + f_origin::LogisticCircuit + children::Vector{<:ExpFlowNode{<:F}} + f::F + fg::F +end + +import LogicCircuits: children +children(x::UpExpFlow) = x.children + +""" +Expected Prediction of LC w.r.t PC. +This implementation uses the computation graph approach. +""" +function ExpectationUpward(pc::ProbCircuit, lc::LogisticCircuit, data) + # 1. Get probability of each observation + log_likelihoods = marginal(pc, data) + p_observed = exp.( log_likelihoods ) + + # 2. Expectation w.r.t. P(x_m, x_o) + exps_flow = exp_pass_up(pc, lc, data) + results_unnormalized = exps_flow[end].fg + + # 3. Expectation w.r.t P(x_m | x_o) + results = transpose(results_unnormalized) ./ p_observed + + # 4. Add Bias terms + biases = lc.thetas + results .+= biases + + results, exps_flow +end + + +""" +Construct a upward expectation flow circuit from a given pair of PC and LC circuits +Note that its assuming the two circuits share the same vtree +""" +function ExpFlowCircuit(pc::ProbCircuit, lc::LogisticCircuit, batch_size::Int, ::Type{El}) where El + F = Array{El, 2} + fmem = () -> zeros(1, batch_size) #Vector{El}(undef, batch_size) #init_array(El, batch_size) # note: fmem's return type will determine type of all UpFlows in the circuit (should be El) + fgmem = () -> zeros(num_classes(lc), batch_size) + + root_pc = pc + root_lc = children(lc)[1] + + cache = Dict{Pair{Node, Node}, ExpFlowNode}() + sizehint!(cache, (num_nodes(pc) + num_nodes(lc))*4÷3) + expFlowCircuit = Vector{ExpFlowNode}() + + function ExpflowTraverse(n::PlainSumNode, m::Logistic⋁Node) + get!(cache, Pair(n, m)) do + ch = [ ExpflowTraverse(i, j) for i in children(n) for j in children(m)] + node = UpExpFlow{F}(n, m, ch, fmem(), fgmem()) + push!(expFlowCircuit, node) + return node + end + end + function ExpflowTraverse(n::PlainMulNode, m::Logistic⋀Node) + get!(cache, Pair(n, m)) do + ch = [ ExpflowTraverse(z[1], z[2]) for z in zip(children(n), children(m)) ] + node = UpExpFlow{F}(n, m, ch, fmem(), fgmem()) + push!(expFlowCircuit, node) + return node + end + end + function ExpflowTraverse(n::PlainProbLiteralNode, m::Logistic⋁Node) + get!(cache, Pair(n, m)) do + ch = Vector{ExpFlowNode{F}}() # TODO + node = UpExpFlow{F}(n, m, ch, fmem(), fgmem()) + push!(expFlowCircuit, node) + return node + end + end + function ExpflowTraverse(n::PlainProbLiteralNode, m::LogisticLiteral) + get!(cache, Pair(n, m)) do + ch = Vector{ExpFlowNode{F}}() # TODO + node = UpExpFlow{F}(n, m, ch, fmem(), fgmem()) + push!(expFlowCircuit, node) + return node + end + end + + ExpflowTraverse(root_pc, root_lc) + expFlowCircuit +end + +function exp_pass_up(pc::ProbCircuit, lc::LogisticCircuit, data) + expFlowCircuit = ExpFlowCircuit(pc, lc, num_examples(data), Float64); + for n in expFlowCircuit + exp_pass_up_node(n, data) + end + expFlowCircuit +end + +function exp_pass_up(fc::ExpFlowCircuit, data) + #TODO write resize_flows similar to flow_circuits + # and give as input the expFlowCircuit instead + #expFlowCircuit = ExpFlowCircuit(pc, lc, num_examples(data), Float64); + for n in fc + exp_pass_up_node(n, data) + end +end + +function exp_pass_up_node(node::ExpFlowNode{E}, data) where E + pType = typeof(node.p_origin) + fType = typeof(node.f_origin) + + if node.p_origin isa PlainSumNode && node.f_origin isa Logistic⋁Node + #todo this ordering might be different than the ExpFlowNode children + pthetas = [exp(node.p_origin.log_probs[i]) + for i in 1:length(children(node.p_origin)) for j in 1:length(children(node.f_origin))] + fthetas = [node.f_origin.thetas[j,:] # only taking the first class for now + for i in 1:length(node.p_origin.children) for j in 1:length(node.f_origin.children)] + + node.f .= 0.0 + node.fg .= 0.0 + for z = 1:length(children(node)) + node.f .+= pthetas[z] .* children(node)[z].f + node.fg .+= (pthetas[z] .* fthetas[z]) .* children(node)[z].f + node.fg .+= pthetas[z] .* children(node)[z].fg + end + elseif node.p_origin isa PlainMulNode && node.f_origin isa Logistic⋀Node + node.f .= children(node)[1].f .* children(node)[2].f # assume 2 children + node.fg .= (children(node)[1].f .* children(node)[2].fg) .+ + (children(node)[2].f .* children(node)[1].fg) + + elseif node.p_origin isa PlainProbLiteralNode + if node.f_origin isa Logistic⋁Node + m = children(node.f_origin)[1] + elseif node.f_origin isa LogisticLiteral + m = node.f_origin + else + error("Invalid Types of pairs {$pType} - {$fType}") + end + + var = variable(m) + X = data + if ispositive(node.p_origin) && ispositive(m) + node.f[:, X[:, var] .!= 0 ] .= 1.0 # positive and missing observations + node.f[:, X[:, var] .== 0 ] .= 0.0 + elseif isnegative(node.p_origin) && isnegative(m) + node.f[:, X[:, var] .!= 1 ] .= 1.0 # negative and missing observations + node.f[:, X[:, var] .== 1 ] .= 0.0 + else + node.f .= 0.0 + end + + if node.f_origin isa Logistic⋁Node + node.fg .= node.f .* transpose(node.f_origin.thetas) + else + node.fg .= 0.0 + end + + else + error("Invalid Types of pairs {$pType} - {$fType}") + end + +end \ No newline at end of file diff --git a/src/queries/expectation_rec.jl b/src/queries/expectation_rec.jl new file mode 100644 index 00000000..93e90e1d --- /dev/null +++ b/src/queries/expectation_rec.jl @@ -0,0 +1,247 @@ +export Expectation, Moment + + +ExpCacheDict = Dict{Pair{ProbCircuit, LogisticCircuit}, Array{Float64, 2}} +MomentCacheDict = Dict{Tuple{ProbCircuit, LogisticCircuit, Int64}, Array{Float64, 2}} + +struct ExpectationCache + f::ExpCacheDict + fg::ExpCacheDict +end + +ExpectationCache() = ExpectationCache(ExpCacheDict(), ExpCacheDict()) + +struct MomentCache + f::ExpCacheDict + fg::MomentCacheDict +end +MomentCache() = MomentCache( ExpCacheDict(), MomentCacheDict()) + + +# Find a better way to cache n_choose_k values +max_k = 31 +choose_cache = [ 1.0 * binomial(i,j) for i=0:max_k+1, j=0:max_k+1 ] +@inline function choose(n::Int, m::Int) + return choose_cache[n+1, m+1] +end + + +# On Tractable Computation of Expected Predictions (https://arxiv.org/abs/1910.02182) +""" +Missing values should be denoted by -1 +""" +function Expectation(pc::ProbCircuit, lc::LogisticCircuit, data) + # 1. Get probability of each observation + log_likelihoods = marginal(pc, data) + p_observed = exp.( log_likelihoods ) + + # 2. Expectation w.r.t. P(x_m, x_o) + cache = ExpectationCache() + results_unnormalized = exp_g(pc, children(lc)[1], data, cache) # skipping the bias node of lc + + # 3. Expectation w.r.t P(x_m | x_o) + results = transpose(results_unnormalized) ./ p_observed + + # 4. Add Bias terms + biases = lc.thetas + results .+= biases + + results, cache +end + +function Moment(pc::ProbCircuit, lc::LogisticCircuit, data, moment::Int) + # 1. Get probability of each observation + log_likelihoods = marginal(pc, data) + p_observed = exp.( log_likelihoods ) + + # 2. Moment w.r.t. P(x_m, x_o) + cache = MomentCache() + biases = lc.thetas + results_unnormalized = zeros(num_examples(data), num_classes(lc)) + + for z = 0:moment-1 + results_unnormalized .+= choose(moment, z) .* (biases .^ (z)) .* transpose(moment_g(pc, children(lc)[1], data, moment - z, cache)) + end + + # 3. Moment w.r.t P(x_m | x_o) + results = results_unnormalized ./ p_observed + + # 4. Add Bias^moment terms + results .+= biases .^ (moment) + + results, cache +end + + + +# exp_f (pr-constraint) is originally from: +# Arthur Choi, Guy Van den Broeck, and Adnan Darwiche. Tractable learning for structured probability spaces: A case study in learning preference distributions. In Proceedings of IJCAI, 2015. + +function exp_f(n::Union{PlainSumNode, StructSumNode}, m::Logistic⋁Node, data, cache::Union{ExpectationCache, MomentCache}) + @inbounds get!(cache.f, Pair(n, m)) do + value = zeros(1 , num_examples(data) ) + pthetas = [exp(n.log_probs[i]) for i in 1:num_children(n)] + @fastmath @simd for i in 1:num_children(n) + @simd for j in 1:num_children(m) + value .+= (pthetas[i] .* exp_f(children(n)[i], children(m)[j], data, cache)) + end + end + return value + end +end + +function exp_f(n::Union{PlainMulNode, StructMulNode}, m::Logistic⋀Node, data, cache::Union{ExpectationCache, MomentCache}) + @inbounds get!(cache.f, Pair(n, m)) do + value = ones(1 , num_examples(data) ) + @fastmath for (i,j) in zip(children(n), children(m)) + value .*= exp_f(i, j, data, cache) + end + return value + # exp_f(children(n)[1], children(m)[1], data, cache) .* exp_f(children(n)[2], children(m)[2], data, cache) + end +end + + +@inline function exp_f(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::LogisticLiteral, data, cache::Union{ExpectationCache, MomentCache}) + @inbounds get!(cache.f, Pair(n, m)) do + value = zeros(1 , num_examples(data) ) + var = lit2var(literal(m)) + X = data + if ispositive(n) && ispositive(m) + # value[1, X[:, var] .== -1 ] .= 1.0 # missing observation always agrees + # value[1, X[:, var] .== 1 ] .= 1.0 # positive observations + value[1, .!isequal.(X[:, var], 0)] .= 1.0 # positive or missing observations + elseif isnegative(n) && isnegative(m) + # value[1, X[:, var] .== -1 ] .= 1.0 # missing observation always agrees + # value[1, X[:, var] .== 0 ] .= 1.0 # negative observations + value[1, .!isequal.(X[:, var], 1)] .= 1.0 # negative or missing observations + end + return value + end +end + +""" +Has to be a Logistic⋁Node with only one child, which is a leaf node +""" +@inline function exp_f(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::Logistic⋁Node, data, cache::Union{ExpectationCache, MomentCache}) + @inbounds get!(cache.f, Pair(n, m)) do + exp_f(n, children(m)[1], data, cache) + end +end + +####################################################################### +######## exp_g, exp_fg +######################################################################## + +@inline function exp_g(n::Union{PlainSumNode, StructSumNode}, m::Logistic⋁Node, data, cache::ExpectationCache) + exp_fg(n, m, data, cache) # exp_fg and exp_g are the same for OR nodes +end + +# function exp_g(n::Prob⋀, m::Logistic⋀Node, data, cache::ExpectationCache) +# value = zeros(classes(m) , num_examples(data)) +# @fastmath for (i,j) in zip(children(n), children(m)) +# value .+= exp_fg(i, j, data, cache) +# end +# return value +# # exp_fg(children(n)[1], children(m)[1], data, cache) .+ exp_fg(children(n)[2], children(m)[2], data, cache) +# end + + +function exp_fg(n::Union{PlainSumNode, StructSumNode}, m::Logistic⋁Node, data, cache::ExpectationCache) + @inbounds get!(cache.fg, Pair(n, m)) do + value = zeros(num_classes(m) , num_examples(data) ) + pthetas = [exp(n.log_probs[i]) for i in 1:num_children(n)] + @fastmath @simd for i in 1:num_children(n) + for j in 1:num_children(m) + value .+= (pthetas[i] .* m.thetas[j,:]) .* exp_f(children(n)[i], children(m)[j], data, cache) + value .+= pthetas[i] .* exp_fg(children(n)[i], children(m)[j], data, cache) + end + end + return value + end +end + +function exp_fg(n::Union{PlainMulNode, StructMulNode}, m::Logistic⋀Node, data, cache::ExpectationCache) + @inbounds get!(cache.fg, Pair(n, m)) do + # Assuming 2 children + value = exp_f(children(n)[1], children(m)[1], data, cache) .* exp_fg(children(n)[2], children(m)[2], data, cache) + value .+= exp_f(children(n)[2], children(m)[2], data, cache) .* exp_fg(children(n)[1], children(m)[1], data, cache) + return value + end +end + + +""" +Has to be a Logistic⋁Node with only one child, which is a leaf node +""" +@inline function exp_fg(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::Logistic⋁Node, data, cache::ExpectationCache) + @inbounds get!(cache.fg, Pair(n, m)) do + m.thetas[1,:] .* exp_f(n, m, data, cache) + end +end + +@inline function exp_fg(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::LogisticLiteral, data, cache::ExpectationCache) + #dont know how many classes, boradcasting does the job + zeros(1 , num_examples(data)) +end + +####################################################################### +######## moment_g, moment_fg +######################################################################## + +@inline function moment_g(n::Union{PlainSumNode, StructSumNode}, m::Logistic⋁Node, data, moment::Int, cache::MomentCache) + get!(cache.fg, (n, m, moment)) do + moment_fg(n, m, data, moment, cache) + end +end + +""" +Calculating E[g^k * f] +""" +function moment_fg(n::Union{PlainSumNode, StructSumNode}, m::Logistic⋁Node, data, moment::Int, cache::MomentCache) + if moment == 0 + return exp_f(n, m, data, cache) + end + + get!(cache.fg, (n, m, moment)) do + value = zeros(num_classes(m) , num_examples(data) ) + pthetas = [exp(n.log_probs[i]) for i in 1:num_children(n)] + @fastmath @simd for i in 1:num_children(n) + for j in 1:num_children(m) + for z in 0:moment + value .+= pthetas[i] .* choose(moment, z) .* m.thetas[j,:].^(moment - z) .* moment_fg(children(n)[i], children(m)[j], data, z, cache) + end + end + end + return value + end +end + +@inline function moment_fg(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::Logistic⋁Node, data, moment::Int, cache::MomentCache) + get!(cache.fg, (n, m, moment)) do + m.thetas[1,:].^(moment) .* exp_f(n, m, data, cache) + end +end + +@inline function moment_fg(n::Union{PlainProbLiteralNode, StructProbLiteralNode}, m::LogisticLiteral, data, moment::Int, cache::MomentCache) + #dont know how many classes, boradcasting does the job + if moment == 0 + exp_f(n, m, data, cache) + else + zeros(1, num_examples(data)) + end +end + +function moment_fg(n::Union{PlainMulNode, StructMulNode}, m::Logistic⋀Node, data, moment::Int, cache::MomentCache) + if moment == 0 + return exp_f(n, m, data, cache) + end + get!(cache.fg, (n, m, moment)) do + value = moment_fg(children(n)[1], children(m)[1], data, 0, cache) .* moment_fg(children(n)[2], children(m)[2], data, moment, cache) + + for z in 1:moment + value .+= choose(moment, z) .* moment_fg(children(n)[1], children(m)[1], data, z, cache) .* moment_fg(children(n)[2], children(m)[2], data, moment - z, cache) + end + return value + end +end diff --git a/src/queries/information.jl b/src/queries/information.jl new file mode 100644 index 00000000..127ea0a5 --- /dev/null +++ b/src/queries/information.jl @@ -0,0 +1,164 @@ +export kl_divergence + +const KLDCache = Dict{Tuple{ProbCircuit,ProbCircuit}, Float64} + +"""" +Calculate entropy of the distribution of the input pc." +""" + +import ..Utils: entropy +function entropy(pc_node::StructSumNode, pc_entropy_cache::Dict{ProbCircuit, Float64}=Dict{ProbCircuit, Float64}())::Float64 + if pc_node in keys(pc_entropy_cache) + return pc_entropy_cache[pc_node] + elseif children(pc_node)[1] isa StructProbLiteralNode + return get!(pc_entropy_cache, pc_node, + - exp(pc_node.log_probs[1]) * pc_node.log_probs[1] - + exp(pc_node.log_probs[2]) * pc_node.log_probs[2]) + else + local_entropy = 0.0 + for (prob⋀_node, log_prob) in zip(children(pc_node), pc_node.log_probs) + p = children(prob⋀_node)[1] + s = children(prob⋀_node)[2] + + local_entropy += exp(log_prob) * (entropy(p, pc_entropy_cache) + + entropy(s, pc_entropy_cache) - log_prob) + end + return get!(pc_entropy_cache, pc_node, local_entropy) + end +end + +function entropy(pc_node::StructMulNode, pc_entropy_cache::Dict{ProbCircuit, Float64})::Float64 + return get!(pc_entropy_cache, children(pc_node)[1], entropy(children(pc_node)[1], pc_entropy_cache)) + + get!(pc_entropy_cache, children(pc_node)[2], entropy(children(pc_node)[2], pc_entropy_cache)) +end + +function entropy(pc_node::StructProbLiteralNode, pc_entropy_cache::Dict{ProbCircuit, Float64})::Float64 + return get!(pc_entropy_cache, pc_node, 0.0) +end + +"Calculate KL divergence calculation for pcs that are not necessarily identical" +function kl_divergence(pc_node1::StructSumNode, pc_node2::StructSumNode, + kl_divergence_cache::KLDCache=KLDCache(), pr_constraint_cache::PRCache=PRCache()) + @assert !(pc_node1 isa StructMulNode || pc_node2 isa StructMulNode) "Prob⋀ not a valid pc node for KL-Divergence" + + # Check if both nodes are normalized for same vtree node + @assert variables(pc_node1) == variables(pc_node2) "Both nodes not normalized for same vtree node" + + if (pc_node1, pc_node2) in keys(kl_divergence_cache) # Cache hit + return kl_divergence_cache[(pc_node1, pc_node2)] + elseif children(pc_node1)[1] isa StructProbLiteralNode + if pc_node2 isa StructProbLiteralNode + kl_divergence(children(pc_node1)[1], pc_node2, kl_divergence_cache, pr_constraint_cache) + kl_divergence(children(pc_node1)[2], pc_node2, kl_divergence_cache, pr_constraint_cache) + if literal(children(pc_node1)[1]) == literal(pc_node2) + return get!(kl_divergence_cache, (pc_node1, pc_node2), + pc_node1.log_probs[1] * exp(pc_node1.log_probs[1]) + ) + else + return get!(kl_divergence_cache, (pc_node1, pc_node2), + pc_node1.log_probs[2] * exp(pc_node1.log_probs[2]) + ) + end + else + # The below four lines actually assign zero, but still we need to + # call it. + kl_divergence(children(pc_node1)[1], children(pc_node2)[1], kl_divergence_cache, pr_constraint_cache) + kl_divergence(children(pc_node1)[1], children(pc_node2)[2], kl_divergence_cache, pr_constraint_cache) + kl_divergence(children(pc_node1)[2], children(pc_node2)[1], kl_divergence_cache, pr_constraint_cache) + kl_divergence(children(pc_node1)[2], children(pc_node2)[2], kl_divergence_cache, pr_constraint_cache) + # There are two possible matches + if literal(children(pc_node1)[1]) == literal(children(pc_node2)[1]) + return get!(kl_divergence_cache, (pc_node1, pc_node2), + exp(pc_node1.log_probs[1]) * (pc_node1.log_probs[1] - pc_node2.log_probs[1]) + + exp(pc_node1.log_probs[2]) * (pc_node1.log_probs[2] - pc_node2.log_probs[2]) + ) + else + return get!(kl_divergence_cache, (pc_node1, pc_node2), + exp(pc_node1.log_probs[1]) * (pc_node1.log_probs[1] - pc_node2.log_probs[2]) + + exp(pc_node1.log_probs[2]) * (pc_node1.log_probs[2] - pc_node2.log_probs[1]) + ) + end + end + else # the normal case + kld = 0.0 + + # loop through every combination of prim and sub + for (prob⋀_node1, log_theta1) in zip(children(pc_node1), pc_node1.log_probs) + for (prob⋀_node2, log_theta2) in zip(children(pc_node2), pc_node2.log_probs) + p = children(prob⋀_node1)[1] + s = children(prob⋀_node1)[2] + + r = children(prob⋀_node2)[1] + t = children(prob⋀_node2)[2] + + theta1 = exp(log_theta1) + + p11 = pr_constraint(s, t, pr_constraint_cache) + p12 = pr_constraint(p, r, pr_constraint_cache) + + p13 = theta1 * (log_theta1 - log_theta2) + + p21 = kl_divergence(p, r, kl_divergence_cache, pr_constraint_cache) + p31 = kl_divergence(s, t, kl_divergence_cache, pr_constraint_cache) + + kld += p11 * p12 * p13 + theta1 * (p11 * p21 + p12 * p31) + end + end + return get!(kl_divergence_cache, (pc_node1, pc_node2), kld) + end +end + +function kl_divergence(pc_node1::StructProbLiteralNode, pc_node2::StructProbLiteralNode, + kl_divergence_cache::KLDCache, pr_constraint_cache::PRCache) + # Check if literals are over same variables in vtree + @assert variables(pc_node1) == variables(pc_node2) "Both nodes not normalized for same vtree node" + + if (pc_node1, pc_node2) in keys(kl_divergence_cache) # Cache hit + return kl_divergence_cache[pc_node1, pc_node2] + else + # In this case probability is 1, kl divergence is 0 + return get!(kl_divergence_cache, (pc_node1, pc_node2), 0.0) + end +end + +function kl_divergence(pc_node1::StructSumNode, pc_node2::StructProbLiteralNode, + kl_divergence_cache::KLDCache, pr_constraint_cache::PRCache) + @assert variables(pc_node1) == variables(pc_node2) "Both nodes not normalized for same vtree node" + + if (pc_node1, pc_node2) in keys(kl_divergence_cache) # Cache hit + return kl_divergence_cache[pc_node1, pc_node2] + else + kl_divergence(children(pc_node1)[1], pc_node2, kl_divergence_cache, pr_constraint_cache) + kl_divergence(children(pc_node1)[2], pc_node2, kl_divergence_cache, pr_constraint_cache) + if literal(children(pc_node1)[1]) == literal(pc_node2) + return get!(kl_divergence_cache, (pc_node1, pc_node2), + pc_node1.log_probs[1] * exp(pc_node1.log_probs[1]) + ) + else + return get!(kl_divergence_cache, (pc_node1, pc_node2), + pc_node1.log_probs[2] * exp(pc_node1.log_probs[2]) + ) + end + end +end + +function kl_divergence(pc_node1::StructProbLiteralNode, pc_node2::StructSumNode, + kl_divergence_cache::KLDCache, pr_constraint_cache::PRCache) + @assert variables(pc_node1) == variables(pc_node2) "Both nodes not normalized for same vtree node" + + if (pc_node1, pc_node2) in keys(kl_divergence_cache) # Cache hit + return kl_divergence_cache[pc_node1, pc_node2] + else + kl_divergence(pc_node1, children(pc_node2)[1], kl_divergence_cache, pr_constraint_cache) + kl_divergence(pc_node1, children(pc_node2)[2], kl_divergence_cache, pr_constraint_cache) + if literal(pc_node1) == literal(children(pc_node2)[1]) + return get!(kl_divergence_cache, (pc_node1, pc_node2), + -pc_node2.log_probs[1] + ) + else + return get!(kl_divergence_cache, (pc_node1, pc_node2), + -pc_node2.log_probs[2] + ) + end + end +end diff --git a/src/queries/likelihood.jl b/src/queries/likelihood.jl new file mode 100644 index 00000000..aeb8a174 --- /dev/null +++ b/src/queries/likelihood.jl @@ -0,0 +1,82 @@ +export EVI, log_likelihood_per_instance, log_likelihood, log_likelihood_avg + +""" +Compute the likelihood of the PC given each individual instance in the data +""" +function log_likelihood_per_instance(pc::ProbCircuit, data) + @assert isbinarydata(data) "Probabilistic circuit likelihoods are for binary data only" + bc = ParamBitCircuit(pc, data) + if isgpu(data) + log_likelihood_per_instance_gpu(to_gpu(bc), data) + else + log_likelihood_per_instance_cpu(bc, data) + end +end + +function log_likelihood_per_instance_cpu(bc, data) + ll::Vector{Float64} = zeros(Float64, num_examples(data)) + ll_lock::Threads.ReentrantLock = Threads.ReentrantLock() + + @inline function on_edge(flows, values, prime, sub, element, grandpa, single_child) + if !single_child + lock(ll_lock) do # TODO: move lock to inner loop? change to atomic float? + for i = 1:size(flows,1) + @inbounds edge_flow = values[i, prime] & values[i, sub] & flows[i, grandpa] + first_true_bit = trailing_zeros(edge_flow)+1 + last_true_bit = 64-leading_zeros(edge_flow) + @simd for j = first_true_bit:last_true_bit + ex_id = ((i-1) << 6) + j + if get_bit(edge_flow, j) + @inbounds ll[ex_id] += bc.params[element] + end + end + end + end + end + nothing + end + + satisfies_flows(bc.bitcircuit, data; on_edge) + return ll +end + +function log_likelihood_per_instance_gpu(bc, data) + params_device = CUDA.cudaconvert(bc.params) + ll::CuVector{Float64} = CUDA.zeros(Float64, num_examples(data)) + ll_device = CUDA.cudaconvert(ll) + + @inline function on_edge(flows, values, prime, sub, element, grandpa, chunk_id, edge_flow, single_child) + if !single_child + first_true_bit = 1+trailing_zeros(edge_flow) + last_true_bit = 64-leading_zeros(edge_flow) + for j = first_true_bit:last_true_bit + ex_id = ((chunk_id-1) << 6) + j + if get_bit(edge_flow, j) + CUDA.@atomic ll_device[ex_id] += params_device[element] + end + end + end + nothing + end + + v, f = satisfies_flows(bc.bitcircuit, data; on_edge) + CUDA.unsafe_free!(v) # save the GC some effort + CUDA.unsafe_free!(f) # save the GC some effort + + return ll +end + +""" +Complete evidence queries +""" +const EVI = log_likelihood_per_instance + +""" +Compute the likelihood of the PC given the data +""" +log_likelihood(pc, data) = sum(log_likelihood_per_instance(pc, data)) + +""" +Compute the likelihood of the PC given the data, averaged over all instances in the data +""" +log_likelihood_avg(pc, data) = log_likelihood(pc, data)/num_examples(data) \ No newline at end of file diff --git a/src/queries/map.jl b/src/queries/map.jl new file mode 100644 index 00000000..cda5d052 --- /dev/null +++ b/src/queries/map.jl @@ -0,0 +1,122 @@ +export max_a_posteriori, MAP + +import DataFrames: DataFrame, mapcols! + +##################### +# Circuit MAP/MPE evaluation +##################### + +"Evaluate maximum a-posteriori state of the circuit for a given input" +max_a_posteriori(root::ProbCircuit, data::Union{Bool,Missing}...) = + max_a_posteriori(root, collect(Union{Bool,Missing}, data)) + +max_a_posteriori(root::ProbCircuit, data::Union{Vector{Union{Bool,Missing}},CuVector{UInt8}}) = + example(max_a_posteriori(root, DataFrame(reshape(data, 1, :))), 1) + +max_a_posteriori(circuit::ProbCircuit, data::DataFrame) = + max_a_posteriori(same_device(ParamBitCircuit(circuit, data), data), data) + +function max_a_posteriori(pbc::ParamBitCircuit, data; Float=Float32) + @assert isgpu(data) == isgpu(pbc) "ParamBitCircuit and data need to be on the same device" + values = marginal_all(pbc, data) + return map_down(pbc, data, values; Float) +end + +""" +Maximum a-posteriori queries +""" +const MAP = max_a_posteriori + +""" +Mode of the distribution +""" +const mode = max_a_posteriori + +"Find the MAP child value and node id of a given decision node" +function map_child(params, nodes, elements, ex_id, dec_id, values) + @inbounds els_start = nodes[1,dec_id] + @inbounds els_end = nodes[2,dec_id] + pr_opt = typemin(eltype(values)) + j_opt = 1 + for j = els_start:els_end + @inbounds prime = elements[2,j] + @inbounds sub = elements[3,j] + @inbounds pr = values[ex_id, prime] + values[ex_id, sub] + params[j] + if pr > pr_opt + pr_opt = pr + j_opt = j + end + end + @inbounds return params[j_opt], elements[2,j_opt], elements[3,j_opt] +end + +# CPU code + +function map_down(pbc, data, values::Array; Float=Float32) + state = zeros(Bool, num_examples(data), num_features(data)) + logprob = zeros(Float, num_examples(data)) + Threads.@threads for ex_id = 1:size(state,1) + map_rec(num_leafs(pbc), params(pbc), nodes(pbc), elements(pbc), ex_id, num_nodes(pbc), values, state, logprob) + end + df = DataFrame(state) + mapcols!(c -> BitVector(c), df) + return df, logprob +end + +function map_rec(nl, params, nodes, elements, ex_id, dec_id, values, state, logprob) + if isleafgate(nl, dec_id) + if isliteralgate(nl, dec_id) + l = literal(nl, dec_id) + @inbounds state[ex_id, lit2var(l)] = (l > 0) + end + else + edge_log_pr, prime, sub = map_child(params, nodes, elements, ex_id, dec_id, values) + @inbounds logprob[ex_id] += edge_log_pr + map_rec(nl, params, nodes, elements, ex_id, prime, values, state, logprob) + map_rec(nl, params, nodes, elements, ex_id, sub, values, state, logprob) + end +end + +# GPU code + +function map_down(pbc, data, values::CuArray; Float=Float32) + state = CUDA.zeros(Bool, num_examples(data), num_features(data)) + logprob = CUDA.zeros(Float, num_examples(data)) + stack = CUDA.zeros(Int32, num_examples(data), num_features(data)+3) + @inbounds stack[:,1] .= 1 # start with 1 dec_id in the stack + @inbounds stack[:,2] .= num_nodes(pbc) # start with the root in the stack + num_threads = 256 + num_blocks = ceil(Int, size(state,1)/num_threads) + CUDA.@sync begin + @cuda threads=num_threads blocks=num_blocks map_cuda_kernel(num_leafs(pbc), params(pbc), nodes(pbc), elements(pbc), values, state, logprob, stack) + end + CUDA.unsafe_free!(values) # save the GC some effort + # do the conversion to a CuBitVector on the CPU... + df = DataFrame(to_cpu(state)) + mapcols!(c -> to_gpu(BitVector(c)), df) + return df, logprob +end + +function map_cuda_kernel(nl, params, nodes, elements, values, state, logprob, stack) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + stride_x = blockDim().x * gridDim().x + for ex_id = index_x:stride_x:size(state,1) + dec_id = pop_cuda!(stack, ex_id) + while dec_id > zero(eltype(stack)) + if isleafgate(nl, dec_id) + if isliteralgate(nl, dec_id) + l = literal(nl, dec_id) + var = lit2var(l) + @inbounds state[ex_id, var] = (l > 0) + end + else + edge_log_pr, prime, sub = map_child(params, nodes, elements, ex_id, dec_id, values) + @inbounds logprob[ex_id] += edge_log_pr + push_cuda!(stack, prime, ex_id) + push_cuda!(stack, sub, ex_id) + end + dec_id = pop_cuda!(stack, ex_id) + end + end + return nothing +end \ No newline at end of file diff --git a/src/queries/marginal_flow.jl b/src/queries/marginal_flow.jl new file mode 100644 index 00000000..1a6b55df --- /dev/null +++ b/src/queries/marginal_flow.jl @@ -0,0 +1,344 @@ +using StatsFuns: logsumexp, log1pexp + +using CUDA: CUDA, @cuda +using DataFrames: DataFrame +using LoopVectorization: @avx +using LogicCircuits: balance_threads + +export marginal, MAR, marginal_all, marginal_log_likelihood, +marginal_log_likelihood_avg, marginal_flows, marginal_flows_down + +##################### +# Circuit marginal evaluation +##################### + +# evaluate a probabilistic circuit as a function +function (root::ProbCircuit)(data...) + marginal(root, data...) +end + +"Evaluate marginals of the circuit bottom-up for a given input" +marginal(root::ProbCircuit, data::Union{Real,Missing}...) = + marginal(root, collect(Union{Bool,Missing}, data)) + +marginal(root::ProbCircuit, data::Union{Vector{Union{Bool,Missing}},CuVector{UInt8}}) = + marginal(root, DataFrame(reshape(data, 1, :)))[1] + +marginal(circuit::ProbCircuit, data::DataFrame) = + marginal(same_device(ParamBitCircuit(circuit, data), data) , data) + +function marginal(circuit::ParamBitCircuit, data::DataFrame)::AbstractVector + marginal_all(circuit,data)[:,end] +end + +""" +Marginal queries +""" +const MAR = marginal + +""" +Compute the marginal likelihood of the PC given the data +""" +marginal_log_likelihood(pc, data) = sum(marginal(pc, data)) + +""" +Compute the marginal likelihood of the PC given the data, averaged over all instances in the data +""" +marginal_log_likelihood_avg(pc, data) = marginal_log_likelihood(pc, data)/num_examples(data) + +##################### +# Circuit evaluation of *all* nodes in circuit +##################### + +"Evaluate the probabilistic circuit bottom-up for a given input and return the marginal probability value of all nodes" +marginal_all(circuit::ProbCircuit, data::DataFrame) = + marginal_all(same_device(ParamBitCircuit(circuit, data), data) , data) + +function marginal_all(circuit::ParamBitCircuit, data, reuse=nothing) + @assert num_features(data) == num_features(circuit) + @assert isbinarydata(data) + values = init_marginal(data, reuse, num_nodes(circuit)) + marginal_layers(circuit, values) + return values +end + +"Initialize values from the data (data frames)" +function init_marginal(data, reuse, num_nodes; Float=Float32) + flowtype = isgpu(data) ? CuMatrix{Float} : Matrix{Float} + values = similar!(reuse, flowtype, num_examples(data), num_nodes) + @views values[:,LogicCircuits.TRUE_BITS] .= log(one(Float)) + @views values[:,LogicCircuits.FALSE_BITS] .= log(zero(Float)) + # here we should use a custom CUDA kernel to extract Float marginals from bit vectors + # for now the lazy solution is to move everything to the CPU and do the work there... + data_cpu = to_cpu(data) + for i=1:num_features(data) + marg_pos::Vector{Float} = log.(coalesce.(data_cpu[:,i], one(Float))) + marg_neg::Vector{Float} = log.(coalesce.(1.0 .- data_cpu[:,i], one(Float))) + values[:,2+i] .= same_device(marg_pos, values) + values[:,2+num_features(data)+i] .= same_device(marg_neg, values) + end + return values +end + +# upward pass helpers on CPU + +"Compute marginals on the CPU (SIMD & multi-threaded)" +function marginal_layers(circuit::ParamBitCircuit, values::Matrix) + bc = circuit.bitcircuit + els = bc.elements + pars = circuit.params + for layer in bc.layers[2:end] + Threads.@threads for dec_id in layer + j = @inbounds bc.nodes[1,dec_id] + els_end = @inbounds bc.nodes[2,dec_id] + if j == els_end + assign_marginal(values, dec_id, els[2,j], els[3,j], pars[j]) + j += 1 + else + assign_marginal(values, dec_id, els[2,j], els[3,j], els[2,j+1], els[3,j+1], pars[j], pars[j+1]) + j += 2 + end + while j <= els_end + accum_marginal(values, dec_id, els[2,j], els[3,j], pars[j]) + j += 1 + end + end + end +end + +assign_marginal(v::Matrix{<:AbstractFloat}, i, e1p, e1s, p1) = + @views @. @avx v[:,i] = v[:,e1p] + v[:,e1s] + p1 + +accum_marginal(v::Matrix{<:AbstractFloat}, i, e1p, e1s, p1) = begin + @avx for j=1:size(v,1) + @inbounds x = v[j,i] + @inbounds y = v[j,e1p] + v[j,e1s] + p1 + Δ = ifelse(x == y, zero(eltype(v)), abs(x - y)) + @inbounds v[j,i] = max(x, y) + log1p(exp(-Δ)) + end +end + +assign_marginal(v::Matrix{<:AbstractFloat}, i, e1p, e1s, e2p, e2s, p1, p2) = begin + @avx for j=1:size(v,1) + @inbounds x = v[j,e1p] + v[j,e1s] + p1 + @inbounds y = v[j,e2p] + v[j,e2s] + p2 + Δ = ifelse(x == y, zero(eltype(v)), abs(x - y)) + @inbounds v[j,i] = max(x, y) + log1p(exp(-Δ)) + end +end + +# upward pass helpers on GPU + +"Compute marginals on the GPU" +function marginal_layers(circuit::ParamBitCircuit, values::CuMatrix; dec_per_thread = 8, log2_threads_per_block = 8) + bc = circuit.bitcircuit + CUDA.@sync for layer in bc.layers[2:end] + num_examples = size(values, 1) + num_decision_sets = length(layer)/dec_per_thread + num_threads = balance_threads(num_examples, num_decision_sets, log2_threads_per_block) + num_blocks = (ceil(Int, num_examples/num_threads[1]), + ceil(Int, num_decision_sets/num_threads[2])) + @cuda threads=num_threads blocks=num_blocks marginal_layers_cuda(layer, bc.nodes, bc.elements, circuit.params, values) + end +end + +"CUDA kernel for circuit evaluation" +function marginal_layers_cuda(layer, nodes, elements, params, values) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + for j = index_x:stride_x:size(values,1) + for i = index_y:stride_y:length(layer) + decision_id = @inbounds layer[i] + k = @inbounds nodes[1,decision_id] + els_end = @inbounds nodes[2,decision_id] + @inbounds x = values[j, elements[2,k]] + values[j, elements[3,k]] + params[k] + while k < els_end + k += 1 + @inbounds y = values[j, elements[2,k]] + values[j, elements[3,k]] + params[k] + Δ = ifelse(x == y, zero(eltype(values)), CUDA.abs(x - y)) + x = max(x, y) + CUDA.log1p(CUDA.exp(-Δ)) + end + values[j, decision_id] = x + end + end + return nothing +end + + +##################### +# Bit circuit marginals and flows (up and downward pass) +##################### + +"Compute the marginal and flow of each node" +function marginal_flows(circuit::ProbCircuit, data, + reuse_values=nothing, reuse_flows=nothing; on_node=noop, on_edge=noop) + bc = same_device(ParamBitCircuit(circuit, data), data) + marginal_flows(bc, data, reuse_values, reuse_flows; on_node, on_edge) +end + +function marginal_flows(circuit::ParamBitCircuit, data, + reuse_values=nothing, reuse_flows=nothing; on_node=noop, on_edge=noop) + @assert isgpu(data) == isgpu(circuit) "ParamBitCircuit and data need to be on the same device" + values = marginal_all(circuit, data, reuse_values) + flows = marginal_flows_down(circuit, values, reuse_flows; on_node, on_edge) + return values, flows +end + +##################### +# Bit circuit marginal flows downward pass +##################### + +"When marginals of nodes have already been computed, do a downward pass computing the marginal flows at each node" +function marginal_flows_down(circuit::ParamBitCircuit, values, reuse=nothing; on_node=noop, on_edge=noop) + flows = similar!(reuse, typeof(values), size(values)...) + marginal_flows_down_layers(circuit, flows, values, on_node, on_edge) + return flows +end + +# downward pass helpers on CPU + +"Evaluate marginals of the layers of a bit circuit on the CPU (SIMD & multi-threaded)" +function marginal_flows_down_layers(pbc::ParamBitCircuit, flows::Matrix, values::Matrix, on_node, on_edge) + @assert flows !== values + circuit = pbc.bitcircuit + els = circuit.elements + for layer in Iterators.reverse(circuit.layers) + Threads.@threads for dec_id in layer + par_start = @inbounds circuit.nodes[3,dec_id] + if iszero(par_start) + if dec_id == num_nodes(circuit) + # marginal flow start from 0.0 + @inbounds @views flows[:, dec_id] .= zero(eltype(flows)) + end + # no parents, ignore (can happen for false/true node and root) + else + par_end = @inbounds circuit.nodes[4,dec_id] + for j = par_start:par_end + par = @inbounds circuit.parents[j] + grandpa = @inbounds els[1,par] + sib_id = sibling(els, par, dec_id) + single_child = has_single_child(circuit.nodes, grandpa) + if single_child + if j == par_start + @inbounds @views @. flows[:, dec_id] = flows[:, grandpa] + else + accum_marg_flow(flows, dec_id, grandpa) + end + else + θ = eltype(flows)(pbc.params[par]) + if j == par_start + assign_marg_flow(flows, values, dec_id, grandpa, sib_id, θ) + else + accum_marg_flow(flows, values, dec_id, grandpa, sib_id, θ) + end + end + # report edge flow only once: + sib_id > dec_id && on_edge(flows, values, dec_id, sib_id, par, grandpa, single_child) + end + end + on_node(flows, values, dec_id) + end + end +end + +function assign_marg_flow(f::Matrix{<:AbstractFloat}, v, d, g, s, θ) + @inbounds @simd for j in 1:size(f,1) #@avx gives incorrect results + edge_flow = v[j, s] + v[j, d] - v[j, g] + f[j, g] + θ + edge_flow = ifelse(isnan(edge_flow), typemin(eltype(f)), edge_flow) + f[j, d] = edge_flow + end + # @assert !any(isnan, f[:,d]) +end + +function accum_marg_flow(f::Matrix{<:AbstractFloat}, d, g) + @avx for j=1:size(f,1) #@avx gives incorrect results + x = f[j, d] + y = f[j, g] + Δ = ifelse(x == y, zero(eltype(f)), abs(x - y)) + f[j, d] = max(x, y) + log1p(exp(-Δ)) + end + # @assert !any(isnan, f[:,d]) +end + +function accum_marg_flow(f::Matrix{<:AbstractFloat}, v, d, g, s, θ) + @inbounds @simd for j=1:size(f,1) #@avx gives incorrect results + x = f[j, d] + y = v[j, s] + v[j, d] - v[j, g] + f[j, g] + θ + y = ifelse(isnan(y), typemin(eltype(f)), y) + Δ = ifelse(x == y, zero(eltype(f)), abs(x - y)) + f[j, d] = max(x, y) + log1p(exp(-Δ)) + end + # @assert !any(isnan, f[:,d]) +end + +# downward pass helpers on GPU + +"Pass marginal flows down the layers of a bit circuit on the GPU" +function marginal_flows_down_layers(pbc::ParamBitCircuit, flows::CuMatrix, values::CuMatrix, + on_node, on_edge; + dec_per_thread = 8, log2_threads_per_block = 7) + bc = pbc.bitcircuit + CUDA.@sync for layer in Iterators.reverse(bc.layers) + num_examples = size(values, 1) + num_decision_sets = length(layer)/dec_per_thread + num_threads = balance_threads(num_examples, num_decision_sets, log2_threads_per_block) + num_blocks = (ceil(Int, num_examples/num_threads[1]), + ceil(Int, num_decision_sets/num_threads[2])) + @cuda threads=num_threads blocks=num_blocks marginal_flows_down_layers_cuda(layer, bc.nodes, bc.elements, bc.parents, pbc.params, flows, values, on_node, on_edge) + end +end + +"CUDA kernel for passing marginal flows down circuit" +function marginal_flows_down_layers_cuda(layer, nodes, elements, parents, params, flows, values, on_node, on_edge) + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + for k = index_x:stride_x:size(values,1) + for i = index_y:stride_y:length(layer) + dec_id = @inbounds layer[i] + if dec_id == size(nodes,2) + # populate root flows + flow = zero(eltype(flows)) + else + par_start = @inbounds nodes[3,dec_id] + flow = typemin(eltype(flows)) # log(0) + if !iszero(par_start) + par_end = @inbounds nodes[4,dec_id] + for j = par_start:par_end + par = @inbounds parents[j] + grandpa = @inbounds elements[1,par] + v_gp = @inbounds values[k, grandpa] + prime = elements[2,par] + sub = elements[3,par] + θ = eltype(flows)(params[par]) + if !iszero(v_gp) # edge flow only gets reported when non-zero + f_gp = @inbounds flows[k, grandpa] + single_child = has_single_child(nodes, grandpa) + if single_child + edge_flow = f_gp + else + v_prime = @inbounds values[k, prime] + v_sub = @inbounds values[k, sub] + edge_flow = compute_marg_edge_flow(v_prime, v_sub, v_gp, f_gp, θ) + end + flow = logsumexp_cuda(flow, edge_flow) + # report edge flow only once: + dec_id == prime && on_edge(flows, values, prime, sub, par, grandpa, k, edge_flow, single_child) + end + end + end + end + @inbounds flows[k, dec_id] = flow + on_node(flows, values, dec_id, k, flow) + end + end + return nothing +end + +@inline function compute_marg_edge_flow(p_up, s_up, n_up, n_down, θ) + x = p_up + s_up - n_up + n_down + θ + ifelse(isnan(x), typemin(n_down), x) +end diff --git a/src/queries/pr_constraint.jl b/src/queries/pr_constraint.jl new file mode 100644 index 00000000..4594a5c5 --- /dev/null +++ b/src/queries/pr_constraint.jl @@ -0,0 +1,63 @@ +export pr_constraint + +const PRCache = Dict{Tuple{ProbCircuit, LogicCircuit}, Float64} + +# Arthur Choi, Guy Van den Broeck, and Adnan Darwiche. Tractable learning for structured probability +# spaces: A case study in learning preference distributions. In Proceedings of IJCAI, 2015. + +""" +Calculate the probability of the logic formula given by LC for the PC +""" +function pr_constraint(pc_node::StructProbCircuit, lc_node, cache::PRCache=PRCache())::Float64 + + # TODO require that both circuits have an equal vtree for safety. If they don't, then first convert them to have a vtree + @assert respects_vtree(lc_node, vtree(pc_node)) "Both circuits do not have an equal vtree" + + # Cache hit + if (pc_node, lc_node) in keys(cache) + return cache[pc_node, lc_node] + + # Boundary cases + elseif isliteralgate(pc_node) + # Both are literals, just check whether they agrees with each other + if isliteralgate(lc_node) + if literal(pc_node) == literal(lc_node) + return get!(cache, (pc_node, lc_node), 1.0) + else + return get!(cache, (pc_node, lc_node), 0.0) + end + else + pr_constraint(pc_node, children(lc_node)[1], cache) + if length(children(lc_node)) > 1 + pr_constraint(pc_node, children(lc_node)[2], cache) + return get!(cache, (pc_node, lc_node), 1.0) + else + return get!(cache, (pc_node, lc_node), + literal(children(lc_node)[1]) == literal(pc_node) ? 1.0 : 0.0) + end + end + + # The pc is true + elseif isliteralgate(children(pc_node)[1]) + theta = exp(pc_node.log_probs[1]) + return get!(cache, (pc_node, lc_node), + theta * pr_constraint(children(pc_node)[1], lc_node, cache) + + (1.0 - theta) * pr_constraint(children(pc_node)[2], lc_node, cache)) + + # Both pcs are not trivial + else + prob = 0.0 + for (prob⋀_node, log_theta) in zip(children(pc_node), pc_node.log_probs) + p = children(prob⋀_node)[1] + s = children(prob⋀_node)[2] + + theta = exp(log_theta) + for lc⋀_node in children(lc_node) + r = children(lc⋀_node)[1] + t = children(lc⋀_node)[2] + prob += theta * pr_constraint(p, r, cache) * pr_constraint(s, t, cache) + end + end + return get!(cache, (pc_node, lc_node), prob) + end +end \ No newline at end of file diff --git a/src/queries/sample.jl b/src/queries/sample.jl new file mode 100644 index 00000000..72aa5c88 --- /dev/null +++ b/src/queries/sample.jl @@ -0,0 +1,160 @@ +export sample, to_sampled_dataframes + +import DataFrames: DataFrame, mapcols! +import Random: default_rng + +##################### +# Circuit sampling +##################### + +"Sample states from the circuit distribution." +function sample(pc::ProbCircuit; rng = default_rng()) + states, prs = sample(pc, 1, [missing for i=1:num_variables(pc)]...; rng) + return states[1,:], prs[1] +end + +sample(pc::ProbCircuit, num_samples; rng = default_rng(), gpu=false) = + sample(pc, num_samples, [missing for i=1:num_variables(pc)]...; rng, gpu) + +sample(pc::ProbCircuit, num_samples, inputs::Union{Bool,Missing}...; + rng = default_rng(), gpu=false) = + sample(pc, num_samples, collect(Union{Bool,Missing}, inputs); rng, gpu) + +function sample(pc::ProbCircuit, num_samples, inputs::AbstractVector{Union{Bool,Missing}}; + rng = default_rng(), gpu=false) + data = DataFrame(reshape(inputs, 1, :)) + data = gpu ? to_gpu(data) : data + states, prs = sample(pc, num_samples, data; rng) + return states[:,1,:], prs[:,1] +end + +sample(circuit::ProbCircuit, num_samples, data::DataFrame; rng = default_rng()) = + sample(same_device(ParamBitCircuit(circuit, data), data), num_samples, data; rng) + +function sample(pbc::ParamBitCircuit, num_samples, data; Float = Float32, rng = default_rng()) + @assert isgpu(data) == isgpu(pbc) "ParamBitCircuit and data need to be on the same device" + values = marginal_all(pbc, data) + return sample_down(pbc, num_samples, data, values, rng, Float) +end + +"Convert an array of samples into a vector of dataframes" +function to_sampled_dataframes(states) + dfs = mapslices(DataFrame, states, dims = [2,3]) + map(dfs) do df + mapcols!(c -> BitVector(c), df) + end + return dfs +end + +# CPU code + +function sample_down(pbc, num_samples, data, values::Array, rng, ::Type{Float}) where Float + state = zeros(Bool, num_samples, num_examples(data), num_features(data)) + logprob = zeros(Float, num_samples, num_examples(data)) + Threads.@threads for (s_id, ex_id) = collect(Iterators.product(1:size(state,1), 1:size(state,2))) + sample_rec(num_leafs(pbc), params(pbc), nodes(pbc), elements(pbc), ex_id, s_id, num_nodes(pbc), values, state, logprob, rng) + end + return state, logprob +end + +function sample_rec(nl, params, nodes, elements, ex_id, s_id, dec_id, values, state, logprob, rng) + if isleafgate(nl, dec_id) + if isliteralgate(nl, dec_id) + l = literal(nl, dec_id) + @inbounds state[s_id, ex_id, lit2var(l)] = (l > 0) + end + else + edge_log_pr, prime, sub = sample_child_cpu(params, nodes, elements, ex_id, dec_id, values, rng) + @inbounds logprob[s_id, ex_id] += edge_log_pr + sample_rec(nl, params, nodes, elements, ex_id, s_id, prime, values, state, logprob, rng) + sample_rec(nl, params, nodes, elements, ex_id, s_id, sub, values, state, logprob, rng) + end +end + +function sample_child_cpu(params, nodes, elements, ex_id, dec_id, values, rng) + @inbounds els_start = nodes[1,dec_id] + @inbounds els_end = nodes[2,dec_id] + threshold = log(rand(rng)) + values[ex_id, dec_id] + cumul_prob = -Inf + j_sampled = els_end - els_start + 1 # give all numerical error probability to the last node + for j = els_start:els_end + @inbounds prime = elements[2,j] + @inbounds sub = elements[3,j] + @inbounds pr = values[ex_id, prime] + values[ex_id, sub] + params[j] + Δ = ifelse(cumul_prob == pr, zero(cumul_prob), abs(cumul_prob - pr)) + cumul_prob = max(cumul_prob, pr) + log1p(exp(-Δ)) + if cumul_prob > threshold + j_sampled = j + break + end + end + @inbounds return params[j_sampled], elements[2,j_sampled], elements[3,j_sampled] +end + + +# GPU code + +function sample_down(pbc, num_samples, data, values::CuArray, rng, ::Type{Float}) where Float + CUDA.seed!(rand(rng, UInt)) + state = CUDA.zeros(Bool, num_samples, num_examples(data), num_features(data)) + logprob = CUDA.zeros(Float, num_samples, num_examples(data)) + stack = CUDA.zeros(Int32, num_samples, num_examples(data), num_features(data)+3) + @inbounds stack[:,:,1] .= 1 # start with 1 dec_id in the stack + @inbounds stack[:,:,2] .= num_nodes(pbc) # start with the pc in the stack + num_threads = balance_threads(num_samples, num_examples(data), 8) + num_blocks = (ceil(Int, num_samples/num_threads[1]), + ceil(Int, num_examples(data)/num_threads[2])) + CUDA.@sync while true + r = CUDA.rand(num_samples, num_examples(data)) + @cuda threads=num_threads blocks=num_blocks sample_cuda_kernel(num_leafs(pbc), params(pbc), nodes(pbc), elements(pbc), values, state, logprob, stack, r, Float) + all_empty(stack) && break + end + CUDA.unsafe_free!(values) # save the GC some effort + return state, logprob +end + +function sample_cuda_kernel(nl, params, nodes, elements, values, state, logprob, stack, r, ::Type{Float}) where Float + index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x + index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y + stride_x = blockDim().x * gridDim().x + stride_y = blockDim().y * gridDim().y + for s_id = index_x:stride_x:size(state,1) + for ex_id = index_y:stride_y:size(state,2) + dec_id = pop_cuda!(stack, s_id, ex_id) + if dec_id > zero(eltype(stack)) + if isleafgate(nl, dec_id) + if isliteralgate(nl, dec_id) + l = literal(nl, dec_id) + var = lit2var(l) + @inbounds state[s_id, ex_id, var] = (l > 0) + end + else + edge_log_pr, prime, sub = sample_child_cuda(params, nodes, elements, s_id, ex_id, dec_id, values, r, Float) + @inbounds logprob[s_id, ex_id] += edge_log_pr + push_cuda!(stack, prime, s_id, ex_id) + push_cuda!(stack, sub, s_id, ex_id) + end + end + end + end + return nothing +end + +function sample_child_cuda(params, nodes, elements, s_id, ex_id, dec_id, values, r, ::Type{Float}) where Float + @inbounds els_start = nodes[1,dec_id] + @inbounds els_end = nodes[2,dec_id] + @inbounds threshold = CUDA.log(r[s_id, ex_id]) + values[ex_id, dec_id] + cumul_prob::Float = -Inf + j_sampled = els_end - els_start + 1 # give all numerical error probability to the last node + for j = els_start:els_end + @inbounds prime = elements[2,j] + @inbounds sub = elements[3,j] + @inbounds pr::Float = values[ex_id, prime] + values[ex_id, sub] + params[j] + cumul_prob = logsumexp_cuda(cumul_prob, pr) + if cumul_prob > threshold + j_sampled = j + break + end + end + @inbounds return params[j_sampled], elements[2,j_sampled], elements[3,j_sampled] +end \ No newline at end of file diff --git a/src/structured_prob_nodes.jl b/src/structured_prob_nodes.jl new file mode 100644 index 00000000..6fbfca38 --- /dev/null +++ b/src/structured_prob_nodes.jl @@ -0,0 +1,141 @@ +export ProbCircuit, StructProbCircuit, StructProbLeafNode, StructProbInnerNode, + StructProbLiteralNode, StructMulNode, StructSumNode, check_parameter_integrity + +##################### +# Prob circuits that are structured, +# meaning that each conjunction is associated with a vtree node. +##################### + +"Root of the plain structure probabilistic circuit node hierarchy" +abstract type StructProbCircuit <: ProbCircuit end + +"A plain structured probabilistic leaf node" +abstract type StructProbLeafNode <: StructProbCircuit end + +"A plain structured probabilistic inner node" +abstract type StructProbInnerNode <: StructProbCircuit end + +"A plain structured probabilistic literal leaf node, representing the positive or negative literal of its variable" +mutable struct StructProbLiteralNode <: StructProbLeafNode + literal::Lit + vtree::Vtree + data + counter::UInt32 + StructProbLiteralNode(l,v) = begin + @assert lit2var(l) ∈ v + new(l, v, nothing, 0) + end +end + +"A plain structured probabilistic conjunction node" +mutable struct StructMulNode <: StructProbInnerNode + prime::StructProbCircuit + sub::StructProbCircuit + vtree::Vtree + data + counter::UInt32 + StructMulNode(p,s,v) = begin + @assert isinner(v) "Structured conjunctions must respect inner vtree node" + @assert varsubset_left(vtree(p),v) "$p does not go left in $v" + @assert varsubset_right(vtree(s),v) "$s does not go right in $v" + new(p,s, v, nothing, 0) + end +end + +"A plain structured probabilistic disjunction node" +mutable struct StructSumNode <: StructProbInnerNode + children::Vector{StructProbCircuit} + log_probs::Vector{Float64} + vtree::Vtree # could be leaf or inner + data + counter::UInt32 + StructSumNode(c, v) = + new(c, init_array(Float64, length(c)), v, nothing, 0) +end + +##################### +# traits +##################### + +import LogicCircuits.GateType # make available for extension +@inline GateType(::Type{<:StructProbLiteralNode}) = LiteralGate() +@inline GateType(::Type{<:StructMulNode}) = ⋀Gate() +@inline GateType(::Type{<:StructSumNode}) = ⋁Gate() + +##################### +# methods +##################### + +import LogicCircuits: children, vtree, vtree_safe, respects_vtree # make available for extension +@inline children(n::StructSumNode) = n.children +@inline children(n::StructMulNode) = [n.prime,n.sub] + +"Get the vtree corresponding to the argument, or nothing if the node has no vtree" +@inline vtree(n::StructProbCircuit) = n.vtree +@inline vtree_safe(n::StructProbInnerNode) = vtree(n) +@inline vtree_safe(n::StructProbLiteralNode) = vtree(n) + +# ProbCircuit has a default argument for respects: its root's vtree +respects_vtree(circuit::StructProbCircuit) = + respects_vtree(circuit, vtree(circuit)) + +@inline num_parameters_node(n::StructSumNode) = num_children(n) + +##################### +# constructors and compilation +##################### + +multiply(arguments::Vector{<:StructProbCircuit}; + reuse=nothing, use_vtree=nothing) = + multiply(arguments...; reuse, use_vtree) + +function multiply(a1::StructProbCircuit, + a2::StructProbCircuit; + reuse=nothing, use_vtree=nothing) + reuse isa StructMulNode && reuse.prime == a1 && reuse.sub == a2 && return reuse + !(use_vtree isa Vtree) && (reuse isa StructProbCircuit) && (use_vtree = reuse.vtree) + !(use_vtree isa Vtree) && (use_vtree = find_inode(vtree_safe(a1), vtree_safe(a2))) + return StructMulNode(a1, a2, use_vtree) +end + +function summate(arguments::Vector{<:StructProbCircuit}; + reuse=nothing, use_vtree=nothing) + @assert length(arguments) > 0 + reuse isa StructSumNode && reuse.children == arguments && return reuse + !(use_vtree isa Vtree) && (reuse isa StructProbCircuit) && (use_vtree = reuse.vtree) + !(use_vtree isa Vtree) && (use_vtree = mapreduce(vtree_safe, lca, arguments)) + return StructSumNode(arguments, use_vtree) +end + +# claim `StructProbCircuit` as the default `ProbCircuit` implementation that has a vtree + +compile(::Type{ProbCircuit}, a1::Union{Vtree, StructLogicCircuit}, args...) = + compile(StructProbCircuit, a1, args...) + +compile(n::StructProbCircuit, args...) = + compile(typeof(n), root(vtree(n)), args...) + +compile(::Type{<:StructProbCircuit}, c::StructLogicCircuit) = + compile(StructProbCircuit, root(vtree(c)), c) + +compile(::Type{<:StructLogicCircuit}, c::StructProbCircuit) = + compile(StructLogicCircuit, root(vtree(c)), c) + +compile(::Type{<:StructProbCircuit}, ::Vtree, ::Bool) = + error("Probabilistic circuits do not have constant leafs.") + +compile(::Type{<:StructProbCircuit}, vtree::Vtree, l::Lit) = + StructProbLiteralNode(l,find_leaf(lit2var(l),vtree)) + +function compile(::Type{<:StructProbCircuit}, vtree::Vtree, circuit::LogicCircuit) + f_con(n) = error("Cannot construct a probabilistic circuit from constant leafs: first smooth and remove unsatisfiable branches.") + f_lit(n) = compile(StructProbCircuit, vtree, literal(n)) + f_a(n, cns) = multiply(cns...) # note: this will use the LCA as vtree node + f_o(n, cns) = summate(cns) # note: this will use the LCA as vtree node + foldup_aggregate(circuit, f_con, f_lit, f_a, f_o, StructProbCircuit) +end + +function fully_factorized_circuit(::Type{<:StructProbCircuit}, vtree::Vtree) + ff_logic_circuit = fully_factorized_circuit(PlainStructLogicCircuit, vtree) + compile(StructProbCircuit, vtree, ff_logic_circuit) +end diff --git a/src/Probabilistic/VtreeLearner.jl b/src/structurelearner/VtreeLearner.jl similarity index 98% rename from src/Probabilistic/VtreeLearner.jl rename to src/structurelearner/VtreeLearner.jl index 3fd4f719..676f911c 100644 --- a/src/Probabilistic/VtreeLearner.jl +++ b/src/structurelearner/VtreeLearner.jl @@ -224,7 +224,7 @@ function learn_vtree_bottom_up(train_x::PlainXData; α) (_, mi) = mutual_information(feature_matrix(train_x), Data.weights(train_x); α = α) vars = Var.(collect(1:num_features(train_x))) context = BlossomContext(vars, mi) - vtree = bottom_up_vtree(PlainVtreeNode, vars, blossom_bottom_up_curry(context)) + vtree = bottom_up_vtree(PlainVtree, vars, blossom_bottom_up_curry(context)) end ############# diff --git a/src/StructureLearner/ChowLiuTree.jl b/src/structurelearner/chow_liu_tree.jl similarity index 84% rename from src/StructureLearner/ChowLiuTree.jl rename to src/structurelearner/chow_liu_tree.jl index b2581869..ff9ca1ab 100644 --- a/src/StructureLearner/ChowLiuTree.jl +++ b/src/structurelearner/chow_liu_tree.jl @@ -1,5 +1,6 @@ -using LightGraphs: SimpleGraph, SimpleDiGraph, complete_graph, add_edge!, kruskal_mst, bfs_tree, center, - connected_components, induced_subgraph, nv, ne, edges, vertices, src, dst +export CLT, learn_chow_liu_tree, parent_vector +using LightGraphs: SimpleGraph, SimpleDiGraph, complete_graph, add_edge!, kruskal_mst, + bfs_tree, center, connected_components, induced_subgraph, nv, ne, edges, vertices, src, dst using SimpleWeightedGraphs: SimpleWeightedGraph using MetaGraphs: MetaDiGraph, set_prop!, props @@ -16,15 +17,12 @@ const CLT = MetaDiGraph learn a Chow-Liu tree from training set `train_x`, with Laplace smoothing factor `α`, specifying the tree root by `clt_root` return a `CLT` """ -function learn_chow_liu_tree(train_x::XData; α = 1.0, clt_root="graph_center")::CLT - learn_chow_liu_tree(WXData(train_x);α=α, clt_root=clt_root) -end - -function learn_chow_liu_tree(train_x::WXData; α = 1.0, clt_root="graph_center")::CLT +function learn_chow_liu_tree(train_x; α = 1.0, clt_root="graph_center", + weight=ones(Float64, num_examples(train_x)))::CLT features_num = num_features(train_x) # calculate mutual information - (dis_cache, MI) = mutual_information(feature_matrix(train_x), Data.weights(train_x); α = α) + (dis_cache, MI) = mutual_information(train_x, weight; α = α) # maximum spanning tree/ forest g = SimpleWeightedGraph(complete_graph(features_num)) @@ -91,9 +89,7 @@ function parent_vector(tree::CLT)::Vector{Int64} return v end -##################### -# Methods for test -##################### +import LogicCircuits: print_tree "Print edges and vertices of a ChowLiu tree" function print_tree(clt::CLT) for e in edges(clt) print(e); print(" ");end diff --git a/src/structurelearner/heuristics.jl b/src/structurelearner/heuristics.jl new file mode 100644 index 00000000..7312aa27 --- /dev/null +++ b/src/structurelearner/heuristics.jl @@ -0,0 +1,95 @@ + +using LinearAlgebra: diagind +""" +Pick the edge with maximum flow +""" +function count_downflow(values::Matrix{UInt64}, flows::Matrix{UInt64}, n::LogicCircuit) + dec_id = n.data.node_id + sum(1:size(flows,1)) do i + count_ones(flows[i, dec_id]) + end +end + +function count_downflow(values::Matrix{UInt64}, flows::Matrix{UInt64}, n::LogicCircuit, c::LogicCircuit) + grandpa = n.data.node_id + prime = c.prime.data.node_id + sub = c.sub.data.node_id + edge_count = sum(1:size(flows,1)) do i + count_ones(values[i, prime] & values[i, sub] & flows[i, grandpa]) + end +end + +function downflow_all(values::Matrix{UInt64}, flows::Matrix{UInt64}, n::LogicCircuit, c::LogicCircuit) + grandpa = n.data.node_id + prime = c.prime.data.node_id + sub = c.sub.data.node_id + edge = map(1:size(flows,1)) do i + digits(Bool, values[i, prime] & values[i, sub] & flows[i, grandpa], base=2, pad=64) + end + vcat(edge...) +end + +function eFlow(values, flows, candidates::Vector{Tuple{Node, Node}}) + edge2flows = map(candidates) do (or, and) + count_downflow(values, flows, or, and) + end + (max_flow, max_edge_id) = findmax(edge2flows) + candidates[max_edge_id], max_flow +end + +""" +Pick the variable with maximum sum of mutual information +""" +function vMI(values, flows, edge, vars::Vector{Var}, train_x) + examples_id = downflow_all(values, flows, edge...)[1:num_examples(train_x)] + sub_matrix = train_x[examples_id, vars] + (_, mi) = mutual_information(sub_matrix; α=1.0) + mi[diagind(mi)] .= 0 + scores = dropdims(sum(mi, dims = 1), dims = 1) + var = vars[argmax(scores)] + score = maximum(scores) + var, score +end + +""" +Pick the edge randomly +""" +function eRand(candidates::Vector{Tuple{Node, Node}}) + return rand(candidates) +end + +""" +Pick the variable randomly +""" +function vRand(vars::Vector{Var}) + lits = collect(Set{Lit}(scope[and])) + vars = Var.(intersect(filter(l -> l > 0, lits), - filter(l -> l < 0, lits))) + return Var(rand(vars)) +end + +function heuristic_loss(circuit::LogicCircuit, train_x; pick_edge="eFlow", pick_var="vMI") + candidates, scope = split_candidates(circuit) + values, flows = satisfies_flows(circuit, train_x) + if pick_edge == "eFlow" + edge, flow = eFlow(values, flows, candidates) + elseif pick_edge == "eRand" + edge = eRand(candidates) + else + error("Heuristics $pick_edge to pick edge is undefined.") + end + + or, and = edge + lits = collect(Set{Lit}(scope[and])) + vars = Var.(intersect(filter(l -> l > 0, lits), - filter(l -> l < 0, lits))) + + if pick_var == "vMI" + var, score = vMI(values, flows, edge, vars, train_x) + elseif pick_var == "vRand" + var = vRand(vars) + else + error("Heuristics $pick_var to pick variable is undefined.") + end + + return (or, and), var +end + diff --git a/src/structurelearner/init.jl b/src/structurelearner/init.jl new file mode 100644 index 00000000..5c9155d7 --- /dev/null +++ b/src/structurelearner/init.jl @@ -0,0 +1,167 @@ +export learn_chow_liu_tree_circuit, learn_vtree_from_clt, compile_sdd_from_clt +using LightGraphs: outneighbors +using MetaGraphs: get_prop + +""" +Learning from data a structured-decomposable circuit with several structure learning algorithms +""" +function learn_chow_liu_tree_circuit(data; + pseudocount = 1.0, + algo = "chow-liu", algo_kwargs=(α=1.0, clt_root="graph_center"), + vtree = "chow-liu", vtree_kwargs=(vtree_mode="balanced",)) + if algo == "chow-liu" + clt = learn_chow_liu_tree(data; algo_kwargs...) + vtree = learn_vtree_from_clt(clt; vtree_kwargs...) + lc = compile_sdd_from_clt(clt, vtree) + pc = ProbCircuit(lc) + estimate_parameters(pc, data; pseudocount=pseudocount) + pc, vtree + else + error("Cannot learn a structured-decomposable circuit with algorithm $algo") + end +end + +############# +# Learn PlainVtree from CLT +############# + +" +Learn a vtree from clt, +with strategy (close to) `linear` or `balanced` +" +function learn_vtree_from_clt(clt::CLT; vtree_mode::String)::PlainVtree + roots = [i for (i, x) in enumerate(parent_vector(clt)) if x == 0] + rootnode = construct_children(Var.(roots), clt, vtree_mode) + + return rootnode +end + +function construct_node(v::Var, clt::CLT, strategy::String)::PlainVtree + children = Var.(outneighbors(clt, v)) + if isempty(children) # leaf node + return PlainVtreeLeafNode(v) + else + right = construct_children(children, clt, strategy) + return add_parent(v, right) + end +end + +function construct_children(children::Vector{Var}, clt::CLT, strategy::String)::PlainVtree + sorted_vars = sort(collect(children)) + children_nodes = Vector{PlainVtree}() + foreach(x -> push!(children_nodes, construct_node(x, clt, strategy)), sorted_vars) + + if strategy == "linear" + construct_children_linear(children_nodes, clt) + elseif strategy == "balanced" + construct_children_balanced(children_nodes, clt) + else + throw("Unknown type of strategy") + end +end + +function construct_children_linear(children_nodes::Vector{PlainVtree}, clt::CLT)::PlainVtree + children_nodes = Iterators.Stateful(reverse(children_nodes)) + + right = popfirst!(children_nodes) + for left in children_nodes + right = PlainVtreeInnerNode(left, right) + end + return right +end + +function construct_children_balanced(children_nodes::Vector{PlainVtree}, clt::CLT)::PlainVtree + if length(children_nodes) == 1 + return children_nodes[1] + elseif length(children_nodes) == 2 + return PlainVtreeInnerNode(children_nodes[1], children_nodes[2]) + else + len = trunc(Int64, length(children_nodes) / 2) + left = construct_children_balanced(children_nodes[1 : len], clt) + right = construct_children_balanced(children_nodes[len + 1 : end], clt) + return PlainVtreeInnerNode(left, right) + end +end + +function add_parent(parent::Var, children::PlainVtree) + return PlainVtreeInnerNode(PlainVtreeLeafNode(parent), children) +end + +##################### +# Compile PSDD from CLT and vtree +##################### + +"Compile a psdd circuit from clt and vtree" +function compile_sdd_from_clt(clt::CLT, vtree::PlainVtree)::PlainStructLogicCircuit + + parent_clt = Var.(parent_vector(clt)) + v2p = Dict{PlainVtree, Vector{PlainStructLogicCircuit}}() + + function add_mapping!(v::PlainVtree, circuits) + if !haskey(v2p, v); v2p[v] = Vector{PlainStructLogicCircuit}(); end + foreach(c -> if !(c in v2p[v]) push!(v2p[v], c);end, circuits) + end + + # compile vtree leaf node to terminal/true node + function compile_from_vtree_node(v::PlainVtreeLeafNode) + var = v.var + children = Var.(outneighbors(clt, var)) + cpt = get_prop(clt, var, :cpt) + parent = parent_clt[var] + if isequal(children, []) + circuit = compile_true_nodes(var, v; num=length(cpt) ÷ 2) + else + circuit = compile_canonical_literals(var, v) + end + add_mapping!(v, circuit) + nothing + end + + # compile to decision node + function compile_from_vtree_node(v::PlainVtreeInnerNode) + left_var = left_most_descendent(v.left).var + right_var = left_most_descendent(v.right).var + left_circuit = v2p[v.left] + right_circuit = v2p[v.right] + + if parent_clt[left_var] == parent_clt[right_var] # two nodes are independent, compile to seperate decision nodes + circuit = [compile_decision_node([l], [r], v) for (l, r) in zip(left_circuit, right_circuit)] + elseif left_var == parent_clt[right_var] # conditioned on left + cpt = get_prop(clt, left_var, :cpt) + circuit = compile_decision_nodes(left_circuit, right_circuit, v; num=length(cpt) ÷ 2) + else + throw("PlainVtree are not learned from the same CLT") + end + add_mapping!(v, circuit) + nothing + end + + foreach(compile_from_vtree_node, vtree) + + v2p[vtree][end] +end + +##################### +# Construct circuit node +##################### +"Construct decision nodes given `primes` and `subs`" +function compile_decision_node(primes::Vector{<:PlainStructLogicCircuit}, subs::Vector{<:PlainStructLogicCircuit}, vtree::PlainVtreeInnerNode) + elements = [conjoin(prime, sub; use_vtree=vtree) for (prime, sub) in zip(primes, subs)] + return disjoin(elements; use_vtree=vtree) +end + +"Construct literal nodes given variable `var`" +function compile_canonical_literals(var::Var, vtree::PlainVtreeLeafNode) + return [PlainStructLiteralNode( var2lit(var), vtree), PlainStructLiteralNode(-var2lit(var), vtree)] +end + +"Construct true nodes given variable `var`" +function compile_true_nodes(var::Var, vtree::PlainVtreeLeafNode; num) + pos, neg = compile_canonical_literals(var, vtree) + return [disjoin([pos, neg]; use_vtree = vtree) for _ in 1 : num] +end + +"Construct decision nodes conditiond on different distribution" +function compile_decision_nodes(primes::Vector{<:PlainStructLogicCircuit}, subs::Vector{<:PlainStructLogicCircuit}, vtree::PlainVtreeInnerNode; num) + return [compile_decision_node(primes, subs, vtree) for _ in 1 : num] +end diff --git a/src/structurelearner/learner.jl b/src/structurelearner/learner.jl new file mode 100644 index 00000000..4ac904a9 --- /dev/null +++ b/src/structurelearner/learner.jl @@ -0,0 +1,39 @@ +export learn_single_model +using LogicCircuits: split_step, struct_learn +using Statistics: mean +using Random +""" +Learn structure decomposable circuits +""" +function learn_single_model(train_x; + pick_edge="eFlow", pick_var="vMI", depth=1, + pseudocount=1.0, + sanity_check=true, + maxiter=typemax(Int), + seed=1337) + + # init + Random.seed!(seed) + pc, vtree = learn_struct_prob_circuit(train_x) + + # structure_update + loss(circuit) = heuristic_loss(circuit, train_x; pick_edge=pick_edge, pick_var=pick_var) + pc_split_step(circuit) = begin + c::ProbCircuit, = split_step(circuit; loss=loss, depth=depth, sanity_check=sanity_check) + estimate_parameters(c, train_x; pseudocount=pseudocount) + return c, missing + end + iter = 0 + log_per_iter(circuit) = begin + ll = EVI(circuit, train_x) + println("Log likelihood of iteration $iter is $(mean(ll))") + println() + iter += 1 + false + end + log_per_iter(pc) + pc = struct_learn(pc; + primitives=[pc_split_step], kwargs=Dict(pc_split_step=>()), + maxiter=maxiter, stop=log_per_iter) +end + diff --git a/test/IO/PSDDParserTest.jl b/test/LoadSave/circuit_loaders_tests.jl similarity index 61% rename from test/IO/PSDDParserTest.jl rename to test/LoadSave/circuit_loaders_tests.jl index d880024a..1829c16b 100644 --- a/test/IO/PSDDParserTest.jl +++ b/test/LoadSave/circuit_loaders_tests.jl @@ -5,25 +5,27 @@ using ProbabilisticCircuits @testset "Load a small PSDD and test methods" begin file = zoo_psdd_file("little_4var.psdd") prob_circuit = load_prob_circuit(file); - @test prob_circuit isa ProbΔ + @test prob_circuit isa ProbCircuit # Testing number of nodes and parameters @test 9 == num_parameters(prob_circuit) - @test 20 == size(prob_circuit)[1] + @test 20 == num_nodes(prob_circuit) # Testing Read Parameters EPS = 1e-7 - @test abs(prob_circuit[13].log_thetas[1] - (-1.6094379124341003)) < EPS - @test abs(prob_circuit[13].log_thetas[2] - (-1.2039728043259361)) < EPS - @test abs(prob_circuit[13].log_thetas[3] - (-0.916290731874155)) < EPS - @test abs(prob_circuit[13].log_thetas[4] - (-2.3025850929940455)) < EPS + or1 = children(children(prob_circuit)[1])[2] + @test abs(or1.log_probs[1] - (-1.6094379124341003)) < EPS + @test abs(or1.log_probs[2] - (-1.2039728043259361)) < EPS + @test abs(or1.log_probs[3] - (-0.916290731874155)) < EPS + @test abs(or1.log_probs[4] - (-2.3025850929940455)) < EPS - @test abs(prob_circuit[18].log_thetas[1] - (-2.3025850929940455)) < EPS - @test abs(prob_circuit[18].log_thetas[2] - (-2.3025850929940455)) < EPS - @test abs(prob_circuit[18].log_thetas[3] - (-2.3025850929940455)) < EPS - @test abs(prob_circuit[18].log_thetas[4] - (-0.35667494393873245)) < EPS + or2 = children(children(prob_circuit)[1])[1] + @test abs(or2.log_probs[1] - (-2.3025850929940455)) < EPS + @test abs(or2.log_probs[2] - (-2.3025850929940455)) < EPS + @test abs(or2.log_probs[3] - (-2.3025850929940455)) < EPS + @test abs(or2.log_probs[4] - (-0.35667494393873245)) < EPS - @test abs(prob_circuit[20].log_thetas[1] - (0.0)) < EPS + @test abs(prob_circuit.log_probs[1] - (0.0)) < EPS end psdd_files = ["little_4var.psdd", "msnbc-yitao-a.psdd", "msnbc-yitao-b.psdd", "msnbc-yitao-c.psdd", "msnbc-yitao-d.psdd", "msnbc-yitao-e.psdd", "mnist-antonio.psdd"] diff --git a/test/IO/CircuitSaverTest.jl b/test/LoadSave/circuit_savers_tests.jl similarity index 71% rename from test/IO/CircuitSaverTest.jl rename to test/LoadSave/circuit_savers_tests.jl index 7ce513e5..d8696f07 100644 --- a/test/IO/CircuitSaverTest.jl +++ b/test/LoadSave/circuit_savers_tests.jl @@ -9,13 +9,15 @@ using ProbabilisticCircuits zoo_psdd_file("little_4var.psdd"), zoo_vtree_file("little_4var.vtree")) # load, save, and load as .psdd + # TODO reinstate after fix Struct Prob Circuit save_circuit("$tmp/temp.psdd", circuit, vtree) - save(vtree, "$tmp/temp.vtree"); + save_vtree(vtree, "$tmp/temp.vtree"); + load_struct_prob_circuit("$tmp/temp.psdd", "$tmp/temp.vtree") # save and load as .sdd - save_circuit("$tmp/temp.sdd", circuit, vtree) - save(vtree, "$tmp/temp.vtree") + save_circuit("$tmp/temp.sdd", PlainStructLogicCircuit(circuit), vtree) + save_vtree(vtree, "$tmp/temp.vtree") end diff --git a/test/Logistic/LogisticCircuitTest.jl b/test/Logistic/LogisticCircuitTest.jl deleted file mode 100644 index 5ca7ab24..00000000 --- a/test/Logistic/LogisticCircuitTest.jl +++ /dev/null @@ -1,46 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - -# This tests are supposed to test queries on the circuits -@testset "Logistic Circuit Class Conditional" begin - # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to - # match with python version. - - EPS = 1e-7; - my_opts = (max_factors= 2, - compact⋀=false, - compact⋁=false) - - logistic_circuit = zoo_lc("little_4var.circuit", 2); - @test logistic_circuit isa Vector{<:LogisticΔNode}; - - flow_circuit = FlowΔ(logistic_circuit, 16, Float64, my_opts) - @test flow_circuit isa Vector{<:FlowΔNode}; - - # Step 1. Check Probabilities for 3 samples - data = XData(Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1])); - - true_prob = [3.43147972 4.66740416; - 4.27595352 2.83503504; - 3.67415087 4.93793472] - - CLASSES = 2 - calc_prob = class_conditional_likelihood_per_instance(flow_circuit, CLASSES, data) - - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ calc_prob[i,j] atol= EPS; - end - end - - # 2. Testing different API - fc2, calc_prob2 = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) - for i = 1:3 - for j = 1:2 - @test true_prob[i,j] ≈ calc_prob2[i,j] atol= EPS; - end - end - - -end \ No newline at end of file diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl new file mode 100644 index 00000000..579176b3 --- /dev/null +++ b/test/Logistic/logistic_tests.jl @@ -0,0 +1,82 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +# This tests are supposed to test queries on the circuits +@testset "Logistic Circuit Query and Parameter Tests" begin + # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to + # match with python version. + + # CLASSES = 2 + + # logistic_circuit = zoo_lc("little_4var.circuit", CLASSES) + # @test logistic_circuit isa LogisticCircuit + + # # check probabilities for binary samples + # data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1]) + # # true_weight_func = [3.43147972 4.66740416; + # # 4.27595352 2.83503504; + # # 3.67415087 4.93793472] + # true_prob = [0.9686740008311808 0.9906908445371728; + # 0.9862917392724188 0.9445399509069984; + # 0.9752568185086389 0.9928816444223209] + + # class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) + # for i = 1:size(true_prob)[1] + # for j = 1:CLASSES + # @test true_prob[i,j] ≈ class_prob[i,j] + # end + # end + + # # check probabilities for float samples + # data = Float32.(data) + # class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data) + # for i = 1:size(true_prob)[1] + # for j = 1:CLASSES + # @test true_prob[i,j] ≈ class_prob[i,j] + # end + # end + + # # check predicted_classes + # true_labels = [2, 1, 2] + # predicted_classes = predict_class(logistic_circuit, CLASSES, data) + # @test all(predicted_classes .== true_labels) + + # # check accuracy + # @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0 + + # # check parameter updates + # original_literal_parameters = Dict{Int, Vector{Float64}}() + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # original_literal_parameters[c.literal] = copy(theta) + # end + # end + # end + # end + + # one_hot_labels = [0.0 1.0; + # 1.0 0.0; + # 0.0 1.0] + # one_hot_labels = Float32.(one_hot_labels) + # true_error = true_prob .- one_hot_labels + # step_size = 0.1 + # learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true) + + # foreach(logistic_circuit) do ln + # if ln isa Logistic⋁Node + # foreach(ln.children, eachrow(ln.thetas)) do c, theta + # if c isa LogisticLiteral + # for class = 1:CLASSES + # true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1] + # updated_amount = theta[class] - original_literal_parameters[c.literal][class] + # @test updated_amount ≈ true_update_amount atol=1e-7 + # end + # end + # end + # end + # end + +end \ No newline at end of file diff --git a/test/Probabilistic/CircuitQueriesTest.jl b/test/Probabilistic/CircuitQueriesTest.jl deleted file mode 100644 index 26a31784..00000000 --- a/test/Probabilistic/CircuitQueriesTest.jl +++ /dev/null @@ -1,162 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - -# This tests are supposed to test queries on the circuits -@testset "Probability of Full Evidence" begin - # Uses a PSDD with 4 variables, and tests 3 of the configurations to - # match with python. Also tests all probabilities sum up to 1. - - EPS = 1e-7; - prob_circuit = zoo_psdd("little_4var.psdd"); - @test prob_circuit isa Vector{<:ProbΔNode}; - - flow_circuit = FlowΔ(prob_circuit, 16, Bool) - @test flow_circuit isa Vector{<:FlowΔNode}; - - - # Step 1. Check Probabilities for 3 samples - data = XData(Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1])); - true_prob = [0.07; 0.03; 0.13999999999999999] - - calc_prob = log_likelihood_per_instance(flow_circuit, data) - calc_prob = exp.(calc_prob) - - for i = 1:3 - @test true_prob[i] ≈ calc_prob[i] atol= EPS; - end - - # Step 2. Add up all probabilities and see if they add up to one - N = 4; - data_all = XData(generate_data_all(N)) - - calc_prob_all = log_likelihood_per_instance(flow_circuit, data_all) - calc_prob_all = exp.(calc_prob_all) - sum_prob_all = sum(calc_prob_all) - - @test 1 ≈ sum_prob_all atol = EPS; -end - -@testset "Probability of partial Evidence (marginals)" begin - EPS = 1e-7; - prob_circuit = zoo_psdd("little_4var.psdd"); - - data = XData( - Int8.([0 0 0 0; 0 1 1 0; 0 0 1 1; - 0 0 0 -1; -1 1 0 -1; -1 -1 -1 -1; 0 -1 -1 -1]) - ); - true_prob = [0.07; 0.03; 0.13999999999999999; - 0.3499999999999; 0.1; 1.0; 0.8] - - opts = (compact⋀=false, compact⋁=false) - flow_circuit = UpFlowΔ(prob_circuit, 16, Float64, opts) - calc_prob = marginal_log_likelihood_per_instance(flow_circuit, data) - calc_prob = exp.(calc_prob) - - for i = 1:length(true_prob) - @test true_prob[i] ≈ calc_prob[i] atol= EPS; - end - - # Now trying the other api without instantiating a flow circuit - fc2, calc_prob2 = marginal_log_likelihood_per_instance(prob_circuit, data) - calc_prob2 = exp.(calc_prob2) - for i = 1:length(true_prob) - @test true_prob[i] ≈ calc_prob2[i] atol= EPS; - end - -end - -@testset "Marginal Pass Down" begin - EPS = 1e-7; - prob_circuit = zoo_psdd("little_4var.psdd"); - - N = 4 - data_full = XData(Int8.(generate_data_all(N))) - opts= (compact⋀=false, compact⋁=false) - - flow_circuit = FlowΔ(prob_circuit, 16, Float64, opts) - flow_circuit_marg = FlowΔ(prob_circuit, 16, Float64, opts) - - - # Comparing with down pass with fully obeserved data - pass_up_down(flow_circuit, data_full) - marginal_pass_up_down(flow_circuit_marg, data_full) - - for (ind, node) in enumerate(flow_circuit) - if node isa HasDownFlow - @test all( isapprox.(downflow(flow_circuit[ind]), downflow(flow_circuit_marg[ind]), atol = EPS) ) - end - end - - - # Validating one example with missing features done by hand - data_partial = XData(Int8.([-1 1 -1 1])) - flow_circuit_part = FlowΔ(prob_circuit, 16, Float64, opts) - ProbabilisticCircuits.marginal_pass_up_down(flow_circuit_part, data_partial) - - # (node index, correct down_flow_value) - true_vals = [(1, 0.5), - (2, 1.0), - (3, 1/3), - (4, 1.0), - (5, 0.5), - (6, 0.0), - (7, 2/3), - (8, 0.0), - (9, 0.3333333333333), - (10, 0.0), - (11, 0.6666666666666), - (12, 0.0), - (13, 1.0), - (14, 0.5), - (15, 0.0), - (16, 0.5), - (17, 0.0), - (18, 1.0), - (19, 1.0), - (20, 1.0)] - - for ind_val in true_vals - @test downflow(flow_circuit_part[ind_val[1]])[1] ≈ ind_val[2] atol= EPS - end - -end - -function test_mpe_brute_force(prob_circuit, evidence) - EPS = 1e-9; - result = MPE(prob_circuit, evidence); - for idx = 1 : num_examples(evidence) - marg = XData(generate_all(evidence.x[idx,:])); - fc, lls = log_likelihood_per_instance(prob_circuit, marg); - brute_mpe = marg.x[argmax(lls), :] - - # Compare and validate p(result[idx]) == p(brute_mpe) - comp_data = XData(vcat(result[idx,:]', brute_mpe')) - fc2, lls2 = log_likelihood_per_instance(prob_circuit, comp_data); - - @test lls2[1] ≈ lls2[2] atol= EPS - end -end - -@testset "MPE Brute Force Test Small (4 var)" begin - prob_circuit = zoo_psdd("little_4var.psdd"); - evidence = XData( Int8.( [-1 0 0 0; - 0 -1 -1 0; - 1 1 1 -1; - 1 0 1 0; - -1 -1 -1 1; - -1 -1 -1 -1] )) - - test_mpe_brute_force(prob_circuit, evidence) - -end - -@testset "MPE Brute Force Test Big (15 var)" begin - N = 15 - COUNT = 10 - - prob_circuit = zoo_psdd("exp-D15-N1000-C4.psdd"); - evidence = XData(Int8.(rand( (-1,0,1), (COUNT, N) ))) - - test_mpe_brute_force(prob_circuit, evidence) -end diff --git a/test/Probabilistic/EntropyKLDTest.jl b/test/Probabilistic/EntropyKLDTest.jl deleted file mode 100644 index ad459fcf..00000000 --- a/test/Probabilistic/EntropyKLDTest.jl +++ /dev/null @@ -1,48 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - -@testset "Entropy and KLD" begin - pc1, vtree = load_struct_prob_circuit( - zoo_psdd_file("simple2.1.psdd"), zoo_vtree_file("simple2.vtree")) - pc2, vtree = load_struct_prob_circuit( - zoo_psdd_file("simple2.2.psdd"), zoo_vtree_file("simple2.vtree")) - pc3, vtree = load_struct_prob_circuit( - zoo_psdd_file("simple2.3.psdd"), zoo_vtree_file("simple2.vtree")) - - # Entropy calculation test - @test abs(psdd_entropy(pc1[end]) - 1.2899219826090118) < 1e-8 - @test abs(psdd_entropy(pc2[end]) - 0.9359472745536583) < 1e-8 - - # KLD Tests # - # KLD base tests - pr_constraint_cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - kl_divergence_cache = Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}() - - @test_throws AssertionError("Both nodes not normalized for same vtree node") psdd_kl_divergence(pc1[1], pc1[3], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Both nodes not normalized for same vtree node") psdd_kl_divergence(pc1[2], pc1[3], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Both nodes not normalized for same vtree node") psdd_kl_divergence(pc1[1], pc1[4], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Both nodes not normalized for same vtree node") psdd_kl_divergence(pc1[1], pc1[5], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Both nodes not normalized for same vtree node") psdd_kl_divergence(pc1[2], pc1[5], kl_divergence_cache, pr_constraint_cache) - - @test_throws AssertionError("Prob⋀ not a valid PSDD node for KL-Divergence") psdd_kl_divergence(pc1[1], pc1[6], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Prob⋀ not a valid PSDD node for KL-Divergence") psdd_kl_divergence(pc1[7], pc1[2], kl_divergence_cache, pr_constraint_cache) - @test_throws AssertionError("Prob⋀ not a valid PSDD node for KL-Divergence") psdd_kl_divergence(pc1[6], pc2[7], kl_divergence_cache, pr_constraint_cache) - - # KLD calculation test - @test abs(psdd_kl_divergence(pc1[1], pc2[1], kl_divergence_cache, pr_constraint_cache) - 0.0) < 1e-8 - @test abs(psdd_kl_divergence(pc1[1], pc1[2], kl_divergence_cache, pr_constraint_cache) - 0.0) < 1e-8 - @test abs(psdd_kl_divergence(pc1[1], pc2[3], kl_divergence_cache, pr_constraint_cache) + log(0.9)) < 1e-8 - @test abs(psdd_kl_divergence(pc1[2], pc2[3], kl_divergence_cache, pr_constraint_cache) + log(0.1)) < 1e-8 - @test abs(psdd_kl_divergence(pc1[5], pc2[4], kl_divergence_cache, pr_constraint_cache) - 0.2 * log(0.2)) < 1e-8 - @test abs(psdd_kl_divergence(pc1[5], pc2[5], kl_divergence_cache, pr_constraint_cache) - 0.8 * log(0.8)) < 1e-8 - @test abs(psdd_kl_divergence(pc1[5], pc2[5], kl_divergence_cache, pr_constraint_cache) - 0.8 * log(0.8)) < 1e-8 - @test abs(psdd_kl_divergence(pc1[end], pc2[end]) - 0.5672800167911778) < 1e-8 - - kl_divergence_cache = Dict{Tuple{ProbΔNode, ProbΔNode}, Float64}() - @test abs(psdd_kl_divergence(pc2[4], pc3[5], kl_divergence_cache, pr_constraint_cache) - 0.0) < 1e-8 - @test abs(psdd_kl_divergence(pc2[4], pc3[4], kl_divergence_cache, pr_constraint_cache) - 0.0) < 1e-8 - @test abs(psdd_kl_divergence(pc2[3], pc3[3], kl_divergence_cache, pr_constraint_cache) - 0.9 * log(0.9 / 0.5) - 0.1 * log(0.1 / 0.5)) < 1e-8 - @test abs(psdd_kl_divergence(pc2[end], pc3[end]) - 0.38966506) < 1e-8 - -end diff --git a/test/Probabilistic/PrConstraintTest.jl b/test/Probabilistic/PrConstraintTest.jl deleted file mode 100644 index b7ea73fb..00000000 --- a/test/Probabilistic/PrConstraintTest.jl +++ /dev/null @@ -1,41 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits - - -@testset "pr_constraint Query" begin - # two nodes - simplevtree = zoo_vtree_file("simple2.vtree") - pc, vtree = load_struct_prob_circuit( - zoo_psdd_file("simple2.4.psdd"), simplevtree) - - cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - - @test abs(pr_constraint(pc[end], pc[end], cache) - 1.0) < 1e-8 - @test abs(pr_constraint(pc[5], pc[3], cache) - 0.2) < 1e-8 - @test abs(pr_constraint(pc[5], pc[4], cache) - 0.8) < 1e-8 - - file_circuit = "little_4var.circuit" - file_vtree = "little_4var.vtree" - logical_circuit, vtree = load_struct_smooth_logical_circuit( - zoo_lc_file(file_circuit), zoo_vtree_file(file_vtree)) - - pc = zoo_psdd("little_4var.psdd") - - @test abs(pr_constraint(pc[end], logical_circuit[end - 1], cache) - 1.0) < 1e-8 - - # Test with two psdds - pc1, vtree = load_struct_prob_circuit(zoo_psdd_file("simple2.5.psdd"), simplevtree) - pc2, vtree = load_struct_prob_circuit(zoo_psdd_file("simple2.6.psdd"), simplevtree) - - pr_constraint_cache = Dict{Tuple{ProbΔNode, Union{ProbΔNode, StructLogicalΔNode}}, Float64}() - pr_constraint(pc1[end], pc2[end], pr_constraint_cache) - @test abs(pr_constraint_cache[pc1[1], pc2[1]] - 1.0) < 1e-8 - @test abs(pr_constraint_cache[pc1[1], pc2[2]] - 0.0) < 1e-8 - @test abs(pr_constraint_cache[pc1[3], pc2[4]] - 1.0) < 1e-8 - @test abs(pr_constraint_cache[pc1[3], pc2[5]] - 0.0) < 1e-8 - @test abs(pr_constraint_cache[pc1[9], pc2[8]] - 1.0) < 1e-8 - @test abs(pr_constraint_cache[pc1[5], pc2[4]] - 0.2) < 1e-8 - @test abs(pr_constraint_cache[pc1[5], pc2[5]] - 0.8) < 1e-8 - @test abs(pr_constraint_cache[pc1[2], pc2[3]] - 1.0) < 1e-8 -end \ No newline at end of file diff --git a/test/Probabilistic/SamplingTest.jl b/test/Probabilistic/SamplingTest.jl deleted file mode 100644 index 0e1afd5c..00000000 --- a/test/Probabilistic/SamplingTest.jl +++ /dev/null @@ -1,79 +0,0 @@ -using Test -using LogicCircuits -using ProbabilisticCircuits -using DataStructures - -@testset "Sampling Test" begin - EPS = 1e-2; - prob_circuit = zoo_psdd("little_4var.psdd"); - flow_circuit = FlowΔ(prob_circuit, 16, Bool); - - N = 4; - data_all = XData(generate_data_all(N)); - - calc_prob_all = log_likelihood_per_instance(flow_circuit, data_all); - calc_prob_all = exp.(calc_prob_all); - - using DataStructures - hist = DefaultDict{AbstractString,Float64}(0.0) - - Nsamples = 1000 * 1000 - for i = 1:Nsamples - cur = join(Int.(sample(prob_circuit))) - hist[cur] += 1 - end - - for k in keys(hist) - hist[k] /= Nsamples - end - - for k in keys(hist) - cur = parse(Int32, k, base=2) + 1 # cause Julia arrays start at 1 :( - @test calc_prob_all[cur] ≈ hist[k] atol= EPS; - end - - -end - -@testset "Sampling With Evidence" begin - # TODO (pashak) this test should be improved by adding few more cases - EPS = 1e-3; - prob_circuit = zoo_psdd("little_4var.psdd"); - - opts= (compact⋀=false, compact⋁=false) - flow_circuit = UpFlowΔ(prob_circuit, 1, Float64, opts); - - N = 4; - data = XData(Int8.([0 -1 0 -1])); - calc_prob = marginal_log_likelihood_per_instance(flow_circuit, data); - calc_prob = exp.(calc_prob); - - flow_circuit_all = UpFlowΔ(prob_circuit, 4, Float64, opts); - data_all = XData(Int8.([ - 0 0 0 0; - 0 0 0 1; - 0 1 0 0; - 0 1 0 1; - ])); - calc_prob_all = marginal_log_likelihood_per_instance(flow_circuit_all, data_all); - calc_prob_all = exp.(calc_prob_all); - - calc_prob_all ./= calc_prob[1] - - hist = DefaultDict{AbstractString,Float64}(0.0) - - Nsamples = 1000 * 1000 - for i = 1:Nsamples - cur = join(Int.(sample(flow_circuit))) - hist[cur] += 1 - end - - for k in keys(hist) - hist[k] /= Nsamples - end - - for ind = 1:4 - cur = join(data_all.x[ind, :]) - @test calc_prob_all[ind] ≈ hist[cur] atol= EPS; - end -end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 77513980..8f317fa7 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,10 +1,14 @@ [deps] -DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Jive = "ba5e3d4b-8524-549f-bc71-e76ad9e9deed" -LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599" -MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +DataFrames = "0.21" +Jive = "0.2" +julia = "1.5" diff --git a/test/StructureLearner/CircuitBuilderTest.jl b/test/StructureLearner/CircuitBuilderTest.jl deleted file mode 100644 index 48415a53..00000000 --- a/test/StructureLearner/CircuitBuilderTest.jl +++ /dev/null @@ -1,24 +0,0 @@ -using Test: @test, @testset -using LogicCircuits -using ProbabilisticCircuits - -@testset "Probabilistic circuits learner tests" begin - data = dataset(twenty_datasets("nltcs"); do_shuffle=false, batch_size=-1) - train_x = train(data) - pc = learn_probabilistic_circuit(train_x; pseudocount = 1.0, algo = "chow-liu", algo_kwargs=(α=1.0, clt_root="graph_center")) - - # simple test - @test pc isa ProbΔ - @test check_parameter_integrity(pc) - @test num_parameters(pc) == 62 - @test pc[26].log_thetas[1] ≈ -0.023528423773273476 atol=1.0e-7 - - # all evidence sums to 1 - N = num_features(train_x); - data_all = XData(generate_data_all(N)) - fc = FlowΔ(pc, max_batch_size(train_x), Bool, opts_accumulate_flows) - calc_prob_all = log_likelihood_per_instance(fc, data_all) - calc_prob_all = exp.(calc_prob_all) - sum_prob_all = sum(calc_prob_all) - @test sum_prob_all ≈ 1 atol = 1.0e-7; -end \ No newline at end of file diff --git a/test/StructureLearner/PSDDInitializerTest.jl b/test/StructureLearner/PSDDInitializerTest.jl deleted file mode 100644 index 25a8690c..00000000 --- a/test/StructureLearner/PSDDInitializerTest.jl +++ /dev/null @@ -1,34 +0,0 @@ -using Test: @test, @testset -using LogicCircuits -using ProbabilisticCircuits - -@testset "Probabilistic circuits learner tests" begin - data = dataset(twenty_datasets("nltcs"); do_shuffle=false, batch_size=-1) - train_x = train(data) - (pc, vtree) = learn_struct_prob_circuit(train_x; pseudocount = 1.0, algo = "chow-liu", algo_kwargs=(α=1.0, clt_root="graph_center"), - vtree = "chow-liu", vtree_kwargs=(vtree_mode="balanced",)) - - # simple test - @test pc isa ProbΔ - @test vtree isa PlainVtree - @test num_variables(vtree) == num_features(data) - @test check_parameter_integrity(pc) - @test num_parameters(pc) == 74 - - # test below has started to fail -- unclear whether that is a bug or randomness...? - # @test pc[28].log_thetas[1] ≈ -1.1870882896239272 atol=1.0e-7 - - # is structured decomposable - for (n, vars) in variable_scopes(pc) - @test vars == BitSet(variables(origin(n).vtree)) - end - - # all evidence sums to 1 - N = num_features(train_x); - data_all = XData(generate_data_all(N)) - fc = FlowΔ(pc, max_batch_size(train_x), Bool, opts_accumulate_flows) - calc_prob_all = log_likelihood_per_instance(fc, data_all) - calc_prob_all = exp.(calc_prob_all) - sum_prob_all = sum(calc_prob_all) - @test sum_prob_all ≈ 1 atol = 1.0e-7; -end \ No newline at end of file diff --git a/test/broken/Logistic/logistic_tests.jl b/test/broken/Logistic/logistic_tests.jl new file mode 100644 index 00000000..db741936 --- /dev/null +++ b/test/broken/Logistic/logistic_tests.jl @@ -0,0 +1,31 @@ +#TODO: reinstate + +# using Test +# using LogicCircuits +# using ProbabilisticCircuits + +# # This tests are supposed to test queries on the circuits +# @testset "Logistic Circuit Class Conditional" begin +# # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to +# # match with python version. + +# EPS = 1e-7; +# logistic_circuit = zoo_lc("little_4var.circuit", 2); +# @test logistic_circuit isa LogisticCircuit; + +# # Step 1. Check Probabilities for 3 samples +# data = Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1]); + +# true_prob = [3.43147972 4.66740416; +# 4.27595352 2.83503504; +# 3.67415087 4.93793472] + +# CLASSES = 2 +# calc_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data) + +# for i = 1:3 +# for j = 1:2 +# @test true_prob[i,j] ≈ calc_prob[i,j] atol= EPS; +# end +# end +# end \ No newline at end of file diff --git a/test/Probabilistic/EMLearnerTest.jl b/test/broken/Mixtures/EMLearnerTest.jl similarity index 100% rename from test/Probabilistic/EMLearnerTest.jl rename to test/broken/Mixtures/EMLearnerTest.jl diff --git a/test/Probabilistic/VtreeLearnerTest.jl b/test/broken/StructureLearner/VtreeLearnerTest.jl similarity index 94% rename from test/Probabilistic/VtreeLearnerTest.jl rename to test/broken/StructureLearner/VtreeLearnerTest.jl index 146929fb..a9dac98a 100644 --- a/test/Probabilistic/VtreeLearnerTest.jl +++ b/test/broken/StructureLearner/VtreeLearnerTest.jl @@ -13,7 +13,7 @@ using ProbabilisticCircuits mktempdir() do tmp save(vtree, "$tmp/test.vtree.dot") psdd = compile_psdd_from_clt(clt, vtree); - @test psdd isa ProbΔ + @test psdd isa ProbCircuit save_as_dot(psdd, "$tmp/test.psdd.dot") end diff --git a/test/StructureLearner/ChowLiuTreeTest.jl b/test/broken/StructureLearner/chow_liu_tree_tests.jl similarity index 87% rename from test/StructureLearner/ChowLiuTreeTest.jl rename to test/broken/StructureLearner/chow_liu_tree_tests.jl index ff624ef0..24b2fb8c 100644 --- a/test/StructureLearner/ChowLiuTreeTest.jl +++ b/test/broken/StructureLearner/chow_liu_tree_tests.jl @@ -5,8 +5,7 @@ using LogicCircuits using ProbabilisticCircuits @testset "Chow-Liu Tree learner tests" begin - data = dataset(twenty_datasets("nltcs"); do_shuffle=false, batch_size=-1) - train_x = train(data) + train_x, _, _ = twenty_datasets("nltcs") clt = learn_chow_liu_tree(train_x; α=1.0, clt_root="graph_center") pv = parent_vector(clt) diff --git a/test/broken/StructureLearner/init_tests.jl b/test/broken/StructureLearner/init_tests.jl new file mode 100644 index 00000000..eb38727a --- /dev/null +++ b/test/broken/StructureLearner/init_tests.jl @@ -0,0 +1,40 @@ +# TODO: reinstate + +# using Test: @test, @testset +# using LogicCircuits +# using ProbabilisticCircuits + +# @testset "Probabilistic circuits learner tests" begin +# train_x, _, _ = twenty_datasets("nltcs") + +# @assert train_x isa DataFrame +# @assert isbinarydata(train_x) + +# (pc, vtree) = learn_struct_prob_circuit(train_x) + +# # simple test +# @test pc isa ProbCircuit +# @test vtree isa PlainVtree +# @test num_variables(vtree) == num_features(train_x) +# @test check_parameter_integrity(pc) +# @test num_parameters(pc) == 74 + +# # test below has started to fail -- unclear whether that is a bug or randomness...? +# # @test pc[28].log_probs[1] ≈ -1.1870882896239272 atol=1.0e-7 + +# # is structured decomposable +# for (n, vars) in variables_by_node(pc) +# @test vars == BitSet(variables(n.vtree)) +# end + +# # all evidence sums to 1 +# N = num_features(train_x) +# data_all = generate_data_all(N) +# @assert data_all isa DataFrame +# @assert isbinarydata(data_all) + +# calc_prob_all = log_likelihood_per_instance(pc, data_all) +# calc_prob_all = exp.(calc_prob_all) +# sum_prob_all = sum(calc_prob_all) +# @test sum_prob_all ≈ 1 atol = 1.0e-7; +# end \ No newline at end of file diff --git a/test/Probabilistic/MutualInformationTest.jl b/test/broken/Utils/informations_tests.jl similarity index 100% rename from test/Probabilistic/MutualInformationTest.jl rename to test/broken/Utils/informations_tests.jl diff --git a/test/Reasoning/ExpectationTest.jl b/test/broken/expectation_tests.jl similarity index 78% rename from test/Reasoning/ExpectationTest.jl rename to test/broken/expectation_tests.jl index b0b59a71..c8aafc50 100644 --- a/test/Reasoning/ExpectationTest.jl +++ b/test/broken/expectation_tests.jl @@ -2,19 +2,19 @@ using Test using LogicCircuits using ProbabilisticCircuits -function test_expectation_brute_force(pc::ProbΔ, lc::LogisticΔ, data::XData, CLASSES::Int) +function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int) EPS = 1e-7; - COUNT = size(data.x)[1] + COUNT = size(data)[1] # Compute True expectation brute force true_exp = zeros(COUNT, CLASSES) for i in 1:COUNT - row = data.x[i, :] - cur_data_all = XData(generate_all(row)) + row = data[i, :] + cur_data_all = generate_all(row) - fc1, calc_p = log_likelihood_per_instance(pc, cur_data_all) + calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - fc2, calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) true_exp[i, :] = sum(calc_p .* calc_f, dims=1) true_exp[i, :] ./= sum(calc_p) #p_observed end @@ -35,19 +35,19 @@ function test_expectation_brute_force(pc::ProbΔ, lc::LogisticΔ, data::XData, C end end -function test_moment_brute_force(pc::ProbΔ, lc::LogisticΔ, data::XData, CLASSES::Int, moment::Int) +function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int, moment::Int) EPS = 1e-7; - COUNT = size(data.x)[1] + COUNT = size(data)[1] # Compute True moment brute force true_mom = zeros(COUNT, CLASSES) for i in 1:COUNT - row = data.x[i, :] - cur_data_all = XData(generate_all(row)) + row = data[i, :] + cur_data_all = generate_all(row) - fc1, calc_p = log_likelihood_per_instance(pc, cur_data_all) + calc_p = log_likelihood_per_instance(pc, cur_data_all) calc_p = exp.(calc_p) - fc2, calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) + calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all) true_mom[i, :] = sum(calc_p .* (calc_f .^ moment), dims=1) true_mom[i, :] ./= sum(calc_p) #p_observed end @@ -70,7 +70,7 @@ end pc = zoo_psdd(psdd_file); lc = zoo_lc(logistic_file, CLASSES); - data = XData(Int8.([ + data = Int8.([ 0 0 0 0; 0 1 1 0; 0 0 1 1; @@ -82,7 +82,7 @@ end -1 -1 0 1; -1 -1 -1 1; -1 -1 -1 0; - ])); + ]); test_expectation_brute_force(pc, lc, data, CLASSES) end @@ -97,7 +97,7 @@ end pc = zoo_psdd(psdd_file); lc = zoo_lc(logistic_file, CLASSES); - data = XData(Int8.(rand( (-1,0,1), (COUNT, N) ))) + data = Int8.(rand( (-1,0,1), (COUNT, N) )) test_expectation_brute_force(pc, lc, data, CLASSES) end @@ -112,7 +112,7 @@ end pc = zoo_psdd(psdd_file); lc = zoo_lc(logistic_file, CLASSES); - data = XData(Int8.(rand( (-1,0,1), (COUNT, N) ))) + data = Int8.(rand( (-1,0,1), (COUNT, N) )) test_moment_brute_force(pc, lc, data, CLASSES, 1) test_moment_brute_force(pc, lc, data, CLASSES, 2) @@ -131,7 +131,7 @@ end pc = zoo_psdd(psdd_file); lc = zoo_lc(logistic_file, CLASSES); - data = XData(Int8.(rand( (-1,0,1), (COUNT, N) ))) + data = Int8.(rand( (-1,0,1), (COUNT, N) )) test_moment_brute_force(pc, lc, data, CLASSES, 1) test_moment_brute_force(pc, lc, data, CLASSES, 2) diff --git a/test/helper/gpu.jl b/test/helper/gpu.jl new file mode 100644 index 00000000..95e03927 --- /dev/null +++ b/test/helper/gpu.jl @@ -0,0 +1,9 @@ +using CUDA: CUDA + +function cpu_gpu_agree(f, data; atol=1e-7) + CUDA.functional() && @test f(data) == to_cpu(f(to_gpu(data))) +end + +function cpu_gpu_agree_approx(f, data; atol=1e-7) + CUDA.functional() && @test f(data) ≈ to_cpu(f(to_gpu(data))) atol=atol +end \ No newline at end of file diff --git a/test/helper/plain_logic_circuits.jl b/test/helper/plain_logic_circuits.jl new file mode 100644 index 00000000..f2a66f95 --- /dev/null +++ b/test/helper/plain_logic_circuits.jl @@ -0,0 +1,68 @@ +function little_2var() + v = Var(2) + pos = compile(PlainLogicCircuit, var2lit(v)) + neg = compile(PlainLogicCircuit, -var2lit(v)) + or1 = pos | neg + or2 = pos | neg + + v = Var(1) + pos = compile(PlainLogicCircuit, var2lit(v)) + neg = compile(PlainLogicCircuit, -var2lit(v)) + + and1 = pos & or1 + and2 = neg & or2 + and1 | and2 +end + +function little_3var() + or1 = little_2var() + v = Var(3) + + pos = compile(PlainLogicCircuit, var2lit(v)) + neg = compile(PlainLogicCircuit, -var2lit(v)) + + or2 = disjoin(children(or1)) + + and1 = pos & or1 + and2 = neg & or2 + and1 | and2 +end + +function little_3var_constants() + or1 = little_2var() + v = Var(3) + + t = compile(PlainLogicCircuit, true) + f = compile(PlainLogicCircuit, false) + + pos = compile(PlainLogicCircuit, var2lit(v)) & t + neg = compile(PlainLogicCircuit, -var2lit(v)) & f + + or2 = disjoin(children(or1)) + + and1 = pos & or1 + and2 = neg & or2 + and1 | and2 +end + +function little_4var() + ors = map(1:4) do v + v = Var(v) + pos = compile(PlainLogicCircuit, var2lit(v)) + neg = compile(PlainLogicCircuit, - var2lit(v)) + or = pos | neg + end + and1 = ors[1] & ors[2] + and2 = ors[3] & ors[4] + or = and1 | and2 +end + +function little_5var() + c_4var = little_4var() + v = Var(5) + pos = compile(PlainLogicCircuit, var2lit(v)) + neg = compile(PlainLogicCircuit, - var2lit(v)) + or = pos | neg + and = c_4var & or + Plain⋁Node([and]) +end diff --git a/test/parameters_tests.jl b/test/parameters_tests.jl new file mode 100644 index 00000000..23a93078 --- /dev/null +++ b/test/parameters_tests.jl @@ -0,0 +1,43 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using DataFrames: DataFrame +using CUDA: CUDA + +@testset "MLE tests" begin + + dfb = DataFrame(BitMatrix([true false; true true; false true])) + r = fully_factorized_circuit(ProbCircuit,num_features(dfb)) + + estimate_parameters(r,dfb; pseudocount=1.0) + @test log_likelihood_avg(r,dfb) ≈ LogicCircuits.Utils.fully_factorized_log_likelihood(dfb; pseudocount=1.0) + + estimate_parameters(r,dfb; pseudocount=0.0) + @test log_likelihood_avg(r,dfb) ≈ LogicCircuits.Utils.fully_factorized_log_likelihood(dfb; pseudocount=0.0) + + if CUDA.functional() + + dfb_gpu = to_gpu(dfb) + + estimate_parameters(r,dfb_gpu; pseudocount=1.0) + @test log_likelihood_avg(r,dfb_gpu) ≈ LogicCircuits.Utils.fully_factorized_log_likelihood(dfb; pseudocount=1.0) + + estimate_parameters(r,dfb_gpu; pseudocount=0.0) + @test log_likelihood_avg(r,dfb_gpu) ≈ LogicCircuits.Utils.fully_factorized_log_likelihood(dfb; pseudocount=0.0) + + end + +end + +@testset "EM tests" begin + data = DataFrame([true missing]) + vtree2 = PlainVtree(2, :balanced) + pc = fully_factorized_circuit(StructProbCircuit, vtree2).children[1] + uniform_parameters(pc) + pc.children[1].prime.log_probs .= log.([0.3, 0.7]) + pc.children[1].sub.log_probs .= log.([0.4, 0.6]) + pbc = ParamBitCircuit(pc, data) + estimate_parameters_em(pc, data; pseudocount=0.0) + @test all(pc.children[1].prime.log_probs .== log.([1.0, 0.0])) + @test pc.children[1].sub.log_probs[1] .≈ log.([0.4, 0.6])[1] atol=1e-6 +end \ No newline at end of file diff --git a/test/plain_prob_nodes_tests.jl b/test/plain_prob_nodes_tests.jl new file mode 100644 index 00000000..592c52a7 --- /dev/null +++ b/test/plain_prob_nodes_tests.jl @@ -0,0 +1,49 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +include("helper/plain_logic_circuits.jl") + +@testset "probabilistic circuit nodes" begin + + c1 = little_3var() + + @test isdisjoint(linearize(ProbCircuit(c1)), linearize(ProbCircuit(c1))) + + p1 = ProbCircuit(c1) + lit3 = children(children(p1)[1])[1] + + # traits + @test p1 isa ProbCircuit + @test p1 isa PlainSumNode + @test children(p1)[1] isa PlainMulNode + @test lit3 isa PlainProbLiteralNode + @test GateType(p1) isa ⋁Gate + @test GateType(children(p1)[1]) isa ⋀Gate + @test GateType(lit3) isa LiteralGate + @test length(mul_nodes(p1)) == 4 + + # methods + @test num_parameters(p1) == 10 + + # extension methods + @test literal(lit3) === literal(children(children(c1)[1])[1]) + @test variable(left_most_descendent(p1)) == Var(3) + @test ispositive(left_most_descendent(p1)) + @test !isnegative(left_most_descendent(p1)) + @test num_nodes(p1) == 15 + @test num_edges(p1) == 18 + + r1 = fully_factorized_circuit(ProbCircuit,10) + @test num_parameters(r1) == 2*10+1 + + @test length(mul_nodes(r1)) == 1 + + # compilation tests + lit1 = compile(PlainProbCircuit, Lit(1)) + litn1 = compile(PlainProbCircuit, Lit(-1)) + r = lit1 * 0.3 + 0.7 * litn1 + @test r isa PlainSumNode + @test all(children(r) .== [lit1, litn1]) + @test all(r.log_probs .≈ log.([0.3, 0.7])) +end \ No newline at end of file diff --git a/test/queries/informations_tests.jl b/test/queries/informations_tests.jl new file mode 100644 index 00000000..efc60de4 --- /dev/null +++ b/test/queries/informations_tests.jl @@ -0,0 +1,20 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +@testset "Entropy and KLD" begin + + pc1, vtree = load_struct_prob_circuit( + zoo_psdd_file("simple2.1.psdd"), zoo_vtree_file("simple2.vtree")) + pc2, vtree = load_struct_prob_circuit( + zoo_psdd_file("simple2.2.psdd"), zoo_vtree_file("simple2.vtree")) + pc3, vtree = load_struct_prob_circuit( + zoo_psdd_file("simple2.3.psdd"), zoo_vtree_file("simple2.vtree")) + + @test entropy(pc1) ≈ 1.2899219826090118 + @test entropy(pc2) ≈ 0.9359472745536583 + + @test kl_divergence(pc1, pc2) ≈ 0.5672800167911778 + @test kl_divergence(pc2, pc3) ≈ 0.38966506 + +end diff --git a/test/queries/likelihood_tests.jl b/test/queries/likelihood_tests.jl new file mode 100644 index 00000000..9f32e388 --- /dev/null +++ b/test/queries/likelihood_tests.jl @@ -0,0 +1,38 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using DataFrames: DataFrame + +include("../helper/gpu.jl") + +@testset "Likelihood" begin + # Uses a PC with 4 variables, and tests 3 of the configurations to + # match with python. Also tests all probabilities sum up to 1. + + prob_circuit = zoo_psdd("little_4var.psdd"); + @test prob_circuit isa ProbCircuit; + + # Step 1. Check Probabilities for 3 samples + data = DataFrame(BitArray([0 0 0 0; 0 1 1 0; 0 0 1 1])); + true_prob = [0.07; 0.03; 0.13999999999999999] + + calc_prob = EVI(prob_circuit, data) + calc_prob = exp.(calc_prob) + + @test true_prob ≈ calc_prob atol=1e-7; + + # Step 2. Add up all probabilities and see if they add up to one + N = 4; + data_all = generate_data_all(N) + + calc_prob_all = EVI(prob_circuit, data_all) + calc_prob_all = exp.(calc_prob_all) + sum_prob_all = sum(calc_prob_all) + + @test 1 ≈ sum_prob_all atol = 1e-7; + + cpu_gpu_agree_approx(data_all) do d + EVI(prob_circuit, d) + end + +end \ No newline at end of file diff --git a/test/queries/map_tests.jl b/test/queries/map_tests.jl new file mode 100644 index 00000000..1b0b1f8c --- /dev/null +++ b/test/queries/map_tests.jl @@ -0,0 +1,61 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using DataFrames: DataFrame +using CUDA + +include("../helper/gpu.jl") + +@testset "MAP" begin + prob_circuit = zoo_psdd("little_4var.psdd"); + + data_full = generate_data_all(num_variables(prob_circuit)) + + map, mappr = MAP(prob_circuit, data_full) + + @test map == data_full + + evipr = EVI(prob_circuit, data_full) + @test mappr ≈ evipr atol=1e-6 + + data_marg = DataFrame([false false false false; + false true true false; + false false true true; + false false false missing; + missing true false missing; + missing missing missing missing; + false missing missing missing]) + + map, mappr = MAP(prob_circuit, data_marg) + + @test all(zip(eachcol(map), eachcol(data_marg))) do (cf,cm) + all(zip(cf, cm)) do (f,m) + ismissing(m) || f == m + end + end + + mar = MAR(prob_circuit, data_marg) + + @test all(mar .> mappr .- 1e-6) + + # same MAP states on CPU and GPU + cpu_gpu_agree(data_full) do d + MAP(prob_circuit, d)[1] + end + + # same MAP probabilities on CPU and GPU + cpu_gpu_agree_approx(data_full) do d + MAP(prob_circuit, d)[2] + end + + # same MAP states on CPU and GPU + cpu_gpu_agree(data_marg) do d + MAP(prob_circuit, d)[1] + end + + # same MAP probabilities on CPU and GPU + cpu_gpu_agree_approx(data_marg) do d + MAP(prob_circuit, d)[2] + end + +end \ No newline at end of file diff --git a/test/queries/marginal_flow_tests.jl b/test/queries/marginal_flow_tests.jl new file mode 100644 index 00000000..235809c5 --- /dev/null +++ b/test/queries/marginal_flow_tests.jl @@ -0,0 +1,97 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using DataFrames: DataFrame +using CUDA + +include("../helper/gpu.jl") + +@testset "Marginals" begin + prob_circuit = zoo_psdd("little_4var.psdd"); + + data_marg = DataFrame([false false false false; + false true true false; + false false true true; + false false false missing; + missing true false missing; + missing missing missing missing; + false missing missing missing]) + true_prob = [0.07; 0.03; 0.13999999999999999; + 0.3499999999999; 0.1; 1.0; 0.8] + + calc_prob = exp.(MAR(prob_circuit, data_marg)) + @test true_prob ≈ calc_prob atol=1e-7 + + cpu_gpu_agree_approx(data_marg) do d + marginal_all(prob_circuit, d) + end + + function test_complete_mar(data) + r1 = EVI(prob_circuit, data) + r2 = MAR(prob_circuit, data) + @test r1 ≈ r2 atol=1e-6 + end + + data_full = generate_data_all(num_variables(prob_circuit)) + + test_complete_mar(data_full) + CUDA.functional() && test_complete_mar(to_gpu(data_full)) + + cpu_gpu_agree_approx(data_full) do d + marginal_all(prob_circuit, d) + end + +end + +@testset "Marginal flows" begin + + prob_circuit = zoo_psdd("little_4var.psdd"); + + function test_flows(data) + # Comparing with down pass with fully observed data + + data_f = CUDA.@allowscalar Float64.(data) + + _, f1 = satisfies_flows(prob_circuit, data_f) + _, f2 = marginal_flows(prob_circuit, data) + + # note: while downward pass flows should be the same, + # the upward pass is *not* supposed to be the same (parameters used vs not) + + f1 = to_cpu(f1[:,3:end]) # ignore true and false leaf + f2 = to_cpu(f2[:,3:end]) # ignore true and false leaf + + @test f1 ≈ exp.(f2) atol=1e-6 + end + + data_full = generate_data_all(num_variables(prob_circuit)) + + test_flows(data_full) + CUDA.functional() && test_flows(to_gpu(data_full)) + + cpu_gpu_agree_approx(data_full) do d + _, f = marginal_flows(prob_circuit, d) + f[:,3:end] # ignore true and false leaf + end + + # Validating one example with missing features done by hand + data_partial = DataFrame([missing true missing true;]) + prob_circuit = zoo_psdd("little_4var.psdd"); + _, f = marginal_flows(prob_circuit, data_partial) + f = exp.(f) + + @test f[end] ≈ 1.0 + @test f[end-1] ≈ 1.0 + @test f[end-2] ≈ 1.0 + @test f[end-4] ≈ 2/3 + @test f[end-5] ≈ 0.0 atol=1e-7 + @test f[end-6] ≈ 1/2 + @test f[end-7] ≈ 1.0 + @test f[end-8] ≈ 1/3 + @test f[end-9] ≈ 1 + @test f[end-10] ≈ 1/2 + + # correctness on gpu by transitivy with above test + +end + diff --git a/test/queries/pr_constraint_tests.jl b/test/queries/pr_constraint_tests.jl new file mode 100644 index 00000000..d3eb6cb8 --- /dev/null +++ b/test/queries/pr_constraint_tests.jl @@ -0,0 +1,30 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits + +@testset "Probability of constraint" begin + + # two nodes + simplevtree = zoo_vtree_file("simple2.vtree") + pc, vtree = load_struct_prob_circuit( + zoo_psdd_file("simple2.4.psdd"), simplevtree) + + + @test pr_constraint(pc, pc) ≈ 1.0 + + file_circuit = "little_4var.circuit" + file_vtree = "little_4var.vtree" + logic_circuit, vtree = load_struct_smooth_logic_circuit( + zoo_lc_file(file_circuit), zoo_vtree_file(file_vtree)) + + pc, _ = load_struct_prob_circuit(zoo_psdd_file("little_4var.psdd"), zoo_vtree_file("little_4var.vtree")) + + @test pr_constraint(pc, children(logic_circuit)[1]) ≈ 1.0 + + # Test with two psdds + pc1, vtree = load_struct_prob_circuit(zoo_psdd_file("simple2.5.psdd"), simplevtree) + pc2, vtree = load_struct_prob_circuit(zoo_psdd_file("simple2.6.psdd"), simplevtree) + + @test pr_constraint(pc1, pc2) ≈ 1 + +end diff --git a/test/queries/sample_test.jl b/test/queries/sample_test.jl new file mode 100644 index 00000000..46143add --- /dev/null +++ b/test/queries/sample_test.jl @@ -0,0 +1,101 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using Random: MersenneTwister +using CUDA: functional + +function histogram_matches_likelihood(samples::Matrix{Bool}, worlds, loglikelihoods) + hist = Dict{BitVector,Int}() + for i = 1:size(samples,1) + sample = BitVector(samples[i,:]) + hist[sample] = get(hist, sample, 0) + 1 + end + for i = 1:size(worlds,1) + exact_prob = exp(loglikelihoods[i]) + ex = BitVector(example(worlds,i)) + estim_prob = get(hist, ex, 0) / size(samples,1) + @test exact_prob ≈ estim_prob atol=1e-2; + end + +end + +@testset "Unconditional Sampling Test" begin + + rng = MersenneTwister(42) + + pc = zoo_psdd("little_4var.psdd"); + worlds = generate_data_all(num_variables(pc)); + + loglikelihoods = EVI(pc, worlds) + + Nsamples = 2_0000 + + samples, _ = sample(pc, Nsamples; rng) + histogram_matches_likelihood(samples, worlds, loglikelihoods) + + if CUDA.functional() + samples, _ = sample(pc, Nsamples; rng, gpu = true) + samples_cpu = to_cpu(samples) + histogram_matches_likelihood(samples_cpu, worlds, loglikelihoods) + end + +end + +@testset "Conditional Sampling Test" begin + + rng = MersenneTwister(42) + num_samples = 10 + + pc = zoo_psdd("little_4var.psdd"); + data_all = generate_data_all(num_variables(pc)); + + # sampling given complete data should return same data with its log likelihood + + + loglikelihoods = MAR(pc, data_all) + sample_states, sample_prs = sample(pc, num_samples, data_all; rng) + + for i in 1:num_samples + @test sample_states[i,:,:] == convert(Matrix,data_all) + @test sample_prs[i,:] ≈ loglikelihoods atol=1e-6 + end + + # same states on CPU and GPU + cpu_gpu_agree(data_all) do d + sample(pc, num_samples, d)[1] + end + + # same probabilities on CPU and GPU + cpu_gpu_agree_approx(data_all) do d + sample(pc, num_samples, d)[2] + end + + + # sampling given partial data invariants + + data_marg = DataFrame([false false false false; + false true true false; + false false true true; + false false false missing; + missing true false missing; + missing missing missing missing; + false missing missing missing]) + + _, map_pr = MAP(pc, data_marg) + + sample_states, sample_prs = sample(pc, num_samples, data_marg; rng) + + for i in 1:num_samples + + # samples keep the partial evidence values + pairs = collect(zip(sample_states[i,:,:], convert(Matrix,data_marg))) + @test all(pairs) do (f,m) + ismissing(m) || f == m + end + + # probability does not exceed MAP probability + @test all(sample_prs[i,:] .<= map_pr .+ 1e-6) + end + + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index b9e58d89..6c0ce54c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,4 +11,5 @@ end using Jive -runtests(@__DIR__, skip=["runtests.jl", "helper"]) +# TODO reinstate after refactoring all modules +runtests(@__DIR__, skip=["runtests.jl", "helper", "broken"]) diff --git a/test/structured_prob_nodes_tests.jl b/test/structured_prob_nodes_tests.jl new file mode 100644 index 00000000..bf31a710 --- /dev/null +++ b/test/structured_prob_nodes_tests.jl @@ -0,0 +1,112 @@ +using Test +using LogicCircuits +using ProbabilisticCircuits +using DataFrames: DataFrame + + +@testset "structured probabilistic circuit nodes" begin + + vtree = PlainVtree(10, :balanced) + f = fully_factorized_circuit(StructProbCircuit, vtree) + @test f isa StructProbCircuit + @test num_nodes(f) == 20+10+9*2+1 + @test num_edges(f) == 20+18+9+1 + @test length(mul_nodes(f)) == 9 + @test length(sum_nodes(f)) == 10+9+1 + + @test respects_vtree(f) + @test respects_vtree(f, PlainVtree(10, :balanced)) + @test !respects_vtree(f, PlainVtree(5, :balanced)) + @test !respects_vtree(f, PlainVtree(10, :rightlinear)) + @test !respects_vtree(f, PlainVtree(10, :leftlinear)) + + @test variable(left_most_descendent(f)) == Var(1) + @test variable(right_most_descendent(f)) == Var(10) + @test ispositive(left_most_descendent(f)) + @test isnegative(right_most_descendent(f)) + + @test literal((StructProbCircuit,vtree)(Lit(-5))) == Lit(-5) + + @test_throws Exception multiply(StructProbCircuit[]) + @test_throws Exception summate(StructProbCircuit[]) + + @test isdecomposable(f) + + @test variables(f) == BitSet(1:10) + @test num_variables(f) == 10 + @test issmooth(f) + + input = DataFrame(BitArray([1 0 1 0 1 0 1 0 1 0; + 1 1 1 1 1 1 1 1 1 1; + 0 0 0 0 0 0 0 0 0 0; + 0 1 1 0 1 0 0 1 0 1])) + @test satisfies(f,input) == BitVector([1,1,1,1]) + + plainf = PlainLogicCircuit(f) + foreach(plainf) do n + @test n isa PlainLogicCircuit + end + @test plainf !== f + @test num_edges(plainf) == num_edges(f) + @test num_nodes(plainf) == num_nodes(f) + @test length(and_nodes(plainf)) == 9 + @test length(or_nodes(plainf)) == 10+9+1 + @test model_count(plainf) == BigInt(2)^10 + @test isempty(intersect(linearize(f),linearize(plainf))) + + ref = StructProbCircuit(vtree,plainf) + foreach(ref) do n + @test n isa StructProbCircuit + end + @test plainf !== ref + @test f !== ref + @test f.vtree === ref.vtree + @test num_edges(ref) == num_edges(f) + @test num_nodes(ref) == num_nodes(f) + @test length(and_nodes(ref)) == 9 + @test length(or_nodes(ref)) == 10+9+1 + @test model_count(ref) == BigInt(2)^10 + @test isempty(intersect(linearize(f),linearize(ref))) + + ref = StructProbCircuit(vtree,f) + foreach(ref) do n + @test n isa StructProbCircuit + end + @test plainf !== ref + @test f !== ref + @test f.vtree === ref.vtree + @test num_edges(ref) == num_edges(f) + @test num_nodes(ref) == num_nodes(f) + @test length(and_nodes(ref)) == 9 + @test length(or_nodes(ref)) == 10+9+1 + @test model_count(ref) == BigInt(2)^10 + @test isempty(intersect(linearize(f),linearize(ref))) + + mgr = SddMgr(7, :balanced) + v = Dict([(i => compile(mgr, Lit(i))) for i=1:7]) + c = (v[1] | !v[2] | v[3]) & + (v[2] | !v[7] | v[6]) & + (v[3] | !v[4] | v[5]) & + (v[1] | !v[4] | v[6]) + + c2 = StructLogicCircuit(mgr, c) + c2 = propagate_constants(c2; remove_unary=true) + + c3 = StructProbCircuit(mgr, c2) + foreach(c3) do n + @test n isa StructProbCircuit + end + @test num_edges(c3) == 69 + @test num_variables(c3) == 7 + + # compilation tests + v = Vtree(Var(1)) + lit1 = compile(StructProbCircuit, v, Lit(1)) + litn1 = compile(StructProbCircuit, v, Lit(-1)) + r = lit1 * 0.3 + 0.7 * litn1 + @test r isa StructSumNode + @test all(children(r) .== [lit1, litn1]) + @test r.vtree === lit1.vtree + @test all(r.log_probs .≈ log.([0.3, 0.7])) + +end \ No newline at end of file