diff --git a/Project.toml b/Project.toml
index cc639b0c..368a92ad 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "0.1.1"
 [deps]
 BlossomV = "6c721016-9dae-5d90-abf6-67daaccb2332"
 Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599"
@@ -19,10 +20,13 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 
 [compat]
 BlossomV = "0.4"
 Clustering = "0.14"
+CUDA = "1.2"
 DataStructures = "0.17"
 LightGraphs = "1.3"
 LogicCircuits = "0.1.1"
@@ -32,4 +36,6 @@ Reexport = "0.2"
 SimpleWeightedGraphs = "1.1"
 StatsBase = "0.33"
 StatsFuns = "0.9"
+LoopVectorization = "0.8.20"
+MLDatasets = "0.4, 0.5"
 julia = "1.5"
diff --git a/src/LoadSave/circuit_line_compiler.jl b/src/LoadSave/circuit_line_compiler.jl
index a0d09d7e..02879f55 100644
--- a/src/LoadSave/circuit_line_compiler.jl
+++ b/src/LoadSave/circuit_line_compiler.jl
@@ -111,7 +111,7 @@ function decorate_logistic(lines::CircuitFormatLines, logic_circuit::LogicCircui
 
     function compile(ln::BiasLine)
         root = id2logisticnode(ln.node_id)::Logistic⋁Node
-        # @assert length(node.thetas) == 1
+        # @assert length(root.thetas) == 1
         root.thetas[1,:] .= ln.weights
     end
 
diff --git a/src/Logistic/Logistic.jl b/src/Logistic/Logistic.jl
index c657449f..d5b03b10 100644
--- a/src/Logistic/Logistic.jl
+++ b/src/Logistic/Logistic.jl
@@ -5,7 +5,9 @@ using ..Utils
 
 include("logistic_nodes.jl")
 include("queries.jl")
+# include("parameter_circuit.jl")
+# include("learn_parameters.jl")
 
-# TODO learning
+# TODO structure learning
 
 end
\ No newline at end of file
diff --git a/src/Logistic/learn_parameters.jl b/src/Logistic/learn_parameters.jl
new file mode 100644
index 00000000..1bb8219c
--- /dev/null
+++ b/src/Logistic/learn_parameters.jl
@@ -0,0 +1,49 @@
+export learn_parameters
+
+using LogicCircuits: compute_flows, or_nodes
+using LoopVectorization: @avx
+
+"""
+Maximum likilihood estimation of parameters given data through gradient descent
+"""
+function learn_parameters(lc::LogisticCircuit, classes::Int, data, labels; num_epochs=30, step_size=0.1, flows_computed=false)
+
+    @inline function one_hot(labels::Vector, classes::Int)        
+        one_hot_labels = zeros(length(labels), classes)
+        for (i, j) in enumerate(labels)
+            one_hot_labels[i, j] = 1.0
+        end
+        one_hot_labels
+    end
+
+    one_hot_labels = one_hot(labels, classes)
+    if !flows_computed
+        compute_flows(lc, data)
+    end
+
+    for _ = 1:num_epochs
+        class_probs = class_likelihood_per_instance(lc, classes, data; flows_computed=true)
+        update_parameters(lc, class_probs, one_hot_labels)
+    end
+
+    nothing
+end
+
+
+@inline function update_parameters(lc::LogisticCircuit, class_probs, one_hot_labels; step_size=0.1)
+    num_samples = Float64(size(one_hot_labels)[1])
+    error = class_probs .- one_hot_labels
+    
+    foreach(or_nodes(lc)) do ln
+        foreach(eachrow(ln.thetas), children(ln)) do theta, c
+            flow = Float64.(downflow(ln, c))
+            @avx update_amount = flow' * error / num_samples * step_size
+            update_amount = dropdims(update_amount; dims=1)
+            @avx @. theta -= update_amount
+        end
+    end
+    
+    nothing
+end
+
+
diff --git a/src/Logistic/logistic_nodes.jl b/src/Logistic/logistic_nodes.jl
index 950dd8c8..0232ca2b 100644
--- a/src/Logistic/logistic_nodes.jl
+++ b/src/Logistic/logistic_nodes.jl
@@ -1,12 +1,8 @@
 export 
     LogisticCircuit, 
-    LogisticLeafNode, 
-    LogisticInnerNode, 
-    LogisticLiteral,
-    Logistic⋀Node,
-    Logistic⋁Node,
-    classes,
-    num_parameters_perclass
+    LogisticLeafNode, LogisticInnerNode, 
+    LogisticLiteral, Logistic⋀Node, Logistic⋁Node,
+    num_classes, num_parameters_per_class
     
 #####################
 # Infrastructure for logistic circuit nodes
@@ -54,11 +50,11 @@ A logistic disjunction node (Or node)
 """
 mutable struct Logistic⋁Node <: LogisticInnerNode
     children::Vector{<:LogisticCircuit}
-    thetas::Array{Float64, 2}
+    thetas::Matrix{Float64}
     data
     counter::UInt32
     Logistic⋁Node(children, class::Int) = begin
-        new(convert(Vector{LogisticCircuit}, children), init_array(Float64, length(children), class), nothing, 0)
+        new(convert(Vector{LogisticCircuit}, children), init_array(Float32, length(children), class), nothing, 0)
     end
 end
 
@@ -77,11 +73,11 @@ import LogicCircuits.GateType # make available for extension
 
 import LogicCircuits: children # make available for extension
 @inline children(n::LogisticInnerNode) = n.children
-@inline classes(n::Logistic⋁Node) = size(n.thetas)[2]
+@inline num_classes(n::Logistic⋁Node) = size(n.thetas)[2]
 
 import ..Utils: num_parameters
 @inline num_parameters(c::LogisticCircuit) = sum(n -> num_children(n) * classes(n), ⋁_nodes(c))
-@inline num_parameters_perclass(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c))
+@inline num_parameters_per_class(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c))
 
 #####################
 # constructors and conversions
diff --git a/src/Logistic/parameter_circuit.jl b/src/Logistic/parameter_circuit.jl
new file mode 100644
index 00000000..dc338e77
--- /dev/null
+++ b/src/Logistic/parameter_circuit.jl
@@ -0,0 +1,225 @@
+using CUDA
+using LogicCircuits
+
+export LayeredParameterCircuit, CuLayeredParameterCircuit
+export class_likelihood, class_weights
+export one_hot, learn_parameters, update_parameters
+
+# in a parameter circuit
+# 1 is true, 2 is false
+const TRUE_ID = Int32(1)
+const FALSE_ID = Int32(2)
+
+struct LayeredParameterCircuit
+    layered_circuit::LayeredBitCircuit
+    layered_parameters::Vector{Matrix{Float32}}
+end
+
+LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin
+    @assert is⋁gate(circuit)
+    decisions::Vector{Vector{Int32}} = Vector{Vector{Int32}}()
+    elements::Vector{Vector{Int32}} = Vector{Vector{Int32}}()
+    parameters::Vector{Vector{Float32}} = Vector{Vector{Float32}}()
+    num_decisions::Int32 = 2 * num_features + 2
+    num_elements::Vector{Int32} = Vector{Int32}()
+    # num_parameters always equals num_elements
+
+    ensure_layer(i) = begin
+        if length(decisions) < i
+            # add a new layer
+            push!(decisions, Int32[])
+            push!(elements, Int32[])
+            push!(parameters, Float32[])
+            push!(num_elements, 0)
+        end
+    end
+    
+    f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_ID : FALSE_ID)
+    f_lit(n) = LayeredDecisionId(0, 
+        ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n)))
+
+    f_and(n, cs) = begin
+        @assert length(cs) == 2
+        LayeredDecisionId[cs[1], cs[2]]
+    end
+    f_or(n, cs) = begin
+        num_decisions += 1
+        # determine layer
+        layer_id = zero(Int32)
+        for c in cs
+            if c isa Vector{LayeredDecisionId}
+                @assert length(c) == 2
+                layer_id = max(layer_id, c[1].layer_id, c[2].layer_id)
+            else
+                @assert c isa LayeredDecisionId
+                layer_id = max(layer_id, c.layer_id)
+            end
+        end
+        layer_id += 1
+        ensure_layer(layer_id)
+        first_element = num_elements[layer_id] + 1
+        foreach(cs, eachrow(n.thetas)) do c, theta
+            @assert size(theta)[1] == nc
+            append!(parameters[layer_id], theta)
+            num_elements[layer_id] += 1
+            if c isa Vector{LayeredDecisionId}
+                push!(elements[layer_id], c[1].decision_id, c[2].decision_id)
+            else
+                push!(elements[layer_id], c.decision_id, TRUE_ID)
+            end
+        end
+        push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id])
+        LayeredDecisionId(layer_id, num_decisions)
+    end
+
+    foldup_aggregate(circuit, f_con, f_lit, f_and, f_or, 
+        Union{LayeredDecisionId,Vector{LayeredDecisionId}})
+    
+    circuit_layers = map(decisions, elements) do d, e
+        Layer(reshape(d, 3, :), reshape(e, 2, :))
+    end
+    parameter_layers = map(parameters) do p
+        reshape(p, nc, :)
+    end
+    return LayeredParameterCircuit(LayeredBitCircuit(circuit_layers), parameter_layers)
+end
+
+struct CuLayeredParameterCircuit
+    layered_circuit::CuLayeredBitCircuit
+    layered_parameters::Vector{CuMatrix{Float32}}
+    CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters))
+end
+
+
+
+function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing)
+    cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp)
+    one = Float32(1.0)
+    return @. one / (one + exp(-cw)), flow, v
+end
+
+function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing)
+    flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down)
+    cw = calculate_class_weights(circuit, nc, data, v, flow, reuse_cw)
+    return cw, flow, v
+end
+
+function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, v, flow, reuse_cw=nothing)
+    ne = num_examples(data)
+    cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc)
+        reuse_cw .= zero(Float32)
+        reuse_cw
+    else
+        CUDA.zeros(Float32, ne, nc)
+    end
+
+    dec_per_thread = 4
+    CUDA.@sync for i = 1:length(circuit.layered_circuit.layers)
+        circuit_layer = circuit.layered_circuit.layers[i]
+        parameter_layer = circuit.layered_parameters[i]
+        ndl = num_decisions(circuit_layer)
+        num_threads = balance_threads(ne, ndl / dec_per_thread, 8)
+        num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)
+        @cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, v, flow, circuit_layer.decisions, circuit_layer.elements, parameter_layer)
+    end
+    
+    return cw
+end
+
+function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, elements, parameters)
+    index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x
+    index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y
+    stride_x = blockDim().x * gridDim().x
+    stride_y = blockDim().y * gridDim().y
+    ne, nc = size(cw)
+    _, num_decisions = size(decisions)
+    
+    for j = index_x:stride_x:ne
+        for i = index_y:stride_y:num_decisions
+            decision_id = @inbounds decisions[1, i]
+            n_up = @inbounds v[j, decision_id]
+            if n_up > zero(Float32)
+                first_elem = @inbounds decisions[2, i]
+                last_elem = @inbounds decisions[3, i]
+                n_down = @inbounds flow[j, decision_id]
+                for e = first_elem:last_elem
+                    e1 = @inbounds elements[1, first_elem]
+                    e2 = @inbounds elements[2, first_elem]
+                    e_up = @inbounds (v[j, e1] * v[j, e2])
+                    edge_flow = e_up / n_up * n_down
+                    # following needs to be memory safe
+                    for class=1:nc
+                        @CUDA.atomic cw[j, class] += edge_flow * parameters[class, e] # atomic is automatically inbounds
+                    end
+                end
+            end
+        end
+    end
+    
+    return nothing
+end
+
+
+
+function one_hot(labels::Vector, nc::Integer)    
+    ne = length(labels) 
+    one_hot_labels = zeros(Float32, ne, nc)
+    for (i, j) in enumerate(labels)
+        one_hot_labels[i, j + 1] = 1.0
+    end
+    one_hot_labels
+end
+
+function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001)
+    cp, flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp)
+    update_parameters(circuit, labels, cp, flow, step_size)
+    for _ = 2:num_epochs
+        cp, flow, v = class_likelihood(circuit, nc, data, v, flow, cp)
+        update_parameters(circuit, labels, cp, v, flow, step_size)
+    end
+    return nothing
+end
+
+function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, v, flow, step_size=0.0001)
+    _, nc = size(labels)
+    step_size = Float32(step_size)
+    CUDA.@sync for i = 1:length(circuit.layered_circuit.layers)
+        circuit_layer = circuit.layered_circuit.layers[i]
+        flow_layer = flow[i]
+        parameter_layer = circuit.layered_parameters[i]
+        ndl = num_decisions(circuit_layer)
+        num_threads = balance_threads(ndl, nc, 6)
+        num_threads = num_threads[1], num_threads[2], 
+        num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, nc / num_threads[2]), 4
+        @cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size)
+    end
+    return nothing
+end
+
+function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size)
+    index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x
+    index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y
+    index_z = (blockIdx().z - 1) * blockDim().z + threadIdx().z
+    stride_x = blockDim().x * gridDim().x
+    stride_y = blockDim().y * gridDim().y
+    stride_z = blockDim().z * gridDim().z
+    ne, nc = size(labels)
+    _, num_decisions = size(decisions)
+    
+    for class = index_y:stride_y:nc
+        for i = index_x:stride_x:num_decisions
+            first_elem = @inbounds decisions[2, i]
+            last_elem = @inbounds decisions[3, i]
+            for e = first_elem:last_elem
+                for j = index_z:stride_z:ne
+                    edge_flow = e_up / n_up * n_down
+                    u = @inbounds edge_flow * (cp[j, class] - labels[j, class]) * step_size
+                    # following needs to be memory safe
+                    @inbounds parameters[class, e] -= u 
+                end
+            end
+        end
+    end
+    
+    return nothing
+end
\ No newline at end of file
diff --git a/src/Logistic/queries.jl b/src/Logistic/queries.jl
index 7c80e625..7ceba404 100644
--- a/src/Logistic/queries.jl
+++ b/src/Logistic/queries.jl
@@ -1,22 +1,165 @@
-export class_conditional_likelihood_per_instance
+export class_likelihood_per_instance, class_weights_per_instance
+
+using CUDA
+using LoopVectorization: @avx, vifelse
+
+
 
-using ..Probabilistic: get_downflow, get_upflow
 """
 Class Conditional Probability
 """
-function class_conditional_likelihood_per_instance(lc::LogisticCircuit,  classes::Int, data)
-    compute_flows(lc, data)
-    likelihoods = zeros(num_examples(data), classes)
-    foreach(lc) do ln
-        if ln isa Logistic⋁Node
-            # For each class. orig.thetas is 2D so used eachcol
-            for (idx, thetaC) in enumerate(eachcol(ln.thetas))
-                foreach(children(ln), thetaC) do c, theta
-                    likelihoods[:, idx] .+= Float64.(get_downflow(ln) .& get_upflow(c)) .* theta
+
+function bitcircuit_with_params(lc, nc, data)
+    params::Vector{Vector{Float64}} = Vector{Vector{Float64}}()
+    on_decision(n, cs, layer_id, decision_id, first_element, last_element) = begin
+        if isnothing(n)
+            # @assert first_element == last_element
+            push!(params, zeros(Float64, nc))
+        else
+            # @assert last_element-first_element+1 == length(n.log_probs) "$last_element-$first_element+1 != $(length(n.log_probs))"
+            for theta in eachrow(n.thetas)
+                push!(params, theta)
+            end
+        end
+    end
+    bc = BitCircuit(lc, data; on_decision)
+    (bc, permutedims(hcat(params...), (2, 1)))
+end
+
+function class_likelihood_per_instance(lc::LogicCircuit, nc::Int, data)    
+    cw = class_weights_per_instance(lc, nc, data)
+    isgpu(data) ? (@. 1.0 / (1.0 + exp(-cw))) : (@. @avx 1.0 / (1.0 + exp(-cw)))
+end
+
+function class_weights_per_instance(lc::LogisticCircuit, nc::Int, data)
+    bc, params = bitcircuit_with_params(lc, nc, data)
+    if isgpu(data)
+        class_weights_per_instance_gpu(to_gpu(bc), data, to_gpu(params))
+    else
+        class_weights_per_instance_cpu(bc, data, params)
+    end
+end
+
+function class_weights_per_instance_cpu(bc, data, params)
+    ne::Int = num_examples(data)
+    nc::Int = size(params, 2)
+    cw::Matrix{Float64} = zeros(Float64, ne, nc)
+    cw_lock::Threads.ReentrantLock = Threads.ReentrantLock()
+ 
+    @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, locks)
+        if els_start != els_end
+            lock(cw_lock) do # TODO: move lock to inner loop?
+                for i = 1:size(flows, 1)
+                    @inbounds edge_flow = values[i, p] & values[i, s] & flows[i, dec_id]
+                    first_true_bit = trailing_zeros(edge_flow) + 1
+                    last_true_bit = 64 - leading_zeros(edge_flow)
+                    @simd for j = first_true_bit:last_true_bit
+                        if get_bit(edge_flow, j)
+                            ex_id = ((i-1) << 6) + j
+                            for class = 1:size(cw, 2)
+                                @inbounds cw[ex_id, class] += params[el_id, class]
+                            end
+                        end
+                    end
+                end
+            end
+        end
+        nothing
+    end
+
+    @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, locks)
+        if els_start != els_end
+            lock(cw_lock) do # TODO: move lock to inner loop?
+                @avx for i = 1:size(flows, 1)
+                    @inbounds edge_flow = values[i, p] * values[i, s] / values[i, dec_id] * flows[i, dec_id]
+                    edge_flow = vifelse(isfinite(edge_flow), edge_flow, zero(Float32))
+                    for class = 1:size(cw, 2)
+                        @inbounds cw[i, class] += edge_flow * params[el_id, class]
+                    end
                 end
             end
         end
+        nothing
     end
-    likelihoods
+
+    if isbinarydata(data)
+        compute_values_flows(bc, data; on_edge = on_edge_binary)
+    else
+        compute_values_flows(bc, data; on_edge = on_edge_float)
+    end
+
+    return cw
 end
 
+function class_weights_per_instance_gpu(bc, data, params)
+    ne::Int = num_examples(data)
+    nc::Int = size(params, 2)
+    cw::CuMatrix{Float64} = CUDA.zeros(Float64, num_examples(data), nc)
+    cw_device = CUDA.cudaconvert(cw)
+    params_device = CUDA.cudaconvert(params)
+
+    @inline function on_edge_binary(flows, values, dec_id, el_id, p, s, els_start, els_end, chunk_id, edge_flow)
+        if els_start != els_end
+            first_true_bit = 1+trailing_zeros(edge_flow)
+            last_true_bit = 64-leading_zeros(edge_flow)
+            for j = first_true_bit:last_true_bit
+                if get_bit(edge_flow, j)
+                    ex_id = ((chunk_id-1) << 6) + j
+                    for class = 1:size(cw_device, 2)
+                        CUDA.@atomic cw_device[ex_id, class] += params_device[el_id, class]
+                    end
+                end
+            end
+        end
+        nothing
+    end
+
+    @inline function on_edge_float(flows, values, dec_id, el_id, p, s, els_start, els_end, ex_id, edge_flow)
+        if els_start != els_end
+            for class = 1:size(cw_device, 2)
+                CUDA.@atomic cw_device[ex_id, class] += edge_flow * params_device[el_id, class]
+            end
+        end
+        nothing
+    end
+    
+    if isbinarydata(data)
+        v,f = compute_values_flows(bc, data; on_edge = on_edge_binary)
+    else
+        @assert isfpdata(data) "Only floating point and binary data are supported"
+        v,f = compute_values_flows(bc, data; on_edge = on_edge_float)
+    end
+    CUDA.unsafe_free!(v) # save the GC some effort
+    CUDA.unsafe_free!(f) # save the GC some effort
+
+    return cw
+end
+
+
+
+"""
+Class Predictions
+"""
+function predict_class(lc::LogisticCircuit, nc::Int, data)
+    class_likelihoods = class_likelihood_per_instance(lc, nc, data)
+    predict_class(class_likelihoods)
+end
+
+function predict_class(class_likelihoods)
+    _, mxindex = findmax(class_likelihoods; dims=2)
+    dropdims(getindex.(mxindex, 2); dims=2)
+end
+
+
+
+"""
+Prediction accuracy
+"""
+accuracy(lc::LogisticCircuit, nc::Int, data, labels) = 
+    accuracy(predict_class(lc, nc, data), labels)
+
+accuracy(predicted_class, labels) = 
+    Float64(sum(@. predicted_class == labels)) / length(labels)
+
+accuracy(class_likelihoods, labels) = 
+    accuracy(predict_class(class_likelihoods), labels)
diff --git a/src/Probabilistic/prob_nodes.jl b/src/Probabilistic/prob_nodes.jl
index f002baea..4cb10f16 100644
--- a/src/Probabilistic/prob_nodes.jl
+++ b/src/Probabilistic/prob_nodes.jl
@@ -1,5 +1,7 @@
-export PlainProbCircuit, ProbLeafNode, ProbInnerNode, ProbLiteralNode, Prob⋀Node, 
-Prob⋁Node
+export 
+    PlainProbCircuit,
+    ProbLeafNode, ProbInnerNode, 
+    ProbLiteralNode, Prob⋀Node, Prob⋁Node
 
 #####################
 # Infrastructure for probabilistic circuit nodes
diff --git a/test/Logistic/logistic_tests.jl b/test/Logistic/logistic_tests.jl
index 3461a064..9a63f0aa 100644
--- a/test/Logistic/logistic_tests.jl
+++ b/test/Logistic/logistic_tests.jl
@@ -3,27 +3,80 @@ using LogicCircuits
 using ProbabilisticCircuits
 
 # This tests are supposed to test queries on the circuits
-@testset "Logistic Circuit Class Conditional" begin
+@testset "Logistic Circuit Query and Parameter Tests" begin
     # Uses a Logistic Circuit with 4 variables, and tests 3 of the configurations to 
     # match with python version.
+    
+    CLASSES = 2
 
-    EPS = 1e-7;
-    logistic_circuit = zoo_lc("little_4var.circuit", 2);
-    @test logistic_circuit isa LogisticCircuit;
+    logistic_circuit = zoo_lc("little_4var.circuit", CLASSES)
+    @test logistic_circuit isa LogisticCircuit
 
-    # Step 1. Check Probabilities for 3 samples
-    data = Bool.([0 0 0 0; 0 1 1 0; 0 0 1 1]);
-    
-    true_prob = [3.43147972 4.66740416; 
-                4.27595352 2.83503504;
-                3.67415087 4.93793472]
+    # check probabilities for binary samples
+    data = @. Bool([0 0 0 0; 0 1 1 0; 0 0 1 1])
+    # true_weight_func = [3.43147972 4.66740416; 
+    #                     4.27595352 2.83503504;
+    #                     3.67415087 4.93793472]
+    true_prob = [0.9686740008311808 0.9906908445371728;
+                 0.9862917392724188 0.9445399509069984; 
+                 0.9752568185086389 0.9928816444223209]
             
-    CLASSES = 2
-    calc_prob = class_conditional_likelihood_per_instance(logistic_circuit, CLASSES, data)
-    
-    for i = 1:3
-        for j = 1:2
-            @test true_prob[i,j] ≈ calc_prob[i,j] atol= EPS;
+    class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data)
+    for i = 1:size(true_prob)[1]
+        for j = 1:CLASSES
+            @test true_prob[i,j] ≈ class_prob[i,j]
+        end
+    end
+
+    # check probabilities for float samples
+    data = Float32.(data)
+    class_prob = class_likelihood_per_instance(logistic_circuit, CLASSES, data)
+    for i = 1:size(true_prob)[1]
+        for j = 1:CLASSES
+            @test true_prob[i,j] ≈ class_prob[i,j]
         end
     end
+
+    # check predicted_classes
+    true_labels = [2, 1, 2]
+    predicted_classes = predict_class(logistic_circuit, CLASSES, data)
+    @test all(predicted_classes .== true_labels)
+    
+    # check accuracy
+    @test accuracy(logistic_circuit, CLASSES, data, true_labels) == 1.0
+
+    # # check parameter updates
+    # original_literal_parameters = Dict{Int, Vector{Float64}}()
+    # foreach(logistic_circuit) do ln
+    #     if ln isa Logistic⋁Node
+    #         foreach(ln.children, eachrow(ln.thetas)) do c, theta
+    #             if c isa LogisticLiteral
+    #                 original_literal_parameters[c.literal] = copy(theta)
+    #             end
+    #         end
+    #     end
+    # end
+    
+    # one_hot_labels = [0.0 1.0;
+    #                   1.0 0.0;
+    #                   0.0 1.0]
+    # one_hot_labels = Float32.(one_hot_labels)
+    # true_error = true_prob .- one_hot_labels
+    # step_size = 0.1
+    # learn_parameters(logistic_circuit, CLASSES, data, true_labels; num_epochs=1, step_size=step_size, flows_computed=true)
+    
+    # foreach(logistic_circuit) do ln
+    #     if ln isa Logistic⋁Node
+    #         foreach(ln.children, eachrow(ln.thetas)) do c, theta
+    #             if c isa LogisticLiteral
+    #                 for class = 1:CLASSES
+    #                     true_update_amount = -step_size * sum(c.data.upflow .* true_error[:, class]) / size(true_error)[1]
+    #                     updated_amount = theta[class] - original_literal_parameters[c.literal][class]
+    #                     @test updated_amount ≈ true_update_amount atol=1e-7
+    #                 end
+    #             end
+    #         end
+    #     end
+    # end
+
 end
\ No newline at end of file
diff --git a/test/Reasoning/expectation_test.jl b/test/Reasoning/expectation_test.jl
index c8aafc50..8959c381 100644
--- a/test/Reasoning/expectation_test.jl
+++ b/test/Reasoning/expectation_test.jl
@@ -3,7 +3,7 @@ using LogicCircuits
 using ProbabilisticCircuits
 
 function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int)
-    EPS = 1e-7;
+    EPS = 1e-5;
     COUNT = size(data)[1]
     # Compute True expectation brute force
     true_exp = zeros(COUNT, CLASSES)
@@ -14,7 +14,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data
         calc_p = log_likelihood_per_instance(pc, cur_data_all)
         calc_p = exp.(calc_p)
 
-        calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all)
+        calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all)
         true_exp[i, :] = sum(calc_p .* calc_f, dims=1)
         true_exp[i, :] ./= sum(calc_p) #p_observed
     end
@@ -36,7 +36,7 @@ function test_expectation_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data
 end
 
 function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLASSES::Int, moment::Int)
-    EPS = 1e-7;
+    EPS = 1e-5;
     COUNT = size(data)[1]
     # Compute True moment brute force
     true_mom = zeros(COUNT, CLASSES)
@@ -47,7 +47,7 @@ function test_moment_brute_force(pc::ProbCircuit, lc::LogisticCircuit, data, CLA
         calc_p = log_likelihood_per_instance(pc, cur_data_all)
         calc_p = exp.(calc_p)
 
-        calc_f = class_conditional_likelihood_per_instance(lc, CLASSES, cur_data_all)
+        calc_f = class_weights_per_instance(lc, CLASSES, cur_data_all)
         true_mom[i, :] = sum(calc_p .* (calc_f .^ moment), dims=1)
         true_mom[i, :] ./= sum(calc_p) #p_observed
     end