Skip to content
Merged

Lc #22

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.1.1"
[deps]
BlossomV = "6c721016-9dae-5d90-abf6-67daaccb2332"
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
LogicCircuits = "a7847b3b-b7f1-4dd5-83c3-60e0aa0f8599"
Expand All @@ -19,10 +20,13 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"

[compat]
BlossomV = "0.4"
Clustering = "0.14"
CUDA = "1.2"
DataStructures = "0.17"
LightGraphs = "1.3"
LogicCircuits = "0.1.1"
Expand All @@ -32,4 +36,6 @@ Reexport = "0.2"
SimpleWeightedGraphs = "1.1"
StatsBase = "0.33"
StatsFuns = "0.9"
LoopVectorization = "0.8.20"
MLDatasets = "0.4, 0.5"
julia = "1.5"
2 changes: 1 addition & 1 deletion src/LoadSave/circuit_line_compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ function decorate_logistic(lines::CircuitFormatLines, logic_circuit::LogicCircui

function compile(ln::BiasLine)
root = id2logisticnode(ln.node_id)::Logistic⋁Node
# @assert length(node.thetas) == 1
# @assert length(root.thetas) == 1
root.thetas[1,:] .= ln.weights
end

Expand Down
4 changes: 3 additions & 1 deletion src/Logistic/Logistic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ using ..Utils

include("logistic_nodes.jl")
include("queries.jl")
# include("parameter_circuit.jl")
# include("learn_parameters.jl")

# TODO learning
# TODO structure learning

end
49 changes: 49 additions & 0 deletions src/Logistic/learn_parameters.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
export learn_parameters

using LogicCircuits: compute_flows, or_nodes
using LoopVectorization: @avx

"""
Maximum likilihood estimation of parameters given data through gradient descent
"""
function learn_parameters(lc::LogisticCircuit, classes::Int, data, labels; num_epochs=30, step_size=0.1, flows_computed=false)

@inline function one_hot(labels::Vector, classes::Int)
one_hot_labels = zeros(length(labels), classes)
for (i, j) in enumerate(labels)
one_hot_labels[i, j] = 1.0
end
one_hot_labels
end

one_hot_labels = one_hot(labels, classes)
if !flows_computed
compute_flows(lc, data)
end

for _ = 1:num_epochs
class_probs = class_likelihood_per_instance(lc, classes, data; flows_computed=true)
update_parameters(lc, class_probs, one_hot_labels)
end

nothing
end


@inline function update_parameters(lc::LogisticCircuit, class_probs, one_hot_labels; step_size=0.1)
num_samples = Float64(size(one_hot_labels)[1])
error = class_probs .- one_hot_labels

foreach(or_nodes(lc)) do ln
foreach(eachrow(ln.thetas), children(ln)) do theta, c
flow = Float64.(downflow(ln, c))
@avx update_amount = flow' * error / num_samples * step_size
update_amount = dropdims(update_amount; dims=1)
@avx @. theta -= update_amount
end
end

nothing
end


18 changes: 7 additions & 11 deletions src/Logistic/logistic_nodes.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
export
LogisticCircuit,
LogisticLeafNode,
LogisticInnerNode,
LogisticLiteral,
Logistic⋀Node,
Logistic⋁Node,
classes,
num_parameters_perclass
LogisticLeafNode, LogisticInnerNode,
LogisticLiteral, Logistic⋀Node, Logistic⋁Node,
num_classes, num_parameters_per_class

#####################
# Infrastructure for logistic circuit nodes
Expand Down Expand Up @@ -54,11 +50,11 @@ A logistic disjunction node (Or node)
"""
mutable struct Logistic⋁Node <: LogisticInnerNode
children::Vector{<:LogisticCircuit}
thetas::Array{Float64, 2}
thetas::Matrix{Float64}
data
counter::UInt32
Logistic⋁Node(children, class::Int) = begin
new(convert(Vector{LogisticCircuit}, children), init_array(Float64, length(children), class), nothing, 0)
new(convert(Vector{LogisticCircuit}, children), init_array(Float32, length(children), class), nothing, 0)
end
end

Expand All @@ -77,11 +73,11 @@ import LogicCircuits.GateType # make available for extension

import LogicCircuits: children # make available for extension
@inline children(n::LogisticInnerNode) = n.children
@inline classes(n::Logistic⋁Node) = size(n.thetas)[2]
@inline num_classes(n::Logistic⋁Node) = size(n.thetas)[2]

import ..Utils: num_parameters
@inline num_parameters(c::LogisticCircuit) = sum(n -> num_children(n) * classes(n), ⋁_nodes(c))
@inline num_parameters_perclass(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c))
@inline num_parameters_per_class(c::LogisticCircuit) = sum(n -> num_children(n), ⋁_nodes(c))

#####################
# constructors and conversions
Expand Down
225 changes: 225 additions & 0 deletions src/Logistic/parameter_circuit.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
using CUDA
using LogicCircuits

export LayeredParameterCircuit, CuLayeredParameterCircuit
export class_likelihood, class_weights
export one_hot, learn_parameters, update_parameters

# in a parameter circuit
# 1 is true, 2 is false
const TRUE_ID = Int32(1)
const FALSE_ID = Int32(2)

struct LayeredParameterCircuit
layered_circuit::LayeredBitCircuit
layered_parameters::Vector{Matrix{Float32}}
end

LayeredParameterCircuit(circuit::LogisticCircuit, nc::Integer, num_features::Integer) = begin
@assert is⋁gate(circuit)
decisions::Vector{Vector{Int32}} = Vector{Vector{Int32}}()
elements::Vector{Vector{Int32}} = Vector{Vector{Int32}}()
parameters::Vector{Vector{Float32}} = Vector{Vector{Float32}}()
num_decisions::Int32 = 2 * num_features + 2
num_elements::Vector{Int32} = Vector{Int32}()
# num_parameters always equals num_elements

ensure_layer(i) = begin
if length(decisions) < i
# add a new layer
push!(decisions, Int32[])
push!(elements, Int32[])
push!(parameters, Float32[])
push!(num_elements, 0)
end
end

f_con(n) = LayeredDecisionId(0, istrue(n) ? TRUE_ID : FALSE_ID)
f_lit(n) = LayeredDecisionId(0,
ispositive(n) ? Int32(2 + variable(n)) : Int32(2 + num_features + variable(n)))

f_and(n, cs) = begin
@assert length(cs) == 2
LayeredDecisionId[cs[1], cs[2]]
end
f_or(n, cs) = begin
num_decisions += 1
# determine layer
layer_id = zero(Int32)
for c in cs
if c isa Vector{LayeredDecisionId}
@assert length(c) == 2
layer_id = max(layer_id, c[1].layer_id, c[2].layer_id)
else
@assert c isa LayeredDecisionId
layer_id = max(layer_id, c.layer_id)
end
end
layer_id += 1
ensure_layer(layer_id)
first_element = num_elements[layer_id] + 1
foreach(cs, eachrow(n.thetas)) do c, theta
@assert size(theta)[1] == nc
append!(parameters[layer_id], theta)
num_elements[layer_id] += 1
if c isa Vector{LayeredDecisionId}
push!(elements[layer_id], c[1].decision_id, c[2].decision_id)
else
push!(elements[layer_id], c.decision_id, TRUE_ID)
end
end
push!(decisions[layer_id], num_decisions, first_element, num_elements[layer_id])
LayeredDecisionId(layer_id, num_decisions)
end

foldup_aggregate(circuit, f_con, f_lit, f_and, f_or,
Union{LayeredDecisionId,Vector{LayeredDecisionId}})

circuit_layers = map(decisions, elements) do d, e
Layer(reshape(d, 3, :), reshape(e, 2, :))
end
parameter_layers = map(parameters) do p
reshape(p, nc, :)
end
return LayeredParameterCircuit(LayeredBitCircuit(circuit_layers), parameter_layers)
end

struct CuLayeredParameterCircuit
layered_circuit::CuLayeredBitCircuit
layered_parameters::Vector{CuMatrix{Float32}}
CuLayeredParameterCircuit(l::LayeredParameterCircuit) = new(CuLayeredBitCircuit(l.layered_circuit), map(CuMatrix, l.layered_parameters))
end



function class_likelihood(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing)
cw, flow, v = class_weights(circuit, nc, data, reuse_up, reuse_down, reuse_cp)
one = Float32(1.0)
return @. one / (one + exp(-cw)), flow, v
end

function class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cw=nothing)
flow, v = compute_flows2(circuit.layered_circuit, data, reuse_up, reuse_down)
cw = calculate_class_weights(circuit, nc, data, v, flow, reuse_cw)
return cw, flow, v
end

function calculate_class_weights(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, v, flow, reuse_cw=nothing)
ne = num_examples(data)
cw = if reuse_cw isa CuMatrix{Float32} && size(reuse_cw) == (ne, nc)
reuse_cw .= zero(Float32)
reuse_cw
else
CUDA.zeros(Float32, ne, nc)
end

dec_per_thread = 4
CUDA.@sync for i = 1:length(circuit.layered_circuit.layers)
circuit_layer = circuit.layered_circuit.layers[i]
parameter_layer = circuit.layered_parameters[i]
ndl = num_decisions(circuit_layer)
num_threads = balance_threads(ne, ndl / dec_per_thread, 8)
num_blocks = ceil(Int, ne / num_threads[1]), ceil(Int, ndl / num_threads[2] / dec_per_thread)
@cuda threads=num_threads blocks=num_blocks calculate_class_weights_layer_kernel_cuda(cw, v, flow, circuit_layer.decisions, circuit_layer.elements, parameter_layer)
end

return cw
end

function calculate_class_weights_layer_kernel_cuda(cw, v, flow, decisions, elements, parameters)
index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x
index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y
stride_x = blockDim().x * gridDim().x
stride_y = blockDim().y * gridDim().y
ne, nc = size(cw)
_, num_decisions = size(decisions)

for j = index_x:stride_x:ne
for i = index_y:stride_y:num_decisions
decision_id = @inbounds decisions[1, i]
n_up = @inbounds v[j, decision_id]
if n_up > zero(Float32)
first_elem = @inbounds decisions[2, i]
last_elem = @inbounds decisions[3, i]
n_down = @inbounds flow[j, decision_id]
for e = first_elem:last_elem
e1 = @inbounds elements[1, first_elem]
e2 = @inbounds elements[2, first_elem]
e_up = @inbounds (v[j, e1] * v[j, e2])
edge_flow = e_up / n_up * n_down
# following needs to be memory safe
for class=1:nc
@CUDA.atomic cw[j, class] += edge_flow * parameters[class, e] # atomic is automatically inbounds
end
end
end
end
end

return nothing
end



function one_hot(labels::Vector, nc::Integer)
ne = length(labels)
one_hot_labels = zeros(Float32, ne, nc)
for (i, j) in enumerate(labels)
one_hot_labels[i, j + 1] = 1.0
end
one_hot_labels
end

function learn_parameters(circuit::CuLayeredParameterCircuit, nc::Integer, data::CuMatrix{Float32}, labels::CuMatrix{Float32}, reuse_up=nothing, reuse_down=nothing, reuse_cp=nothing, num_epochs=20, step_size=0.0001)
cp, flow, v = class_likelihood(circuit, nc, data, reuse_up, reuse_down, reuse_cp)
update_parameters(circuit, labels, cp, flow, step_size)
for _ = 2:num_epochs
cp, flow, v = class_likelihood(circuit, nc, data, v, flow, cp)
update_parameters(circuit, labels, cp, v, flow, step_size)
end
return nothing
end

function update_parameters(circuit::CuLayeredParameterCircuit, labels, cp, v, flow, step_size=0.0001)
_, nc = size(labels)
step_size = Float32(step_size)
CUDA.@sync for i = 1:length(circuit.layered_circuit.layers)
circuit_layer = circuit.layered_circuit.layers[i]
flow_layer = flow[i]
parameter_layer = circuit.layered_parameters[i]
ndl = num_decisions(circuit_layer)
num_threads = balance_threads(ndl, nc, 6)
num_threads = num_threads[1], num_threads[2],
num_blocks = ceil(Int, ndl / num_threads[1]), ceil(Int, nc / num_threads[2]), 4
@cuda threads=num_threads blocks=num_blocks update_parameters_layer_kernel_cuda(labels, cp, flow_layer, circuit_layer.decisions, parameter_layer, step_size)
end
return nothing
end

function update_parameters_layer_kernel_cuda(labels, cp, flow, decisions, parameters, step_size)
index_x = (blockIdx().x - 1) * blockDim().x + threadIdx().x
index_y = (blockIdx().y - 1) * blockDim().y + threadIdx().y
index_z = (blockIdx().z - 1) * blockDim().z + threadIdx().z
stride_x = blockDim().x * gridDim().x
stride_y = blockDim().y * gridDim().y
stride_z = blockDim().z * gridDim().z
ne, nc = size(labels)
_, num_decisions = size(decisions)

for class = index_y:stride_y:nc
for i = index_x:stride_x:num_decisions
first_elem = @inbounds decisions[2, i]
last_elem = @inbounds decisions[3, i]
for e = first_elem:last_elem
for j = index_z:stride_z:ne
edge_flow = e_up / n_up * n_down
u = @inbounds edge_flow * (cp[j, class] - labels[j, class]) * step_size
# following needs to be memory safe
@inbounds parameters[class, e] -= u
end
end
end
end

return nothing
end
Loading