From 0efe36881f9eeb47da6e496dbaed06fe8f9ef891 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Fri, 7 Jan 2022 03:37:43 +0000 Subject: [PATCH 01/18] temporarily add some pkgs to do testing --- Project.toml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 7166d770..a5bb9d46 100644 --- a/Project.toml +++ b/Project.toml @@ -6,15 +6,18 @@ repo = "https://github.com/TuringLang/Libtask.jl.git" version = "0.6.2" [deps] +AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] -julia = "1.3" -MacroTools = "0.5" IRTools = "0.4" +MacroTools = "0.5" +julia = "1.3" [extras] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" From 463c187a3a6c0a6205e7bb1baa2bbc567259b34f Mon Sep 17 00:00:00 2001 From: KDr2 Date: Fri, 7 Jan 2022 08:07:22 +0000 Subject: [PATCH 02/18] simple benchmarks --- Project.toml | 1 + perf/p0.jl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 perf/p0.jl diff --git a/Project.toml b/Project.toml index a5bb9d46..3538d4eb 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.6.2" [deps] AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/perf/p0.jl b/perf/p0.jl new file mode 100644 index 00000000..273fe869 --- /dev/null +++ b/perf/p0.jl @@ -0,0 +1,40 @@ +# ]add Turing#hg/new-libtask2 + +using Libtask +using Turing, DynamicPPL, AdvancedPS +using BenchmarkTools + +@model gdemo(x, y) = begin + # Assumptions + σ ~ InverseGamma(2,3) + μ ~ Normal(0,sqrt(σ)) + # Observations + x ~ Normal(μ, sqrt(σ)) + y ~ Normal(μ, sqrt(σ)) +end + + +# Case 1: Sample from the prior. + +m = Turing.Core.TracedModel(gdemo(1.5, 2.), SampleFromPrior(), VarInfo()) + +f = m.evaluator[1]; + +args = m.evaluator[2:end]; + +@btime f(args...) +# (2.0, VarInfo (2 variables (μ, σ), dimension 2; logp: -6.162)) + +t = Libtask.CTask(f, args...) +# schedule(t.task) # work fine! +# @show Libtask.result(t.tf.tape) +@btime Libtask.step_in(t.tf.tape, args) + +# Case 2: SMC sampler + +m = Turing.Core.TracedModel(gdemo(1.5, 2.), Sampler(SMC(50)), VarInfo()); +t = Libtask.CTask(m.evaluator[1], m.evaluator[2:end]...); +# schedule(t.task) +# @show Libtask.result(t.tf.tape) +@btime m.evaluator[1](m.evaluator[2:end]...) +@btime Libtask.step_in(t.tf.tape, m.evaluator[2:end]) From 7d9eeb0bfc02d814bbdc1df6f393d2e5de87ae85 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Sun, 9 Jan 2022 07:30:41 +0000 Subject: [PATCH 03/18] use ir and tape cache --- perf/p0.jl | 13 ++++++++++--- src/tapedfunction.jl | 9 ++++++++- src/tapedtask.jl | 12 +++++++++++- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/perf/p0.jl b/perf/p0.jl index 273fe869..b2ae3c57 100644 --- a/perf/p0.jl +++ b/perf/p0.jl @@ -22,19 +22,26 @@ f = m.evaluator[1]; args = m.evaluator[2:end]; +@show "Directly call..." @btime f(args...) # (2.0, VarInfo (2 variables (μ, σ), dimension 2; logp: -6.162)) -t = Libtask.CTask(f, args...) +@show "CTask construction..." +t = @btime Libtask.CTask(f, args...) # schedule(t.task) # work fine! # @show Libtask.result(t.tf.tape) +@show "Step in a tape..." @btime Libtask.step_in(t.tf.tape, args) # Case 2: SMC sampler m = Turing.Core.TracedModel(gdemo(1.5, 2.), Sampler(SMC(50)), VarInfo()); -t = Libtask.CTask(m.evaluator[1], m.evaluator[2:end]...); +@show "Directly call..." +@btime m.evaluator[1](m.evaluator[2:end]...) + +@show "CTask construction..." +t = @btime Libtask.CTask(m.evaluator[1], m.evaluator[2:end]...); # schedule(t.task) # @show Libtask.result(t.tf.tape) -@btime m.evaluator[1](m.evaluator[2:end]...) +@show "Step in a tape..." @btime Libtask.step_in(t.tf.tape, m.evaluator[2:end]) diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index 083cfccc..186ddb53 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -91,7 +91,7 @@ end function run_and_record!(tape::Tape, f, args...) f = val(f) # f maybe a Boxed closure output = try - box(f(map(val, args)...)) + Box{Any}(f(map(val, args)...)) catch e @warn e Box{Any}(nothing) @@ -190,6 +190,13 @@ mutable struct TapedFunction end end +function reset!(tf::TapedFunction, ir::IRTools.IR, tape::Tape) + tf.ir = ir + tf.tape = tape + setowner!(tape, tf) + return tf +end + function (tf::TapedFunction)(args...) if isempty(tf.tape) ir = IRTools.@code_ir tf.func(args...) diff --git a/src/tapedtask.jl b/src/tapedtask.jl index e6da976d..90aaafba 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -16,9 +16,19 @@ struct TapedTask end end +const TRCache = Dict{Any, Any}() + function TapedTask(tf::TapedFunction, args...) tf.owner != nothing && error("TapedFunction is owned to another task.") - isempty(tf.tape) && tf(args...) + if isempty(tf.tape) + if haskey(TRCache, tf.func) + ir, tape = TRCache[tf.func] + reset!(tf, ir, copy(tape, Dict{UInt64, Any}())) + else + tf(args...) + TRCache[tf.func] = (tf.ir, tf.tape) + end + end produce_ch = Channel() consume_ch = Channel{Int}() task = @task try From b78d3f30fa9e7aedda35921349e240ea43761e5c Mon Sep 17 00:00:00 2001 From: KDr2 Date: Mon, 10 Jan 2022 19:23:39 +0000 Subject: [PATCH 04/18] use LRUCache instead of Dict --- Project.toml | 1 + src/Libtask.jl | 2 ++ src/tapedtask.jl | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3538d4eb..56ec483e 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" +LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/Libtask.jl b/src/Libtask.jl index 12a8c516..59795b98 100644 --- a/src/Libtask.jl +++ b/src/Libtask.jl @@ -3,6 +3,8 @@ module Libtask using IRTools using MacroTools +using LRUCache + export CTask, consume, produce export TArray, tzeros, tfill, TRef diff --git a/src/tapedtask.jl b/src/tapedtask.jl index 90aaafba..bc358bc8 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -16,7 +16,7 @@ struct TapedTask end end -const TRCache = Dict{Any, Any}() +const TRCache = LRU{Any, Any}(maxsize=10) function TapedTask(tf::TapedFunction, args...) tf.owner != nothing && error("TapedFunction is owned to another task.") From dd211e9b9c52d8edd911dc30c52bb72b3d5c1982 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Mon, 10 Jan 2022 19:38:33 +0000 Subject: [PATCH 05/18] partially copy tape --- src/tapedtask.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/tapedtask.jl b/src/tapedtask.jl index bc358bc8..bf2fd7a0 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -23,7 +23,7 @@ function TapedTask(tf::TapedFunction, args...) if isempty(tf.tape) if haskey(TRCache, tf.func) ir, tape = TRCache[tf.func] - reset!(tf, ir, copy(tape, Dict{UInt64, Any}())) + reset!(tf, ir, copy(tape, Dict{UInt64, Any}(); partial=false)) else tf(args...) TRCache[tf.func] = (tf.ir, tf.tape) @@ -209,14 +209,16 @@ function Base.copy(x::Instruction, on_tape::Tape, roster::Dict{UInt64, Any}) Instruction(x.fun, input, output, on_tape) end -function Base.copy(t::Tape, roster::Dict{UInt64, Any}) +function Base.copy(t::Tape, roster::Dict{UInt64, Any}; partial=true) old_data = t.tape - new_data = Vector{AbstractInstruction}() - new_tape = Tape(new_data, t.counter, t.owner) + len = partial ? length(old_data) - t.counter + 1 : length(old_data) + start = partial ? t.counter : 1 + new_data = Vector{AbstractInstruction}(undef, len) + new_tape = Tape(new_data, 1, t.owner) - for x in old_data + for (i, x) in enumerate(old_data[start:end]) new_ins = copy(x, new_tape, roster) - push!(new_data, new_ins) + new_data[i] = new_ins end return new_tape From 2e3491d42a9061a0cf846828fa879dd8d2b2b717 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 11 Jan 2022 01:21:58 +0000 Subject: [PATCH 06/18] fix a TArray bug --- src/tarray.jl | 93 ++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/src/tarray.jl b/src/tarray.jl index 507f59c5..9c3c2fba 100644 --- a/src/tarray.jl +++ b/src/tarray.jl @@ -37,6 +37,9 @@ TArray{T,N}(::UndefInitializer, d::Vararg{<:Integer,N}) where {T,N} = TArray{T,N TArray{T,N}(dim::NTuple{N,Int}) where {T,N} = TArray(T, dim) TArray(T::Type, dim) = TArray(Array{T}(undef, dim)) +localize(x) = x +localize(x::AbstractArray) = TArray(x) +getdata(x) = x getdata(x::TArray) = x.data tape_copy(x::TArray) = TArray(deepcopy(x.data)) @@ -166,70 +169,70 @@ end # Other methods from stdlib Base.view(x::TArray, inds...; kwargs...) = - Base.view(getdata(x), inds...; kwargs...) |> TArray -Base.:-(x::TArray) = (-getdata(x)) |> TArray -Base.transpose(x::TArray) = transpose(getdata(x)) |> TArray -Base.adjoint(x::TArray) = adjoint(getdata(x)) |> TArray -Base.repeat(x::TArray; kw...) = repeat(getdata(x); kw...) |> TArray + Base.view(getdata(x), inds...; kwargs...) |> localize +Base.:-(x::TArray) = (-getdata(x)) |> localize +Base.transpose(x::TArray) = transpose(getdata(x)) |> localize +Base.adjoint(x::TArray) = adjoint(getdata(x)) |> localize +Base.repeat(x::TArray; kw...) = repeat(getdata(x); kw...) |> localize Base.hcat(xs::Union{TArray{T,1}, TArray{T,2}}...) where T = - hcat(getdata.(xs)...) |> TArray + hcat(getdata.(xs)...) |> localize Base.vcat(xs::Union{TArray{T,1}, TArray{T,2}}...) where T = - vcat(getdata.(xs)...) |> TArray + vcat(getdata.(xs)...) |> localize Base.cat(xs::Union{TArray{T,1}, TArray{T,2}}...; dims) where T = - cat(getdata.(xs)...; dims = dims) |> TArray + cat(getdata.(xs)...; dims = dims) |> localize -Base.reshape(x::TArray, dims::Union{Colon,Int}...) = reshape(getdata(x), dims) |> TArray +Base.reshape(x::TArray, dims::Union{Colon,Int}...) = reshape(getdata(x), dims) |> localize Base.reshape(x::TArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = - reshape(getdata(x), Base._reshape_uncolon(getdata(x), dims)) |> TArray -Base.reshape(x::TArray, dims::Tuple{Vararg{Int}}) = reshape(getdata(x), dims) |> TArray - -Base.permutedims(x::TArray, perm) = permutedims(getdata(x), perm) |> TArray -Base.PermutedDimsArray(x::TArray, perm) = PermutedDimsArray(getdata(x), perm) |> TArray -Base.reverse(x::TArray; dims) = reverse(getdata(x), dims = dims) |> TArray - -Base.sum(x::TArray; dims = :) = sum(getdata(x), dims = dims) |> TArray -Base.sum(f::Union{Function,Type},x::TArray) = sum(f.(getdata(x))) |> TArray -Base.prod(x::TArray; dims=:) = prod(getdata(x); dims=dims) |> TArray -Base.prod(f::Union{Function, Type}, x::TArray) = prod(f.(getdata(x))) |> TArray - -Base.findfirst(x::TArray, args...) = findfirst(getdata(x), args...) |> TArray -Base.maximum(x::TArray; dims = :) = maximum(getdata(x), dims = dims) |> TArray -Base.minimum(x::TArray; dims = :) = minimum(getdata(x), dims = dims) |> TArray - -Base.:/(x::TArray, y::TArray) = getdata(x) / getdata(y) |> TArray -Base.:/(x::AbstractArray, y::TArray) = x / getdata(y) |> TArray -Base.:/(x::TArray, y::AbstractArray) = getdata(x) / y |> TArray -Base.:\(x::TArray, y::TArray) = getdata(x) \ getdata(y) |> TArray -Base.:\(x::AbstractArray, y::TArray) = x \ getdata(y) |> TArray -Base.:\(x::TArray, y::AbstractArray) = getdata(x) \ y |> TArray -Base.:*(x::TArray, y::TArray) = getdata(x) * getdata(y) |> TArray -Base.:*(x::AbstractArray, y::TArray) = x * getdata(y) |> TArray -Base.:*(x::TArray, y::AbstractArray) = getdata(x) * y |> TArray + reshape(getdata(x), Base._reshape_uncolon(getdata(x), dims)) |> localize +Base.reshape(x::TArray, dims::Tuple{Vararg{Int}}) = reshape(getdata(x), dims) |> localize + +Base.permutedims(x::TArray, perm) = permutedims(getdata(x), perm) |> localize +Base.PermutedDimsArray(x::TArray, perm) = PermutedDimsArray(getdata(x), perm) |> localize +Base.reverse(x::TArray; dims) = reverse(getdata(x), dims = dims) |> localize + +Base.sum(x::TArray; dims = :) = sum(getdata(x), dims = dims) |> localize +Base.sum(f::Union{Function,Type},x::TArray) = sum(f.(getdata(x))) |> localize +Base.prod(x::TArray; dims=:) = prod(getdata(x); dims=dims) |> localize +Base.prod(f::Union{Function, Type}, x::TArray) = prod(f.(getdata(x))) |> localize + +Base.findfirst(x::TArray, args...) = findfirst(getdata(x), args...) |> localize +Base.maximum(x::TArray; dims = :) = maximum(getdata(x), dims = dims) |> localize +Base.minimum(x::TArray; dims = :) = minimum(getdata(x), dims = dims) |> localize + +Base.:/(x::TArray, y::TArray) = getdata(x) / getdata(y) |> localize +Base.:/(x::AbstractArray, y::TArray) = x / getdata(y) |> localize +Base.:/(x::TArray, y::AbstractArray) = getdata(x) / y |> localize +Base.:\(x::TArray, y::TArray) = getdata(x) \ getdata(y) |> localize +Base.:\(x::AbstractArray, y::TArray) = x \ getdata(y) |> localize +Base.:\(x::TArray, y::AbstractArray) = getdata(x) \ y |> localize +Base.:*(x::TArray, y::TArray) = getdata(x) * getdata(y) |> localize +Base.:*(x::AbstractArray, y::TArray) = x * getdata(y) |> localize +Base.:*(x::TArray, y::AbstractArray) = getdata(x) * y |> localize # broadcast Base.BroadcastStyle(::Type{<:TArray}) = Broadcast.ArrayStyle{TArray}() -Broadcast.broadcasted(::Broadcast.ArrayStyle{TArray}, f, args...) = f.(getdata.(args)...) |> TArray +Broadcast.broadcasted(::Broadcast.ArrayStyle{TArray}, f, args...) = f.(getdata.(args)...) |> localize import LinearAlgebra import LinearAlgebra: \, /, inv, det, logdet, logabsdet, norm -LinearAlgebra.inv(x::TArray) = inv(getdata(x)) |> TArray -LinearAlgebra.det(x::TArray) = det(getdata(x)) |> TArray -LinearAlgebra.logdet(x::TArray) = logdet(getdata(x)) |> TArray -LinearAlgebra.logabsdet(x::TArray) = logabsdet(getdata(x)) |> TArray +LinearAlgebra.inv(x::TArray) = inv(getdata(x)) |> localize +LinearAlgebra.det(x::TArray) = det(getdata(x)) |> localize +LinearAlgebra.logdet(x::TArray) = logdet(getdata(x)) |> localize +LinearAlgebra.logabsdet(x::TArray) = logabsdet(getdata(x)) |> localize LinearAlgebra.norm(x::TArray, p::Real = 2) = - LinearAlgebra.norm(getdata(x), p) |> TArray + LinearAlgebra.norm(getdata(x), p) |> localize import LinearAlgebra: dot -dot(x::TArray, ys::TArray) = dot(getdata(x), getdata(ys)) |> TArray -dot(x::AbstractArray, ys::TArray) = dot(x, getdata(ys)) |> TArray -dot(x::TArray, ys::AbstractArray) = dot(getdata(x), ys) |> TArray +dot(x::TArray, ys::TArray) = dot(getdata(x), getdata(ys)) |> localize +dot(x::AbstractArray, ys::TArray) = dot(x, getdata(ys)) |> localize +dot(x::TArray, ys::AbstractArray) = dot(getdata(x), ys) |> localize using Statistics -Statistics.mean(x::TArray; dims = :) = mean(getdata(x), dims = dims) |> TArray -Statistics.std(x::TArray; kw...) = std(getdata(x), kw...) |> TArray +Statistics.mean(x::TArray; dims = :) = mean(getdata(x), dims = dims) |> localize +Statistics.std(x::TArray; kw...) = std(getdata(x), kw...) |> localize # TODO # * NNlib From 400571cc12893deb2ac82694e2d0835df003a83d Mon Sep 17 00:00:00 2001 From: KDr2 Date: Wed, 12 Jan 2022 00:24:34 +0000 Subject: [PATCH 07/18] add Project.toml for perf dir --- Project.toml | 8 ++------ perf/Project.toml | 22 ++++++++++++++++++++++ perf/p1.jl | 39 +++++++++++++++++++++++++++++++++++++++ perf/src/LibtaskPerf.jl | 2 ++ src/tapedfunction.jl | 5 ++++- src/tapedtask.jl | 11 +++++++---- 6 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 perf/Project.toml create mode 100644 perf/p1.jl create mode 100644 perf/src/LibtaskPerf.jl diff --git a/Project.toml b/Project.toml index 56ec483e..1de90e38 100644 --- a/Project.toml +++ b/Project.toml @@ -6,20 +6,16 @@ repo = "https://github.com/TuringLang/Libtask.jl.git" version = "0.6.2" [deps] -AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" -BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" -DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] -IRTools = "0.4" -MacroTools = "0.5" julia = "1.3" +MacroTools = "0.5" +IRTools = "0.4" [extras] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" diff --git a/perf/Project.toml b/perf/Project.toml new file mode 100644 index 00000000..6cd85f3c --- /dev/null +++ b/perf/Project.toml @@ -0,0 +1,22 @@ +name = "LibtaskPerf" +uuid = "09aeecf0-733b-11ec-8a4e-06c55de9177a" +license = "MIT" +desc = "Performance Tuning for Libtask" +version = "0.0.1" + +[deps] +AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" +AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" +Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +julia = "1.3" + +[extras] + +[targets] +test = ["Test", "BenchmarkTools"] diff --git a/perf/p1.jl b/perf/p1.jl new file mode 100644 index 00000000..85adcfa9 --- /dev/null +++ b/perf/p1.jl @@ -0,0 +1,39 @@ +using Turing, Test, AbstractMCMC, DynamicPPL, Random + +import AbstractMCMC.AbstractSampler + +function check_numerical(chain, + symbols::Vector, + exact_vals::Vector; + atol=0.2, + rtol=0.0) + for (sym, val) in zip(symbols, exact_vals) + E = val isa Real ? + mean(chain[sym]) : + vec(mean(chain[sym], dims=1)) + @info (symbol=sym, exact=val, evaluated=E) + @test E ≈ val atol=atol rtol=rtol + end +end + +function check_MoGtest_default(chain; atol=0.2, rtol=0.0) + check_numerical(chain, + [:z1, :z2, :z3, :z4, :mu1, :mu2], + [1.0, 1.0, 2.0, 2.0, 1.0, 4.0], + atol=atol, rtol=rtol) +end + +@model gdemo_d(x, y) = begin + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + x ~ Normal(m, sqrt(s)) + y ~ Normal(m, sqrt(s)) + return s, m +end + +alg = CSMC(15) +chain = sample(gdemo_d(1.5, 2.0), alg, 5_00) + +@show chain + +check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) diff --git a/perf/src/LibtaskPerf.jl b/perf/src/LibtaskPerf.jl new file mode 100644 index 00000000..b0871b2d --- /dev/null +++ b/perf/src/LibtaskPerf.jl @@ -0,0 +1,2 @@ +module LibtaskPerf +end diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index 186ddb53..8e58df3a 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -53,6 +53,9 @@ function Base.show(io::IO, instruction::Instruction) end function Base.show(io::IO, tp::Tape) + # we use an extra IOBuffer to collect all the data and then + # output it once to avoid output interrupt during task context + # switching buf = IOBuffer() print(buf, "$(length(tp))-element Tape") isempty(tp) || println(buf, ":") @@ -91,7 +94,7 @@ end function run_and_record!(tape::Tape, f, args...) f = val(f) # f maybe a Boxed closure output = try - Box{Any}(f(map(val, args)...)) + box(f(map(val, args)...)) catch e @warn e Box{Any}(nothing) diff --git a/src/tapedtask.jl b/src/tapedtask.jl index bf2fd7a0..b96cb5e6 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -19,14 +19,17 @@ end const TRCache = LRU{Any, Any}(maxsize=10) function TapedTask(tf::TapedFunction, args...) - tf.owner != nothing && error("TapedFunction is owned to another task.") + tf.owner != nothing && error("TapedFunction is owned by another task.") if isempty(tf.tape) - if haskey(TRCache, tf.func) - ir, tape = TRCache[tf.func] + cache_key = (tf.func, typeof.(args)...) + if haskey(TRCache, cache_key) + ir, tape = TRCache[cache_key] + # Here we don't need change the initial arguments of the tape, + # it will be set when we `step_in` to the tape. reset!(tf, ir, copy(tape, Dict{UInt64, Any}(); partial=false)) else tf(args...) - TRCache[tf.func] = (tf.ir, tf.tape) + TRCache[cache_key] = (tf.ir, tf.tape) end end produce_ch = Channel() From 3a2524061b9c7be7cefce423c46d4c546f2d9e05 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Wed, 12 Jan 2022 00:41:04 +0000 Subject: [PATCH 08/18] minor update --- perf/p1.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf/p1.jl b/perf/p1.jl index 85adcfa9..4ecd2ec8 100644 --- a/perf/p1.jl +++ b/perf/p1.jl @@ -32,7 +32,7 @@ end end alg = CSMC(15) -chain = sample(gdemo_d(1.5, 2.0), alg, 5_00) +chain = sample(gdemo_d(1.5, 2.0), alg, 5_000) @show chain From f24480ebd45e9a5ded071aade25a64d075876847 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Wed, 12 Jan 2022 12:32:43 +0000 Subject: [PATCH 09/18] Update src/tapedtask.jl Co-authored-by: David Widmann --- src/tapedtask.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tapedtask.jl b/src/tapedtask.jl index b96cb5e6..72af7d2e 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -19,7 +19,7 @@ end const TRCache = LRU{Any, Any}(maxsize=10) function TapedTask(tf::TapedFunction, args...) - tf.owner != nothing && error("TapedFunction is owned by another task.") + tf.owner !== nothing && error("TapedFunction is owned by another task.") if isempty(tf.tape) cache_key = (tf.func, typeof.(args)...) if haskey(TRCache, cache_key) From 9b548c6acd37e5413328d899c4e7c88783edbfbc Mon Sep 17 00:00:00 2001 From: KDr2 Date: Wed, 12 Jan 2022 23:57:38 +0000 Subject: [PATCH 10/18] remove redundant module --- perf/Project.toml | 6 ------ perf/src/LibtaskPerf.jl | 2 -- 2 files changed, 8 deletions(-) delete mode 100644 perf/src/LibtaskPerf.jl diff --git a/perf/Project.toml b/perf/Project.toml index 6cd85f3c..829ff90d 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -1,9 +1,3 @@ -name = "LibtaskPerf" -uuid = "09aeecf0-733b-11ec-8a4e-06c55de9177a" -license = "MIT" -desc = "Performance Tuning for Libtask" -version = "0.0.1" - [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" diff --git a/perf/src/LibtaskPerf.jl b/perf/src/LibtaskPerf.jl deleted file mode 100644 index b0871b2d..00000000 --- a/perf/src/LibtaskPerf.jl +++ /dev/null @@ -1,2 +0,0 @@ -module LibtaskPerf -end From c6ec201e52bb4d9b9c28cc2affd7138b40d57f09 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Mon, 17 Jan 2022 17:20:07 +0000 Subject: [PATCH 11/18] Catch and print error while re-running a (cached) tape. --- src/tapedfunction.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index 8e58df3a..d213b0af 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -69,8 +69,13 @@ function Base.show(io::IO, tp::Tape) end function (instr::Instruction{F})() where F - output = instr.fun(map(val, instr.input)...) + try + output = instr.fun(map(Libtask.val, instr.input)...) instr.output.val = output + catch e + println(e, catch_backtrace()); + rethrow(e); + end end function increase_counter!(t::Tape) From e4838e9d0e7c206bb668f453266d790113f37d9a Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 18 Jan 2022 03:18:43 +0000 Subject: [PATCH 12/18] put `new` onto tape --- perf/Project.toml | 4 +-- perf/p2.jl | 63 ++++++++++++++++++++++++++++++++++++++++++++ src/tapedfunction.jl | 54 ++++++++++++++++++++++++++++++++----- 3 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 perf/p2.jl diff --git a/perf/Project.toml b/perf/Project.toml index 829ff90d..9e9ab49b 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -4,13 +4,11 @@ AdvancedPS = "576499cb-2369-40b2-a588-c64705576edc" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" -Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] julia = "1.3" -[extras] - [targets] test = ["Test", "BenchmarkTools"] diff --git a/perf/p2.jl b/perf/p2.jl new file mode 100644 index 00000000..44fd61a7 --- /dev/null +++ b/perf/p2.jl @@ -0,0 +1,63 @@ +using Turing, Test, AbstractMCMC, DynamicPPL, Random, Turing.RandomMeasures, Libtask + +@model infiniteGMM(x) = begin + # Hyper-parameters, i.e. concentration parameter and parameters of H. + α = 1.0 + μ0 = 0.0 + σ0 = 1.0 + + # Define random measure, e.g. Dirichlet process. + rpm = DirichletProcess(α) + + # Define the base distribution, i.e. expected value of the Dirichlet process. + H = Normal(μ0, σ0) + + # Latent assignment. + z = tzeros(Int, length(x)) + + # Locations of the infinitely many clusters. + μ = tzeros(Float64, 0) + + for i in 1:length(x) + + # Number of clusters. + K = maximum(z) + nk = Vector{Int}(map(k -> sum(z .== k), 1:K)) + + # Draw the latent assignment. + z[i] ~ ChineseRestaurantProcess(rpm, nk) + + # Create a new cluster? + if z[i] > K + push!(μ, 0.0) + + # Draw location of new cluster. + μ[z[i]] ~ H + end + + # Draw observation. + x[i] ~ Normal(μ[z[i]], 1.0) + end +end + +# Generate some test data. +Random.seed!(1) + +data = vcat(randn(10), randn(10) .- 5, randn(10) .+ 10) +data .-= mean(data) +data /= std(data) + +# MCMC sampling +Random.seed!(2) +iterations = 500 +model_fun = infiniteGMM(data) + +m = Turing.Core.TracedModel(model_fun, Sampler(SMC(50)), VarInfo()) +f = m.evaluator[1] +args = m.evaluator[2:end] + +t = Libtask.CTask(f, args...) + +Libtask.step_in(t.tf.tape, args) + +@show Libtask.result(t.tf.tape) diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index d213b0af..ed52d9be 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -13,6 +13,12 @@ mutable struct Instruction{F} <: AbstractInstruction tape::Tape end +mutable struct NewInstruction <: AbstractInstruction + input::Tuple + output + tape::Tape +end + Tape() = Tape(Vector{AbstractInstruction}(), 1, nothing) Tape(owner) = Tape(Vector{AbstractInstruction}(), 1, owner) MacroTools.@forward Tape.tape Base.iterate, Base.length @@ -46,6 +52,10 @@ function Base.show(io::IO, box::Box) println(io, "Box($(box.val))") end +function Base.show(io::IO, instruction::AbstractInstruction) + println(io, "A $(typeof(instruction))") +end + function Base.show(io::IO, instruction::Instruction) fun = instruction.fun tape = instruction.tape @@ -69,15 +79,28 @@ function Base.show(io::IO, tp::Tape) end function (instr::Instruction{F})() where F - try - output = instr.fun(map(Libtask.val, instr.input)...) - instr.output.val = output + try + output = instr.fun(map(val, instr.input)...) + instr.output.val = output + catch e + println(e, catch_backtrace()); + rethrow(e); + end +end + + +function (instr::NewInstruction)() + try + expr = Expr(:new, map(val, instr.input)...) + output = eval(expr) + instr.output.val = output catch e - println(e, catch_backtrace()); + println(e, catch_backtrace()); rethrow(e); end end + function increase_counter!(t::Tape) t.counter > length(t) && return # instr = t[t.counter] @@ -109,6 +132,19 @@ function run_and_record!(tape::Tape, f, args...) return output end +function run_and_record_new!(tape::Tape, args...) + output = try + expr = Expr(:new, map(val, args)...) + box(eval(expr)) + catch e + @warn e + Box{Any}(nothing) + end + ins = NewInstruction(args, output, tape) + push!(tape, ins) + return output +end + function unbox_condition(ir) for blk in IRTools.blocks(ir) vars = keys(blk) @@ -177,9 +213,13 @@ function intercept(ir; recorder=:run_and_record!) for (x, st) in ir x == tape && continue - Meta.isexpr(st.expr, :call) || continue - new_args = (x == args_var) ? st.expr.args : _replace_args(st.expr.args, arg_pairs) - ir[x] = IRTools.xcall(@__MODULE__, recorder, tape, new_args...) + if Meta.isexpr(st.expr, :call) + new_args = (x == args_var) ? st.expr.args : _replace_args(st.expr.args, arg_pairs) + ir[x] = IRTools.xcall(@__MODULE__, recorder, tape, new_args...) + elseif Meta.isexpr(st.expr, :new) + args = st.expr.args + ir[x] = IRTools.xcall(@__MODULE__, :run_and_record_new!, tape, args...) + end end # the real return value will be in the last instruction on the tape IRTools.return!(ir, tape) From e1ae835bed9bd7e0c8b20c17f4dbaa580fc60999 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 18 Jan 2022 05:54:42 +0000 Subject: [PATCH 13/18] copy NewInstruction --- src/tapedtask.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/tapedtask.jl b/src/tapedtask.jl index 72af7d2e..75861cea 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -212,6 +212,14 @@ function Base.copy(x::Instruction, on_tape::Tape, roster::Dict{UInt64, Any}) Instruction(x.fun, input, output, on_tape) end +function Base.copy(x::NewInstruction, on_tape::Tape, roster::Dict{UInt64, Any}) + input = map(x.input) do ob + copy_box(ob, roster) + end + output = copy_box(x.output, roster) + NewInstruction(input, output, on_tape) +end + function Base.copy(t::Tape, roster::Dict{UInt64, Any}; partial=true) old_data = t.tape len = partial ? length(old_data) - t.counter + 1 : length(old_data) From c9673cd11255b54f311b0354cae11db78a66db36 Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 18 Jan 2022 08:41:44 +0000 Subject: [PATCH 14/18] update docs/comments --- src/tapedfunction.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index ed52d9be..ceece830 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -6,6 +6,11 @@ mutable struct Tape owner end +""" + Instruction + +An `Instruction` stands for a function call +""" mutable struct Instruction{F} <: AbstractInstruction fun::F input::Tuple @@ -13,6 +18,14 @@ mutable struct Instruction{F} <: AbstractInstruction tape::Tape end + +""" + NewInstruction + +A `NewInstruction` stands for a `new` operator, which only appears in +an inner constructor. Its represtation in IRCode is not a function call, +so we need a new intruction type to represent it on tapes. +""" mutable struct NewInstruction <: AbstractInstruction input::Tuple output @@ -79,6 +92,7 @@ function Base.show(io::IO, tp::Tape) end function (instr::Instruction{F})() where F + # Catch run-time exceptions / errors. try output = instr.fun(map(val, instr.input)...) instr.output.val = output @@ -90,6 +104,7 @@ end function (instr::NewInstruction)() + # Catch run-time exceptions / errors. try expr = Expr(:new, map(val, instr.input)...) output = eval(expr) From d14936ba07e649ead60d5b09578b744bf191c63b Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 18 Jan 2022 13:56:32 +0000 Subject: [PATCH 15/18] give a warning when find an unknown ir code --- src/tapedfunction.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index ceece830..b3339cd8 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -234,6 +234,8 @@ function intercept(ir; recorder=:run_and_record!) elseif Meta.isexpr(st.expr, :new) args = st.expr.args ir[x] = IRTools.xcall(@__MODULE__, :run_and_record_new!, tape, args...) + else + @warn "Unknown IR code: " st end end # the real return value will be in the last instruction on the tape From 000ef2bcb58de3acf8f203d04b1dc4991f3899ca Mon Sep 17 00:00:00 2001 From: KDr2 Date: Tue, 18 Jan 2022 15:20:25 +0000 Subject: [PATCH 16/18] refactor new instruction, add test cases --- src/tapedfunction.jl | 28 +++++++--------------------- src/tapedtask.jl | 8 -------- test/runtests.jl | 1 + test/tf.jl | 17 +++++++++++++++++ 4 files changed, 25 insertions(+), 29 deletions(-) create mode 100644 test/tf.jl diff --git a/src/tapedfunction.jl b/src/tapedfunction.jl index b3339cd8..10d27dc3 100644 --- a/src/tapedfunction.jl +++ b/src/tapedfunction.jl @@ -18,20 +18,6 @@ mutable struct Instruction{F} <: AbstractInstruction tape::Tape end - -""" - NewInstruction - -A `NewInstruction` stands for a `new` operator, which only appears in -an inner constructor. Its represtation in IRCode is not a function call, -so we need a new intruction type to represent it on tapes. -""" -mutable struct NewInstruction <: AbstractInstruction - input::Tuple - output - tape::Tape -end - Tape() = Tape(Vector{AbstractInstruction}(), 1, nothing) Tape(owner) = Tape(Vector{AbstractInstruction}(), 1, owner) MacroTools.@forward Tape.tape Base.iterate, Base.length @@ -92,7 +78,7 @@ function Base.show(io::IO, tp::Tape) end function (instr::Instruction{F})() where F - # Catch run-time exceptions / errors. + # catch run-time exceptions / errors. try output = instr.fun(map(val, instr.input)...) instr.output.val = output @@ -102,9 +88,9 @@ function (instr::Instruction{F})() where F end end - -function (instr::NewInstruction)() - # Catch run-time exceptions / errors. +function _new end +function (instr::Instruction{typeof(_new)})() + # catch run-time exceptions / errors. try expr = Expr(:new, map(val, instr.input)...) output = eval(expr) @@ -147,7 +133,7 @@ function run_and_record!(tape::Tape, f, args...) return output end -function run_and_record_new!(tape::Tape, args...) +function run_and_record!(tape::Tape, ::typeof(_new), args...) output = try expr = Expr(:new, map(val, args)...) box(eval(expr)) @@ -155,7 +141,7 @@ function run_and_record_new!(tape::Tape, args...) @warn e Box{Any}(nothing) end - ins = NewInstruction(args, output, tape) + ins = Instruction(_new, args, output, tape) push!(tape, ins) return output end @@ -233,7 +219,7 @@ function intercept(ir; recorder=:run_and_record!) ir[x] = IRTools.xcall(@__MODULE__, recorder, tape, new_args...) elseif Meta.isexpr(st.expr, :new) args = st.expr.args - ir[x] = IRTools.xcall(@__MODULE__, :run_and_record_new!, tape, args...) + ir[x] = IRTools.xcall(@__MODULE__, recorder, tape, _new, args...) else @warn "Unknown IR code: " st end diff --git a/src/tapedtask.jl b/src/tapedtask.jl index 75861cea..72af7d2e 100644 --- a/src/tapedtask.jl +++ b/src/tapedtask.jl @@ -212,14 +212,6 @@ function Base.copy(x::Instruction, on_tape::Tape, roster::Dict{UInt64, Any}) Instruction(x.fun, input, output, on_tape) end -function Base.copy(x::NewInstruction, on_tape::Tape, roster::Dict{UInt64, Any}) - input = map(x.input) do ob - copy_box(ob, roster) - end - output = copy_box(x.output, roster) - NewInstruction(input, output, on_tape) -end - function Base.copy(t::Tape, roster::Dict{UInt64, Any}; partial=true) old_data = t.tape len = partial ? length(old_data) - t.counter + 1 : length(old_data) diff --git a/test/runtests.jl b/test/runtests.jl index 2749827a..24c28f6e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Libtask using Test +include("tf.jl") include("ctask.jl") include("tarray.jl") include("tref.jl") diff --git a/test/tf.jl b/test/tf.jl new file mode 100644 index 00000000..53ac57ca --- /dev/null +++ b/test/tf.jl @@ -0,0 +1,17 @@ +using Libtask + +@testset "tapedfunction" begin + # Test case 1: stack allocated objects are deep copied. + @testset "Instruction{typeof(_new)}" begin + mutable struct S + i::Int + S(x, y) = new(x + y) + end + + tf = Libtask.TapedFunction(S) + s1 = tf(1, 2) + @test s1.i == 3 + newins = findall(x -> isa(x, Libtask.Instruction{typeof(Libtask._new)}), tf.tape.tape) + @test length(newins) == 1 + end +end From 1edd7189f9981a8e109f8a553a0119509579e68e Mon Sep 17 00:00:00 2001 From: KDr2 Date: Wed, 19 Jan 2022 00:31:55 +0000 Subject: [PATCH 17/18] new CI job --- .../BenchmarksAndMicroIntegration.yml | 40 +++++++++++++++++++ perf/runtests.jl | 3 ++ 2 files changed, 43 insertions(+) create mode 100644 .github/workflows/BenchmarksAndMicroIntegration.yml create mode 100644 perf/runtests.jl diff --git a/.github/workflows/BenchmarksAndMicroIntegration.yml b/.github/workflows/BenchmarksAndMicroIntegration.yml new file mode 100644 index 00000000..67b8b421 --- /dev/null +++ b/.github/workflows/BenchmarksAndMicroIntegration.yml @@ -0,0 +1,40 @@ +name: Benchmarks and MicroIntegration + +on: + push: + branches: + - master + pull_request: + +jobs: + test: + name: Benchmarks and MicroIntegration + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: 1 + arch: x64 + - uses: julia-actions/julia-buildpkg@latest + - name: setup enviroment + shell: julia --color=yes --project=perf {0} + run: | + using Pkg + try + # force it to use this PR's version of the package + pkg"add Turing#hg/new-libtask2" # TODO: remove this when Turing is updated + Pkg.develop(PackageSpec(path=".")) # resolver may fail with main deps + Pkg.update() + catch err + err isa Pkg.Resolve.ResolverError || rethrow() + # If we can't resolve that means this is incompatible by SemVer and this is fine + # It means we marked this as a breaking change, so we don't need to worry about + # Mistakenly introducing a breaking change, as we have intentionally made one + @info "Not compatible with this release. No problem." exception=err + exit(0) # Exit immediately, as a success + end + - name: run + run: julia --color=yes --project=perf perf/runtests.jl diff --git a/perf/runtests.jl b/perf/runtests.jl new file mode 100644 index 00000000..9856db08 --- /dev/null +++ b/perf/runtests.jl @@ -0,0 +1,3 @@ +include("p0.jl") +include("p1.jl") +include("p2.jl") From 3b4fb2b7be30cb44dd959c1faeae54ad94cc07b8 Mon Sep 17 00:00:00 2001 From: Hong Ge Date: Wed, 19 Jan 2022 09:40:52 +0000 Subject: [PATCH 18/18] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 1de90e38..544296e8 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f" license = "MIT" desc = "Tape based task copying in Turing" repo = "https://github.com/TuringLang/Libtask.jl.git" -version = "0.6.2" +version = "0.6.3" [deps] IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"