Merge pull request #8 from TuringLang/torfjelde/improvements

More improvements
TuringLang · Sep 16, 2023 · 38ee328 · 38ee328 · torfjelde · Sep 16, 2023
2 parents 8f536d6 + 9b2e59e
commit 38ee328
Show file tree

Hide file tree

Showing 3 changed files with 175 additions and 48 deletions.
diff --git a/examples/item-response-model.jl b/examples/item-response-model.jl
@@ -42,10 +42,14 @@ y, i, p, _, _ = sim(20, P);
 end
 
 # performant model
+function bernoulli_logit_logpdf(y, theta, beta)
+    return logpdf(BernoulliLogit(theta - beta), y)
+end
+
 @model function irt(y, i, p; I = maximum(i), P = maximum(p))
     theta ~ filldist(Normal(), P)
     beta ~ filldist(Normal(), I)
-    Turing.@addlogprob! sum(logpdf.(BernoulliLogit.(theta[p] - beta[i]), y))
+    Turing.@addlogprob! sum(bernoulli_logit_logpdf.(y, theta[p], beta[i]))
 
     return (; theta, beta)
 end
@@ -56,7 +60,11 @@ model = irt(y, i, p);
 # Make the benchmark suite.
 suite = TuringBenchmarking.make_turing_suite(
     model,
-    adbackends = [TuringBenchmarking.ForwardDiffAD{40}(), TuringBenchmarking.ReverseDiffAD{true}()]
+    adbackends = [
+        TuringBenchmarking.ForwardDiffAD{40}(),
+        TuringBenchmarking.ReverseDiffAD{true}(),
+        TuringBenchmarking.ReverseDiffAD{false}()
+    ]
 );
 
 # Run suite!

diff --git a/src/TuringBenchmarking.jl b/src/TuringBenchmarking.jl
@@ -9,32 +9,92 @@ using Turing
 using Turing.Essential: ForwardDiffAD, TrackerAD, ReverseDiffAD, ZygoteAD
 using DynamicPPL: DynamicPPL
 
+using ReverseDiff: ReverseDiff
+using Zygote: Zygote
+
 if !isdefined(Base, :get_extension)
     using Requires
 end
 
+export benchmark_model, make_turing_suite, @tagged
+
 # Don't include `TrackerAD` because it's never going to win.
 const DEFAULT_ADBACKENDS = [
-    ForwardDiffAD{40}(),    # chunksize=40
-    ForwardDiffAD{100}(),   # chunksize=100
-    ZygoteAD(),
+    ForwardDiffAD{Turing.Essential.CHUNKSIZE[]}(), # chunksize=40
     ReverseDiffAD{false}(), # rdcache=false
-    ReverseDiffAD{true}()   # rdcache=false
+    ReverseDiffAD{true}(),  # rdcache=false
+    ZygoteAD(),
 ]
 
+backend_label(::ForwardDiffAD) = "ForwardDiff"
+backend_label(::ReverseDiffAD) = "ReverseDiff"
+backend_label(::ZygoteAD) = "Zygote"
+backend_label(::TrackerAD) = "Tracker"
+
+const SYMBOL_TO_BACKEND = Dict(
+    :forwarddiff => ForwardDiffAD{Turing.Essential.CHUNKSIZE[]}(),
+    :reversediff => ReverseDiffAD{false}(),
+    :reversediff_compiled => ReverseDiffAD{true}(),
+    :zygote => ZygoteAD(),
+    :tracker => TrackerAD(),
+)
+
+to_backend(x) = error("Unknown backend: $x")
+to_backend(x::Turing.Essential.ADBackend) = x
+function to_backend(x::Union{AbstractString,Symbol})
+    k = Symbol(lowercase(string(x)))
+    haskey(SYMBOL_TO_BACKEND, k) || error("Unknown backend: $x")
+    return SYMBOL_TO_BACKEND[k]
+end
+
+"""
+    benchmark_model(model::Turing.Model; suite_kwargs..., kwargs...)
+
+Create and run a benchmark suite for `model`.
+
+The benchmarking suite will be created using [`make_turing_suite`](@ref).
+See [`make_turing_suite`](@ref) for the available keyword arguments and more information.
+
+# Keyword arguments
+- `suite_kwargs`: Keyword arguments passed to [`make_turing_suite`](@ref).
+- `kwargs`: Keyword arguments passed to `BenchmarkTools.run`.
+"""
+function benchmark_model(
+    model::DynamicPPL.Model;
+    adbackends = DEFAULT_ADBACKENDS,
+    run_once::Bool = true,
+    check_grads::Bool = false,
+    varinfo::DynamicPPL.AbstractVarInfo = DynamicPPL.VarInfo(model),
+    sampler::Union{AbstractMCMC.AbstractSampler,Nothing} = nothing,
+    context::DynamicPPL.AbstractContext = DynamicPPL.DefaultContext(),
+    kwargs...
+)
+    suite = make_turing_suite(
+        model;
+        adbackends,
+        run_once,
+        check_grads,
+        varinfo,
+        sampler,
+        context,
+        kwargs...
+    )
+    return run(suite; kwargs...)
+end
+
 """
     make_turing_suite(model::Turing.Model; kwargs...)
 
 Create default benchmark suite for `model`.
 
 # Keyword arguments
-- `adbackends`: a collection of adbackends to use. Defaults to `$(DEFAULT_ADBACKENDS)`.
+- `adbackends`: a collection of adbackends to use, specified either as a
+  `Turing.Essential.ADBackend` or using a `Symbol`. Defaults to `$(DEFAULT_ADBACKENDS)`.
 - `run_once=true`: if `true`, the body of each benchmark will be run once to avoid
   compilation to be included in the timings (this may occur if compilation runs
   longer than the allowed time limit).
-- `save_grads=false`: if `true` and `run_once` is `true`, the gradients from the initial
-  execution will be saved and returned as the second return-value. This is useful if you
-  want to check correctness of the gradients for different backends.
+- `check_grads=false`: if `true` and `run_once` is `true`, the gradients from the initial
+  execution will be compared against each other to ensure that they are consistent.
 - `varinfo`: the `VarInfo` to use. Defaults to `DynamicPPL.VarInfo(model)`.
 - `sampler`: the `Sampler` to use. Defaults to `nothing` (i.e. no sampler).
 - `context`: the `Context` to use. Defaults to `DynamicPPL.DefaultContext()`.
@@ -48,23 +108,31 @@ function make_turing_suite(
     model::DynamicPPL.Model;
     adbackends = DEFAULT_ADBACKENDS,
     run_once::Bool = true,
-    save_grads::Bool = false,
+    check_grads::Bool = false,
     varinfo::DynamicPPL.AbstractVarInfo = DynamicPPL.VarInfo(model),
     sampler::Union{AbstractMCMC.AbstractSampler,Nothing} = nothing,
     context::DynamicPPL.AbstractContext = DynamicPPL.DefaultContext()
 )
-    suite = BenchmarkGroup()
-    suite["not_linked"] = BenchmarkGroup()
-    suite["linked"] = BenchmarkGroup()
+    grads = Dict(:standard => Dict(), :linked => Dict())
 
-    grads = Dict(:not_linked => Dict(), :linked => Dict())
+    suite = BenchmarkGroup()
+    suite_evaluation = BenchmarkGroup()
+    suite_gradient = BenchmarkGroup()
+    suite["evaluation"] = suite_evaluation
+    suite["gradient"] = suite_gradient
 
     indexer = sampler === nothing ? Colon() : sampler
     if sampler !== nothing
         context = DynamicPPL.SamplingContext(sampler, context)
     end
 
-    for adbackend in adbackends
+    for adbackend in map(to_backend, adbackends)
+        suite_backend = BenchmarkGroup([backend_label(adbackend)])
+        suite_gradient["$(adbackend)"] = suite_backend
+
+        suite_backend["standard"] = BenchmarkGroup()
+        suite_backend["linked"] = BenchmarkGroup()
+
         varinfo_current = DynamicPPL.unflatten(varinfo, context, varinfo[indexer])
         f = LogDensityProblemsAD.ADgradient(
             adbackend,
@@ -78,11 +146,11 @@ function make_turing_suite(
             if run_once
                 ℓ, ∇ℓ = LogDensityProblems.logdensity_and_gradient(f, θ)
 
-                if save_grads
-                    grads[:not_linked][adbackend] = (ℓ, ∇ℓ)
+                if check_grads
+                    grads[:standard][adbackend] = (ℓ, ∇ℓ)
                 end
             end
-            suite["not_linked"]["$(adbackend)"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f, $θ)
+            suite_backend["standard"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f, $θ)
         catch e
             @warn "Gradient computation (without linking) failed for $(adbackend): $(e)"
         end
@@ -104,26 +172,44 @@ function make_turing_suite(
             if run_once
                 ℓ, ∇ℓ = LogDensityProblems.logdensity_and_gradient(f_linked, θ_linked)
 
-                if save_grads
+                if check_grads
                     grads[:linked][adbackend] = (ℓ, ∇ℓ)
                 end
             end
-            suite["linked"]["$(adbackend)"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f_linked, $θ_linked)
+            suite_backend["linked"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f_linked, $θ_linked)
         catch e
             @warn "Gradient computation (with linking) failed for $(adbackend): $(e)"
         end
     end
 
     # Also benchmark just standard model evaluation because why not.
-    suite["not_linked"]["evaluation"] = @benchmarkable $(DynamicPPL.evaluate!!)($model, $varinfo, $context)
+    suite_evaluation["standard"] = @benchmarkable $(DynamicPPL.evaluate!!)(
+        $model, $varinfo, $context
+    )
     varinfo_linked = if sampler === nothing
         DynamicPPL.link!!(deepcopy(varinfo), model)
     else
         DynamicPPL.link!!(deepcopy(varinfo), sampler, model)
     end
-    suite["linked"]["evaluation"] = @benchmarkable $(DynamicPPL.evaluate!!)($model, $varinfo_linked, $context)
+    suite_evaluation["linked"] = @benchmarkable $(DynamicPPL.evaluate!!)(
+        $model, $varinfo_linked, $context
+    )
+
+    if check_grads
+        # Let's check that the gradients are roughly the same for all backends.
+        (val_first, grad_first) = first(values(grads[:standard]))
+        for (backend, (val, grad)) in grads[:standard]
+            if !(val ≈ val_first)
+                @warn "Gradient check failed for $(backend): log-density values differ"
+            end
 
-    return save_grads ? (suite, grads) : suite
+            if !(grad ≈ grad_first)
+                @warn "Gradient check failed for $(backend): gradients differ"
+            end
+        end
+    end
+
+    return suite
 end
 
 """

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -6,13 +6,13 @@ using Test
 using Zygote: Zygote
 using ReverseDiff: ReverseDiff
 
+# Just make things run a bit faster.
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 1
+BenchmarkTools.DEFAULT_PARAMETERS.evals = 1
+BenchmarkTools.DEFAULT_PARAMETERS.samples = 2
+
 # These should be ordered (ascendingly) by runtime.
-ADBACKENDS = [
-    TuringBenchmarking.ForwardDiffAD{40}(),
-    TuringBenchmarking.ReverseDiffAD{true}(),
-    TuringBenchmarking.ReverseDiffAD{false}(),
-    TuringBenchmarking.ZygoteAD(),
-]
+ADBACKENDS = TuringBenchmarking.DEFAULT_ADBACKENDS
 
 @testset "TuringBenchmarking.jl" begin
     @testset "Item-Response model" begin
@@ -35,9 +35,7 @@ ADBACKENDS = [
 
             return yvec, ivec, pvec, theta, beta
         end
-
-        P = 10
-        y, i, p, _, _ = sim(20, P)
+        y, i, p, _, _ = sim(5, 3)
 
         ### Turing ###
         # performant model
@@ -55,31 +53,60 @@ ADBACKENDS = [
         # Make the benchmark suite.
         @testset "$(nameof(typeof(varinfo)))" for varinfo in [
             DynamicPPL.VarInfo(model),
-            DynamicPPL.SimpleVarInfo{Float64}(model),
+            DynamicPPL.SimpleVarInfo(model),
         ]
             suite = TuringBenchmarking.make_turing_suite(
                 model;
                 adbackends=ADBACKENDS,
-                varinfo=varinfo
+                varinfo=varinfo,
+                check_grads=true,
             )
-            results = run(suite, verbose=true, evals=1, samples=2)
+            results = run(suite, verbose=true)
+
+            @testset "$adbackend" for (i, adbackend) in enumerate(ADBACKENDS)
+                adbackend_string = "$(adbackend)"
+                results_backend = results[@tagged adbackend_string]
+                # Each AD backend should have two results.
+                @test length(leaves(results_backend)) == 2
+                # It should be under the "gradient" section.
+                @test haskey(results_backend, "gradient")
+                # It should have one tagged "linked" and one "standard"
+                @test length(leaves(results_backend[@tagged "linked"])) == 1
+                @test length(leaves(results_backend[@tagged "standard"])) == 1
+            end
+        end
 
-            # TODO: Is there a better way to test these?
-            for (i, adbackend) in enumerate(ADBACKENDS)
-                @test haskey(suite["not_linked"], "$(adbackend)")
-                @test haskey(suite["linked"], "$(adbackend)")
+        @testset "Specify AD backends using symbols" begin
+            varinfo = DynamicPPL.VarInfo(model)
+            suite = TuringBenchmarking.make_turing_suite(
+                model;
+                adbackends=[:forwarddiff, :reversediff, :reversediff_compiled, :zygote],
+                varinfo=varinfo,
+            )
+            results = run(suite, verbose=true)
+
+            @testset "$adbackend" for (i, adbackend) in enumerate(ADBACKENDS)
+                adbackend_string = "$(adbackend)"
+                results_backend = results[@tagged adbackend_string]
+                # Each AD backend should have two results.
+                @test length(leaves(results_backend)) == 2
+                # It should be under the "gradient" section.
+                @test haskey(results_backend, "gradient")
+                # It should have one tagged "linked" and one "standard"
+                @test length(leaves(results_backend[@tagged "linked"])) == 1
+                @test length(leaves(results_backend[@tagged "standard"])) == 1
             end
         end
     end
 
+
     @testset "Model with mutation" begin
         @model function demo_with_mutation(::Type{TV}=Vector{Float64}) where {TV}
             x = TV(undef, 2)
             x[1] ~ Normal()
             x[2] ~ Normal()
             return x
         end
-
         model = demo_with_mutation()
 
         # Make the benchmark suite.
@@ -93,16 +120,22 @@ ADBACKENDS = [
                 adbackends=ADBACKENDS,
                 varinfo=varinfo
             )
-            results = run(suite, verbose=true, evals=1, samples=2)
+            results = run(suite, verbose=true)
 
-            for (i, adbackend) in enumerate(ADBACKENDS)
-                # Zygote.jl should fail.
+            @testset "$adbackend" for (i, adbackend) in enumerate(ADBACKENDS)
+                adbackend_string = "$(adbackend)"
+                results_backend = results[@tagged adbackend_string]
                 if adbackend isa TuringBenchmarking.ZygoteAD
-                    @test !haskey(suite["not_linked"], "$(adbackend)")
-                    @test !haskey(suite["linked"], "$(adbackend)")
+                    # Zygote.jl should fail, i.e. return an empty suite.
+                    @test length(leaves(results_backend)) == 0
                 else
-                    @test haskey(suite["not_linked"], "$(adbackend)")
-                    @test haskey(suite["linked"], "$(adbackend)")
+                    # Each AD backend should have two results.
+                    @test length(leaves(results_backend)) == 2
+                    # It should be under the "gradient" section.
+                    @test haskey(results_backend, "gradient")
+                    # It should have one tagged "linked" and one "standard"
+                    @test length(leaves(results_backend[@tagged "linked"])) == 1
+                    @test length(leaves(results_backend[@tagged "standard"])) == 1
                 end
             end
         end