diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 3c91e003b..a6310afe5 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -4,80 +4,122 @@ on:
   pull_request:
 
 jobs:
-  benchmarks:
+  benchmark-base:
     runs-on: ubuntu-latest
+    outputs:
+      results: ${{ steps.benchmark.outputs.results }}
+      sha: ${{ steps.benchmark.outputs.sha }}
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ github.base_ref }}
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: '1.11'
+      - uses: julia-actions/cache@v2
+
+      - name: Run benchmarks
+        id: benchmark
+        working-directory: ./benchmarks
+        run: |
+          # github output can't handle more than 1 line, hence the tail
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          echo $results
+          echo "results=$results" >> "$GITHUB_OUTPUT"
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
+  benchmark-head:
+    runs-on: ubuntu-latest
+    outputs:
+      results: ${{ steps.benchmark.outputs.results }}
+      sha: ${{ steps.benchmark.outputs.sha }}
     steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v5
+      - uses: actions/checkout@v5
         with:
           ref: ${{ github.event.pull_request.head.sha }}
-
-      - name: Set up Julia
-        uses: julia-actions/setup-julia@v2
+      - uses: julia-actions/setup-julia@v2
         with:
           version: '1.11'
-
       - uses: julia-actions/cache@v2
 
-      - name: Install Dependencies
-        run: julia --project=benchmarks/ -e 'using Pkg; Pkg.instantiate()'
+      - name: Run benchmarks
+        id: benchmark
+        working-directory: ./benchmarks
+        run: |
+          # github output can't handle more than 1 line, hence the tail
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          echo $results
+          echo "results=$results" >> "$GITHUB_OUTPUT"
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
-      - name: Run Benchmarks
-        id: run_benchmarks
+  combine-results:
+    runs-on: ubuntu-latest
+    needs: [benchmark-base, benchmark-head]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: '1.11'
+      - uses: julia-actions/cache@v2
+
+      - name: Combine benchmark results
+        working-directory: ./benchmarks
         run: |
-          # Capture version info into a variable, print it, and set it as an env var for later steps
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
           echo "$version_info"
           echo "VERSION_INFO<<EOF" >> $GITHUB_ENV
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          # Capture benchmark output into a variable. The sed and tail calls cut out anything but the
-          # final block of results.
-          echo "Running Benchmarks..."
-          benchmark_output=$(\
-            julia --project=benchmarks benchmarks/benchmarks.jl \
-            | sed -n '/Final results:/,$p' \
-            | tail -n +2\
-          )
+          # save outputs of previous jobs to json file
+          echo "Base results"
+          echo "--------------------------------------------------------"
+          echo '${{needs.benchmark-base.outputs.results}}'
+          echo '${{needs.benchmark-base.outputs.results}}' > base.json
+          echo "Head results"
+          echo "--------------------------------------------------------"
+          echo '${{needs.benchmark-head.outputs.results}}'
+          echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
-          # Print benchmark results directly to the workflow log
-          echo "Benchmark Results:"
-          echo "$benchmark_output"
+          # combine them and save the output as an env var for later steps
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          results=$(julia --project=. benchmarks.jl combine head.json base.json)
+          echo "Combined results"
+          echo "--------------------------------------------------------"
+          echo "$results"
 
-          # Set the benchmark output as an env var for later steps
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          echo "$benchmark_output" >> $GITHUB_ENV
+          echo "$results" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          # Get the current commit SHA of DynamicPPL
-          DPPL_COMMIT_SHA=$(git rev-parse HEAD)
-          echo "DPPL_COMMIT_SHA=$DPPL_COMMIT_SHA" >> $GITHUB_ENV
-
-          COMMIT_URL="https://github.com/${{ github.repository }}/commit/$DPPL_COMMIT_SHA"
-          echo "DPPL_COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
-
-      - name: Find Existing Comment
+      - name: Find existing benchmark comment
         uses: peter-evans/find-comment@v4
         id: find_comment
         with:
           issue-number: ${{ github.event.pull_request.number }}
           comment-author: github-actions[bot]
 
-      - name: Post Benchmark Results as PR Comment
+      - name: Create or update benchmark comment
         uses: peter-evans/create-or-update-comment@v5
         with:
           issue-number: ${{ github.event.pull_request.number }}
           body: |
-            ## Benchmark Report for Commit ${{ env.DPPL_COMMIT_SHA }}
+            ## Benchmark Report
+
+            - this PR's head: `${{ needs.benchmark-head.outputs.sha }}`
+            - base branch: `${{ needs.benchmark-base.outputs.sha }}`
+
             ### Computer Information
             ```
             ${{ env.VERSION_INFO }}
             ```
             ### Benchmark Results
-            ```
+
             ${{ env.BENCHMARK_OUTPUT }}
-            ```
+
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
           edit-mode: replace
diff --git a/.gitignore b/.gitignore
index 198907c73..d5a87f1eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@
 .DS_Store
 Manifest.toml
 **.~undo-tree~
+
+benchmarks/*.json
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 0d4e9a654..c154c5ca5 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -4,11 +4,12 @@ version = "0.1.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
@@ -21,11 +22,12 @@ DynamicPPL = {path = "../"}
 
 [compat]
 ADTypes = "1.14.0"
-BenchmarkTools = "1.6.0"
+Chairmarks = "1.3.1"
 Distributions = "0.25.117"
 DynamicPPL = "0.38"
 Enzyme = "0.13"
-ForwardDiff = "0.10.38, 1"
+ForwardDiff = "1"
+JSON = "1.3.0"
 LogDensityProblems = "2.1.2"
 Mooncake = "0.4"
 PrettyTables = "3"
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 35cb8c0bf..ad70b7c03 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,4 +1,4 @@
-To run the benchmarks, run this from the root directory of the repository:
+To run the benchmarks locally, run this from the root directory of the repository:
 
 ```sh
 julia --project=benchmarks benchmarks/benchmarks.jl
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 035d8ff49..7bae6a9da 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,113 +1,231 @@
 using Pkg
 
-using DynamicPPLBenchmarks: Models, make_suite, model_dimension
-using BenchmarkTools: @benchmark, median, run
-using PrettyTables: pretty_table, fmt__printf
+using Chairmarks: @be, median
+using DynamicPPLBenchmarks: Models, benchmark, model_dimension
+using JSON: JSON
+using PrettyTables: pretty_table, fmt__printf, EmptyCells, MultiColumn, TextTableFormat
+using Printf: @sprintf
 using StableRNGs: StableRNG
 
 rng = StableRNG(23)
 
-function print_results(results_table)
-    table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
-    header = [
-        "Model",
-        "Dim",
-        "AD Backend",
-        "VarInfo",
-        "Linked",
-        "t(eval)/t(ref)",
-        "t(grad)/t(eval)",
-    ]
-    return pretty_table(
-        table_matrix;
-        column_labels=header,
-        backend=:text,
-        formatters=[fmt__printf("%.1f", [6, 7])],
-        fit_table_in_display_horizontally=false,
-        fit_table_in_display_vertically=false,
-    )
-end
-
-# Create DynamicPPL.Model instances to run benchmarks on.
-smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
-loop_univariate1k, multivariate1k = begin
-    data_1k = randn(rng, 1_000)
-    loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
-    multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
-    loop, multi
-end
-loop_univariate10k, multivariate10k = begin
-    data_10k = randn(rng, 10_000)
-    loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
-    multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
-    loop, multi
-end
-lda_instance = begin
-    w = [1, 2, 3, 2, 1, 1]
-    d = [1, 1, 1, 2, 2, 2]
-    Models.lda(2, d, w)
-end
+head_filename = "benchmarks_result_head.json"
+base_filename = "benchmarks_result_base.json"
 
-# Specify the combinations to test:
-# (Model Name, model instance, VarInfo choice, AD backend, linked)
-chosen_combinations = [
-    (
-        "Simple assume observe",
-        Models.simple_assume_observe(randn(rng)),
-        :typed,
-        :forwarddiff,
-        false,
-    ),
-    ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
-    ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
-    ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
-    ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
-    ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
-    ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
-    ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
-    ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
-    ("LDA", lda_instance, :typed, :reversediff, true),
+colnames = [
+    "Model", "Dim", "AD Backend", "VarInfo", "Linked", "t(eval)/t(ref)", "t(grad)/t(eval)"
 ]
-
-# Time running a model-like function that does not use DynamicPPL, as a reference point.
-# Eval timings will be relative to this.
-reference_time = begin
-    obs = randn(rng)
-    median(@benchmark Models.simple_assume_observe_non_model(obs)).time
+function print_results(results_table; to_json=false)
+    if to_json
+        # Print to the given file as JSON
+        results_array = [
+            Dict(colnames[i] => results_table[j][i] for i in eachindex(colnames)) for
+            j in eachindex(results_table)
+        ]
+        # do not use pretty=true, as GitHub Actions expects no linebreaks
+        JSON.json(stdout, results_array)
+        println()
+    else
+        # Pretty-print to terminal
+        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+        return pretty_table(
+            table_matrix;
+            column_labels=colnames,
+            backend=:text,
+            formatters=[fmt__printf("%.1f", [6, 7])],
+            fit_table_in_display_horizontally=false,
+            fit_table_in_display_vertically=false,
+        )
+    end
 end
 
-results_table = Tuple{String,Int,String,String,Bool,Float64,Float64}[]
+function run(; to_json=false)
+    # Create DynamicPPL.Model instances to run benchmarks on.
+    smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
+    loop_univariate1k, multivariate1k = begin
+        data_1k = randn(rng, 1_000)
+        loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
+        multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
+        loop, multi
+    end
+    loop_univariate10k, multivariate10k = begin
+        data_10k = randn(rng, 10_000)
+        loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
+        multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
+        loop, multi
+    end
+    lda_instance = begin
+        w = [1, 2, 3, 2, 1, 1]
+        d = [1, 1, 1, 2, 2, 2]
+        Models.lda(2, d, w)
+    end
 
-for (model_name, model, varinfo_choice, adbackend, islinked) in chosen_combinations
-    @info "Running benchmark for $model_name"
-    suite = make_suite(model, varinfo_choice, adbackend, islinked)
-    results = run(suite)
-    eval_time = median(results["evaluation"]).time
-    relative_eval_time = eval_time / reference_time
-    ad_eval_time = median(results["gradient"]).time
-    relative_ad_eval_time = ad_eval_time / eval_time
-    push!(
-        results_table,
+    # Specify the combinations to test:
+    # (Model Name, model instance, VarInfo choice, AD backend, linked)
+    chosen_combinations = [
         (
-            model_name,
-            model_dimension(model, islinked),
-            string(adbackend),
-            string(varinfo_choice),
-            islinked,
-            relative_eval_time,
-            relative_ad_eval_time,
+            "Simple assume observe",
+            Models.simple_assume_observe(randn(rng)),
+            :typed,
+            :forwarddiff,
+            false,
         ),
+        ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
+        ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
+        ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
+        ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
+        ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
+        ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
+        ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
+        ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
+        ("LDA", lda_instance, :typed, :reversediff, true),
+    ]
+
+    # Time running a model-like function that does not use DynamicPPL, as a reference point.
+    # Eval timings will be relative to this.
+    reference_time = begin
+        obs = randn(rng)
+        median(@be Models.simple_assume_observe_non_model(obs)).time
+    end
+    @info "Reference evaluation time: $(reference_time) seconds"
+
+    results_table = Tuple{
+        String,Int,String,String,Bool,Union{Float64,Missing},Union{Float64,Missing}
+    }[]
+
+    for (model_name, model, varinfo_choice, adbackend, islinked) in chosen_combinations
+        @info "Running benchmark for $model_name"
+        relative_eval_time, relative_ad_eval_time = try
+            results = benchmark(model, varinfo_choice, adbackend, islinked)
+            (results.primal_time / reference_time),
+            (results.grad_time / results.primal_time)
+        catch e
+            missing, missing
+        end
+        push!(
+            results_table,
+            (
+                model_name,
+                model_dimension(model, islinked),
+                string(adbackend),
+                string(varinfo_choice),
+                islinked,
+                relative_eval_time,
+                relative_ad_eval_time,
+            ),
+        )
+        print_results(results_table; to_json=to_json)
+    end
+    print_results(results_table; to_json=to_json)
+    return nothing
+end
+
+struct TestCase
+    model_name::String
+    dim::Integer
+    ad_backend::String
+    varinfo::String
+    linked::Bool
+    TestCase(d::Dict{String,Any}) = new((d[c] for c in colnames[1:5])...)
+end
+function combine(head_filename::String, base_filename::String)
+    head_results = try
+        JSON.parsefile(head_filename, Vector{Dict{String,Any}})
+    catch
+        Dict{String,Any}[]
+    end
+    @info "Loaded $(length(head_results)) results from $head_filename"
+    base_results = try
+        JSON.parsefile(base_filename, Vector{Dict{String,Any}})
+    catch
+        Dict{String,Any}[]
+    end
+    @info "Loaded $(length(base_results)) results from $base_filename"
+    # Identify unique combinations of (Model, Dim, AD Backend, VarInfo, Linked)
+    head_testcases = Dict(
+        TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in head_results
     )
-    println("Results so far:")
-    print_results(results_table)
+    base_testcases = Dict(
+        TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in base_results
+    )
+    all_testcases = union(Set(keys(head_testcases)), Set(keys(base_testcases)))
+    @info "$(length(all_testcases)) unique test cases found"
+    sorted_testcases = sort(
+        collect(all_testcases); by=(c -> (c.model_name, c.ad_backend, c.varinfo, c.linked))
+    )
+    results_table = Tuple{
+        String,Int,String,String,Bool,String,String,String,String,String,String
+    }[]
+    results_colnames = [
+        [
+            EmptyCells(5),
+            MultiColumn(3, "t(eval) / t(ref)"),
+            MultiColumn(3, "t(grad) / t(eval)"),
+        ],
+        [colnames[1:5]..., "base", "this PR", "speedup", "base", "this PR", "speedup"],
+    ]
+    sprint_float(x::Float64) = @sprintf("%.2f", x)
+    sprint_float(m::Missing) = "err"
+    for c in sorted_testcases
+        head_eval, head_grad = get(head_testcases, c, (missing, missing))
+        base_eval, base_grad = get(base_testcases, c, (missing, missing))
+        speedup_eval = base_eval / head_eval
+        speedup_grad = base_grad / head_grad
+        push!(
+            results_table,
+            (
+                c.model_name,
+                c.dim,
+                c.ad_backend,
+                c.varinfo,
+                c.linked,
+                sprint_float(base_eval),
+                sprint_float(head_eval),
+                sprint_float(speedup_eval),
+                sprint_float(base_grad),
+                sprint_float(head_grad),
+                sprint_float(speedup_grad),
+            ),
+        )
+    end
+    # Pretty-print to terminal
+    if isempty(results_table)
+        println("No benchmark results obtained.")
+    else
+        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+        println("```")
+        pretty_table(
+            table_matrix;
+            column_labels=results_colnames,
+            backend=:text,
+            fit_table_in_display_horizontally=false,
+            fit_table_in_display_vertically=false,
+            table_format=TextTableFormat(; horizontal_line_at_merged_column_labels=true),
+        )
+        println("```")
+    end
 end
 
-println("Final results:")
-print_results(results_table)
+# The command-line arguments are used on CI purposes.
+# Run with `julia --project=. benchmarks.jl json` to run benchmarks and output JSON to
+# stdout
+# Run with `julia --project=. benchmarks.jl combine head.json base.json` to combine two JSON
+# files
+if length(ARGS) == 3 && ARGS[1] == "combine"
+    combine(ARGS[2], ARGS[3])
+elseif ARGS == ["json"]
+    run(; to_json=true)
+elseif ARGS == []
+    # When running locally just omit the argument and it will just benchmark and print to
+    # terminal.
+    run()
+else
+    error("invalid arguments: $(ARGS)")
+end
diff --git a/benchmarks/src/DynamicPPLBenchmarks.jl b/benchmarks/src/DynamicPPLBenchmarks.jl
index 225e40cd8..0dc7ece6e 100644
--- a/benchmarks/src/DynamicPPLBenchmarks.jl
+++ b/benchmarks/src/DynamicPPLBenchmarks.jl
@@ -1,20 +1,20 @@
 module DynamicPPLBenchmarks
 
 using DynamicPPL: VarInfo, SimpleVarInfo, VarName
-using BenchmarkTools: BenchmarkGroup, @benchmarkable
 using DynamicPPL: DynamicPPL
+using DynamicPPL.TestUtils.AD: run_ad, NoTest
 using ADTypes: ADTypes
 using LogDensityProblems: LogDensityProblems
 
 using ForwardDiff: ForwardDiff
-using Mooncake: Mooncake
 using ReverseDiff: ReverseDiff
+using Mooncake: Mooncake
+using Enzyme: Enzyme
 using StableRNGs: StableRNG
 
 include("./Models.jl")
 using .Models: Models
-using Enzyme: Enzyme
-export Models, make_suite, model_dimension
+export Models, benchmark, model_dimension
 
 """
     model_dimension(model, islinked)
@@ -52,9 +52,11 @@ function to_backend(x::Union{AbstractString,Symbol})
 end
 
 """
-    make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+    benchmark(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+
+Benchmark evaluation and gradient calculation for `model` using the selected varinfo type
+and AD backend.
 
-Create a benchmark suite for `model` using the selected varinfo type and AD backend.
 Available varinfo choices:
   • `:untyped`           → uses `DynamicPPL.untyped_varinfo(model)`
   • `:typed`             → uses `DynamicPPL.typed_varinfo(model)`
@@ -65,10 +67,10 @@ The AD backend should be specified as a Symbol (e.g. `:forwarddiff`, `:reversedi
 
 `islinked` determines whether to link the VarInfo for evaluation.
 """
-function make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+function benchmark(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
     rng = StableRNG(23)
 
-    suite = BenchmarkGroup()
+    adbackend = to_backend(adbackend)
 
     vi = if varinfo_choice == :untyped
         DynamicPPL.untyped_varinfo(rng, model)
@@ -94,20 +96,9 @@ function make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::
         vi = DynamicPPL.link(vi, model)
     end
 
-    f = DynamicPPL.LogDensityFunction(
-        model, DynamicPPL.getlogjoint_internal, vi; adtype=adbackend
+    return run_ad(
+        model, adbackend; varinfo=vi, benchmark=true, test=NoTest(), verbose=false
     )
-    # The parameters at which we evaluate f.
-    θ = vi[:]
-
-    # Run once to trigger compilation.
-    LogDensityProblems.logdensity_and_gradient(f, θ)
-    suite["gradient"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f, $θ)
-
-    # Also benchmark just standard model evaluation because why not.
-    suite["evaluation"] = @benchmarkable $(LogDensityProblems.logdensity)($f, $θ)
-
-    return suite
 end
 
 end # module