From 5a9e2c71ae28ab6339ef93f67e94f7fc25b62d63 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 18:57:04 +0000
Subject: [PATCH 01/31] comparative benchmarks

---
 .github/workflows/Benchmarking.yml     |  34 ++-
 .gitignore                             |   2 +
 benchmarks/Project.toml                |   6 +-
 benchmarks/README.md                   |   2 +-
 benchmarks/benchmarks.jl               | 288 ++++++++++++++++---------
 benchmarks/src/DynamicPPLBenchmarks.jl |  33 ++-
 6 files changed, 226 insertions(+), 139 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 3c91e003b..8c93bf308 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -8,23 +8,15 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v5
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
+      - uses: actions/checkout@v5
 
-      - name: Set up Julia
-        uses: julia-actions/setup-julia@v2
+      - uses: julia-actions/setup-julia@v2
         with:
           version: '1.11'
 
       - uses: julia-actions/cache@v2
 
-      - name: Install Dependencies
-        run: julia --project=benchmarks/ -e 'using Pkg; Pkg.instantiate()'
-
-      - name: Run Benchmarks
-        id: run_benchmarks
+      - name: Run benchmarks
         run: |
           # Capture version info into a variable, print it, and set it as an env var for later steps
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
@@ -33,19 +25,23 @@ jobs:
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          # Capture benchmark output into a variable. The sed and tail calls cut out anything but the
+          ## Run the actual benchmarks
+          # checkout pr HEAD
+          git checkout ${{ github.event.pull_request.head.sha }}
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. benchmarks.jl json-head
+          # then switch to PR base
+          git checkout ${{ github.base_ref }}
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. benchmarks.jl json-base
+          # then combine them. The sed and tail calls cut out anything but the
           # final block of results.
-          echo "Running Benchmarks..."
           benchmark_output=$(\
-            julia --project=benchmarks benchmarks/benchmarks.jl \
+            julia --project=. benchmarks.jl combine \
             | sed -n '/Final results:/,$p' \
             | tail -n +2\
           )
 
-          # Print benchmark results directly to the workflow log
-          echo "Benchmark Results:"
-          echo "$benchmark_output"
-
           # Set the benchmark output as an env var for later steps
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           echo "$benchmark_output" >> $GITHUB_ENV
@@ -57,6 +53,8 @@ jobs:
 
           COMMIT_URL="https://github.com/${{ github.repository }}/commit/$DPPL_COMMIT_SHA"
           echo "DPPL_COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
+        with:
+          working-directory: ./benchmarks
 
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4
diff --git a/.gitignore b/.gitignore
index 198907c73..d5a87f1eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@
 .DS_Store
 Manifest.toml
 **.~undo-tree~
+
+benchmarks/*.json
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 0d4e9a654..710024b77 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -4,11 +4,12 @@ version = "0.1.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
@@ -21,11 +22,12 @@ DynamicPPL = {path = "../"}
 
 [compat]
 ADTypes = "1.14.0"
-BenchmarkTools = "1.6.0"
+Chairmarks = "1.3.1"
 Distributions = "0.25.117"
 DynamicPPL = "0.38"
 Enzyme = "0.13"
 ForwardDiff = "0.10.38, 1"
+JSON = "1.3.0"
 LogDensityProblems = "2.1.2"
 Mooncake = "0.4"
 PrettyTables = "3"
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 35cb8c0bf..ad70b7c03 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,4 +1,4 @@
-To run the benchmarks, run this from the root directory of the repository:
+To run the benchmarks locally, run this from the root directory of the repository:
 
 ```sh
 julia --project=benchmarks benchmarks/benchmarks.jl
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 035d8ff49..40be4a339 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -1,113 +1,207 @@
 using Pkg
 
-using DynamicPPLBenchmarks: Models, make_suite, model_dimension
-using BenchmarkTools: @benchmark, median, run
-using PrettyTables: pretty_table, fmt__printf
+using Chairmarks: @be, median
+using DynamicPPLBenchmarks: Models, benchmark, model_dimension
+using JSON: JSON
+using PrettyTables: pretty_table, fmt__printf, EmptyCells, MultiColumn
+using Printf: @sprintf
 using StableRNGs: StableRNG
 
 rng = StableRNG(23)
 
-function print_results(results_table)
-    table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
-    header = [
-        "Model",
-        "Dim",
-        "AD Backend",
-        "VarInfo",
-        "Linked",
-        "t(eval)/t(ref)",
-        "t(grad)/t(eval)",
-    ]
-    return pretty_table(
-        table_matrix;
-        column_labels=header,
-        backend=:text,
-        formatters=[fmt__printf("%.1f", [6, 7])],
-        fit_table_in_display_horizontally=false,
-        fit_table_in_display_vertically=false,
-    )
-end
-
-# Create DynamicPPL.Model instances to run benchmarks on.
-smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
-loop_univariate1k, multivariate1k = begin
-    data_1k = randn(rng, 1_000)
-    loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
-    multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
-    loop, multi
-end
-loop_univariate10k, multivariate10k = begin
-    data_10k = randn(rng, 10_000)
-    loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
-    multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
-    loop, multi
-end
-lda_instance = begin
-    w = [1, 2, 3, 2, 1, 1]
-    d = [1, 1, 1, 2, 2, 2]
-    Models.lda(2, d, w)
-end
+head_filename = "benchmarks_result_head.json"
+base_filename = "benchmarks_result_base.json"
 
-# Specify the combinations to test:
-# (Model Name, model instance, VarInfo choice, AD backend, linked)
-chosen_combinations = [
-    (
-        "Simple assume observe",
-        Models.simple_assume_observe(randn(rng)),
-        :typed,
-        :forwarddiff,
-        false,
-    ),
-    ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
-    ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
-    ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
-    ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
-    ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
-    ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
-    ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
-    ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
-    ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
-    ("LDA", lda_instance, :typed, :reversediff, true),
+colnames = [
+    "Model", "Dim", "AD Backend", "VarInfo", "Linked", "t(eval)/t(ref)", "t(grad)/t(eval)"
 ]
-
-# Time running a model-like function that does not use DynamicPPL, as a reference point.
-# Eval timings will be relative to this.
-reference_time = begin
-    obs = randn(rng)
-    median(@benchmark Models.simple_assume_observe_non_model(obs)).time
+function print_results(results_table; to_file::Union{Nothing,String}=nothing)
+    if to_file isa String
+        # Print to the given file as JSON
+        results_array = [
+            Dict(colnames[i] => results_table[j][i] for i in eachindex(colnames)) for
+            j in eachindex(results_table)
+        ]
+        JSON.json(to_file, results_array; pretty=true)
+    else
+        # Pretty-print to terminal
+        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+        return pretty_table(
+            table_matrix;
+            column_labels=colnames,
+            backend=:text,
+            formatters=[fmt__printf("%.1f", [6, 7])],
+            fit_table_in_display_horizontally=false,
+            fit_table_in_display_vertically=false,
+        )
+    end
 end
 
-results_table = Tuple{String,Int,String,String,Bool,Float64,Float64}[]
+function run(; to_file::Union{Nothing,String}=nothing)
+    # Create DynamicPPL.Model instances to run benchmarks on.
+    smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
+    loop_univariate1k, multivariate1k = begin
+        data_1k = randn(rng, 1_000)
+        loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
+        multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
+        loop, multi
+    end
+    loop_univariate10k, multivariate10k = begin
+        data_10k = randn(rng, 10_000)
+        loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
+        multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
+        loop, multi
+    end
+    lda_instance = begin
+        w = [1, 2, 3, 2, 1, 1]
+        d = [1, 1, 1, 2, 2, 2]
+        Models.lda(2, d, w)
+    end
 
-for (model_name, model, varinfo_choice, adbackend, islinked) in chosen_combinations
-    @info "Running benchmark for $model_name"
-    suite = make_suite(model, varinfo_choice, adbackend, islinked)
-    results = run(suite)
-    eval_time = median(results["evaluation"]).time
-    relative_eval_time = eval_time / reference_time
-    ad_eval_time = median(results["gradient"]).time
-    relative_ad_eval_time = ad_eval_time / eval_time
-    push!(
-        results_table,
+    # Specify the combinations to test:
+    # (Model Name, model instance, VarInfo choice, AD backend, linked)
+    chosen_combinations = [
         (
-            model_name,
-            model_dimension(model, islinked),
-            string(adbackend),
-            string(varinfo_choice),
-            islinked,
-            relative_eval_time,
-            relative_ad_eval_time,
+            "Simple assume observe",
+            Models.simple_assume_observe(randn(rng)),
+            :typed,
+            :forwarddiff,
+            false,
         ),
+        ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
+        ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
+        # ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
+        # ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
+        # ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
+        # ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
+        # ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
+        # ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
+        # ("LDA", lda_instance, :typed, :reversediff, true),
+    ]
+
+    # Time running a model-like function that does not use DynamicPPL, as a reference point.
+    # Eval timings will be relative to this.
+    reference_time = begin
+        obs = randn(rng)
+        median(@be Models.simple_assume_observe_non_model(obs)).time
+    end
+    @info "Reference evaluation time: $(reference_time) seconds"
+
+    results_table = Tuple{
+        String,Int,String,String,Bool,Union{Float64,Missing},Union{Float64,Missing}
+    }[]
+
+    for (model_name, model, varinfo_choice, adbackend, islinked) in chosen_combinations
+        @info "Running benchmark for $model_name"
+        relative_eval_time, relative_ad_eval_time = try
+            results = benchmark(model, varinfo_choice, adbackend, islinked)
+            (results.primal_time / reference_time),
+            (results.grad_time / results.primal_time)
+        catch e
+            missing, missing
+        end
+        push!(
+            results_table,
+            (
+                model_name,
+                model_dimension(model, islinked),
+                string(adbackend),
+                string(varinfo_choice),
+                islinked,
+                relative_eval_time,
+                relative_ad_eval_time,
+            ),
+        )
+        print_results(results_table; to_file=to_file)
+    end
+    return print_results(results_table; to_file=to_file)
+end
+
+struct TestCase
+    model_name::String
+    dim::Integer
+    ad_backend::String
+    varinfo::String
+    linked::Bool
+    TestCase(d::Dict{String,Any}) = new((d[c] for c in colnames[1:5])...)
+end
+function combine()
+    head_results = JSON.parsefile(head_filename, Vector{Dict{String,Any}})
+    base_results = JSON.parsefile(base_filename, Vector{Dict{String,Any}})
+    # Identify unique combinations of (Model, Dim, AD Backend, VarInfo, Linked)
+    head_testcases = Dict(
+        TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in head_results
+    )
+    base_testcases = Dict(
+        TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in base_results
+    )
+    all_testcases = union(Set(keys(head_testcases)), Set(keys(base_testcases)))
+    sorted_testcases = sort(
+        collect(all_testcases); by=(c -> (c.model_name, c.ad_backend, c.varinfo, c.linked))
+    )
+    results_table = Tuple{
+        String,Int,String,String,Bool,String,String,String,String,String,String
+    }[]
+    results_colnames = [
+        [
+            EmptyCells(5),
+            MultiColumn(3, "t(eval) / t(ref)"),
+            MultiColumn(3, "t(grad) / t(eval)"),
+        ],
+        [colnames[1:5]..., "base", "this PR", "speedup", "base", "this PR", "speedup"],
+    ]
+    sprint_float(x::Float64) = @sprintf("%.2f", x)
+    sprint_float(m::Missing) = "err"
+    for c in sorted_testcases
+        head_eval, head_grad = get(head_testcases, c, (missing, missing))
+        base_eval, base_grad = get(base_testcases, c, (missing, missing))
+        speedup_eval = base_eval / head_eval
+        speedup_grad = base_grad / head_grad
+        push!(
+            results_table,
+            (
+                c.model_name,
+                c.dim,
+                c.ad_backend,
+                c.varinfo,
+                c.linked,
+                sprint_float(base_eval),
+                sprint_float(head_eval),
+                sprint_float(speedup_eval),
+                sprint_float(base_grad),
+                sprint_float(head_grad),
+                sprint_float(speedup_grad),
+            ),
+        )
+    end
+    # Pretty-print to terminal
+    table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+    return pretty_table(
+        table_matrix;
+        column_labels=results_colnames,
+        backend=:text,
+        fit_table_in_display_horizontally=false,
+        fit_table_in_display_vertically=false,
     )
-    println("Results so far:")
-    print_results(results_table)
 end
 
-println("Final results:")
-print_results(results_table)
+# The command-line arguments are used on CI purposes.
+# Run with `julia --project=. benchmarks.jl [combine|json-head|json-base]`
+if ARGS == ["combine"]
+    combine()
+elseif ARGS == ["json-head"]
+    run(; to_file=head_filename)
+elseif ARGS == ["json-base"]
+    run(; to_file=base_filename)
+elseif ARGS == []
+    # When running locally just omit the argument and it will just benchmark and print to
+    # terminal.
+    run()
+end
diff --git a/benchmarks/src/DynamicPPLBenchmarks.jl b/benchmarks/src/DynamicPPLBenchmarks.jl
index 225e40cd8..0dc7ece6e 100644
--- a/benchmarks/src/DynamicPPLBenchmarks.jl
+++ b/benchmarks/src/DynamicPPLBenchmarks.jl
@@ -1,20 +1,20 @@
 module DynamicPPLBenchmarks
 
 using DynamicPPL: VarInfo, SimpleVarInfo, VarName
-using BenchmarkTools: BenchmarkGroup, @benchmarkable
 using DynamicPPL: DynamicPPL
+using DynamicPPL.TestUtils.AD: run_ad, NoTest
 using ADTypes: ADTypes
 using LogDensityProblems: LogDensityProblems
 
 using ForwardDiff: ForwardDiff
-using Mooncake: Mooncake
 using ReverseDiff: ReverseDiff
+using Mooncake: Mooncake
+using Enzyme: Enzyme
 using StableRNGs: StableRNG
 
 include("./Models.jl")
 using .Models: Models
-using Enzyme: Enzyme
-export Models, make_suite, model_dimension
+export Models, benchmark, model_dimension
 
 """
     model_dimension(model, islinked)
@@ -52,9 +52,11 @@ function to_backend(x::Union{AbstractString,Symbol})
 end
 
 """
-    make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+    benchmark(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+
+Benchmark evaluation and gradient calculation for `model` using the selected varinfo type
+and AD backend.
 
-Create a benchmark suite for `model` using the selected varinfo type and AD backend.
 Available varinfo choices:
   • `:untyped`           → uses `DynamicPPL.untyped_varinfo(model)`
   • `:typed`             → uses `DynamicPPL.typed_varinfo(model)`
@@ -65,10 +67,10 @@ The AD backend should be specified as a Symbol (e.g. `:forwarddiff`, `:reversedi
 
 `islinked` determines whether to link the VarInfo for evaluation.
 """
-function make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
+function benchmark(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::Bool)
     rng = StableRNG(23)
 
-    suite = BenchmarkGroup()
+    adbackend = to_backend(adbackend)
 
     vi = if varinfo_choice == :untyped
         DynamicPPL.untyped_varinfo(rng, model)
@@ -94,20 +96,9 @@ function make_suite(model, varinfo_choice::Symbol, adbackend::Symbol, islinked::
         vi = DynamicPPL.link(vi, model)
     end
 
-    f = DynamicPPL.LogDensityFunction(
-        model, DynamicPPL.getlogjoint_internal, vi; adtype=adbackend
+    return run_ad(
+        model, adbackend; varinfo=vi, benchmark=true, test=NoTest(), verbose=false
     )
-    # The parameters at which we evaluate f.
-    θ = vi[:]
-
-    # Run once to trigger compilation.
-    LogDensityProblems.logdensity_and_gradient(f, θ)
-    suite["gradient"] = @benchmarkable $(LogDensityProblems.logdensity_and_gradient)($f, $θ)
-
-    # Also benchmark just standard model evaluation because why not.
-    suite["evaluation"] = @benchmarkable $(LogDensityProblems.logdensity)($f, $θ)
-
-    return suite
 end
 
 end # module

From 1c92685f9f550494d1a9f31f22a2ff009c890c13 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:00:56 +0000
Subject: [PATCH 02/31] fix wd

---
 .github/workflows/Benchmarking.yml | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 8c93bf308..b8337e77b 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -17,6 +17,7 @@ jobs:
       - uses: julia-actions/cache@v2
 
       - name: Run benchmarks
+        working-directory: ./benchmarks
         run: |
           # Capture version info into a variable, print it, and set it as an env var for later steps
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
@@ -34,17 +35,10 @@ jobs:
           git checkout ${{ github.base_ref }}
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           julia --project=. benchmarks.jl json-base
-          # then combine them. The sed and tail calls cut out anything but the
-          # final block of results.
-          benchmark_output=$(\
-            julia --project=. benchmarks.jl combine \
-            | sed -n '/Final results:/,$p' \
-            | tail -n +2\
-          )
 
-          # Set the benchmark output as an env var for later steps
+          # combine them and save the output as an env var for later steps
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          echo "$benchmark_output" >> $GITHUB_ENV
+          julia --project=. benchmarks.jl combine >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
           # Get the current commit SHA of DynamicPPL
@@ -53,8 +47,6 @@ jobs:
 
           COMMIT_URL="https://github.com/${{ github.repository }}/commit/$DPPL_COMMIT_SHA"
           echo "DPPL_COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
-        with:
-          working-directory: ./benchmarks
 
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4

From 6d85c9369036961d0d125d77aead8ad703fc356f Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:05:44 +0000
Subject: [PATCH 03/31] don't do shallow checkout

---
 .github/workflows/Benchmarking.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index b8337e77b..7cccc9a62 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -9,6 +9,8 @@ jobs:
 
     steps:
       - uses: actions/checkout@v5
+        with:
+          fetch-depth: 0  # need to fetch all commits, otherwise git checkout will fail
 
       - uses: julia-actions/setup-julia@v2
         with:
@@ -27,7 +29,7 @@ jobs:
           echo "EOF" >> $GITHUB_ENV
 
           ## Run the actual benchmarks
-          # checkout pr HEAD
+          # first switch to PR head
           git checkout ${{ github.event.pull_request.head.sha }}
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           julia --project=. benchmarks.jl json-head

From a795745f4b385cccebe4cd9e43775cef3a4201b9 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:08:53 +0000
Subject: [PATCH 04/31] output shas correctly

---
 .github/workflows/Benchmarking.yml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 7cccc9a62..9fa27103f 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -33,22 +33,22 @@ jobs:
           git checkout ${{ github.event.pull_request.head.sha }}
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           julia --project=. benchmarks.jl json-head
+          # get the SHA of the PR head
+          DPPL_HEAD_COMMIT_SHA=$(git rev-parse HEAD)
+          echo "DPPL_HEAD_COMMIT_SHA=$DPPL_HEAD_COMMIT_SHA" >> $GITHUB_ENV
           # then switch to PR base
           git checkout ${{ github.base_ref }}
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           julia --project=. benchmarks.jl json-base
+          # get its SHA too
+          DPPL_BASE_COMMIT_SHA=$(git rev-parse BASE)
+          echo "DPPL_BASE_COMMIT_SHA=$DPPL_BASE_COMMIT_SHA" >> $GITHUB_ENV
 
           # combine them and save the output as an env var for later steps
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           julia --project=. benchmarks.jl combine >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          # Get the current commit SHA of DynamicPPL
-          DPPL_COMMIT_SHA=$(git rev-parse HEAD)
-          echo "DPPL_COMMIT_SHA=$DPPL_COMMIT_SHA" >> $GITHUB_ENV
-
-          COMMIT_URL="https://github.com/${{ github.repository }}/commit/$DPPL_COMMIT_SHA"
-          echo "DPPL_COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
 
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4
@@ -62,7 +62,11 @@ jobs:
         with:
           issue-number: ${{ github.event.pull_request.number }}
           body: |
-            ## Benchmark Report for Commit ${{ env.DPPL_COMMIT_SHA }}
+            ## Benchmark Report
+
+            - this PR's head: `${{ env.DPPL_HEAD_COMMIT_SHA }}`
+            - base branch: `${{ env.DPPL_BASE_COMMIT_SHA }}`
+
             ### Computer Information
             ```
             ${{ env.VERSION_INFO }}

From a7283280040a49947951f9462e99036cfc7765cc Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:13:20 +0000
Subject: [PATCH 05/31] resolve

---
 .github/workflows/Benchmarking.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 9fa27103f..68efea913 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -31,14 +31,14 @@ jobs:
           ## Run the actual benchmarks
           # first switch to PR head
           git checkout ${{ github.event.pull_request.head.sha }}
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate()'
           julia --project=. benchmarks.jl json-head
           # get the SHA of the PR head
           DPPL_HEAD_COMMIT_SHA=$(git rev-parse HEAD)
           echo "DPPL_HEAD_COMMIT_SHA=$DPPL_HEAD_COMMIT_SHA" >> $GITHUB_ENV
           # then switch to PR base
           git checkout ${{ github.base_ref }}
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate()'
           julia --project=. benchmarks.jl json-base
           # get its SHA too
           DPPL_BASE_COMMIT_SHA=$(git rev-parse BASE)

From 0d81fc7c58bcf10623d00dba0cf4cfce78ef54a8 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:19:19 +0000
Subject: [PATCH 06/31] registry?

---
 .github/workflows/Benchmarking.yml | 2 ++
 benchmarks/Project.toml            | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 68efea913..593d52fc1 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -28,6 +28,8 @@ jobs:
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
+          julia --project=. -e 'Pkg.Registry.add()'
+
           ## Run the actual benchmarks
           # first switch to PR head
           git checkout ${{ github.event.pull_request.head.sha }}
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index 710024b77..c154c5ca5 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -26,7 +26,7 @@ Chairmarks = "1.3.1"
 Distributions = "0.25.117"
 DynamicPPL = "0.38"
 Enzyme = "0.13"
-ForwardDiff = "0.10.38, 1"
+ForwardDiff = "1"
 JSON = "1.3.0"
 LogDensityProblems = "2.1.2"
 Mooncake = "0.4"

From 81078f8435917b55fa46d4eab6e53ad08b59a01f Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:20:25 +0000
Subject: [PATCH 07/31] using

---
 .github/workflows/Benchmarking.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 593d52fc1..1cb813edd 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -28,7 +28,7 @@ jobs:
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          julia --project=. -e 'Pkg.Registry.add()'
+          julia --project=. -e 'using Pkg; Pkg.Registry.add()'
 
           ## Run the actual benchmarks
           # first switch to PR head

From e5f1089210693bb184475d53ad18ce69828505e9 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:26:47 +0000
Subject: [PATCH 08/31] silly

---
 .github/workflows/Benchmarking.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 1cb813edd..51c473ad7 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -43,7 +43,7 @@ jobs:
           julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate()'
           julia --project=. benchmarks.jl json-base
           # get its SHA too
-          DPPL_BASE_COMMIT_SHA=$(git rev-parse BASE)
+          DPPL_BASE_COMMIT_SHA=$(git rev-parse HEAD)
           echo "DPPL_BASE_COMMIT_SHA=$DPPL_BASE_COMMIT_SHA" >> $GITHUB_ENV
 
           # combine them and save the output as an env var for later steps
@@ -51,7 +51,6 @@ jobs:
           julia --project=. benchmarks.jl combine >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4
         id: find_comment

From 353276dd62a5c417f210ec96f63f2dc3ba32ac55 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:27:33 +0000
Subject: [PATCH 09/31] add explanatory comment

---
 .github/workflows/Benchmarking.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 51c473ad7..a3e39cdb0 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -28,6 +28,9 @@ jobs:
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
+          # need to do this or else Pkg.resolve() fails
+          # in general resolve() is needed because dependencies may change
+          # between the base and head commits.
           julia --project=. -e 'using Pkg; Pkg.Registry.add()'
 
           ## Run the actual benchmarks

From 27dc692303fb6c17d32da243c0d93073e2cfdd5c Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:29:29 +0000
Subject: [PATCH 10/31] don't disable

---
 benchmarks/benchmarks.jl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 40be4a339..1a2da56cf 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -74,16 +74,16 @@ function run(; to_file::Union{Nothing,String}=nothing)
         ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
         ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
         ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
-        # ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
-        # ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
-        # ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
-        # ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
-        # ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
-        # ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
-        # ("LDA", lda_instance, :typed, :reversediff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
+        ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
+        ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
+        ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
+        ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
+        ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
+        ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
+        ("LDA", lda_instance, :typed, :reversediff, true),
     ]
 
     # Time running a model-like function that does not use DynamicPPL, as a reference point.

From 73db6eea7a21f8d150b4b7b4cb4086d46e2f1b8c Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 19:31:00 +0000
Subject: [PATCH 11/31] catch the case when the file doesn't exist

---
 benchmarks/benchmarks.jl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 1a2da56cf..cd3993c86 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -133,8 +133,16 @@ struct TestCase
     TestCase(d::Dict{String,Any}) = new((d[c] for c in colnames[1:5])...)
 end
 function combine()
-    head_results = JSON.parsefile(head_filename, Vector{Dict{String,Any}})
-    base_results = JSON.parsefile(base_filename, Vector{Dict{String,Any}})
+    head_results = try
+        JSON.parsefile(head_filename, Vector{Dict{String,Any}})
+    catch
+        Dict{String,Any}[]
+    end
+    base_results = try
+        JSON.parsefile(base_filename, Vector{Dict{String,Any}})
+    catch
+        Dict{String,Any}[]
+    end
     # Identify unique combinations of (Model, Dim, AD Backend, VarInfo, Linked)
     head_testcases = Dict(
         TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in head_results

From 0b8fc9f1596afff1f6f6d8f490113d53f93ae28a Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 20:06:42 +0000
Subject: [PATCH 12/31] use version of combine on pr head

---
 .github/workflows/Benchmarking.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index a3e39cdb0..bbec5cd42 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -50,6 +50,7 @@ jobs:
           echo "DPPL_BASE_COMMIT_SHA=$DPPL_BASE_COMMIT_SHA" >> $GITHUB_ENV
 
           # combine them and save the output as an env var for later steps
+          git checkout ${{ github.event.pull_request.head.sha }}
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           julia --project=. benchmarks.jl combine >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV

From 4db02b567feadb15f9965d0be6f82b981065ec2d Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 20:31:04 +0000
Subject: [PATCH 13/31]  parallelise

---
 .github/workflows/Benchmarking.yml | 66 +++++++++++++++++++-----------
 benchmarks/benchmarks.jl           |  3 +-
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index bbec5cd42..0f92014ae 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -4,50 +4,66 @@ on:
   pull_request:
 
 jobs:
-  benchmarks:
+  benchmark-base:
     runs-on: ubuntu-latest
-
+    outputs:
+      results: ${{ steps.benchmark.outputs.results }}
+      sha: ${{ steps.benchmark.outputs.sha }}
     steps:
       - uses: actions/checkout@v5
         with:
-          fetch-depth: 0  # need to fetch all commits, otherwise git checkout will fail
-
+          ref: ${{ github.base_ref }}
       - uses: julia-actions/setup-julia@v2
         with:
           version: '1.11'
+      - uses: julia-actions/cache@v2
+
+      - name: Run benchmarks
+        id: benchmark
+        working-directory: ./benchmarks
+        run: |
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          echo "results=$(julia --project=. benchmarks.jl json-base)" >> "$GITHUB_OUTPUT"
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
+  benchmark-head:
+    runs-on: ubuntu-latest
+    outputs:
+      results: ${{ steps.benchmark.outputs.results }}
+      sha: ${{ steps.benchmark.outputs.sha }}
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: '1.11'
       - uses: julia-actions/cache@v2
 
       - name: Run benchmarks
+        id: benchmark
         working-directory: ./benchmarks
         run: |
-          # Capture version info into a variable, print it, and set it as an env var for later steps
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          echo "results=$(julia --project=. benchmarks.jl json-base)" >> "$GITHUB_OUTPUT"
+          echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+  combine-results:
+    runs-on: ubuntu-latest
+    needs: [benchmark-base, benchmark-head]
+    steps:
+      - name: Get versioninfo
+        run: |
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
           echo "$version_info"
           echo "VERSION_INFO<<EOF" >> $GITHUB_ENV
           echo "$version_info" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-          # need to do this or else Pkg.resolve() fails
-          # in general resolve() is needed because dependencies may change
-          # between the base and head commits.
-          julia --project=. -e 'using Pkg; Pkg.Registry.add()'
-
-          ## Run the actual benchmarks
-          # first switch to PR head
-          git checkout ${{ github.event.pull_request.head.sha }}
-          julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate()'
-          julia --project=. benchmarks.jl json-head
-          # get the SHA of the PR head
-          DPPL_HEAD_COMMIT_SHA=$(git rev-parse HEAD)
-          echo "DPPL_HEAD_COMMIT_SHA=$DPPL_HEAD_COMMIT_SHA" >> $GITHUB_ENV
-          # then switch to PR base
-          git checkout ${{ github.base_ref }}
-          julia --project=. -e 'using Pkg; Pkg.resolve(); Pkg.instantiate()'
-          julia --project=. benchmarks.jl json-base
-          # get its SHA too
-          DPPL_BASE_COMMIT_SHA=$(git rev-parse HEAD)
-          echo "DPPL_BASE_COMMIT_SHA=$DPPL_BASE_COMMIT_SHA" >> $GITHUB_ENV
+          # save outputs of previous jobs to json file
+          # TODO: fix hardcoding
+          echo ${{needs.benchmark-base.outputs.results}} > benchmarks_result_base.json
+          echo ${{needs.benchmark-head.outputs.results}} > benchmarks_result_head.json
 
           # combine them and save the output as an env var for later steps
           git checkout ${{ github.event.pull_request.head.sha }}
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index cd3993c86..143d3ea8d 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -22,7 +22,8 @@ function print_results(results_table; to_file::Union{Nothing,String}=nothing)
             Dict(colnames[i] => results_table[j][i] for i in eachindex(colnames)) for
             j in eachindex(results_table)
         ]
-        JSON.json(to_file, results_array; pretty=true)
+        # do not use pretty=true, as GitHub Actions expects no linebreaks
+        JSON.json(to_file, results_array)
     else
         # Pretty-print to terminal
         table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)

From 89ae3846d11885625ca2c67b2da261aed396b56c Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 20:32:40 +0000
Subject: [PATCH 14/31] parallelise 2

---
 .github/workflows/Benchmarking.yml |  4 ++--
 benchmarks/benchmarks.jl           | 18 ++++++++----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 0f92014ae..90cc1056e 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -23,7 +23,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          echo "results=$(julia --project=. benchmarks.jl json-base)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   benchmark-head:
@@ -45,7 +45,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          echo "results=$(julia --project=. benchmarks.jl json-base)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 143d3ea8d..2267c1d4e 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -15,15 +15,15 @@ base_filename = "benchmarks_result_base.json"
 colnames = [
     "Model", "Dim", "AD Backend", "VarInfo", "Linked", "t(eval)/t(ref)", "t(grad)/t(eval)"
 ]
-function print_results(results_table; to_file::Union{Nothing,String}=nothing)
-    if to_file isa String
+function print_results(results_table; to_json=false)
+    if to_json
         # Print to the given file as JSON
         results_array = [
             Dict(colnames[i] => results_table[j][i] for i in eachindex(colnames)) for
             j in eachindex(results_table)
         ]
         # do not use pretty=true, as GitHub Actions expects no linebreaks
-        JSON.json(to_file, results_array)
+        JSON.json(stdout, results_array)
     else
         # Pretty-print to terminal
         table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
@@ -38,7 +38,7 @@ function print_results(results_table; to_file::Union{Nothing,String}=nothing)
     end
 end
 
-function run(; to_file::Union{Nothing,String}=nothing)
+function run(; to_json=false)
     # Create DynamicPPL.Model instances to run benchmarks on.
     smorgasbord_instance = Models.smorgasbord(randn(rng, 100), randn(rng, 100))
     loop_univariate1k, multivariate1k = begin
@@ -120,9 +120,9 @@ function run(; to_file::Union{Nothing,String}=nothing)
                 relative_ad_eval_time,
             ),
         )
-        print_results(results_table; to_file=to_file)
+        print_results(results_table; to_json=to_json)
     end
-    return print_results(results_table; to_file=to_file)
+    return print_results(results_table; to_json=to_json)
 end
 
 struct TestCase
@@ -205,10 +205,8 @@ end
 # Run with `julia --project=. benchmarks.jl [combine|json-head|json-base]`
 if ARGS == ["combine"]
     combine()
-elseif ARGS == ["json-head"]
-    run(; to_file=head_filename)
-elseif ARGS == ["json-base"]
-    run(; to_file=base_filename)
+elseif ARGS == ["json"]
+    run(; to_json=true)
 elseif ARGS == []
     # When running locally just omit the argument and it will just benchmark and print to
     # terminal.

From 34b254e9a24484eddddab14f13969456276d3c87 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 20:35:11 +0000
Subject: [PATCH 15/31] tableformat

---
 benchmarks/benchmarks.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 2267c1d4e..5933e3928 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -3,7 +3,7 @@ using Pkg
 using Chairmarks: @be, median
 using DynamicPPLBenchmarks: Models, benchmark, model_dimension
 using JSON: JSON
-using PrettyTables: pretty_table, fmt__printf, EmptyCells, MultiColumn
+using PrettyTables: pretty_table, fmt__printf, EmptyCells, MultiColumn, TextTableFormat
 using Printf: @sprintf
 using StableRNGs: StableRNG
 
@@ -198,6 +198,7 @@ function combine()
         backend=:text,
         fit_table_in_display_horizontally=false,
         fit_table_in_display_vertically=false,
+        table_format=TextTableFormat(; horizontal_line_at_merged_column_labels=true),
     )
 end
 

From e10610d6f3b364e801fde496a356d23540fb5b3d Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:13:56 +0000
Subject: [PATCH 16/31] headn1

---
 .github/workflows/Benchmarking.yml |  4 ++--
 benchmarks/benchmarks.jl           | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 90cc1056e..57d677a35 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -23,7 +23,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          echo "results=$(julia --project=. benchmarks.jl json)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   benchmark-head:
@@ -45,7 +45,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          echo "results=$(julia --project=. benchmarks.jl json)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 5933e3928..33b83964a 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -133,7 +133,7 @@ struct TestCase
     linked::Bool
     TestCase(d::Dict{String,Any}) = new((d[c] for c in colnames[1:5])...)
 end
-function combine()
+function combine(head_filename::String, base_filename::String)
     head_results = try
         JSON.parsefile(head_filename, Vector{Dict{String,Any}})
     catch
@@ -203,13 +203,18 @@ function combine()
 end
 
 # The command-line arguments are used on CI purposes.
-# Run with `julia --project=. benchmarks.jl [combine|json-head|json-base]`
-if ARGS == ["combine"]
-    combine()
+# Run with `julia --project=. benchmarks.jl json` to run benchmarks and output JSON to
+# stdout
+# Run with `julia --project=. benchmarks.jl combine head.json base.json` to combine two JSON
+# files
+if length(ARGS) == 3 && ARGS[1] == "combine"
+    combine(ARGS[2], ARGS[3])
 elseif ARGS == ["json"]
     run(; to_json=true)
 elseif ARGS == []
     # When running locally just omit the argument and it will just benchmark and print to
     # terminal.
     run()
+else
+    error("invalid arguments: $(ARGS)")
 end

From db7eda9eaf791c79bbfd34f38a3e94da40a1bd76 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:14:55 +0000
Subject: [PATCH 17/31] comment

---
 .github/workflows/Benchmarking.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 57d677a35..e31705954 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -23,6 +23,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          # github output can't handle more than 1 line, hence the head
           echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
@@ -45,6 +46,7 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          # github output can't handle more than 1 line, hence the head
           echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
@@ -61,14 +63,13 @@ jobs:
           echo "EOF" >> $GITHUB_ENV
 
           # save outputs of previous jobs to json file
-          # TODO: fix hardcoding
-          echo ${{needs.benchmark-base.outputs.results}} > benchmarks_result_base.json
-          echo ${{needs.benchmark-head.outputs.results}} > benchmarks_result_head.json
+          echo ${{needs.benchmark-base.outputs.results}} > base.json
+          echo ${{needs.benchmark-head.outputs.results}} > head.json
 
           # combine them and save the output as an env var for later steps
           git checkout ${{ github.event.pull_request.head.sha }}
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          julia --project=. benchmarks.jl combine >> $GITHUB_ENV
+          julia --project=. benchmarks.jl combine head.json base.json >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
       - name: Find Existing Comment

From e02213cfabbdda4b6022f4ed301b7e39d22d2817 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:28:19 +0000
Subject: [PATCH 18/31] or true

---
 .github/workflows/Benchmarking.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index e31705954..0f05bc8a1 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -24,7 +24,7 @@ jobs:
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the head
-          echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json | head -n 1 || true)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   benchmark-head:
@@ -47,7 +47,7 @@ jobs:
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the head
-          echo "results=$(julia --project=. benchmarks.jl json | head -n 1)" >> "$GITHUB_OUTPUT"
+          echo "results=$(julia --project=. benchmarks.jl json | head -n 1 || true)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:

From c6a96e3c0e22a14c0fb7c9ffec2d54b00efa1943 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:39:46 +0000
Subject: [PATCH 19/31] ?

---
 .github/workflows/Benchmarking.yml | 12 ++++++------
 benchmarks/benchmarks.jl           | 30 +++++++++++++++---------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 0f05bc8a1..1e2c73c2c 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -23,8 +23,8 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          # github output can't handle more than 1 line, hence the head
-          echo "results=$(julia --project=. benchmarks.jl json | head -n 1 || true)" >> "$GITHUB_OUTPUT"
+          # github output can't handle more than 1 line, hence the tail
+          echo "results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   benchmark-head:
@@ -46,8 +46,8 @@ jobs:
         working-directory: ./benchmarks
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          # github output can't handle more than 1 line, hence the head
-          echo "results=$(julia --project=. benchmarks.jl json | head -n 1 || true)" >> "$GITHUB_OUTPUT"
+          # github output can't handle more than 1 line, hence the tail
+          echo "results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:
@@ -63,8 +63,8 @@ jobs:
           echo "EOF" >> $GITHUB_ENV
 
           # save outputs of previous jobs to json file
-          echo ${{needs.benchmark-base.outputs.results}} > base.json
-          echo ${{needs.benchmark-head.outputs.results}} > head.json
+          echo '${{needs.benchmark-base.outputs.results}}' > base.json
+          echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
           # combine them and save the output as an env var for later steps
           git checkout ${{ github.event.pull_request.head.sha }}
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 33b83964a..350aebe42 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -70,21 +70,21 @@ function run(; to_json=false)
             false,
         ),
         ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
-        ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
-        ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
-        ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
-        ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
-        ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
-        ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
-        ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
-        ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
-        ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
-        ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
-        ("LDA", lda_instance, :typed, :reversediff, true),
+        # ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+        # ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
+        # ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
+        # ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
+        # ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
+        # ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
+        # ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
+        # ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
+        # ("LDA", lda_instance, :typed, :reversediff, true),
     ]
 
     # Time running a model-like function that does not use DynamicPPL, as a reference point.

From 9c0d1ab70b52efee5dae9b401e90920210c2ce64 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:44:07 +0000
Subject: [PATCH 20/31] forgot to load julia

---
 .github/workflows/Benchmarking.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 1e2c73c2c..51dddc3e2 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -54,6 +54,13 @@ jobs:
     runs-on: ubuntu-latest
     needs: [benchmark-base, benchmark-head]
     steps:
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: '1.11'
+      - uses: julia-actions/cache@v2
       - name: Get versioninfo
         run: |
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
@@ -67,7 +74,6 @@ jobs:
           echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
           # combine them and save the output as an env var for later steps
-          git checkout ${{ github.event.pull_request.head.sha }}
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           julia --project=. benchmarks.jl combine head.json base.json >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV

From 2814827b6a6d6e61d0c649ee55b85fc9d5784515 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 21:57:26 +0000
Subject: [PATCH 21/31] working dir

---
 .github/workflows/Benchmarking.yml |  1 +
 benchmarks/benchmarks.jl           | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 51dddc3e2..59c52da3c 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -62,6 +62,7 @@ jobs:
           version: '1.11'
       - uses: julia-actions/cache@v2
       - name: Get versioninfo
+        working-directory: ./benchmarks
         run: |
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
           echo "$version_info"
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 350aebe42..33b83964a 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -70,21 +70,21 @@ function run(; to_json=false)
             false,
         ),
         ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
-        # ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
-        # ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
-        # ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
-        # ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
-        # ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
-        # ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
-        # ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
-        # ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
-        # ("LDA", lda_instance, :typed, :reversediff, true),
+        ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed_vector, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :untyped_vector, :forwarddiff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+        ("Smorgasbord", smorgasbord_instance, :typed, :enzyme, true),
+        ("Loop univariate 1k", loop_univariate1k, :typed, :mooncake, true),
+        ("Multivariate 1k", multivariate1k, :typed, :mooncake, true),
+        ("Loop univariate 10k", loop_univariate10k, :typed, :mooncake, true),
+        ("Multivariate 10k", multivariate10k, :typed, :mooncake, true),
+        ("Dynamic", Models.dynamic(), :typed, :mooncake, true),
+        ("Submodel", Models.parent(randn(rng)), :typed, :mooncake, true),
+        ("LDA", lda_instance, :typed, :reversediff, true),
     ]
 
     # Time running a model-like function that does not use DynamicPPL, as a reference point.

From 687349a40dab2657e7ecefcb0ea7c691479ce3fb Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 22:15:42 +0000
Subject: [PATCH 22/31] instantiate

---
 .github/workflows/Benchmarking.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 59c52da3c..0719700f7 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -75,6 +75,7 @@ jobs:
           echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
           # combine them and save the output as an env var for later steps
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           julia --project=. benchmarks.jl combine head.json base.json >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV

From 10d808323bc699a554aa47d56d6e77cb86a4d2d1 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 22:43:41 +0000
Subject: [PATCH 23/31] try

---
 .github/workflows/Benchmarking.yml | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 0719700f7..81f62d885 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -24,7 +24,9 @@ jobs:
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the tail
-          echo "results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)" >> "$GITHUB_OUTPUT"
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          echo $results
+          echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   benchmark-head:
@@ -47,7 +49,9 @@ jobs:
         run: |
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the tail
-          echo "results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)" >> "$GITHUB_OUTPUT"
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          echo $results
+          echo "results=$results >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:
@@ -71,13 +75,16 @@ jobs:
           echo "EOF" >> $GITHUB_ENV
 
           # save outputs of previous jobs to json file
+          echo '${{needs.benchmark-base.outputs.results}}'
           echo '${{needs.benchmark-base.outputs.results}}' > base.json
+          echo '${{needs.benchmark-head.outputs.results}}'
           echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
           julia --project=. benchmarks.jl combine head.json base.json >> $GITHUB_ENV
+          echo "" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
       - name: Find Existing Comment
@@ -94,8 +101,8 @@ jobs:
           body: |
             ## Benchmark Report
 
-            - this PR's head: `${{ env.DPPL_HEAD_COMMIT_SHA }}`
-            - base branch: `${{ env.DPPL_BASE_COMMIT_SHA }}`
+            - this PR's head: `${{ needs.benchmark-head.outputs.sha }}`
+            - base branch: `${{ needs.benchmark-base.outputs.sha }}`
 
             ### Computer Information
             ```

From 99c52963cc8408503ba9a89ea01bf6821b1974fa Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 22:57:02 +0000
Subject: [PATCH 24/31] typo

---
 .github/workflows/Benchmarking.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 81f62d885..c15b31e16 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -51,7 +51,7 @@ jobs:
           # github output can't handle more than 1 line, hence the tail
           results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
           echo $results
-          echo "results=$results >> "$GITHUB_OUTPUT"
+          echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
 
   combine-results:

From dabac1b6073070be29a1c795d352887600cc9495 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 23:20:43 +0000
Subject: [PATCH 25/31] try this

---
 .github/workflows/Benchmarking.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index c15b31e16..92cfe3af6 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -82,9 +82,9 @@ jobs:
 
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          benchmark_output=$(julia --project=. benchmarks.jl combine head.json base.json)
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          julia --project=. benchmarks.jl combine head.json base.json >> $GITHUB_ENV
-          echo "" >> $GITHUB_ENV
+          echo "$benchmark_output" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
       - name: Find Existing Comment

From 12b0fa8ff2f6dd56d46d0b0ff843f2ff72ac3c34 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 23:21:41 +0000
Subject: [PATCH 26/31] this

---
 .github/workflows/Benchmarking.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 92cfe3af6..fef799c39 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -82,9 +82,8 @@ jobs:
 
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          benchmark_output=$(julia --project=. benchmarks.jl combine head.json base.json)
           echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          echo "$benchmark_output" >> $GITHUB_ENV
+          julia --project=. benchmarks.jl combine head.json base.json
           echo "EOF" >> $GITHUB_ENV
 
       - name: Find Existing Comment

From 42dc2dfef3e682565183d99e45912da8a07b2aa9 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 23:22:44 +0000
Subject: [PATCH 27/31] try this from the docs

---
 .github/workflows/Benchmarking.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index fef799c39..14008cdbe 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -82,9 +82,11 @@ jobs:
 
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
-          julia --project=. benchmarks.jl combine head.json base.json
-          echo "EOF" >> $GITHUB_ENV
+          {
+            echo 'BENCHMARK_OUTPUT<<EOF'
+            julia --project=. benchmarks.jl combine head.json base.json
+            echo EOF
+          } >> "$GITHUB_ENV"
 
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4

From 13f6daf2e8d0b5baa8bb82575a3f4f109c1f49ca Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Mon, 17 Nov 2025 23:47:22 +0000
Subject: [PATCH 28/31] try to debug

---
 .github/workflows/Benchmarking.yml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 14008cdbe..dfce798c8 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -22,9 +22,10 @@ jobs:
         id: benchmark
         working-directory: ./benchmarks
         run: |
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the tail
-          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          # julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          results=$(hello)
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
@@ -47,9 +48,10 @@ jobs:
         id: benchmark
         working-directory: ./benchmarks
         run: |
-          julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # github output can't handle more than 1 line, hence the tail
-          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          # julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
+          results=$(hello)
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"

From 5c598a65f5ea22c5b794b53e3c11355805f40df1 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Tue, 18 Nov 2025 00:28:28 +0000
Subject: [PATCH 29/31] ?

---
 .github/workflows/Benchmarking.yml | 22 +++++++++++++++-------
 benchmarks/benchmarks.jl           | 25 ++++++++++++++++---------
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index dfce798c8..3fe217ba2 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -25,7 +25,7 @@ jobs:
           # github output can't handle more than 1 line, hence the tail
           # julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          results=$(hello)
+          results="hello"
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
@@ -51,7 +51,7 @@ jobs:
           # github output can't handle more than 1 line, hence the tail
           # julia --project=. -e 'using Pkg; Pkg.instantiate()'
           # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          results=$(hello)
+          results="hello"
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
@@ -77,18 +77,26 @@ jobs:
           echo "EOF" >> $GITHUB_ENV
 
           # save outputs of previous jobs to json file
+          echo "Base results"
+          echo "--------------------------------------------------------"
           echo '${{needs.benchmark-base.outputs.results}}'
           echo '${{needs.benchmark-base.outputs.results}}' > base.json
+          echo "Head results"
+          echo "--------------------------------------------------------"
           echo '${{needs.benchmark-head.outputs.results}}'
           echo '${{needs.benchmark-head.outputs.results}}' > head.json
 
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          {
-            echo 'BENCHMARK_OUTPUT<<EOF'
-            julia --project=. benchmarks.jl combine head.json base.json
-            echo EOF
-          } >> "$GITHUB_ENV"
+          results=$(julia --project=. benchmarks.jl combine head.json base.json)
+          echo $?
+          echo "Combined results"
+          echo "--------------------------------------------------------"
+          echo "$results"
+
+          echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
+          echo "$results" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
 
       - name: Find Existing Comment
         uses: peter-evans/find-comment@v4
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index 33b83964a..afe7f7e14 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -139,11 +139,13 @@ function combine(head_filename::String, base_filename::String)
     catch
         Dict{String,Any}[]
     end
+    @info "Loaded $(length(head_results)) results from $head_filename"
     base_results = try
         JSON.parsefile(base_filename, Vector{Dict{String,Any}})
     catch
         Dict{String,Any}[]
     end
+    @info "Loaded $(length(base_results)) results from $base_filename"
     # Identify unique combinations of (Model, Dim, AD Backend, VarInfo, Linked)
     head_testcases = Dict(
         TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in head_results
@@ -152,6 +154,7 @@ function combine(head_filename::String, base_filename::String)
         TestCase(d) => (d[colnames[6]], d[colnames[7]]) for d in base_results
     )
     all_testcases = union(Set(keys(head_testcases)), Set(keys(base_testcases)))
+    @info "$(length(all_testcases)) unique test cases found"
     sorted_testcases = sort(
         collect(all_testcases); by=(c -> (c.model_name, c.ad_backend, c.varinfo, c.linked))
     )
@@ -191,15 +194,19 @@ function combine(head_filename::String, base_filename::String)
         )
     end
     # Pretty-print to terminal
-    table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
-    return pretty_table(
-        table_matrix;
-        column_labels=results_colnames,
-        backend=:text,
-        fit_table_in_display_horizontally=false,
-        fit_table_in_display_vertically=false,
-        table_format=TextTableFormat(; horizontal_line_at_merged_column_labels=true),
-    )
+    if isempty(results_table)
+        println("No benchmark results obtained.")
+    else
+        table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+        pretty_table(
+            table_matrix;
+            column_labels=results_colnames,
+            backend=:text,
+            fit_table_in_display_horizontally=false,
+            fit_table_in_display_vertically=false,
+            table_format=TextTableFormat(; horizontal_line_at_merged_column_labels=true),
+        )
+    end
 end
 
 # The command-line arguments are used on CI purposes.

From c52b31f706f7ba5e3d1f1a26c0c8bbf5eaf1c7bc Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Tue, 18 Nov 2025 00:35:12 +0000
Subject: [PATCH 30/31] i think this should work

---
 .github/workflows/Benchmarking.yml | 22 ++++++++++------------
 benchmarks/benchmarks.jl           |  2 ++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
index 3fe217ba2..a6310afe5 100644
--- a/.github/workflows/Benchmarking.yml
+++ b/.github/workflows/Benchmarking.yml
@@ -23,9 +23,8 @@ jobs:
         working-directory: ./benchmarks
         run: |
           # github output can't handle more than 1 line, hence the tail
-          # julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          results="hello"
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
@@ -49,9 +48,8 @@ jobs:
         working-directory: ./benchmarks
         run: |
           # github output can't handle more than 1 line, hence the tail
-          # julia --project=. -e 'using Pkg; Pkg.instantiate()'
-          # results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
-          results="hello"
+          julia --project=. -e 'using Pkg; Pkg.instantiate()'
+          results=$(julia --project=. benchmarks.jl json | tail -n 1 || true)
           echo $results
           echo "results=$results" >> "$GITHUB_OUTPUT"
           echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
@@ -67,7 +65,8 @@ jobs:
         with:
           version: '1.11'
       - uses: julia-actions/cache@v2
-      - name: Get versioninfo
+
+      - name: Combine benchmark results
         working-directory: ./benchmarks
         run: |
           version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
@@ -89,7 +88,6 @@ jobs:
           # combine them and save the output as an env var for later steps
           julia --project=. -e 'using Pkg; Pkg.instantiate()'
           results=$(julia --project=. benchmarks.jl combine head.json base.json)
-          echo $?
           echo "Combined results"
           echo "--------------------------------------------------------"
           echo "$results"
@@ -98,14 +96,14 @@ jobs:
           echo "$results" >> $GITHUB_ENV
           echo "EOF" >> $GITHUB_ENV
 
-      - name: Find Existing Comment
+      - name: Find existing benchmark comment
         uses: peter-evans/find-comment@v4
         id: find_comment
         with:
           issue-number: ${{ github.event.pull_request.number }}
           comment-author: github-actions[bot]
 
-      - name: Post Benchmark Results as PR Comment
+      - name: Create or update benchmark comment
         uses: peter-evans/create-or-update-comment@v5
         with:
           issue-number: ${{ github.event.pull_request.number }}
@@ -120,8 +118,8 @@ jobs:
             ${{ env.VERSION_INFO }}
             ```
             ### Benchmark Results
-            ```
+
             ${{ env.BENCHMARK_OUTPUT }}
-            ```
+
           comment-id: ${{ steps.find_comment.outputs.comment-id }}
           edit-mode: replace
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index afe7f7e14..f0b898adf 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -198,6 +198,7 @@ function combine(head_filename::String, base_filename::String)
         println("No benchmark results obtained.")
     else
         table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+        println("```")
         pretty_table(
             table_matrix;
             column_labels=results_colnames,
@@ -206,6 +207,7 @@ function combine(head_filename::String, base_filename::String)
             fit_table_in_display_vertically=false,
             table_format=TextTableFormat(; horizontal_line_at_merged_column_labels=true),
         )
+        println("```")
     end
 end
 

From 12f6cc5b4e01ef0bfb11671995ec196c855bb123 Mon Sep 17 00:00:00 2001
From: Penelope Yong <penelopeysm@gmail.com>
Date: Tue, 18 Nov 2025 01:00:04 +0000
Subject: [PATCH 31/31] please

---
 benchmarks/benchmarks.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
index f0b898adf..7bae6a9da 100644
--- a/benchmarks/benchmarks.jl
+++ b/benchmarks/benchmarks.jl
@@ -24,6 +24,7 @@ function print_results(results_table; to_json=false)
         ]
         # do not use pretty=true, as GitHub Actions expects no linebreaks
         JSON.json(stdout, results_array)
+        println()
     else
         # Pretty-print to terminal
         table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
@@ -122,7 +123,8 @@ function run(; to_json=false)
         )
         print_results(results_table; to_json=to_json)
     end
-    return print_results(results_table; to_json=to_json)
+    print_results(results_table; to_json=to_json)
+    return nothing
 end
 
 struct TestCase