From caabd0f5fb90163b85af8cae73db8799e1f1c17c Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Wed, 19 Nov 2025 12:58:36 +0100
Subject: [PATCH 01/14] initial changes to get a working demo

---
 src/abstractmcmc.jl            |  2 +-
 src/adaptation/Adaptation.jl   |  2 +-
 src/adaptation/massmatrix.jl   | 67 ++++++++++++++++++++++++++++++++--
 src/adaptation/stan_adaptor.jl |  8 ++--
 src/sampler.jl                 |  4 +-
 tmp/Project.toml               |  5 +++
 tmp/demo.jl                    | 51 ++++++++++++++++++++++++++
 7 files changed, 128 insertions(+), 11 deletions(-)
 create mode 100644 tmp/Project.toml
 create mode 100644 tmp/demo.jl

diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl
index 413e9de6..f6ce4000 100644
--- a/src/abstractmcmc.jl
+++ b/src/abstractmcmc.jl
@@ -196,7 +196,7 @@ function AbstractMCMC.step(
 
     # Adapt h and spl.
     tstat = stat(t)
-    h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate)
+    h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate)
     tstat = merge(tstat, (is_adapt=isadapted,))
 
     # Compute next transition and state.
diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl
index 4f2fde83..e34e9c88 100644
--- a/src/adaptation/Adaptation.jl
+++ b/src/adaptation/Adaptation.jl
@@ -4,7 +4,7 @@ export Adaptation
 using LinearAlgebra: LinearAlgebra
 using Statistics: Statistics
 
-using ..AdvancedHMC: AbstractScalarOrVec
+using ..AdvancedHMC: AbstractScalarOrVec, PhasePoint
 using DocStringExtensions
 
 """
diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index 105d3bae..de2e5178 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -9,16 +9,18 @@ finalize!(::MassMatrixAdaptor) = nothing
 
 function adapt!(
     adaptor::MassMatrixAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z::PhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
     is_update::Bool=true,
 )
-    resize_adaptor!(adaptor, size(θ))
-    push!(adaptor, θ)
+    resize_adaptor!(adaptor, size(z.θ))
+    push!(adaptor, z)
     is_update && update!(adaptor)
     return nothing
 end
 
+Base.push!(a::MassMatrixAdaptor, z::PhasePoint) = push!(a, z.θ)
+
 ## Unit mass matrix adaptor
 
 struct UnitMassMatrix{T<:AbstractFloat} <: MassMatrixAdaptor end
@@ -153,6 +155,65 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat}
     return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5))
 end
 
+## Nutpie-style mass matrix estimator (using positions and gradients)
+
+mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T}
+    position_estimator::WelfordVar{T,E,V}
+    gradient_estimator::WelfordVar{T,E,V}
+    n::Int
+    n_min::Int
+    var::V
+    function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V}
+        return new{eltype(E),E,V}(
+            WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), 
+            WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)),
+            n, n_min, var
+        )
+    end
+end
+
+function Base.show(io::IO, ::NutpieVar{T}) where {T}
+    return print(io, "NutpieVar{", T, "} adaptor")
+end
+
+function NutpieVar{T}(
+    sz::Union{Tuple{Int},Tuple{Int,Int}}; n_min::Int=10, var=ones(T, sz)
+) where {T<:AbstractFloat}
+    return NutpieVar(0, n_min, zeros(T, sz), zeros(T, sz), zeros(T, sz), var)
+end
+
+function NutpieVar(sz::Union{Tuple{Int},Tuple{Int,Int}}; kwargs...)
+    return NutpieVar{Float64}(sz; kwargs...)
+end
+
+function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int,Int}) where {T<:AbstractFloat}
+    resize_adaptor!(nv.position_estimator, size_θ)
+    resize_adaptor!(nv.gradient_estimator, size_θ)
+end
+
+function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:AbstractFloat}
+    resize_adaptor!(nv.position_estimator, size_θ)
+    resize_adaptor!(nv.gradient_estimator, size_θ)
+end
+
+function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat}
+    nv.n = 0
+    reset!(nv.position_estimator)
+    reset!(nv.gradient_estimator)
+end
+
+function Base.push!(nv::NutpieVar, z::PhasePoint)
+    nv.n += 1
+    push!(nv.position_estimator, z.θ)
+    push!(nv.gradient_estimator, z.ℓπ.gradient)
+    return nothing
+end
+
+# https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/var_adaptation.hpp
+function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat}
+    return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
+end
+
 ## Dense mass matrix adaptor
 
 abstract type DenseMatrixEstimator{T} <: MassMatrixAdaptor end
diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl
index b36a2259..666e33cc 100644
--- a/src/adaptation/stan_adaptor.jl
+++ b/src/adaptation/stan_adaptor.jl
@@ -136,20 +136,20 @@ is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits
 
 function adapt!(
     tp::StanHMCAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z::PhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
     tp.state.i += 1
 
-    adapt!(tp.ssa, θ, α)
+    adapt!(tp.ssa, z.θ, α)
 
-    resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary.
+    resize_adaptor!(tp.pc, size(z.θ)) # Resize pre-conditioner if necessary.
 
     # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp
     if is_in_window(tp)
         # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window.
         is_update_M⁻¹ = is_window_end(tp)
-        adapt!(tp.pc, θ, α, is_update_M⁻¹)
+        adapt!(tp.pc, z, α, is_update_M⁻¹)
     end
 
     if is_window_end(tp)
diff --git a/src/sampler.jl b/src/sampler.jl
index 3e477ba3..29c903ba 100644
--- a/src/sampler.jl
+++ b/src/sampler.jl
@@ -75,13 +75,13 @@ function Adaptation.adapt!(
     adaptor::AbstractAdaptor,
     i::Int,
     n_adapts::Int,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z::PhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
     isadapted = false
     if i <= n_adapts
         i == 1 && Adaptation.initialize!(adaptor, n_adapts)
-        adapt!(adaptor, θ, α)
+        adapt!(adaptor, z, α)
         i == n_adapts && finalize!(adaptor)
         h = update(h, adaptor)
         κ = update(κ, adaptor)
diff --git a/tmp/Project.toml b/tmp/Project.toml
new file mode 100644
index 00000000..f0f82f8e
--- /dev/null
+++ b/tmp/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
+PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807"
diff --git a/tmp/demo.jl b/tmp/demo.jl
new file mode 100644
index 00000000..f7f8cc7b
--- /dev/null
+++ b/tmp/demo.jl
@@ -0,0 +1,51 @@
+using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random
+
+if !@isdefined pdb 
+    const pdb = PosteriorDB.database()
+end
+stan_problem(path, data) = StanProblem(
+    path, data;
+    nan_on_error=true,
+    make_args=["STAN_THREADS=TRUE"],
+    warn=false
+)
+stan_problem(posterior_name::AbstractString) = stan_problem(
+    PosteriorDB.path(PosteriorDB.implementation(PosteriorDB.model(PosteriorDB.posterior(pdb, (posterior_name))), "stan")), 
+    PosteriorDB.load(PosteriorDB.dataset(PosteriorDB.posterior(pdb, (posterior_name))), String)
+)
+
+begin
+lpdf = stan_problem("radon_mn-radon_county_intercept")
+
+n_adapts = n_samples = 1000
+rng = Xoshiro(1)
+spl = NUTS(0.8)
+initial_params = nothing
+model = AdvancedHMC.AbstractMCMC._model(lpdf)
+(;logdensity) = model
+metric = AdvancedHMC.make_metric(spl, logdensity)
+hamiltonian = AdvancedHMC.Hamiltonian(metric, model)
+initial_params = AdvancedHMC.make_initial_params(rng, spl, logdensity, initial_params)
+ϵ = AdvancedHMC.make_step_size(rng, spl, hamiltonian, initial_params)
+integrator = AdvancedHMC.make_integrator(spl, ϵ)
+κ = AdvancedHMC.make_kernel(spl, integrator)
+# adaptor = AdvancedHMC.make_adaptor(spl, metric, integrator)
+adaptor = AdvancedHMC.StanHMCAdaptor(
+    AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), 
+    AdvancedHMC.StepSizeAdaptor(spl.δ, integrator)
+)
+h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params)
+# Using the below uses Nutpie (as in position and gradients)
+initial_state = AdvancedHMC.HMCState(0, t, metric, κ, adaptor)
+# Using the below uses Stan (as in only positions)
+# initial_state = nothing
+@time samples = AdvancedHMC.sample(
+    rng,
+    model,
+    spl,
+    n_adapts + n_samples;
+    n_adapts=n_adapts, initial_state,
+    progress=true, 
+)
+nothing
+end

From e030eecf1c2d478ac6c313022ba128adf50e761e Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Fri, 21 Nov 2025 10:09:33 +0100
Subject: [PATCH 02/14] fix tests, add new ones, and add documentation

---
 docs/src/api.md                |  4 ++
 src/adaptation/Adaptation.jl   |  5 +++
 src/adaptation/massmatrix.jl   | 35 ++++++++++++----
 src/adaptation/stan_adaptor.jl | 25 ++++++++++++
 src/sampler.jl                 | 23 ++++++++++-
 test/adaptation.jl             | 74 ++++++++++++++++++++++++++++++++--
 tmp/Project.toml               |  1 +
 tmp/demo.jl                    | 29 +++++++------
 8 files changed, 169 insertions(+), 27 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index 54b5939d..c09026ec 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -33,6 +33,10 @@ where `ϵ` is the step size of leapfrog integration.
   - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)`
     
       + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric`
+      + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), 
+      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+      via `mma = AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹))` 
+      until a new interface is introduced to specify the method of adaptation.
 
   - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)`
     
diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl
index e34e9c88..9d5df077 100644
--- a/src/adaptation/Adaptation.jl
+++ b/src/adaptation/Adaptation.jl
@@ -54,6 +54,11 @@ function adapt!(
     adapt!(nca.pc, θ, α)
     return nothing
 end
+adapt!(
+    nca::NaiveHMCAdaptor,
+    z::PhasePoint,
+    α::AbstractScalarOrVec{<:AbstractFloat},
+) = adapt!(nca, z.θ, α)
 function reset!(aca::NaiveHMCAdaptor)
     reset!(aca.ssa)
     reset!(aca.pc)
diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index de2e5178..b03a7973 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -7,6 +7,18 @@ abstract type MassMatrixAdaptor <: AbstractAdaptor end
 initialize!(::MassMatrixAdaptor, ::Int) = nothing
 finalize!(::MassMatrixAdaptor) = nothing
 
+function adapt!(
+    adaptor::MassMatrixAdaptor,
+    θ::AbstractVecOrMat{<:AbstractFloat},
+    α::AbstractScalarOrVec{<:AbstractFloat},
+    is_update::Bool=true,
+)
+    resize_adaptor!(adaptor, size(θ))
+    push!(adaptor, θ)
+    is_update && update!(adaptor)
+    return nothing
+end
+
 function adapt!(
     adaptor::MassMatrixAdaptor,
     z::PhasePoint,
@@ -155,7 +167,7 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat}
     return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5))
 end
 
-## Nutpie-style mass matrix estimator (using positions and gradients)
+## Nutpie-style diagonal mass matrix estimator (using positions and gradients)
 
 mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T}
     position_estimator::WelfordVar{T,E,V}
@@ -177,7 +189,7 @@ function Base.show(io::IO, ::NutpieVar{T}) where {T}
 end
 
 function NutpieVar{T}(
-    sz::Union{Tuple{Int},Tuple{Int,Int}}; n_min::Int=10, var=ones(T, sz)
+    sz::Union{Tuple{Int},Tuple{Int,Int}}=(2,); n_min::Int=10, var=ones(T, sz)
 ) where {T<:AbstractFloat}
     return NutpieVar(0, n_min, zeros(T, sz), zeros(T, sz), zeros(T, sz), var)
 end
@@ -187,13 +199,22 @@ function NutpieVar(sz::Union{Tuple{Int},Tuple{Int,Int}}; kwargs...)
 end
 
 function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int,Int}) where {T<:AbstractFloat}
-    resize_adaptor!(nv.position_estimator, size_θ)
-    resize_adaptor!(nv.gradient_estimator, size_θ)
+    if size_θ != size(nv.var)
+        @assert nv.n == 0 "Cannot resize a var estimator when it contains samples."
+        resize_adaptor!(nv.position_estimator, size_θ)
+        resize_adaptor!(nv.gradient_estimator, size_θ)
+        nv.var = ones(T, size_θ)
+    end
 end
 
 function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:AbstractFloat}
-    resize_adaptor!(nv.position_estimator, size_θ)
-    resize_adaptor!(nv.gradient_estimator, size_θ)
+    length_θ = first(size_θ)
+    if length_θ != size(nv.var, 1)
+        @assert nv.n == 0 "Cannot resize a var estimator when it contains samples."
+        resize_adaptor!(nv.position_estimator, size_θ)
+        resize_adaptor!(nv.gradient_estimator, size_θ)
+        fill!(resize!(nv.var, length_θ), T(1))
+    end
 end
 
 function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat}
@@ -209,7 +230,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint)
     return nothing
 end
 
-# https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/var_adaptation.hpp
+# Ref: TODO
 function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat}
     return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
 end
diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl
index 666e33cc..ca53421c 100644
--- a/src/adaptation/stan_adaptor.jl
+++ b/src/adaptation/stan_adaptor.jl
@@ -134,6 +134,31 @@ function is_in_window(a::StanHMCAdaptor)
 end
 is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits
 
+function adapt!(
+    tp::StanHMCAdaptor,
+    θ::AbstractVecOrMat{<:AbstractFloat},
+    α::AbstractScalarOrVec{<:AbstractFloat},
+)
+    tp.state.i += 1
+
+    adapt!(tp.ssa, θ, α)
+
+    resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary.
+
+    # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp
+    if is_in_window(tp)
+        # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window.
+        is_update_M⁻¹ = is_window_end(tp)
+        adapt!(tp.pc, θ, α, is_update_M⁻¹)
+    end
+
+    if is_window_end(tp)
+        reset!(tp.ssa)
+        reset!(tp.pc)
+    end
+end
+
+
 function adapt!(
     tp::StanHMCAdaptor,
     z::PhasePoint,
diff --git a/src/sampler.jl b/src/sampler.jl
index 29c903ba..724fac39 100644
--- a/src/sampler.jl
+++ b/src/sampler.jl
@@ -69,6 +69,27 @@ function Adaptation.adapt!(
     return h, κ, false
 end
 
+function Adaptation.adapt!(
+    h::Hamiltonian,
+    κ::AbstractMCMCKernel,
+    adaptor::AbstractAdaptor,
+    i::Int,
+    n_adapts::Int,    
+    θ::AbstractVecOrMat{<:AbstractFloat},
+    α::AbstractScalarOrVec{<:AbstractFloat},
+)
+    isadapted = false
+    if i <= n_adapts
+        i == 1 && Adaptation.initialize!(adaptor, n_adapts)
+        adapt!(adaptor, θ, α)
+        i == n_adapts && finalize!(adaptor)
+        h = update(h, adaptor)
+        κ = update(κ, adaptor)
+        isadapted = true
+    end
+    return h, κ, isadapted
+end
+
 function Adaptation.adapt!(
     h::Hamiltonian,
     κ::AbstractMCMCKernel,
@@ -185,7 +206,7 @@ function sample(
         t = transition(rng, h, κ, t.z)
         # Adapt h and κ; what mutable is the adaptor
         tstat = stat(t)
-        h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate)
+        h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate)
         if isadapted
             num_divergent_transitions_during_adaption += tstat.numerical_error
         else
diff --git a/test/adaptation.jl b/test/adaptation.jl
index 346423ea..9bd41e32 100644
--- a/test/adaptation.jl
+++ b/test/adaptation.jl
@@ -1,6 +1,8 @@
 using ReTest, LinearAlgebra, Distributions, AdvancedHMC, Random, ForwardDiff
+using AdvancedHMC:
+    PhasePoint, DualValue
 using AdvancedHMC.Adaptation:
-    WelfordVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset!
+    DiagMatrixEstimator, WelfordVar, NutpieVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset!
 
 function runnuts(ℓπ, metric; n_samples=10_000)
     D = size(metric, 1)
@@ -18,6 +20,28 @@ function runnuts(ℓπ, metric; n_samples=10_000)
     return (samples=samples, stats=stats, adaptor=adaptor)
 end
 
+# Temporary function until we've settled on a different interface
+function runnuts_nutpie(ℓπ, metric::DiagEuclideanMetric; n_samples=10_000)
+    D = size(metric, 1)
+    n_adapts = 5_000
+    θ_init = rand(D)
+    rng = MersenneTwister(0)
+
+    nuts = NUTS(0.8)
+    h = Hamiltonian(metric, ℓπ, ForwardDiff)
+    step_size = AdvancedHMC.make_step_size(rng, nuts, h, θ_init)
+    integrator = AdvancedHMC.make_integrator(nuts, step_size)
+    κ = AdvancedHMC.make_kernel(nuts, integrator)
+    # Constructing like this until we've settled on a different interface
+    adaptor = AdvancedHMC.StanHMCAdaptor(
+        AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), 
+        AdvancedHMC.StepSizeAdaptor(nuts.δ, integrator)
+    )
+    samples, stats = sample(h, κ, θ_init, n_samples, adaptor, n_adapts; verbose=false)
+    return (samples=samples, stats=stats, adaptor=adaptor)
+end
+preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Diagonal(a.var)) \ cov / sqrt(Diagonal(a.var)))
+
 @testset "Adaptation" begin
     # Check that the estimated variance is approximately correct.
     @testset "Online v.s. naive v.s. true var/cov estimation" begin
@@ -60,15 +84,32 @@ end
 
     @testset "MassMatrixAdaptor constructors" begin
         θ = [0.0, 0.0, 0.0, 0.0]
+        z = PhasePoint(
+            θ, θ, DualValue(0., θ), DualValue(0., θ)
+        )
         pc1 = MassMatrixAdaptor(UnitEuclideanMetric) # default dim = 2
         pc2 = MassMatrixAdaptor(DiagEuclideanMetric)
+        # Constructing like this until we've settled on a different interface
+        pc2_nutpie = NutpieVar{Float64}((2, ))
         pc3 = MassMatrixAdaptor(DenseEuclideanMetric)
 
-        # Var adaptor dimention should be increased to length(θ) from 2
+        # Var adaptor dimension should be increased to length(θ) from 2
         AdvancedHMC.adapt!(pc1, θ, 1.0)
         AdvancedHMC.adapt!(pc2, θ, 1.0)
+        AdvancedHMC.adapt!(pc2_nutpie, z, 1.0)
         AdvancedHMC.adapt!(pc3, θ, 1.0)
         @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ))
+        @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ))
+        @test AdvancedHMC.Adaptation.getM⁻¹(pc3) ==
+            LinearAlgebra.diagm(0 => ones(length(θ)))
+
+        # Making sure "all" MassMatrixAdaptors support getting a PhasePoint instead of a Vector
+        # AdvancedHMC.adapt!(pc1, z, 1.0) - this does not work - should it?
+        AdvancedHMC.adapt!(pc2, z, 1.0)
+        AdvancedHMC.adapt!(pc2_nutpie, z, 1.0)
+        AdvancedHMC.adapt!(pc3, z, 1.0)
+        @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ))
+        @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ))
         @test AdvancedHMC.Adaptation.getM⁻¹(pc3) ==
             LinearAlgebra.diagm(0 => ones(length(θ)))
     end
@@ -82,10 +123,14 @@ end
         adaptor2 = StanHMCAdaptor(
             MassMatrixAdaptor(DiagEuclideanMetric), NesterovDualAveraging(0.8, 0.5)
         )
+        # Constructing like this until we've settled on a different interface
+        adaptor2_nutpie = StanHMCAdaptor(
+            NutpieVar{Float64}((2, )), NesterovDualAveraging(0.8, 0.5)
+        )
         adaptor3 = StanHMCAdaptor(
             MassMatrixAdaptor(DenseEuclideanMetric), NesterovDualAveraging(0.8, 0.5)
         )
-        for a in [adaptor1, adaptor2, adaptor3]
+        for a in [adaptor1, adaptor2, adaptor2_nutpie, adaptor3]
             AdvancedHMC.initialize!(a, 1_000)
             @test a.state.window_start == 76
             @test a.state.window_end == 950
@@ -93,6 +138,7 @@ end
             AdvancedHMC.adapt!(a, θ, 1.0)
         end
         @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2) == ones(length(θ))
+        @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2a) == ones(length(θ))
         @test AdvancedHMC.Adaptation.getM⁻¹(adaptor3) ==
             LinearAlgebra.diagm(0 => ones(length(θ)))
 
@@ -119,19 +165,26 @@ end
 
                 # Random variance
                 σ² = 1 .+ abs.(randn(D))
+                Σ = Diagonal(σ²)
 
                 # Diagonal Gaussian
-                ℓπ = LogDensityDistribution(MvNormal(Diagonal(σ²)))
+                ℓπ = LogDensityDistribution(MvNormal(Σ))
 
                 res = runnuts(ℓπ, DiagEuclideanMetric(D))
                 @test res.adaptor.pc.var ≈ σ² rtol = 0.2
 
+                # For this target, Nutpie (without regularization) will arrive at the true variances after two draws.
+                res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D))
+                @test res.adaptor.pc.var ≈ σ² rtol = 0.2
+                @test preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ)
+
                 res = runnuts(ℓπ, DenseEuclideanMetric(D))
                 @test res.adaptor.pc.cov ≈ Diagonal(σ²) rtol = 0.25
             end
         end
 
         @testset "DenseEuclideanMetric" begin
+            n_nutpie_superior = 0
             for _ in 1:n_tests
                 # Random covariance
                 m = randn(D, D)
@@ -143,9 +196,18 @@ end
                 res = runnuts(ℓπ, DiagEuclideanMetric(D))
                 @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2
 
+                # For this target, Nutpie will NOT converge towards the true variances, even after infinite draws.
+                # HOWEVER, it will asymptotically (but also generally more quickly than Stan) 
+                # find the best preconditioner for the target.
+                # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing.  
+                res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D))
+                # @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2
+                n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ)
+
                 res = runnuts(ℓπ, DenseEuclideanMetric(D))
                 @test res.adaptor.pc.cov ≈ Σ rtol = 0.25
             end
+            @test n_nutpie_superior > n_tests / 2
         end
     end
 
@@ -156,6 +218,10 @@ end
         res = runnuts(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1)
         @test res.adaptor.pc.var == mass_init
 
+        mass_init = fill(0.5, D)
+        res = runnuts_nutpie(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1)
+        @test res.adaptor.pc.var == mass_init
+
         mass_init = diagm(0 => fill(0.5, D))
         res = runnuts(ℓπ, DenseEuclideanMetric(mass_init); n_samples=1)
         @test res.adaptor.pc.cov == mass_init
diff --git a/tmp/Project.toml b/tmp/Project.toml
index f0f82f8e..7dfd40e4 100644
--- a/tmp/Project.toml
+++ b/tmp/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
+MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
 PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807"
diff --git a/tmp/demo.jl b/tmp/demo.jl
index f7f8cc7b..6a35c0a9 100644
--- a/tmp/demo.jl
+++ b/tmp/demo.jl
@@ -1,4 +1,4 @@
-using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random
+using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random, MCMCDiagnosticTools
 
 if !@isdefined pdb 
     const pdb = PosteriorDB.database()
@@ -18,7 +18,7 @@ begin
 lpdf = stan_problem("radon_mn-radon_county_intercept")
 
 n_adapts = n_samples = 1000
-rng = Xoshiro(1)
+rng = Xoshiro(2)
 spl = NUTS(0.8)
 initial_params = nothing
 model = AdvancedHMC.AbstractMCMC._model(lpdf)
@@ -35,17 +35,16 @@ adaptor = AdvancedHMC.StanHMCAdaptor(
     AdvancedHMC.StepSizeAdaptor(spl.δ, integrator)
 )
 h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params)
-# Using the below uses Nutpie (as in position and gradients)
-initial_state = AdvancedHMC.HMCState(0, t, metric, κ, adaptor)
-# Using the below uses Stan (as in only positions)
-# initial_state = nothing
-@time samples = AdvancedHMC.sample(
-    rng,
-    model,
-    spl,
-    n_adapts + n_samples;
-    n_adapts=n_adapts, initial_state,
-    progress=true, 
-)
-nothing
+performances = map((;nutpie=AdvancedHMC.HMCState(0, t, metric, κ, adaptor), stan=nothing)) do initial_state
+    dt = @elapsed samples = AdvancedHMC.sample(
+        rng,
+        model,
+        spl,
+        n_adapts + n_samples;
+        n_adapts=n_adapts, callback=error,#initial_state,
+        progress=true, 
+    )
+    ess(reshape(mapreduce(sample->sample.z.θ , hcat, samples[n_adapts+1:end])', (n_samples, 1, :))) |> minimum  |> Base.Fix2(/, dt)
 end
+@info (;performances)
+end
\ No newline at end of file

From 7cdea3660131f590a0319fdc0158eed4f7bc29d3 Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Fri, 21 Nov 2025 10:11:08 +0100
Subject: [PATCH 03/14] fix type

---
 test/adaptation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/adaptation.jl b/test/adaptation.jl
index 9bd41e32..6af62367 100644
--- a/test/adaptation.jl
+++ b/test/adaptation.jl
@@ -138,7 +138,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
             AdvancedHMC.adapt!(a, θ, 1.0)
         end
         @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2) == ones(length(θ))
-        @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2a) == ones(length(θ))
+        @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2_nutpie) == ones(length(θ))
         @test AdvancedHMC.Adaptation.getM⁻¹(adaptor3) ==
             LinearAlgebra.diagm(0 => ones(length(θ)))
 

From 989d284cb792f1c4bec2aae3169d08feaffd3565 Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Fri, 21 Nov 2025 10:18:21 +0100
Subject: [PATCH 04/14] fix some stray tests

---
 src/adaptation/massmatrix.jl | 9 +++++++++
 test/adaptation.jl           | 3 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index b03a7973..92cb1560 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -60,6 +60,15 @@ function adapt!(
     return nothing
 end
 
+function adapt!(
+    ::UnitMassMatrix,
+    ::PhasePoint,
+    ::AbstractScalarOrVec{<:AbstractFloat},
+    is_update::Bool=true,
+)
+    return nothing
+end
+
 ## Diagonal mass matrix adaptor
 
 abstract type DiagMatrixEstimator{T} <: MassMatrixAdaptor end
diff --git a/test/adaptation.jl b/test/adaptation.jl
index 6af62367..553ba32e 100644
--- a/test/adaptation.jl
+++ b/test/adaptation.jl
@@ -104,7 +104,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
             LinearAlgebra.diagm(0 => ones(length(θ)))
 
         # Making sure "all" MassMatrixAdaptors support getting a PhasePoint instead of a Vector
-        # AdvancedHMC.adapt!(pc1, z, 1.0) - this does not work - should it?
+        AdvancedHMC.adapt!(pc1, z, 1.0)
         AdvancedHMC.adapt!(pc2, z, 1.0)
         AdvancedHMC.adapt!(pc2_nutpie, z, 1.0)
         AdvancedHMC.adapt!(pc3, z, 1.0)
@@ -201,7 +201,6 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
                 # find the best preconditioner for the target.
                 # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing.  
                 res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D))
-                # @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2
                 n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ)
 
                 res = runnuts(ℓπ, DenseEuclideanMetric(D))

From 842fe06822194a75113ffc61a482a22d0b13568a Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Fri, 21 Nov 2025 10:21:32 +0100
Subject: [PATCH 05/14] delete tmp folder with demo

---
 tmp/Project.toml |  6 ------
 tmp/demo.jl      | 50 ------------------------------------------------
 2 files changed, 56 deletions(-)
 delete mode 100644 tmp/Project.toml
 delete mode 100644 tmp/demo.jl

diff --git a/tmp/Project.toml b/tmp/Project.toml
deleted file mode 100644
index 7dfd40e4..00000000
--- a/tmp/Project.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-[deps]
-AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
-MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
-PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807"
diff --git a/tmp/demo.jl b/tmp/demo.jl
deleted file mode 100644
index 6a35c0a9..00000000
--- a/tmp/demo.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random, MCMCDiagnosticTools
-
-if !@isdefined pdb 
-    const pdb = PosteriorDB.database()
-end
-stan_problem(path, data) = StanProblem(
-    path, data;
-    nan_on_error=true,
-    make_args=["STAN_THREADS=TRUE"],
-    warn=false
-)
-stan_problem(posterior_name::AbstractString) = stan_problem(
-    PosteriorDB.path(PosteriorDB.implementation(PosteriorDB.model(PosteriorDB.posterior(pdb, (posterior_name))), "stan")), 
-    PosteriorDB.load(PosteriorDB.dataset(PosteriorDB.posterior(pdb, (posterior_name))), String)
-)
-
-begin
-lpdf = stan_problem("radon_mn-radon_county_intercept")
-
-n_adapts = n_samples = 1000
-rng = Xoshiro(2)
-spl = NUTS(0.8)
-initial_params = nothing
-model = AdvancedHMC.AbstractMCMC._model(lpdf)
-(;logdensity) = model
-metric = AdvancedHMC.make_metric(spl, logdensity)
-hamiltonian = AdvancedHMC.Hamiltonian(metric, model)
-initial_params = AdvancedHMC.make_initial_params(rng, spl, logdensity, initial_params)
-ϵ = AdvancedHMC.make_step_size(rng, spl, hamiltonian, initial_params)
-integrator = AdvancedHMC.make_integrator(spl, ϵ)
-κ = AdvancedHMC.make_kernel(spl, integrator)
-# adaptor = AdvancedHMC.make_adaptor(spl, metric, integrator)
-adaptor = AdvancedHMC.StanHMCAdaptor(
-    AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), 
-    AdvancedHMC.StepSizeAdaptor(spl.δ, integrator)
-)
-h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params)
-performances = map((;nutpie=AdvancedHMC.HMCState(0, t, metric, κ, adaptor), stan=nothing)) do initial_state
-    dt = @elapsed samples = AdvancedHMC.sample(
-        rng,
-        model,
-        spl,
-        n_adapts + n_samples;
-        n_adapts=n_adapts, callback=error,#initial_state,
-        progress=true, 
-    )
-    ess(reshape(mapreduce(sample->sample.z.θ , hcat, samples[n_adapts+1:end])', (n_samples, 1, :))) |> minimum  |> Base.Fix2(/, dt)
-end
-@info (;performances)
-end
\ No newline at end of file

From 0f27172fbe05cf4df6293882176756d6ee41751d Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Mon, 24 Nov 2025 12:48:28 +0100
Subject: [PATCH 06/14] address review comments

---
 src/adaptation/massmatrix.jl | 6 +++---
 src/sampler.jl               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index 92cb1560..6ca9ea29 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -226,7 +226,7 @@ function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:Abstra
     end
 end
 
-function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat}
+function reset!(nv::NutpieVar)
     nv.n = 0
     reset!(nv.position_estimator)
     reset!(nv.gradient_estimator)
@@ -239,8 +239,8 @@ function Base.push!(nv::NutpieVar, z::PhasePoint)
     return nothing
 end
 
-# Ref: TODO
-function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat}
+# Ref: https://github.com/pymc-devs/nutpie
+function get_estimation(nv::NutpieVar)
     return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
 end
 
diff --git a/src/sampler.jl b/src/sampler.jl
index 724fac39..15d96b4e 100644
--- a/src/sampler.jl
+++ b/src/sampler.jl
@@ -74,7 +74,7 @@ function Adaptation.adapt!(
     κ::AbstractMCMCKernel,
     adaptor::AbstractAdaptor,
     i::Int,
-    n_adapts::Int,    
+    n_adapts::Int,
     θ::AbstractVecOrMat{<:AbstractFloat},
     α::AbstractScalarOrVec{<:AbstractFloat},
 )

From c395715a69837e51e473d15f9a1795d46e08c3cb Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Mon, 24 Nov 2025 13:25:36 +0100
Subject: [PATCH 07/14] reference interface refactor issue

---
 src/adaptation/massmatrix.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index 6ca9ea29..17f0920f 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -176,7 +176,7 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat}
     return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5))
 end
 
-## Nutpie-style diagonal mass matrix estimator (using positions and gradients)
+## Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475
 
 mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T}
     position_estimator::WelfordVar{T,E,V}
@@ -186,7 +186,7 @@ mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecO
     var::V
     function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V}
         return new{eltype(E),E,V}(
-            WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), 
+            WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)),
             WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)),
             n, n_min, var
         )

From f1d1c80175ec1e8ed2dec111289daa6ffc632874 Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Wed, 26 Nov 2025 10:18:26 +0100
Subject: [PATCH 08/14] refactor and fix test rng handling

---
 src/AdvancedHMC.jl             |  3 +-
 src/adaptation/Adaptation.jl   | 26 ++++++++---------
 src/adaptation/massmatrix.jl   | 51 ++++++++++++----------------------
 src/adaptation/stan_adaptor.jl | 33 +++-------------------
 src/adaptation/stepsize.jl     |  4 +--
 src/sampler.jl                 | 44 ++++++++---------------------
 test/adaptation.jl             | 21 +++++++++-----
 7 files changed, 62 insertions(+), 120 deletions(-)

diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl
index b25710d5..5db7644c 100644
--- a/src/AdvancedHMC.jl
+++ b/src/AdvancedHMC.jl
@@ -72,7 +72,7 @@ export find_good_eps
 include("adaptation/Adaptation.jl")
 using .Adaptation
 import .Adaptation:
-    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation
+    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint
 
 # Helpers for initializing adaptors via AHMC structs
 
@@ -114,6 +114,7 @@ export StepSizeAdaptor,
     MassMatrixAdaptor,
     UnitMassMatrix,
     WelfordVar,
+    NutpieVar,
     WelfordCov,
     NaiveHMCAdaptor,
     StanHMCAdaptor,
diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl
index 9d5df077..10a9a980 100644
--- a/src/adaptation/Adaptation.jl
+++ b/src/adaptation/Adaptation.jl
@@ -10,7 +10,7 @@ using DocStringExtensions
 """
 $(TYPEDEF)
 
-Abstract type for HMC adaptors. 
+Abstract type for HMC adaptors.
 """
 abstract type AbstractAdaptor end
 function getM⁻¹ end
@@ -21,12 +21,17 @@ function initialize! end
 function finalize! end
 export AbstractAdaptor, adapt!, initialize!, finalize!, reset!, getϵ, getM⁻¹
 
+get_position(x::PhasePoint) = x.θ
+get_position(x::AbstractVecOrMat{<:AbstractFloat}) = x
+const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint}
+
 struct NoAdaptation <: AbstractAdaptor end
 export NoAdaptation
 include("stepsize.jl")
 export StepSizeAdaptor, NesterovDualAveraging
+
 include("massmatrix.jl")
-export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, WelfordCov
+export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, NutpieVar, WelfordCov
 
 ##
 ## Composite adaptors
@@ -47,23 +52,14 @@ getϵ(ca::NaiveHMCAdaptor) = getϵ(ca.ssa)
 # TODO: implement consensus adaptor
 function adapt!(
     nca::NaiveHMCAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z_or_theta::PositionOrPhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
-    adapt!(nca.ssa, θ, α)
-    adapt!(nca.pc, θ, α)
-    return nothing
-end
-adapt!(
-    nca::NaiveHMCAdaptor,
-    z::PhasePoint,
-    α::AbstractScalarOrVec{<:AbstractFloat},
-) = adapt!(nca, z.θ, α)
-function reset!(aca::NaiveHMCAdaptor)
-    reset!(aca.ssa)
-    reset!(aca.pc)
+    adapt!(nca.ssa, z_or_theta, α)
+    adapt!(nca.pc, z_or_theta, α)
     return nothing
 end
+
 initialize!(adaptor::NaiveHMCAdaptor, n_adapts::Int) = nothing
 finalize!(aca::NaiveHMCAdaptor) = finalize!(aca.ssa)
 
diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index 17f0920f..74269647 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -9,29 +9,17 @@ finalize!(::MassMatrixAdaptor) = nothing
 
 function adapt!(
     adaptor::MassMatrixAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
-    α::AbstractScalarOrVec{<:AbstractFloat},
-    is_update::Bool=true,
-)
-    resize_adaptor!(adaptor, size(θ))
-    push!(adaptor, θ)
-    is_update && update!(adaptor)
-    return nothing
-end
-
-function adapt!(
-    adaptor::MassMatrixAdaptor,
-    z::PhasePoint,
-    α::AbstractScalarOrVec{<:AbstractFloat},
+    z_or_theta::PositionOrPhasePoint,
+    ::AbstractScalarOrVec{<:AbstractFloat},
     is_update::Bool=true,
 )
-    resize_adaptor!(adaptor, size(z.θ))
-    push!(adaptor, z)
+    resize_adaptor!(adaptor, size(get_position(z_or_theta)))
+    push!(adaptor, z_or_theta)
     is_update && update!(adaptor)
     return nothing
 end
 
-Base.push!(a::MassMatrixAdaptor, z::PhasePoint) = push!(a, z.θ)
+Base.push!(a::MassMatrixAdaptor, z_or_theta::PositionOrPhasePoint) = push!(a, get_position(z_or_theta))
 
 ## Unit mass matrix adaptor
 
@@ -53,16 +41,7 @@ getM⁻¹(::UnitMassMatrix{T}) where {T} = LinearAlgebra.UniformScaling{T}(one(T
 
 function adapt!(
     ::UnitMassMatrix,
-    ::AbstractVecOrMat{<:AbstractFloat},
-    ::AbstractScalarOrVec{<:AbstractFloat},
-    is_update::Bool=true,
-)
-    return nothing
-end
-
-function adapt!(
-    ::UnitMassMatrix,
-    ::PhasePoint,
+    ::PositionOrPhasePoint,
     ::AbstractScalarOrVec{<:AbstractFloat},
     is_update::Bool=true,
 )
@@ -70,7 +49,6 @@ function adapt!(
 end
 
 ## Diagonal mass matrix adaptor
-
 abstract type DiagMatrixEstimator{T} <: MassMatrixAdaptor end
 
 getM⁻¹(ve::DiagMatrixEstimator) = ve.var
@@ -93,7 +71,7 @@ NaiveVar{T}(sz::Tuple{Int,Int}) where {T<:AbstractFloat} = NaiveVar(Vector{Matri
 
 NaiveVar(sz::Union{Tuple{Int},Tuple{Int,Int}}) = NaiveVar{Float64}(sz)
 
-Base.push!(nv::NaiveVar, s::AbstractVecOrMat) = push!(nv.S, s)
+Base.push!(nv::NaiveVar, s::AbstractVecOrMat{<:AbstractFloat}) = push!(nv.S, s)
 
 reset!(nv::NaiveVar) = resize!(nv.S, 0)
 
@@ -158,7 +136,7 @@ function reset!(wv::WelfordVar{T}) where {T<:AbstractFloat}
     return nothing
 end
 
-function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T}
+function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T<:AbstractFloat}
     wv.n += 1
     (; δ, μ, M, n) = wv
     n = T(n)
@@ -176,8 +154,13 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat}
     return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5))
 end
 
-## Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475
+"""
+Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475
 
+Expected to converge faster and to a better mass matrix than WelfordVar.
+
+Can be initialized via NutpieVar(sz) where sz is either a `Tuple{Int}` or a `Tuple{Int,Int}`.
+"""
 mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T}
     position_estimator::WelfordVar{T,E,V}
     gradient_estimator::WelfordVar{T,E,V}
@@ -232,6 +215,8 @@ function reset!(nv::NutpieVar)
     reset!(nv.gradient_estimator)
 end
 
+Base.push!(::NutpieVar, x::AbstractVecOrMat{<:AbstractFloat}) = error("`NutpieVar` adaptation requires position and gradient information!")
+
 function Base.push!(nv::NutpieVar, z::PhasePoint)
     nv.n += 1
     push!(nv.position_estimator, z.θ)
@@ -266,7 +251,7 @@ end
 
 NaiveCov{T}(sz::Tuple{Int}) where {T<:AbstractFloat} = NaiveCov(Vector{Vector{T}}())
 
-Base.push!(nc::NaiveCov, s::AbstractVector) = push!(nc.S, s)
+Base.push!(nc::NaiveCov, s::AbstractVector{<:AbstractFloat}) = push!(nc.S, s)
 
 reset!(nc::NaiveCov{T}) where {T} = resize!(nc.S, 0)
 
@@ -316,7 +301,7 @@ function reset!(wc::WelfordCov{T}) where {T<:AbstractFloat}
     return nothing
 end
 
-function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T}
+function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T<:AbstractFloat}
     wc.n += 1
     (; δ, μ, n, M) = wc
     n = T(n)
diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl
index ca53421c..931e741a 100644
--- a/src/adaptation/stan_adaptor.jl
+++ b/src/adaptation/stan_adaptor.jl
@@ -136,45 +136,20 @@ is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits
 
 function adapt!(
     tp::StanHMCAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z_or_theta::PositionOrPhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
     tp.state.i += 1
 
-    adapt!(tp.ssa, θ, α)
+    adapt!(tp.ssa, z_or_theta, α)
 
-    resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary.
+    resize_adaptor!(tp.pc, size(get_position(z_or_theta))) # Resize pre-conditioner if necessary.
 
     # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp
     if is_in_window(tp)
         # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window.
         is_update_M⁻¹ = is_window_end(tp)
-        adapt!(tp.pc, θ, α, is_update_M⁻¹)
-    end
-
-    if is_window_end(tp)
-        reset!(tp.ssa)
-        reset!(tp.pc)
-    end
-end
-
-
-function adapt!(
-    tp::StanHMCAdaptor,
-    z::PhasePoint,
-    α::AbstractScalarOrVec{<:AbstractFloat},
-)
-    tp.state.i += 1
-
-    adapt!(tp.ssa, z.θ, α)
-
-    resize_adaptor!(tp.pc, size(z.θ)) # Resize pre-conditioner if necessary.
-
-    # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp
-    if is_in_window(tp)
-        # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window.
-        is_update_M⁻¹ = is_window_end(tp)
-        adapt!(tp.pc, z, α, is_update_M⁻¹)
+        adapt!(tp.pc, z_or_theta, α, is_update_M⁻¹)
     end
 
     if is_window_end(tp)
diff --git a/src/adaptation/stepsize.jl b/src/adaptation/stepsize.jl
index 2afbb651..cacb463d 100644
--- a/src/adaptation/stepsize.jl
+++ b/src/adaptation/stepsize.jl
@@ -174,7 +174,7 @@ end
 # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/stepsize_adaptation.hpp
 # Note: This function is not merged with `adapt!` to empahsize the fact that
 #       step size adaptation is not dependent on `θ`.
-# Note 2: `da.state` and `α` support vectorised HMC but should do so together. 
+# Note 2: `da.state` and `α` support vectorised HMC but should do so together.
 function adapt_stepsize!(
     da::NesterovDualAveraging{T}, α::AbstractScalarOrVec{T}
 ) where {T<:AbstractFloat}
@@ -211,7 +211,7 @@ end
 
 function adapt!(
     da::NesterovDualAveraging,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    ::PositionOrPhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
     adapt_stepsize!(da, α)
diff --git a/src/sampler.jl b/src/sampler.jl
index 15d96b4e..1b282383 100644
--- a/src/sampler.jl
+++ b/src/sampler.jl
@@ -60,11 +60,11 @@ end
 function Adaptation.adapt!(
     h::Hamiltonian,
     κ::AbstractMCMCKernel,
-    adaptor::Adaptation.NoAdaptation,
-    i::Int,
-    n_adapts::Int,
-    θ::AbstractVecOrMat{<:AbstractFloat},
-    α::AbstractScalarOrVec{<:AbstractFloat},
+    ::Adaptation.NoAdaptation,
+    ::Int,
+    ::Int,
+    ::PositionOrPhasePoint,
+    ::AbstractScalarOrVec{<:AbstractFloat},
 )
     return h, κ, false
 end
@@ -75,40 +75,18 @@ function Adaptation.adapt!(
     adaptor::AbstractAdaptor,
     i::Int,
     n_adapts::Int,
-    θ::AbstractVecOrMat{<:AbstractFloat},
-    α::AbstractScalarOrVec{<:AbstractFloat},
-)
-    isadapted = false
-    if i <= n_adapts
-        i == 1 && Adaptation.initialize!(adaptor, n_adapts)
-        adapt!(adaptor, θ, α)
-        i == n_adapts && finalize!(adaptor)
-        h = update(h, adaptor)
-        κ = update(κ, adaptor)
-        isadapted = true
-    end
-    return h, κ, isadapted
-end
-
-function Adaptation.adapt!(
-    h::Hamiltonian,
-    κ::AbstractMCMCKernel,
-    adaptor::AbstractAdaptor,
-    i::Int,
-    n_adapts::Int,
-    z::PhasePoint,
+    z_or_theta::PositionOrPhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
-    isadapted = false
-    if i <= n_adapts
+    adapt = i <= n_adapts
+    if adapt
         i == 1 && Adaptation.initialize!(adaptor, n_adapts)
-        adapt!(adaptor, z, α)
+        adapt!(adaptor, z_or_theta, α)
         i == n_adapts && finalize!(adaptor)
         h = update(h, adaptor)
         κ = update(κ, adaptor)
-        isadapted = true
     end
-    return h, κ, isadapted
+    return h, κ, adapt
 end
 
 """
@@ -169,7 +147,7 @@ end
         progress::Bool=false
     )
 Sample `n_samples` samples using the proposal `κ` under Hamiltonian `h`.
-- The randomness is controlled by `rng`. 
+- The randomness is controlled by `rng`.
     - If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used.
 - The initial point is given by `θ`.
 - The adaptor is set by `adaptor`, for which the default is no adaptation.
diff --git a/test/adaptation.jl b/test/adaptation.jl
index 553ba32e..518b4fd6 100644
--- a/test/adaptation.jl
+++ b/test/adaptation.jl
@@ -34,15 +34,23 @@ function runnuts_nutpie(ℓπ, metric::DiagEuclideanMetric; n_samples=10_000)
     κ = AdvancedHMC.make_kernel(nuts, integrator)
     # Constructing like this until we've settled on a different interface
     adaptor = AdvancedHMC.StanHMCAdaptor(
-        AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), 
+        AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)),
         AdvancedHMC.StepSizeAdaptor(nuts.δ, integrator)
     )
     samples, stats = sample(h, κ, θ_init, n_samples, adaptor, n_adapts; verbose=false)
     return (samples=samples, stats=stats, adaptor=adaptor)
 end
+"""
+Computes the condition number of a covariance matrix `cov::AbstractMatrix` after preconditioning with the (diagonal) mass matrix estimated in `a::DiagMatrixEstimator`.
+
+This is a simple but serviceable proxy for eventual sampling efficiency, but see also https://arxiv.org/abs/1905.09813 for a more involved estimate.
+
+(A lower number generally means that the estimated mass matrix is better).
+"""
 preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Diagonal(a.var)) \ cov / sqrt(Diagonal(a.var)))
 
 @testset "Adaptation" begin
+    Random.seed!(1)
     # Check that the estimated variance is approximately correct.
     @testset "Online v.s. naive v.s. true var/cov estimation" begin
         D = 10
@@ -159,9 +167,8 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
     @testset "Adapted mass v.s. true variance" begin
         D = 10
         n_tests = 5
-        @testset "DiagEuclideanMetric" begin
+        @testset "'Diagonal' MvNormal target" begin
             for _ in 1:n_tests
-                Random.seed!(1)
 
                 # Random variance
                 σ² = 1 .+ abs.(randn(D))
@@ -183,7 +190,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
             end
         end
 
-        @testset "DenseEuclideanMetric" begin
+        @testset "'Dense' MvNormal target" begin
             n_nutpie_superior = 0
             for _ in 1:n_tests
                 # Random covariance
@@ -197,16 +204,16 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
                 @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2
 
                 # For this target, Nutpie will NOT converge towards the true variances, even after infinite draws.
-                # HOWEVER, it will asymptotically (but also generally more quickly than Stan) 
+                # HOWEVER, it will asymptotically (but also generally more quickly than Stan)
                 # find the best preconditioner for the target.
-                # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing.  
+                # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing.
                 res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D))
                 n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ)
 
                 res = runnuts(ℓπ, DenseEuclideanMetric(D))
                 @test res.adaptor.pc.cov ≈ Σ rtol = 0.25
             end
-            @test n_nutpie_superior > n_tests / 2
+            @test n_nutpie_superior > 1 + n_tests / 2
         end
     end
 

From 76a13737e422686ef75224c5a1e44ad0e0e58c84 Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Wed, 26 Nov 2025 10:30:32 +0100
Subject: [PATCH 09/14] improve docstring for NutpieVar

---
 src/adaptation/massmatrix.jl | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index 74269647..f68997d4 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -155,17 +155,28 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat}
 end
 
 """
+    NutpieVar
+
 Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475
 
-Expected to converge faster and to a better mass matrix than WelfordVar.
+Expected to converge faster and to a better mass matrix than [`WelfordVar`](@ref), for which it is a drop-in replacement.
+
+Can be initialized via `NutpieVar(sz)` where `sz` is either a `Tuple{Int}` or a `Tuple{Int,Int}`.
 
-Can be initialized via NutpieVar(sz) where sz is either a `Tuple{Int}` or a `Tuple{Int,Int}`.
+# Fields
+
+$(FIELDS)
 """
 mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T}
+    "Online variance estimator of the posterior positions."
     position_estimator::WelfordVar{T,E,V}
+    "Online variance estimator of the posterior gradients."
     gradient_estimator::WelfordVar{T,E,V}
+    "The number of observations collected so far."
     n::Int
+    "The minimal number of observations after which the estimate of the variances can be updated."
     n_min::Int
+    "The estimated variances - initialized to ones, updated after calling [`update!`](@ref) if `n > n_min`."
     var::V
     function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V}
         return new{eltype(E),E,V}(
@@ -225,9 +236,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint)
 end
 
 # Ref: https://github.com/pymc-devs/nutpie
-function get_estimation(nv::NutpieVar)
-    return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
-end
+get_estimation(nv::NutpieVar) =  sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
 
 ## Dense mass matrix adaptor
 

From 45a191522e68871fa987832d184997e744e5502a Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Wed, 26 Nov 2025 10:32:11 +0100
Subject: [PATCH 10/14] remove superfluous white space

---
 src/adaptation/massmatrix.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index f68997d4..bfcef144 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -236,7 +236,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint)
 end
 
 # Ref: https://github.com/pymc-devs/nutpie
-get_estimation(nv::NutpieVar) =  sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
+get_estimation(nv::NutpieVar) = sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator))
 
 ## Dense mass matrix adaptor
 

From 3d47e8a26037f8b99fdf55a2600ca13757ac6ae4 Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Wed, 26 Nov 2025 11:04:26 +0100
Subject: [PATCH 11/14] fix JET tests

---
 src/integrator.jl            | 20 +++++++++-----------
 src/riemannian/integrator.jl |  6 +++---
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/integrator.jl b/src/integrator.jl
index 004028e1..449154c5 100644
--- a/src/integrator.jl
+++ b/src/integrator.jl
@@ -89,14 +89,14 @@ Leapfrog integrator with randomly "jittered" step size `ϵ` for every trajectory
 $(TYPEDFIELDS)
 
 # Description
-This is the same as `LeapFrog`(@ref) but with a "jittered" step size. This means 
-that at the beginning of each trajectory we sample a step size `ϵ` by adding or 
-subtracting from the nominal/base step size `ϵ0` some random proportion of `ϵ0`, 
+This is the same as `LeapFrog`(@ref) but with a "jittered" step size. This means
+that at the beginning of each trajectory we sample a step size `ϵ` by adding or
+subtracting from the nominal/base step size `ϵ0` some random proportion of `ϵ0`,
 with the proportion specified by `jitter`, i.e. `ϵ = ϵ0 - jitter * ϵ0 * rand()`.
 p
 Jittering might help alleviate issues related to poor interactions with a fixed step size:
-- In regions with high "curvature" the current choice of step size might mean over-shoot 
-  leading to almost all steps being rejected. Randomly sampling the step size at the 
+- In regions with high "curvature" the current choice of step size might mean over-shoot
+  leading to almost all steps being rejected. Randomly sampling the step size at the
   beginning of the trajectories can therefore increase the probability of escaping such
   high-curvature regions.
 - Exact periodicity of the simulated trajectories might occur, i.e. you might be so
@@ -168,7 +168,7 @@ $(TYPEDFIELDS)
 
 # Description
 
-Tempering can potentially allow greater exploration of the posterior, e.g. 
+Tempering can potentially allow greater exploration of the posterior, e.g.
 in a multi-modal posterior jumps between the modes can be more likely to occur.
 """
 struct TemperedLeapfrog{FT<:AbstractFloat,T<:AbstractScalarOrVec{FT}} <: AbstractLeapfrog{T}
@@ -226,9 +226,7 @@ function step(
     ϵ = fwd ? step_size(lf) : -step_size(lf)
     ϵ = ϵ'
 
-    if FullTraj
-        res = Vector{P}(undef, n_steps)
-    end
+    res = FullTraj ? Vector{P}(undef, n_steps) : nothing
 
     (; θ, r) = z
     (; value, gradient) = z.ℓπ
@@ -248,12 +246,12 @@ function step(
         # Create a new phase point by caching the logdensity and gradient
         z = phasepoint(h, θ, r; ℓπ=DualValue(value, gradient))
         # Update result
-        if FullTraj
+        if !isnothing(res)
             res[i] = z
         end
         if !isfinite(z)
             # Remove undef
-            if FullTraj
+            if !isnothing(res)
                 resize!(res, i)
             end
             break
diff --git a/src/riemannian/integrator.jl b/src/riemannian/integrator.jl
index 6ce59476..a6d2de5f 100644
--- a/src/riemannian/integrator.jl
+++ b/src/riemannian/integrator.jl
@@ -8,7 +8,7 @@ Generalized leapfrog integrator with fixed step size `ϵ`.
 $(TYPEDFIELDS)
 
 
-## References 
+## References
 
 1. Girolami, Mark, and Ben Calderhead. "Riemann manifold Langevin and Hamiltonian Monte Carlo methods." Journal of the Royal Statistical Society Series B: Statistical Methodology 73, no. 2 (2011): 123-214.
 """
@@ -29,7 +29,7 @@ end
 
 # TODO(Kai) make sure vectorization works
 # TODO(Kai) check if tempering is valid
-# TODO(Kai) abstract out the 3 main steps and merge with `step` in `integrator.jl` 
+# TODO(Kai) abstract out the 3 main steps and merge with `step` in `integrator.jl`
 function step(
     lf::GeneralizedLeapfrog{T},
     h::Hamiltonian,
@@ -59,7 +59,7 @@ function step(
         #r = temper(lf, r, (i=i, is_half=true), n_steps)
         # eq (16) of Girolami & Calderhead (2011)
         r_half = r_init
-        local cache
+        local cache = nothing
         for j in 1:(lf.n)
             # Reuse cache for the first iteration
             if j == 1

From 5888c0f4d01dbf43f9afb44d4cc8eb54e8d77dbb Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Thu, 27 Nov 2025 10:38:41 +0100
Subject: [PATCH 12/14] add entry to history, bump version

---
 HISTORY.md      |  7 +++++++
 Project.toml    |  2 +-
 docs/src/api.md | 14 +++++++-------
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 038968ef..a9daf473 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,12 @@
 # AdvancedHMC Changelog
 
+## 0.8.4
+
+  - Introduces an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)),
+      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+
 ## 0.8.0
 
   - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`).
diff --git a/Project.toml b/Project.toml
index 8c32e813..f38dda02 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "AdvancedHMC"
 uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
-version = "0.8.3"
+version = "0.8.4"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
diff --git a/docs/src/api.md b/docs/src/api.md
index c09026ec..a588929a 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -31,15 +31,15 @@ where `ϵ` is the step size of leapfrog integration.
 ### Adaptor (`adaptor`)
 
   - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)`
-    
+
       + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric`
-      + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), 
+      + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)),
       currently to be initialized for a `metric` of type `DiagEuclideanMetric`
-      via `mma = AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹))` 
-      until a new interface is introduced to specify the method of adaptation.
+      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
 
   - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)`
-    
+
       + It uses Nesterov's dual averaging with `δ` as the target acceptance rate.
   - Combine the two above *naively*: `NaiveHMCAdaptor(mma, ssa)`
   - Combine the first two using Stan's windowed adaptation: `StanHMCAdaptor(mma, ssa)`
@@ -64,12 +64,12 @@ sample(
 Draw `n_samples` samples using the kernel `κ` under the Hamiltonian system `h`
 
   - The randomness is controlled by `rng`.
-    
+
       + If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used.
 
   - The initial point is given by `θ`.
   - The adaptor is set by `adaptor`, for which the default is no adaptation.
-    
+
       + It will perform `n_adapts` steps of adaptation, for which the default is `1_000` or 10% of `n_samples`, whichever is lower.
   - `drop_warmup` specifies whether to drop samples.
   - `verbose` controls the verbosity.

From ca680bbb491ffae38ce897f172be8eaaa7ef1eae Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Thu, 27 Nov 2025 11:38:33 +0100
Subject: [PATCH 13/14] fix NutpieVar docstring

---
 src/adaptation/massmatrix.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl
index bfcef144..13f360e3 100644
--- a/src/adaptation/massmatrix.jl
+++ b/src/adaptation/massmatrix.jl
@@ -157,7 +157,7 @@ end
 """
     NutpieVar
 
-Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475
+Nutpie-style diagonal mass matrix estimator (using positions and gradients).
 
 Expected to converge faster and to a better mass matrix than [`WelfordVar`](@ref), for which it is a drop-in replacement.
 

From e82cc19bade5d4d561811a426d465ba63fa5a2bc Mon Sep 17 00:00:00 2001
From: Nikolas Siccha <nikolassiccha@gmail.com>
Date: Thu, 27 Nov 2025 12:03:49 +0100
Subject: [PATCH 14/14] increase number of tests for mass matrix adaptation

---
 test/adaptation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/adaptation.jl b/test/adaptation.jl
index 518b4fd6..df72c159 100644
--- a/test/adaptation.jl
+++ b/test/adaptation.jl
@@ -166,7 +166,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia
 
     @testset "Adapted mass v.s. true variance" begin
         D = 10
-        n_tests = 5
+        n_tests = 10
         @testset "'Diagonal' MvNormal target" begin
             for _ in 1:n_tests