TuringLang · J-Price-3 · Oct 16, 2025 · Nov 6, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/HISTORY.md b/HISTORY.md
@@ -1,5 +1,12 @@
 # AdvancedHMC Changelog
 
+## 0.8.4
+
+  - Introduces an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)),
+      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
-      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
-      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
-      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+    currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+    via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+    until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
-      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
-      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
-      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+    currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+    via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+    until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+
 ## 0.8.0
 
   - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`).

diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "AdvancedHMC"
 uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d"
-version = "0.8.3"
+version = "0.8.4"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"

diff --git a/docs/src/api.md b/docs/src/api.md
@@ -32,11 +32,15 @@ where `ϵ` is the step size of leapfrog integration.
 ### Adaptor (`adaptor`)
 
   - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)`
-    
+
-
+    
-
+    
       + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric`
+      + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)),
+      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
-      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
-      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
-      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+        currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+        via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+        until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
-      currently to be initialized for a `metric` of type `DiagEuclideanMetric`
-      via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
-      until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
+        currently to be initialized for a `metric` of type `DiagEuclideanMetric`
+        via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))`
+        until a new interface is introduced in an upcoming breaking release to specify the method of adaptation.
 
   - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)`
-    
+
-
+    
-
+    
       + It uses Nesterov's dual averaging with `δ` as the target acceptance rate.
   - Combine the two above *naively*: `NaiveHMCAdaptor(mma, ssa)`
   - Combine the first two using Stan's windowed adaptation: `StanHMCAdaptor(mma, ssa)`
@@ -61,12 +65,12 @@ sample(
 Draw `n_samples` samples using the kernel `κ` under the Hamiltonian system `h`
 
   - The randomness is controlled by `rng`.
-    
+
-
+    
-
+    
       + If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used.
 
   - The initial point is given by `θ`.
   - The adaptor is set by `adaptor`, for which the default is no adaptation.
-    
+
-
+    
-
+    
       + It will perform `n_adapts` steps of adaptation, for which the default is `1_000` or 10% of `n_samples`, whichever is lower.
   - `drop_warmup` specifies whether to drop samples.
   - `verbose` controls the verbosity.

diff --git a/research/src/riemannian_hmc_utility.jl b/research/src/riemannian_hmc_utility.jl
@@ -2,47 +2,74 @@ using Random, LinearAlgebra, ReverseDiff, ForwardDiff, MCMCLogDensityProblems
 
 # Fisher information metric
 function gen_∂G∂θ_rev(Vfunc, x; f=identity)
-    _Hfunc = MCMCLogDensityProblems.gen_hess(Vfunc, ReverseDiff.track.(x))
-    Hfunc = x -> _Hfunc(x)[3]
+    Hfunc = gen_hess_fwd(Vfunc, ReverseDiff.track.(x))
+
     # QUES What's the best output format of this function?
     return x -> ReverseDiff.jacobian(x -> f(Hfunc(x)), x) # default output shape [∂H∂x₁; ∂H∂x₂; ...]
 end
 
 # TODO Refactor this using https://juliadiff.org/ForwardDiff.jl/stable/user/api/#Preallocating/Configuring-Work-Buffers
+function gen_hess_fwd_precompute_cfg(func, x::AbstractVector)
+    cfg = ForwardDiff.HessianConfig(func, x)
+    H = Matrix{eltype(x)}(undef, length(x), length(x))
+
+    function hess(x::AbstractVector)
+        ForwardDiff.hessian!(H, func, x, cfg)
+        return H
+    end
+    return hess
+end
+
 function gen_hess_fwd(func, x::AbstractVector)
+    cfg = nothing
+    H = nothing
+
     function hess(x::AbstractVector)
-        return nothing, nothing, ForwardDiff.hessian(func, x)
+        if cfg === nothing
+            cfg = ForwardDiff.HessianConfig(func, x)
+            H = Matrix{eltype(x)}(undef, length(x), length(x))
+        end
+        ForwardDiff.hessian!(H, func, x, cfg)
+        return H
     end
     return hess
 end
 
 function gen_∂G∂θ_fwd(Vfunc, x; f=identity)
-    _Hfunc = gen_hess_fwd(Vfunc, x)
-    Hfunc = x -> _Hfunc(x)[3]
-    # QUES What's the best output format of this function?
-    cfg = ForwardDiff.JacobianConfig(Hfunc, x)
+    chunk = ForwardDiff.Chunk(x)
+    tag = ForwardDiff.Tag(Vfunc, eltype(x))
+    jac_cfg = ForwardDiff.JacobianConfig(Vfunc, x, chunk, tag)
+    hess_cfg = ForwardDiff.HessianConfig(Vfunc, jac_cfg.duals, chunk, tag)
+
     d = length(x)
     out = zeros(eltype(x), d^2, d)
-    return x -> ForwardDiff.jacobian!(out, Hfunc, x, cfg)
-    return out # default output shape [∂H∂x₁; ∂H∂x₂; ...]
+
+    function ∂G∂θ_fwd(y)
+        hess = z -> Symmetric(ForwardDiff.hessian(Vfunc, z, hess_cfg, Val{false}()))
+        ForwardDiff.jacobian!(out, hess, y, jac_cfg, Val{false}())
+        return out
+    end
+
+    return ∂G∂θ_fwd
 end
-# 1.764 ms 
-# fwd -> 5.338 μs 
-# cfg -> 3.651 μs
 
 function reshape_∂G∂θ(H)
     d = size(H, 2)
-    return cat((H[((i - 1) * d + 1):(i * d), :] for i in 1:d)...; dims=3)
+    return reshape(H, d, d, :)
 end
 
 function prepare_sample_target(hps, θ₀, ℓπ)
     Vfunc = x -> -ℓπ(x) # potential energy is the negative log-probability
-    _Hfunc = MCMCLogDensityProblems.gen_hess(Vfunc, θ₀) # x -> (value, gradient, hessian)
-    Hfunc = x -> copy.(_Hfunc(x)) # _Hfunc do in-place computation, copy to avoid bug
+    Hfunc = gen_hess_fwd_precompute_cfg(Vfunc, θ₀) # x -> (value, gradient, hessian)
 
-    fstabilize = H -> H + hps.λ * I
+    fstabilize = H -> begin
+        @inbounds for i in 1:size(H, 1)
+            H[i, i] += hps.λ
+        end
+        H
+    end
     Gfunc = x -> begin
-        H = fstabilize(Hfunc(x)[3])
+        H = fstabilize(Hfunc(x))
         all(isfinite, H) ? H : diagm(ones(length(x)))
     end
     _∂G∂θfunc = gen_∂G∂θ_fwd(Vfunc, θ₀; f=fstabilize) # size==(4, 2)

diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl
@@ -59,12 +59,11 @@ export Hamiltonian
 
 include("integrator.jl")
 export Leapfrog, JitteredLeapfrog, TemperedLeapfrog
-include("riemannian/integrator.jl")
-export GeneralizedLeapfrog
 
 include("riemannian/metric.jl")
-export IdentityMap, SoftAbsMap, DenseRiemannianMetric
-
+export AbstractRiemannianMetric, DenseRiemannianMetric, IdentityMap, SoftAbsMap
+include("riemannian/integrator.jl")
+export GeneralizedLeapfrog, ImplicitMidpoint
 include("riemannian/hamiltonian.jl")
 
 include("trajectory.jl")
@@ -89,7 +88,7 @@ export find_good_eps
 include("adaptation/Adaptation.jl")
 using .Adaptation
 import .Adaptation:
-    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation
+    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint
-    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint
+    StepSizeAdaptor,
+    MassMatrixAdaptor,
+    StanHMCAdaptor,
+    NesterovDualAveraging,
+    NoAdaptation,
+    PositionOrPhasePoint
-    StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint
+    StepSizeAdaptor,
+    MassMatrixAdaptor,
+    StanHMCAdaptor,
+    NesterovDualAveraging,
+    NoAdaptation,
+    PositionOrPhasePoint
 
 # Helpers for initializing adaptors via AHMC structs
 
@@ -131,6 +130,7 @@ export StepSizeAdaptor,
     MassMatrixAdaptor,
     UnitMassMatrix,
     WelfordVar,
+    NutpieVar,
     WelfordCov,
     NaiveHMCAdaptor,
     StanHMCAdaptor,

diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl
@@ -196,7 +196,7 @@ function AbstractMCMC.step(
 
     # Adapt h and spl.
     tstat = stat(t)
-    h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate)
+    h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate)
     tstat = merge(tstat, (is_adapt=isadapted,))
 
     # Compute next transition and state.

diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl
@@ -4,13 +4,13 @@ export Adaptation
 using LinearAlgebra: LinearAlgebra
 using Statistics: Statistics
 
-using ..AdvancedHMC: AbstractScalarOrVec
+using ..AdvancedHMC: AbstractScalarOrVec, PhasePoint
 using DocStringExtensions
 
 """
 $(TYPEDEF)
 
-Abstract type for HMC adaptors. 
+Abstract type for HMC adaptors.
 """
 abstract type AbstractAdaptor end
 function getM⁻¹ end
@@ -21,12 +21,17 @@ function initialize! end
 function finalize! end
 export AbstractAdaptor, adapt!, initialize!, finalize!, reset!, getϵ, getM⁻¹
 
+get_position(x::PhasePoint) = x.θ
+get_position(x::AbstractVecOrMat{<:AbstractFloat}) = x
+const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint}
-const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint}
+const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat},PhasePoint}
-const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint}
+const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat},PhasePoint}
+
 struct NoAdaptation <: AbstractAdaptor end
 export NoAdaptation
 include("stepsize.jl")
 export StepSizeAdaptor, NesterovDualAveraging
+
 include("massmatrix.jl")
-export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, WelfordCov
+export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, NutpieVar, WelfordCov
 
 ##
 ## Composite adaptors
@@ -47,18 +52,14 @@ getϵ(ca::NaiveHMCAdaptor) = getϵ(ca.ssa)
 # TODO: implement consensus adaptor
 function adapt!(
     nca::NaiveHMCAdaptor,
-    θ::AbstractVecOrMat{<:AbstractFloat},
+    z_or_theta::PositionOrPhasePoint,
     α::AbstractScalarOrVec{<:AbstractFloat},
 )
-    adapt!(nca.ssa, θ, α)
-    adapt!(nca.pc, θ, α)
-    return nothing
-end
-function reset!(aca::NaiveHMCAdaptor)
-    reset!(aca.ssa)
-    reset!(aca.pc)
+    adapt!(nca.ssa, z_or_theta, α)
+    adapt!(nca.pc, z_or_theta, α)
     return nothing
 end
+
 initialize!(adaptor::NaiveHMCAdaptor, n_adapts::Int) = nothing
 finalize!(aca::NaiveHMCAdaptor) = finalize!(aca.ssa)