From caabd0f5fb90163b85af8cae73db8799e1f1c17c Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 19 Nov 2025 12:58:36 +0100 Subject: [PATCH 01/14] initial changes to get a working demo --- src/abstractmcmc.jl | 2 +- src/adaptation/Adaptation.jl | 2 +- src/adaptation/massmatrix.jl | 67 ++++++++++++++++++++++++++++++++-- src/adaptation/stan_adaptor.jl | 8 ++-- src/sampler.jl | 4 +- tmp/Project.toml | 5 +++ tmp/demo.jl | 51 ++++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 tmp/Project.toml create mode 100644 tmp/demo.jl diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl index 413e9de6..f6ce4000 100644 --- a/src/abstractmcmc.jl +++ b/src/abstractmcmc.jl @@ -196,7 +196,7 @@ function AbstractMCMC.step( # Adapt h and spl. tstat = stat(t) - h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate) + h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate) tstat = merge(tstat, (is_adapt=isadapted,)) # Compute next transition and state. diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl index 4f2fde83..e34e9c88 100644 --- a/src/adaptation/Adaptation.jl +++ b/src/adaptation/Adaptation.jl @@ -4,7 +4,7 @@ export Adaptation using LinearAlgebra: LinearAlgebra using Statistics: Statistics -using ..AdvancedHMC: AbstractScalarOrVec +using ..AdvancedHMC: AbstractScalarOrVec, PhasePoint using DocStringExtensions """ diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 105d3bae..de2e5178 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -9,16 +9,18 @@ finalize!(::MassMatrixAdaptor) = nothing function adapt!( adaptor::MassMatrixAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z::PhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, is_update::Bool=true, ) - resize_adaptor!(adaptor, size(θ)) - push!(adaptor, θ) + resize_adaptor!(adaptor, size(z.θ)) + push!(adaptor, z) is_update && update!(adaptor) return nothing end +Base.push!(a::MassMatrixAdaptor, z::PhasePoint) = push!(a, z.θ) + ## Unit mass matrix adaptor struct UnitMassMatrix{T<:AbstractFloat} <: MassMatrixAdaptor end @@ -153,6 +155,65 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5)) end +## Nutpie-style mass matrix estimator (using positions and gradients) + +mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} + position_estimator::WelfordVar{T,E,V} + gradient_estimator::WelfordVar{T,E,V} + n::Int + n_min::Int + var::V + function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V} + return new{eltype(E),E,V}( + WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), + WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), + n, n_min, var + ) + end +end + +function Base.show(io::IO, ::NutpieVar{T}) where {T} + return print(io, "NutpieVar{", T, "} adaptor") +end + +function NutpieVar{T}( + sz::Union{Tuple{Int},Tuple{Int,Int}}; n_min::Int=10, var=ones(T, sz) +) where {T<:AbstractFloat} + return NutpieVar(0, n_min, zeros(T, sz), zeros(T, sz), zeros(T, sz), var) +end + +function NutpieVar(sz::Union{Tuple{Int},Tuple{Int,Int}}; kwargs...) + return NutpieVar{Float64}(sz; kwargs...) +end + +function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int,Int}) where {T<:AbstractFloat} + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) +end + +function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:AbstractFloat} + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) +end + +function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat} + nv.n = 0 + reset!(nv.position_estimator) + reset!(nv.gradient_estimator) +end + +function Base.push!(nv::NutpieVar, z::PhasePoint) + nv.n += 1 + push!(nv.position_estimator, z.θ) + push!(nv.gradient_estimator, z.ℓπ.gradient) + return nothing +end + +# https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/var_adaptation.hpp +function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat} + return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) +end + ## Dense mass matrix adaptor abstract type DenseMatrixEstimator{T} <: MassMatrixAdaptor end diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl index b36a2259..666e33cc 100644 --- a/src/adaptation/stan_adaptor.jl +++ b/src/adaptation/stan_adaptor.jl @@ -136,20 +136,20 @@ is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits function adapt!( tp::StanHMCAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z::PhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) tp.state.i += 1 - adapt!(tp.ssa, θ, α) + adapt!(tp.ssa, z.θ, α) - resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary. + resize_adaptor!(tp.pc, size(z.θ)) # Resize pre-conditioner if necessary. # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp if is_in_window(tp) # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window. is_update_M⁻¹ = is_window_end(tp) - adapt!(tp.pc, θ, α, is_update_M⁻¹) + adapt!(tp.pc, z, α, is_update_M⁻¹) end if is_window_end(tp) diff --git a/src/sampler.jl b/src/sampler.jl index 3e477ba3..29c903ba 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -75,13 +75,13 @@ function Adaptation.adapt!( adaptor::AbstractAdaptor, i::Int, n_adapts::Int, - θ::AbstractVecOrMat{<:AbstractFloat}, + z::PhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) isadapted = false if i <= n_adapts i == 1 && Adaptation.initialize!(adaptor, n_adapts) - adapt!(adaptor, θ, α) + adapt!(adaptor, z, α) i == n_adapts && finalize!(adaptor) h = update(h, adaptor) κ = update(κ, adaptor) diff --git a/tmp/Project.toml b/tmp/Project.toml new file mode 100644 index 00000000..f0f82f8e --- /dev/null +++ b/tmp/Project.toml @@ -0,0 +1,5 @@ +[deps] +AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" +PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807" diff --git a/tmp/demo.jl b/tmp/demo.jl new file mode 100644 index 00000000..f7f8cc7b --- /dev/null +++ b/tmp/demo.jl @@ -0,0 +1,51 @@ +using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random + +if !@isdefined pdb + const pdb = PosteriorDB.database() +end +stan_problem(path, data) = StanProblem( + path, data; + nan_on_error=true, + make_args=["STAN_THREADS=TRUE"], + warn=false +) +stan_problem(posterior_name::AbstractString) = stan_problem( + PosteriorDB.path(PosteriorDB.implementation(PosteriorDB.model(PosteriorDB.posterior(pdb, (posterior_name))), "stan")), + PosteriorDB.load(PosteriorDB.dataset(PosteriorDB.posterior(pdb, (posterior_name))), String) +) + +begin +lpdf = stan_problem("radon_mn-radon_county_intercept") + +n_adapts = n_samples = 1000 +rng = Xoshiro(1) +spl = NUTS(0.8) +initial_params = nothing +model = AdvancedHMC.AbstractMCMC._model(lpdf) +(;logdensity) = model +metric = AdvancedHMC.make_metric(spl, logdensity) +hamiltonian = AdvancedHMC.Hamiltonian(metric, model) +initial_params = AdvancedHMC.make_initial_params(rng, spl, logdensity, initial_params) +ϵ = AdvancedHMC.make_step_size(rng, spl, hamiltonian, initial_params) +integrator = AdvancedHMC.make_integrator(spl, ϵ) +κ = AdvancedHMC.make_kernel(spl, integrator) +# adaptor = AdvancedHMC.make_adaptor(spl, metric, integrator) +adaptor = AdvancedHMC.StanHMCAdaptor( + AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), + AdvancedHMC.StepSizeAdaptor(spl.δ, integrator) +) +h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params) +# Using the below uses Nutpie (as in position and gradients) +initial_state = AdvancedHMC.HMCState(0, t, metric, κ, adaptor) +# Using the below uses Stan (as in only positions) +# initial_state = nothing +@time samples = AdvancedHMC.sample( + rng, + model, + spl, + n_adapts + n_samples; + n_adapts=n_adapts, initial_state, + progress=true, +) +nothing +end From e030eecf1c2d478ac6c313022ba128adf50e761e Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Fri, 21 Nov 2025 10:09:33 +0100 Subject: [PATCH 02/14] fix tests, add new ones, and add documentation --- docs/src/api.md | 4 ++ src/adaptation/Adaptation.jl | 5 +++ src/adaptation/massmatrix.jl | 35 ++++++++++++---- src/adaptation/stan_adaptor.jl | 25 ++++++++++++ src/sampler.jl | 23 ++++++++++- test/adaptation.jl | 74 ++++++++++++++++++++++++++++++++-- tmp/Project.toml | 1 + tmp/demo.jl | 29 +++++++------ 8 files changed, 169 insertions(+), 27 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 54b5939d..c09026ec 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -33,6 +33,10 @@ where `ϵ` is the step size of leapfrog integration. - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)` + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric` + + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), + currently to be initialized for a `metric` of type `DiagEuclideanMetric` + via `mma = AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹))` + until a new interface is introduced to specify the method of adaptation. - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)` diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl index e34e9c88..9d5df077 100644 --- a/src/adaptation/Adaptation.jl +++ b/src/adaptation/Adaptation.jl @@ -54,6 +54,11 @@ function adapt!( adapt!(nca.pc, θ, α) return nothing end +adapt!( + nca::NaiveHMCAdaptor, + z::PhasePoint, + α::AbstractScalarOrVec{<:AbstractFloat}, +) = adapt!(nca, z.θ, α) function reset!(aca::NaiveHMCAdaptor) reset!(aca.ssa) reset!(aca.pc) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index de2e5178..b03a7973 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -7,6 +7,18 @@ abstract type MassMatrixAdaptor <: AbstractAdaptor end initialize!(::MassMatrixAdaptor, ::Int) = nothing finalize!(::MassMatrixAdaptor) = nothing +function adapt!( + adaptor::MassMatrixAdaptor, + θ::AbstractVecOrMat{<:AbstractFloat}, + α::AbstractScalarOrVec{<:AbstractFloat}, + is_update::Bool=true, +) + resize_adaptor!(adaptor, size(θ)) + push!(adaptor, θ) + is_update && update!(adaptor) + return nothing +end + function adapt!( adaptor::MassMatrixAdaptor, z::PhasePoint, @@ -155,7 +167,7 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5)) end -## Nutpie-style mass matrix estimator (using positions and gradients) +## Nutpie-style diagonal mass matrix estimator (using positions and gradients) mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} position_estimator::WelfordVar{T,E,V} @@ -177,7 +189,7 @@ function Base.show(io::IO, ::NutpieVar{T}) where {T} end function NutpieVar{T}( - sz::Union{Tuple{Int},Tuple{Int,Int}}; n_min::Int=10, var=ones(T, sz) + sz::Union{Tuple{Int},Tuple{Int,Int}}=(2,); n_min::Int=10, var=ones(T, sz) ) where {T<:AbstractFloat} return NutpieVar(0, n_min, zeros(T, sz), zeros(T, sz), zeros(T, sz), var) end @@ -187,13 +199,22 @@ function NutpieVar(sz::Union{Tuple{Int},Tuple{Int,Int}}; kwargs...) end function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int,Int}) where {T<:AbstractFloat} - resize_adaptor!(nv.position_estimator, size_θ) - resize_adaptor!(nv.gradient_estimator, size_θ) + if size_θ != size(nv.var) + @assert nv.n == 0 "Cannot resize a var estimator when it contains samples." + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) + nv.var = ones(T, size_θ) + end end function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:AbstractFloat} - resize_adaptor!(nv.position_estimator, size_θ) - resize_adaptor!(nv.gradient_estimator, size_θ) + length_θ = first(size_θ) + if length_θ != size(nv.var, 1) + @assert nv.n == 0 "Cannot resize a var estimator when it contains samples." + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) + fill!(resize!(nv.var, length_θ), T(1)) + end end function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat} @@ -209,7 +230,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint) return nothing end -# https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/var_adaptation.hpp +# Ref: TODO function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat} return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) end diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl index 666e33cc..ca53421c 100644 --- a/src/adaptation/stan_adaptor.jl +++ b/src/adaptation/stan_adaptor.jl @@ -134,6 +134,31 @@ function is_in_window(a::StanHMCAdaptor) end is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits +function adapt!( + tp::StanHMCAdaptor, + θ::AbstractVecOrMat{<:AbstractFloat}, + α::AbstractScalarOrVec{<:AbstractFloat}, +) + tp.state.i += 1 + + adapt!(tp.ssa, θ, α) + + resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary. + + # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp + if is_in_window(tp) + # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window. + is_update_M⁻¹ = is_window_end(tp) + adapt!(tp.pc, θ, α, is_update_M⁻¹) + end + + if is_window_end(tp) + reset!(tp.ssa) + reset!(tp.pc) + end +end + + function adapt!( tp::StanHMCAdaptor, z::PhasePoint, diff --git a/src/sampler.jl b/src/sampler.jl index 29c903ba..724fac39 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -69,6 +69,27 @@ function Adaptation.adapt!( return h, κ, false end +function Adaptation.adapt!( + h::Hamiltonian, + κ::AbstractMCMCKernel, + adaptor::AbstractAdaptor, + i::Int, + n_adapts::Int, + θ::AbstractVecOrMat{<:AbstractFloat}, + α::AbstractScalarOrVec{<:AbstractFloat}, +) + isadapted = false + if i <= n_adapts + i == 1 && Adaptation.initialize!(adaptor, n_adapts) + adapt!(adaptor, θ, α) + i == n_adapts && finalize!(adaptor) + h = update(h, adaptor) + κ = update(κ, adaptor) + isadapted = true + end + return h, κ, isadapted +end + function Adaptation.adapt!( h::Hamiltonian, κ::AbstractMCMCKernel, @@ -185,7 +206,7 @@ function sample( t = transition(rng, h, κ, t.z) # Adapt h and κ; what mutable is the adaptor tstat = stat(t) - h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate) + h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate) if isadapted num_divergent_transitions_during_adaption += tstat.numerical_error else diff --git a/test/adaptation.jl b/test/adaptation.jl index 346423ea..9bd41e32 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -1,6 +1,8 @@ using ReTest, LinearAlgebra, Distributions, AdvancedHMC, Random, ForwardDiff +using AdvancedHMC: + PhasePoint, DualValue using AdvancedHMC.Adaptation: - WelfordVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset! + DiagMatrixEstimator, WelfordVar, NutpieVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset! function runnuts(ℓπ, metric; n_samples=10_000) D = size(metric, 1) @@ -18,6 +20,28 @@ function runnuts(ℓπ, metric; n_samples=10_000) return (samples=samples, stats=stats, adaptor=adaptor) end +# Temporary function until we've settled on a different interface +function runnuts_nutpie(ℓπ, metric::DiagEuclideanMetric; n_samples=10_000) + D = size(metric, 1) + n_adapts = 5_000 + θ_init = rand(D) + rng = MersenneTwister(0) + + nuts = NUTS(0.8) + h = Hamiltonian(metric, ℓπ, ForwardDiff) + step_size = AdvancedHMC.make_step_size(rng, nuts, h, θ_init) + integrator = AdvancedHMC.make_integrator(nuts, step_size) + κ = AdvancedHMC.make_kernel(nuts, integrator) + # Constructing like this until we've settled on a different interface + adaptor = AdvancedHMC.StanHMCAdaptor( + AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), + AdvancedHMC.StepSizeAdaptor(nuts.δ, integrator) + ) + samples, stats = sample(h, κ, θ_init, n_samples, adaptor, n_adapts; verbose=false) + return (samples=samples, stats=stats, adaptor=adaptor) +end +preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Diagonal(a.var)) \ cov / sqrt(Diagonal(a.var))) + @testset "Adaptation" begin # Check that the estimated variance is approximately correct. @testset "Online v.s. naive v.s. true var/cov estimation" begin @@ -60,15 +84,32 @@ end @testset "MassMatrixAdaptor constructors" begin θ = [0.0, 0.0, 0.0, 0.0] + z = PhasePoint( + θ, θ, DualValue(0., θ), DualValue(0., θ) + ) pc1 = MassMatrixAdaptor(UnitEuclideanMetric) # default dim = 2 pc2 = MassMatrixAdaptor(DiagEuclideanMetric) + # Constructing like this until we've settled on a different interface + pc2_nutpie = NutpieVar{Float64}((2, )) pc3 = MassMatrixAdaptor(DenseEuclideanMetric) - # Var adaptor dimention should be increased to length(θ) from 2 + # Var adaptor dimension should be increased to length(θ) from 2 AdvancedHMC.adapt!(pc1, θ, 1.0) AdvancedHMC.adapt!(pc2, θ, 1.0) + AdvancedHMC.adapt!(pc2_nutpie, z, 1.0) AdvancedHMC.adapt!(pc3, θ, 1.0) @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc3) == + LinearAlgebra.diagm(0 => ones(length(θ))) + + # Making sure "all" MassMatrixAdaptors support getting a PhasePoint instead of a Vector + # AdvancedHMC.adapt!(pc1, z, 1.0) - this does not work - should it? + AdvancedHMC.adapt!(pc2, z, 1.0) + AdvancedHMC.adapt!(pc2_nutpie, z, 1.0) + AdvancedHMC.adapt!(pc3, z, 1.0) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ)) @test AdvancedHMC.Adaptation.getM⁻¹(pc3) == LinearAlgebra.diagm(0 => ones(length(θ))) end @@ -82,10 +123,14 @@ end adaptor2 = StanHMCAdaptor( MassMatrixAdaptor(DiagEuclideanMetric), NesterovDualAveraging(0.8, 0.5) ) + # Constructing like this until we've settled on a different interface + adaptor2_nutpie = StanHMCAdaptor( + NutpieVar{Float64}((2, )), NesterovDualAveraging(0.8, 0.5) + ) adaptor3 = StanHMCAdaptor( MassMatrixAdaptor(DenseEuclideanMetric), NesterovDualAveraging(0.8, 0.5) ) - for a in [adaptor1, adaptor2, adaptor3] + for a in [adaptor1, adaptor2, adaptor2_nutpie, adaptor3] AdvancedHMC.initialize!(a, 1_000) @test a.state.window_start == 76 @test a.state.window_end == 950 @@ -93,6 +138,7 @@ end AdvancedHMC.adapt!(a, θ, 1.0) end @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2a) == ones(length(θ)) @test AdvancedHMC.Adaptation.getM⁻¹(adaptor3) == LinearAlgebra.diagm(0 => ones(length(θ))) @@ -119,19 +165,26 @@ end # Random variance σ² = 1 .+ abs.(randn(D)) + Σ = Diagonal(σ²) # Diagonal Gaussian - ℓπ = LogDensityDistribution(MvNormal(Diagonal(σ²))) + ℓπ = LogDensityDistribution(MvNormal(Σ)) res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ σ² rtol = 0.2 + # For this target, Nutpie (without regularization) will arrive at the true variances after two draws. + res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) + @test res.adaptor.pc.var ≈ σ² rtol = 0.2 + @test preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) + res = runnuts(ℓπ, DenseEuclideanMetric(D)) @test res.adaptor.pc.cov ≈ Diagonal(σ²) rtol = 0.25 end end @testset "DenseEuclideanMetric" begin + n_nutpie_superior = 0 for _ in 1:n_tests # Random covariance m = randn(D, D) @@ -143,9 +196,18 @@ end res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2 + # For this target, Nutpie will NOT converge towards the true variances, even after infinite draws. + # HOWEVER, it will asymptotically (but also generally more quickly than Stan) + # find the best preconditioner for the target. + # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing. + res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) + # @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2 + n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) + res = runnuts(ℓπ, DenseEuclideanMetric(D)) @test res.adaptor.pc.cov ≈ Σ rtol = 0.25 end + @test n_nutpie_superior > n_tests / 2 end end @@ -156,6 +218,10 @@ end res = runnuts(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1) @test res.adaptor.pc.var == mass_init + mass_init = fill(0.5, D) + res = runnuts_nutpie(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1) + @test res.adaptor.pc.var == mass_init + mass_init = diagm(0 => fill(0.5, D)) res = runnuts(ℓπ, DenseEuclideanMetric(mass_init); n_samples=1) @test res.adaptor.pc.cov == mass_init diff --git a/tmp/Project.toml b/tmp/Project.toml index f0f82f8e..7dfd40e4 100644 --- a/tmp/Project.toml +++ b/tmp/Project.toml @@ -1,5 +1,6 @@ [deps] AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" +MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0" PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807" diff --git a/tmp/demo.jl b/tmp/demo.jl index f7f8cc7b..6a35c0a9 100644 --- a/tmp/demo.jl +++ b/tmp/demo.jl @@ -1,4 +1,4 @@ -using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random +using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random, MCMCDiagnosticTools if !@isdefined pdb const pdb = PosteriorDB.database() @@ -18,7 +18,7 @@ begin lpdf = stan_problem("radon_mn-radon_county_intercept") n_adapts = n_samples = 1000 -rng = Xoshiro(1) +rng = Xoshiro(2) spl = NUTS(0.8) initial_params = nothing model = AdvancedHMC.AbstractMCMC._model(lpdf) @@ -35,17 +35,16 @@ adaptor = AdvancedHMC.StanHMCAdaptor( AdvancedHMC.StepSizeAdaptor(spl.δ, integrator) ) h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params) -# Using the below uses Nutpie (as in position and gradients) -initial_state = AdvancedHMC.HMCState(0, t, metric, κ, adaptor) -# Using the below uses Stan (as in only positions) -# initial_state = nothing -@time samples = AdvancedHMC.sample( - rng, - model, - spl, - n_adapts + n_samples; - n_adapts=n_adapts, initial_state, - progress=true, -) -nothing +performances = map((;nutpie=AdvancedHMC.HMCState(0, t, metric, κ, adaptor), stan=nothing)) do initial_state + dt = @elapsed samples = AdvancedHMC.sample( + rng, + model, + spl, + n_adapts + n_samples; + n_adapts=n_adapts, callback=error,#initial_state, + progress=true, + ) + ess(reshape(mapreduce(sample->sample.z.θ , hcat, samples[n_adapts+1:end])', (n_samples, 1, :))) |> minimum |> Base.Fix2(/, dt) end +@info (;performances) +end \ No newline at end of file From 7cdea3660131f590a0319fdc0158eed4f7bc29d3 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Fri, 21 Nov 2025 10:11:08 +0100 Subject: [PATCH 03/14] fix type --- test/adaptation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/adaptation.jl b/test/adaptation.jl index 9bd41e32..6af62367 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -138,7 +138,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia AdvancedHMC.adapt!(a, θ, 1.0) end @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2) == ones(length(θ)) - @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2a) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2_nutpie) == ones(length(θ)) @test AdvancedHMC.Adaptation.getM⁻¹(adaptor3) == LinearAlgebra.diagm(0 => ones(length(θ))) From 989d284cb792f1c4bec2aae3169d08feaffd3565 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Fri, 21 Nov 2025 10:18:21 +0100 Subject: [PATCH 04/14] fix some stray tests --- src/adaptation/massmatrix.jl | 9 +++++++++ test/adaptation.jl | 3 +-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index b03a7973..92cb1560 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -60,6 +60,15 @@ function adapt!( return nothing end +function adapt!( + ::UnitMassMatrix, + ::PhasePoint, + ::AbstractScalarOrVec{<:AbstractFloat}, + is_update::Bool=true, +) + return nothing +end + ## Diagonal mass matrix adaptor abstract type DiagMatrixEstimator{T} <: MassMatrixAdaptor end diff --git a/test/adaptation.jl b/test/adaptation.jl index 6af62367..553ba32e 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -104,7 +104,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia LinearAlgebra.diagm(0 => ones(length(θ))) # Making sure "all" MassMatrixAdaptors support getting a PhasePoint instead of a Vector - # AdvancedHMC.adapt!(pc1, z, 1.0) - this does not work - should it? + AdvancedHMC.adapt!(pc1, z, 1.0) AdvancedHMC.adapt!(pc2, z, 1.0) AdvancedHMC.adapt!(pc2_nutpie, z, 1.0) AdvancedHMC.adapt!(pc3, z, 1.0) @@ -201,7 +201,6 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia # find the best preconditioner for the target. # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing. res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) - # @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2 n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) res = runnuts(ℓπ, DenseEuclideanMetric(D)) From 842fe06822194a75113ffc61a482a22d0b13568a Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Fri, 21 Nov 2025 10:21:32 +0100 Subject: [PATCH 05/14] delete tmp folder with demo --- tmp/Project.toml | 6 ------ tmp/demo.jl | 50 ------------------------------------------------ 2 files changed, 56 deletions(-) delete mode 100644 tmp/Project.toml delete mode 100644 tmp/demo.jl diff --git a/tmp/Project.toml b/tmp/Project.toml deleted file mode 100644 index 7dfd40e4..00000000 --- a/tmp/Project.toml +++ /dev/null @@ -1,6 +0,0 @@ -[deps] -AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" -MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0" -PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StanLogDensityProblems = "a545de4d-8dba-46db-9d34-4e41d3f07807" diff --git a/tmp/demo.jl b/tmp/demo.jl deleted file mode 100644 index 6a35c0a9..00000000 --- a/tmp/demo.jl +++ /dev/null @@ -1,50 +0,0 @@ -using AdvancedHMC, PosteriorDB, StanLogDensityProblems, Random, MCMCDiagnosticTools - -if !@isdefined pdb - const pdb = PosteriorDB.database() -end -stan_problem(path, data) = StanProblem( - path, data; - nan_on_error=true, - make_args=["STAN_THREADS=TRUE"], - warn=false -) -stan_problem(posterior_name::AbstractString) = stan_problem( - PosteriorDB.path(PosteriorDB.implementation(PosteriorDB.model(PosteriorDB.posterior(pdb, (posterior_name))), "stan")), - PosteriorDB.load(PosteriorDB.dataset(PosteriorDB.posterior(pdb, (posterior_name))), String) -) - -begin -lpdf = stan_problem("radon_mn-radon_county_intercept") - -n_adapts = n_samples = 1000 -rng = Xoshiro(2) -spl = NUTS(0.8) -initial_params = nothing -model = AdvancedHMC.AbstractMCMC._model(lpdf) -(;logdensity) = model -metric = AdvancedHMC.make_metric(spl, logdensity) -hamiltonian = AdvancedHMC.Hamiltonian(metric, model) -initial_params = AdvancedHMC.make_initial_params(rng, spl, logdensity, initial_params) -ϵ = AdvancedHMC.make_step_size(rng, spl, hamiltonian, initial_params) -integrator = AdvancedHMC.make_integrator(spl, ϵ) -κ = AdvancedHMC.make_kernel(spl, integrator) -# adaptor = AdvancedHMC.make_adaptor(spl, metric, integrator) -adaptor = AdvancedHMC.StanHMCAdaptor( - AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), - AdvancedHMC.StepSizeAdaptor(spl.δ, integrator) -) -h, t = AdvancedHMC.sample_init(rng, hamiltonian, initial_params) -performances = map((;nutpie=AdvancedHMC.HMCState(0, t, metric, κ, adaptor), stan=nothing)) do initial_state - dt = @elapsed samples = AdvancedHMC.sample( - rng, - model, - spl, - n_adapts + n_samples; - n_adapts=n_adapts, callback=error,#initial_state, - progress=true, - ) - ess(reshape(mapreduce(sample->sample.z.θ , hcat, samples[n_adapts+1:end])', (n_samples, 1, :))) |> minimum |> Base.Fix2(/, dt) -end -@info (;performances) -end \ No newline at end of file From 0f27172fbe05cf4df6293882176756d6ee41751d Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Mon, 24 Nov 2025 12:48:28 +0100 Subject: [PATCH 06/14] address review comments --- src/adaptation/massmatrix.jl | 6 +++--- src/sampler.jl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 92cb1560..6ca9ea29 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -226,7 +226,7 @@ function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:Abstra end end -function reset!(nv::NutpieVar{T}) where {T<:AbstractFloat} +function reset!(nv::NutpieVar) nv.n = 0 reset!(nv.position_estimator) reset!(nv.gradient_estimator) @@ -239,8 +239,8 @@ function Base.push!(nv::NutpieVar, z::PhasePoint) return nothing end -# Ref: TODO -function get_estimation(nv::NutpieVar{T}) where {T<:AbstractFloat} +# Ref: https://github.com/pymc-devs/nutpie +function get_estimation(nv::NutpieVar) return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) end diff --git a/src/sampler.jl b/src/sampler.jl index 724fac39..15d96b4e 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -74,7 +74,7 @@ function Adaptation.adapt!( κ::AbstractMCMCKernel, adaptor::AbstractAdaptor, i::Int, - n_adapts::Int, + n_adapts::Int, θ::AbstractVecOrMat{<:AbstractFloat}, α::AbstractScalarOrVec{<:AbstractFloat}, ) From c395715a69837e51e473d15f9a1795d46e08c3cb Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Mon, 24 Nov 2025 13:25:36 +0100 Subject: [PATCH 07/14] reference interface refactor issue --- src/adaptation/massmatrix.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 6ca9ea29..17f0920f 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -176,7 +176,7 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5)) end -## Nutpie-style diagonal mass matrix estimator (using positions and gradients) +## Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475 mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} position_estimator::WelfordVar{T,E,V} @@ -186,7 +186,7 @@ mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecO var::V function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V} return new{eltype(E),E,V}( - WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), + WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), n, n_min, var ) From f1d1c80175ec1e8ed2dec111289daa6ffc632874 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 26 Nov 2025 10:18:26 +0100 Subject: [PATCH 08/14] refactor and fix test rng handling --- src/AdvancedHMC.jl | 3 +- src/adaptation/Adaptation.jl | 26 ++++++++--------- src/adaptation/massmatrix.jl | 51 ++++++++++++---------------------- src/adaptation/stan_adaptor.jl | 33 +++------------------- src/adaptation/stepsize.jl | 4 +-- src/sampler.jl | 44 ++++++++--------------------- test/adaptation.jl | 21 +++++++++----- 7 files changed, 62 insertions(+), 120 deletions(-) diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl index b25710d5..5db7644c 100644 --- a/src/AdvancedHMC.jl +++ b/src/AdvancedHMC.jl @@ -72,7 +72,7 @@ export find_good_eps include("adaptation/Adaptation.jl") using .Adaptation import .Adaptation: - StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation + StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint # Helpers for initializing adaptors via AHMC structs @@ -114,6 +114,7 @@ export StepSizeAdaptor, MassMatrixAdaptor, UnitMassMatrix, WelfordVar, + NutpieVar, WelfordCov, NaiveHMCAdaptor, StanHMCAdaptor, diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl index 9d5df077..10a9a980 100644 --- a/src/adaptation/Adaptation.jl +++ b/src/adaptation/Adaptation.jl @@ -10,7 +10,7 @@ using DocStringExtensions """ $(TYPEDEF) -Abstract type for HMC adaptors. +Abstract type for HMC adaptors. """ abstract type AbstractAdaptor end function getM⁻¹ end @@ -21,12 +21,17 @@ function initialize! end function finalize! end export AbstractAdaptor, adapt!, initialize!, finalize!, reset!, getϵ, getM⁻¹ +get_position(x::PhasePoint) = x.θ +get_position(x::AbstractVecOrMat{<:AbstractFloat}) = x +const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint} + struct NoAdaptation <: AbstractAdaptor end export NoAdaptation include("stepsize.jl") export StepSizeAdaptor, NesterovDualAveraging + include("massmatrix.jl") -export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, WelfordCov +export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, NutpieVar, WelfordCov ## ## Composite adaptors @@ -47,23 +52,14 @@ getϵ(ca::NaiveHMCAdaptor) = getϵ(ca.ssa) # TODO: implement consensus adaptor function adapt!( nca::NaiveHMCAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) - adapt!(nca.ssa, θ, α) - adapt!(nca.pc, θ, α) - return nothing -end -adapt!( - nca::NaiveHMCAdaptor, - z::PhasePoint, - α::AbstractScalarOrVec{<:AbstractFloat}, -) = adapt!(nca, z.θ, α) -function reset!(aca::NaiveHMCAdaptor) - reset!(aca.ssa) - reset!(aca.pc) + adapt!(nca.ssa, z_or_theta, α) + adapt!(nca.pc, z_or_theta, α) return nothing end + initialize!(adaptor::NaiveHMCAdaptor, n_adapts::Int) = nothing finalize!(aca::NaiveHMCAdaptor) = finalize!(aca.ssa) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 17f0920f..74269647 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -9,29 +9,17 @@ finalize!(::MassMatrixAdaptor) = nothing function adapt!( adaptor::MassMatrixAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, - α::AbstractScalarOrVec{<:AbstractFloat}, - is_update::Bool=true, -) - resize_adaptor!(adaptor, size(θ)) - push!(adaptor, θ) - is_update && update!(adaptor) - return nothing -end - -function adapt!( - adaptor::MassMatrixAdaptor, - z::PhasePoint, - α::AbstractScalarOrVec{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, + ::AbstractScalarOrVec{<:AbstractFloat}, is_update::Bool=true, ) - resize_adaptor!(adaptor, size(z.θ)) - push!(adaptor, z) + resize_adaptor!(adaptor, size(get_position(z_or_theta))) + push!(adaptor, z_or_theta) is_update && update!(adaptor) return nothing end -Base.push!(a::MassMatrixAdaptor, z::PhasePoint) = push!(a, z.θ) +Base.push!(a::MassMatrixAdaptor, z_or_theta::PositionOrPhasePoint) = push!(a, get_position(z_or_theta)) ## Unit mass matrix adaptor @@ -53,16 +41,7 @@ getM⁻¹(::UnitMassMatrix{T}) where {T} = LinearAlgebra.UniformScaling{T}(one(T function adapt!( ::UnitMassMatrix, - ::AbstractVecOrMat{<:AbstractFloat}, - ::AbstractScalarOrVec{<:AbstractFloat}, - is_update::Bool=true, -) - return nothing -end - -function adapt!( - ::UnitMassMatrix, - ::PhasePoint, + ::PositionOrPhasePoint, ::AbstractScalarOrVec{<:AbstractFloat}, is_update::Bool=true, ) @@ -70,7 +49,6 @@ function adapt!( end ## Diagonal mass matrix adaptor - abstract type DiagMatrixEstimator{T} <: MassMatrixAdaptor end getM⁻¹(ve::DiagMatrixEstimator) = ve.var @@ -93,7 +71,7 @@ NaiveVar{T}(sz::Tuple{Int,Int}) where {T<:AbstractFloat} = NaiveVar(Vector{Matri NaiveVar(sz::Union{Tuple{Int},Tuple{Int,Int}}) = NaiveVar{Float64}(sz) -Base.push!(nv::NaiveVar, s::AbstractVecOrMat) = push!(nv.S, s) +Base.push!(nv::NaiveVar, s::AbstractVecOrMat{<:AbstractFloat}) = push!(nv.S, s) reset!(nv::NaiveVar) = resize!(nv.S, 0) @@ -158,7 +136,7 @@ function reset!(wv::WelfordVar{T}) where {T<:AbstractFloat} return nothing end -function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T} +function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T<:AbstractFloat} wv.n += 1 (; δ, μ, M, n) = wv n = T(n) @@ -176,8 +154,13 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5)) end -## Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475 +""" +Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475 +Expected to converge faster and to a better mass matrix than WelfordVar. + +Can be initialized via NutpieVar(sz) where sz is either a `Tuple{Int}` or a `Tuple{Int,Int}`. +""" mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} position_estimator::WelfordVar{T,E,V} gradient_estimator::WelfordVar{T,E,V} @@ -232,6 +215,8 @@ function reset!(nv::NutpieVar) reset!(nv.gradient_estimator) end +Base.push!(::NutpieVar, x::AbstractVecOrMat{<:AbstractFloat}) = error("`NutpieVar` adaptation requires position and gradient information!") + function Base.push!(nv::NutpieVar, z::PhasePoint) nv.n += 1 push!(nv.position_estimator, z.θ) @@ -266,7 +251,7 @@ end NaiveCov{T}(sz::Tuple{Int}) where {T<:AbstractFloat} = NaiveCov(Vector{Vector{T}}()) -Base.push!(nc::NaiveCov, s::AbstractVector) = push!(nc.S, s) +Base.push!(nc::NaiveCov, s::AbstractVector{<:AbstractFloat}) = push!(nc.S, s) reset!(nc::NaiveCov{T}) where {T} = resize!(nc.S, 0) @@ -316,7 +301,7 @@ function reset!(wc::WelfordCov{T}) where {T<:AbstractFloat} return nothing end -function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T} +function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T<:AbstractFloat} wc.n += 1 (; δ, μ, n, M) = wc n = T(n) diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl index ca53421c..931e741a 100644 --- a/src/adaptation/stan_adaptor.jl +++ b/src/adaptation/stan_adaptor.jl @@ -136,45 +136,20 @@ is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits function adapt!( tp::StanHMCAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) tp.state.i += 1 - adapt!(tp.ssa, θ, α) + adapt!(tp.ssa, z_or_theta, α) - resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary. + resize_adaptor!(tp.pc, size(get_position(z_or_theta))) # Resize pre-conditioner if necessary. # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp if is_in_window(tp) # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window. is_update_M⁻¹ = is_window_end(tp) - adapt!(tp.pc, θ, α, is_update_M⁻¹) - end - - if is_window_end(tp) - reset!(tp.ssa) - reset!(tp.pc) - end -end - - -function adapt!( - tp::StanHMCAdaptor, - z::PhasePoint, - α::AbstractScalarOrVec{<:AbstractFloat}, -) - tp.state.i += 1 - - adapt!(tp.ssa, z.θ, α) - - resize_adaptor!(tp.pc, size(z.θ)) # Resize pre-conditioner if necessary. - - # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp - if is_in_window(tp) - # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window. - is_update_M⁻¹ = is_window_end(tp) - adapt!(tp.pc, z, α, is_update_M⁻¹) + adapt!(tp.pc, z_or_theta, α, is_update_M⁻¹) end if is_window_end(tp) diff --git a/src/adaptation/stepsize.jl b/src/adaptation/stepsize.jl index 2afbb651..cacb463d 100644 --- a/src/adaptation/stepsize.jl +++ b/src/adaptation/stepsize.jl @@ -174,7 +174,7 @@ end # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/stepsize_adaptation.hpp # Note: This function is not merged with `adapt!` to empahsize the fact that # step size adaptation is not dependent on `θ`. -# Note 2: `da.state` and `α` support vectorised HMC but should do so together. +# Note 2: `da.state` and `α` support vectorised HMC but should do so together. function adapt_stepsize!( da::NesterovDualAveraging{T}, α::AbstractScalarOrVec{T} ) where {T<:AbstractFloat} @@ -211,7 +211,7 @@ end function adapt!( da::NesterovDualAveraging, - θ::AbstractVecOrMat{<:AbstractFloat}, + ::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) adapt_stepsize!(da, α) diff --git a/src/sampler.jl b/src/sampler.jl index 15d96b4e..1b282383 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -60,11 +60,11 @@ end function Adaptation.adapt!( h::Hamiltonian, κ::AbstractMCMCKernel, - adaptor::Adaptation.NoAdaptation, - i::Int, - n_adapts::Int, - θ::AbstractVecOrMat{<:AbstractFloat}, - α::AbstractScalarOrVec{<:AbstractFloat}, + ::Adaptation.NoAdaptation, + ::Int, + ::Int, + ::PositionOrPhasePoint, + ::AbstractScalarOrVec{<:AbstractFloat}, ) return h, κ, false end @@ -75,40 +75,18 @@ function Adaptation.adapt!( adaptor::AbstractAdaptor, i::Int, n_adapts::Int, - θ::AbstractVecOrMat{<:AbstractFloat}, - α::AbstractScalarOrVec{<:AbstractFloat}, -) - isadapted = false - if i <= n_adapts - i == 1 && Adaptation.initialize!(adaptor, n_adapts) - adapt!(adaptor, θ, α) - i == n_adapts && finalize!(adaptor) - h = update(h, adaptor) - κ = update(κ, adaptor) - isadapted = true - end - return h, κ, isadapted -end - -function Adaptation.adapt!( - h::Hamiltonian, - κ::AbstractMCMCKernel, - adaptor::AbstractAdaptor, - i::Int, - n_adapts::Int, - z::PhasePoint, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) - isadapted = false - if i <= n_adapts + adapt = i <= n_adapts + if adapt i == 1 && Adaptation.initialize!(adaptor, n_adapts) - adapt!(adaptor, z, α) + adapt!(adaptor, z_or_theta, α) i == n_adapts && finalize!(adaptor) h = update(h, adaptor) κ = update(κ, adaptor) - isadapted = true end - return h, κ, isadapted + return h, κ, adapt end """ @@ -169,7 +147,7 @@ end progress::Bool=false ) Sample `n_samples` samples using the proposal `κ` under Hamiltonian `h`. -- The randomness is controlled by `rng`. +- The randomness is controlled by `rng`. - If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used. - The initial point is given by `θ`. - The adaptor is set by `adaptor`, for which the default is no adaptation. diff --git a/test/adaptation.jl b/test/adaptation.jl index 553ba32e..518b4fd6 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -34,15 +34,23 @@ function runnuts_nutpie(ℓπ, metric::DiagEuclideanMetric; n_samples=10_000) κ = AdvancedHMC.make_kernel(nuts, integrator) # Constructing like this until we've settled on a different interface adaptor = AdvancedHMC.StanHMCAdaptor( - AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), + AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), AdvancedHMC.StepSizeAdaptor(nuts.δ, integrator) ) samples, stats = sample(h, κ, θ_init, n_samples, adaptor, n_adapts; verbose=false) return (samples=samples, stats=stats, adaptor=adaptor) end +""" +Computes the condition number of a covariance matrix `cov::AbstractMatrix` after preconditioning with the (diagonal) mass matrix estimated in `a::DiagMatrixEstimator`. + +This is a simple but serviceable proxy for eventual sampling efficiency, but see also https://arxiv.org/abs/1905.09813 for a more involved estimate. + +(A lower number generally means that the estimated mass matrix is better). +""" preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Diagonal(a.var)) \ cov / sqrt(Diagonal(a.var))) @testset "Adaptation" begin + Random.seed!(1) # Check that the estimated variance is approximately correct. @testset "Online v.s. naive v.s. true var/cov estimation" begin D = 10 @@ -159,9 +167,8 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia @testset "Adapted mass v.s. true variance" begin D = 10 n_tests = 5 - @testset "DiagEuclideanMetric" begin + @testset "'Diagonal' MvNormal target" begin for _ in 1:n_tests - Random.seed!(1) # Random variance σ² = 1 .+ abs.(randn(D)) @@ -183,7 +190,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia end end - @testset "DenseEuclideanMetric" begin + @testset "'Dense' MvNormal target" begin n_nutpie_superior = 0 for _ in 1:n_tests # Random covariance @@ -197,16 +204,16 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2 # For this target, Nutpie will NOT converge towards the true variances, even after infinite draws. - # HOWEVER, it will asymptotically (but also generally more quickly than Stan) + # HOWEVER, it will asymptotically (but also generally more quickly than Stan) # find the best preconditioner for the target. - # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing. + # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing. res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) res = runnuts(ℓπ, DenseEuclideanMetric(D)) @test res.adaptor.pc.cov ≈ Σ rtol = 0.25 end - @test n_nutpie_superior > n_tests / 2 + @test n_nutpie_superior > 1 + n_tests / 2 end end From 76a13737e422686ef75224c5a1e44ad0e0e58c84 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 26 Nov 2025 10:30:32 +0100 Subject: [PATCH 09/14] improve docstring for NutpieVar --- src/adaptation/massmatrix.jl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 74269647..f68997d4 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -155,17 +155,28 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} end """ + NutpieVar + Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475 -Expected to converge faster and to a better mass matrix than WelfordVar. +Expected to converge faster and to a better mass matrix than [`WelfordVar`](@ref), for which it is a drop-in replacement. + +Can be initialized via `NutpieVar(sz)` where `sz` is either a `Tuple{Int}` or a `Tuple{Int,Int}`. -Can be initialized via NutpieVar(sz) where sz is either a `Tuple{Int}` or a `Tuple{Int,Int}`. +# Fields + +$(FIELDS) """ mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} + "Online variance estimator of the posterior positions." position_estimator::WelfordVar{T,E,V} + "Online variance estimator of the posterior gradients." gradient_estimator::WelfordVar{T,E,V} + "The number of observations collected so far." n::Int + "The minimal number of observations after which the estimate of the variances can be updated." n_min::Int + "The estimated variances - initialized to ones, updated after calling [`update!`](@ref) if `n > n_min`." var::V function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V} return new{eltype(E),E,V}( @@ -225,9 +236,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint) end # Ref: https://github.com/pymc-devs/nutpie -function get_estimation(nv::NutpieVar) - return sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) -end +get_estimation(nv::NutpieVar) = sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) ## Dense mass matrix adaptor From 45a191522e68871fa987832d184997e744e5502a Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 26 Nov 2025 10:32:11 +0100 Subject: [PATCH 10/14] remove superfluous white space --- src/adaptation/massmatrix.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index f68997d4..bfcef144 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -236,7 +236,7 @@ function Base.push!(nv::NutpieVar, z::PhasePoint) end # Ref: https://github.com/pymc-devs/nutpie -get_estimation(nv::NutpieVar) = sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) +get_estimation(nv::NutpieVar) = sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) ## Dense mass matrix adaptor From 3d47e8a26037f8b99fdf55a2600ca13757ac6ae4 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 26 Nov 2025 11:04:26 +0100 Subject: [PATCH 11/14] fix JET tests --- src/integrator.jl | 20 +++++++++----------- src/riemannian/integrator.jl | 6 +++--- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/integrator.jl b/src/integrator.jl index 004028e1..449154c5 100644 --- a/src/integrator.jl +++ b/src/integrator.jl @@ -89,14 +89,14 @@ Leapfrog integrator with randomly "jittered" step size `ϵ` for every trajectory $(TYPEDFIELDS) # Description -This is the same as `LeapFrog`(@ref) but with a "jittered" step size. This means -that at the beginning of each trajectory we sample a step size `ϵ` by adding or -subtracting from the nominal/base step size `ϵ0` some random proportion of `ϵ0`, +This is the same as `LeapFrog`(@ref) but with a "jittered" step size. This means +that at the beginning of each trajectory we sample a step size `ϵ` by adding or +subtracting from the nominal/base step size `ϵ0` some random proportion of `ϵ0`, with the proportion specified by `jitter`, i.e. `ϵ = ϵ0 - jitter * ϵ0 * rand()`. p Jittering might help alleviate issues related to poor interactions with a fixed step size: -- In regions with high "curvature" the current choice of step size might mean over-shoot - leading to almost all steps being rejected. Randomly sampling the step size at the +- In regions with high "curvature" the current choice of step size might mean over-shoot + leading to almost all steps being rejected. Randomly sampling the step size at the beginning of the trajectories can therefore increase the probability of escaping such high-curvature regions. - Exact periodicity of the simulated trajectories might occur, i.e. you might be so @@ -168,7 +168,7 @@ $(TYPEDFIELDS) # Description -Tempering can potentially allow greater exploration of the posterior, e.g. +Tempering can potentially allow greater exploration of the posterior, e.g. in a multi-modal posterior jumps between the modes can be more likely to occur. """ struct TemperedLeapfrog{FT<:AbstractFloat,T<:AbstractScalarOrVec{FT}} <: AbstractLeapfrog{T} @@ -226,9 +226,7 @@ function step( ϵ = fwd ? step_size(lf) : -step_size(lf) ϵ = ϵ' - if FullTraj - res = Vector{P}(undef, n_steps) - end + res = FullTraj ? Vector{P}(undef, n_steps) : nothing (; θ, r) = z (; value, gradient) = z.ℓπ @@ -248,12 +246,12 @@ function step( # Create a new phase point by caching the logdensity and gradient z = phasepoint(h, θ, r; ℓπ=DualValue(value, gradient)) # Update result - if FullTraj + if !isnothing(res) res[i] = z end if !isfinite(z) # Remove undef - if FullTraj + if !isnothing(res) resize!(res, i) end break diff --git a/src/riemannian/integrator.jl b/src/riemannian/integrator.jl index 6ce59476..a6d2de5f 100644 --- a/src/riemannian/integrator.jl +++ b/src/riemannian/integrator.jl @@ -8,7 +8,7 @@ Generalized leapfrog integrator with fixed step size `ϵ`. $(TYPEDFIELDS) -## References +## References 1. Girolami, Mark, and Ben Calderhead. "Riemann manifold Langevin and Hamiltonian Monte Carlo methods." Journal of the Royal Statistical Society Series B: Statistical Methodology 73, no. 2 (2011): 123-214. """ @@ -29,7 +29,7 @@ end # TODO(Kai) make sure vectorization works # TODO(Kai) check if tempering is valid -# TODO(Kai) abstract out the 3 main steps and merge with `step` in `integrator.jl` +# TODO(Kai) abstract out the 3 main steps and merge with `step` in `integrator.jl` function step( lf::GeneralizedLeapfrog{T}, h::Hamiltonian, @@ -59,7 +59,7 @@ function step( #r = temper(lf, r, (i=i, is_half=true), n_steps) # eq (16) of Girolami & Calderhead (2011) r_half = r_init - local cache + local cache = nothing for j in 1:(lf.n) # Reuse cache for the first iteration if j == 1 From 5888c0f4d01dbf43f9afb44d4cc8eb54e8d77dbb Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Thu, 27 Nov 2025 10:38:41 +0100 Subject: [PATCH 12/14] add entry to history, bump version --- HISTORY.md | 7 +++++++ Project.toml | 2 +- docs/src/api.md | 14 +++++++------- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 038968ef..a9daf473 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,12 @@ # AdvancedHMC Changelog +## 0.8.4 + + - Introduces an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), + currently to be initialized for a `metric` of type `DiagEuclideanMetric` + via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))` + until a new interface is introduced in an upcoming breaking release to specify the method of adaptation. + ## 0.8.0 - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`). diff --git a/Project.toml b/Project.toml index 8c32e813..f38dda02 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedHMC" uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" -version = "0.8.3" +version = "0.8.4" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" diff --git a/docs/src/api.md b/docs/src/api.md index c09026ec..a588929a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -31,15 +31,15 @@ where `ϵ` is the step size of leapfrog integration. ### Adaptor (`adaptor`) - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)` - + + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric` - + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), + + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), currently to be initialized for a `metric` of type `DiagEuclideanMetric` - via `mma = AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹))` - until a new interface is introduced to specify the method of adaptation. + via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))` + until a new interface is introduced in an upcoming breaking release to specify the method of adaptation. - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)` - + + It uses Nesterov's dual averaging with `δ` as the target acceptance rate. - Combine the two above *naively*: `NaiveHMCAdaptor(mma, ssa)` - Combine the first two using Stan's windowed adaptation: `StanHMCAdaptor(mma, ssa)` @@ -64,12 +64,12 @@ sample( Draw `n_samples` samples using the kernel `κ` under the Hamiltonian system `h` - The randomness is controlled by `rng`. - + + If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used. - The initial point is given by `θ`. - The adaptor is set by `adaptor`, for which the default is no adaptation. - + + It will perform `n_adapts` steps of adaptation, for which the default is `1_000` or 10% of `n_samples`, whichever is lower. - `drop_warmup` specifies whether to drop samples. - `verbose` controls the verbosity. From ca680bbb491ffae38ce897f172be8eaaa7ef1eae Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Thu, 27 Nov 2025 11:38:33 +0100 Subject: [PATCH 13/14] fix NutpieVar docstring --- src/adaptation/massmatrix.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index bfcef144..13f360e3 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -157,7 +157,7 @@ end """ NutpieVar -Nutpie-style diagonal mass matrix estimator (using positions and gradients) - not exported yet due to https://github.com/TuringLang/AdvancedHMC.jl/issues/475 +Nutpie-style diagonal mass matrix estimator (using positions and gradients). Expected to converge faster and to a better mass matrix than [`WelfordVar`](@ref), for which it is a drop-in replacement. From e82cc19bade5d4d561811a426d465ba63fa5a2bc Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Thu, 27 Nov 2025 12:03:49 +0100 Subject: [PATCH 14/14] increase number of tests for mass matrix adaptation --- test/adaptation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/adaptation.jl b/test/adaptation.jl index 518b4fd6..df72c159 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -166,7 +166,7 @@ preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Dia @testset "Adapted mass v.s. true variance" begin D = 10 - n_tests = 5 + n_tests = 10 @testset "'Diagonal' MvNormal target" begin for _ in 1:n_tests