diff --git a/HISTORY.md b/HISTORY.md index 038968ef..a9daf473 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,12 @@ # AdvancedHMC Changelog +## 0.8.4 + + - Introduces an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), + currently to be initialized for a `metric` of type `DiagEuclideanMetric` + via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))` + until a new interface is introduced in an upcoming breaking release to specify the method of adaptation. + ## 0.8.0 - To make an MCMC transtion from phasepoint `z` using trajectory `τ`(or HMCKernel `κ`) under Hamiltonian `h`, use `transition(h, τ, z)` or `transition(rng, h, τ, z)`(if using HMCKernel, use `transition(h, κ, z)` or `transition(rng, h, κ, z)`). diff --git a/Project.toml b/Project.toml index 8c32e813..f38dda02 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedHMC" uuid = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" -version = "0.8.3" +version = "0.8.4" [deps] AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001" diff --git a/docs/src/api.md b/docs/src/api.md index 54b5939d..a588929a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -31,11 +31,15 @@ where `ϵ` is the step size of leapfrog integration. ### Adaptor (`adaptor`) - Adapt the mass matrix `metric` of the Hamiltonian dynamics: `mma = MassMatrixAdaptor(metric)` - + + This is lowered to `UnitMassMatrix`, `WelfordVar` or `WelfordCov` based on the type of the mass matrix `metric` + + There is an experimental way to improve the *diagonal* mass matrix adaptation using gradient information (similar to [nutpie](https://github.com/pymc-devs/nutpie)), + currently to be initialized for a `metric` of type `DiagEuclideanMetric` + via `mma = AdvancedHMC.NutpieVar(size(metric); var=copy(metric.M⁻¹))` + until a new interface is introduced in an upcoming breaking release to specify the method of adaptation. - Adapt the step size of the leapfrog integrator `integrator`: `ssa = StepSizeAdaptor(δ, integrator)` - + + It uses Nesterov's dual averaging with `δ` as the target acceptance rate. - Combine the two above *naively*: `NaiveHMCAdaptor(mma, ssa)` - Combine the first two using Stan's windowed adaptation: `StanHMCAdaptor(mma, ssa)` @@ -60,12 +64,12 @@ sample( Draw `n_samples` samples using the kernel `κ` under the Hamiltonian system `h` - The randomness is controlled by `rng`. - + + If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used. - The initial point is given by `θ`. - The adaptor is set by `adaptor`, for which the default is no adaptation. - + + It will perform `n_adapts` steps of adaptation, for which the default is `1_000` or 10% of `n_samples`, whichever is lower. - `drop_warmup` specifies whether to drop samples. - `verbose` controls the verbosity. diff --git a/src/AdvancedHMC.jl b/src/AdvancedHMC.jl index b25710d5..5db7644c 100644 --- a/src/AdvancedHMC.jl +++ b/src/AdvancedHMC.jl @@ -72,7 +72,7 @@ export find_good_eps include("adaptation/Adaptation.jl") using .Adaptation import .Adaptation: - StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation + StepSizeAdaptor, MassMatrixAdaptor, StanHMCAdaptor, NesterovDualAveraging, NoAdaptation, PositionOrPhasePoint # Helpers for initializing adaptors via AHMC structs @@ -114,6 +114,7 @@ export StepSizeAdaptor, MassMatrixAdaptor, UnitMassMatrix, WelfordVar, + NutpieVar, WelfordCov, NaiveHMCAdaptor, StanHMCAdaptor, diff --git a/src/abstractmcmc.jl b/src/abstractmcmc.jl index 413e9de6..f6ce4000 100644 --- a/src/abstractmcmc.jl +++ b/src/abstractmcmc.jl @@ -196,7 +196,7 @@ function AbstractMCMC.step( # Adapt h and spl. tstat = stat(t) - h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate) + h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate) tstat = merge(tstat, (is_adapt=isadapted,)) # Compute next transition and state. diff --git a/src/adaptation/Adaptation.jl b/src/adaptation/Adaptation.jl index 4f2fde83..10a9a980 100644 --- a/src/adaptation/Adaptation.jl +++ b/src/adaptation/Adaptation.jl @@ -4,13 +4,13 @@ export Adaptation using LinearAlgebra: LinearAlgebra using Statistics: Statistics -using ..AdvancedHMC: AbstractScalarOrVec +using ..AdvancedHMC: AbstractScalarOrVec, PhasePoint using DocStringExtensions """ $(TYPEDEF) -Abstract type for HMC adaptors. +Abstract type for HMC adaptors. """ abstract type AbstractAdaptor end function getM⁻¹ end @@ -21,12 +21,17 @@ function initialize! end function finalize! end export AbstractAdaptor, adapt!, initialize!, finalize!, reset!, getϵ, getM⁻¹ +get_position(x::PhasePoint) = x.θ +get_position(x::AbstractVecOrMat{<:AbstractFloat}) = x +const PositionOrPhasePoint = Union{AbstractVecOrMat{<:AbstractFloat}, PhasePoint} + struct NoAdaptation <: AbstractAdaptor end export NoAdaptation include("stepsize.jl") export StepSizeAdaptor, NesterovDualAveraging + include("massmatrix.jl") -export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, WelfordCov +export MassMatrixAdaptor, UnitMassMatrix, WelfordVar, NutpieVar, WelfordCov ## ## Composite adaptors @@ -47,18 +52,14 @@ getϵ(ca::NaiveHMCAdaptor) = getϵ(ca.ssa) # TODO: implement consensus adaptor function adapt!( nca::NaiveHMCAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) - adapt!(nca.ssa, θ, α) - adapt!(nca.pc, θ, α) - return nothing -end -function reset!(aca::NaiveHMCAdaptor) - reset!(aca.ssa) - reset!(aca.pc) + adapt!(nca.ssa, z_or_theta, α) + adapt!(nca.pc, z_or_theta, α) return nothing end + initialize!(adaptor::NaiveHMCAdaptor, n_adapts::Int) = nothing finalize!(aca::NaiveHMCAdaptor) = finalize!(aca.ssa) diff --git a/src/adaptation/massmatrix.jl b/src/adaptation/massmatrix.jl index 105d3bae..13f360e3 100644 --- a/src/adaptation/massmatrix.jl +++ b/src/adaptation/massmatrix.jl @@ -9,16 +9,18 @@ finalize!(::MassMatrixAdaptor) = nothing function adapt!( adaptor::MassMatrixAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, - α::AbstractScalarOrVec{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, + ::AbstractScalarOrVec{<:AbstractFloat}, is_update::Bool=true, ) - resize_adaptor!(adaptor, size(θ)) - push!(adaptor, θ) + resize_adaptor!(adaptor, size(get_position(z_or_theta))) + push!(adaptor, z_or_theta) is_update && update!(adaptor) return nothing end +Base.push!(a::MassMatrixAdaptor, z_or_theta::PositionOrPhasePoint) = push!(a, get_position(z_or_theta)) + ## Unit mass matrix adaptor struct UnitMassMatrix{T<:AbstractFloat} <: MassMatrixAdaptor end @@ -39,7 +41,7 @@ getM⁻¹(::UnitMassMatrix{T}) where {T} = LinearAlgebra.UniformScaling{T}(one(T function adapt!( ::UnitMassMatrix, - ::AbstractVecOrMat{<:AbstractFloat}, + ::PositionOrPhasePoint, ::AbstractScalarOrVec{<:AbstractFloat}, is_update::Bool=true, ) @@ -47,7 +49,6 @@ function adapt!( end ## Diagonal mass matrix adaptor - abstract type DiagMatrixEstimator{T} <: MassMatrixAdaptor end getM⁻¹(ve::DiagMatrixEstimator) = ve.var @@ -70,7 +71,7 @@ NaiveVar{T}(sz::Tuple{Int,Int}) where {T<:AbstractFloat} = NaiveVar(Vector{Matri NaiveVar(sz::Union{Tuple{Int},Tuple{Int,Int}}) = NaiveVar{Float64}(sz) -Base.push!(nv::NaiveVar, s::AbstractVecOrMat) = push!(nv.S, s) +Base.push!(nv::NaiveVar, s::AbstractVecOrMat{<:AbstractFloat}) = push!(nv.S, s) reset!(nv::NaiveVar) = resize!(nv.S, 0) @@ -135,7 +136,7 @@ function reset!(wv::WelfordVar{T}) where {T<:AbstractFloat} return nothing end -function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T} +function Base.push!(wv::WelfordVar, s::AbstractVecOrMat{T}) where {T<:AbstractFloat} wv.n += 1 (; δ, μ, M, n) = wv n = T(n) @@ -153,6 +154,90 @@ function get_estimation(wv::WelfordVar{T}) where {T<:AbstractFloat} return n / ((n + 5) * (n - 1)) * M .+ ϵ * (5 / (n + 5)) end +""" + NutpieVar + +Nutpie-style diagonal mass matrix estimator (using positions and gradients). + +Expected to converge faster and to a better mass matrix than [`WelfordVar`](@ref), for which it is a drop-in replacement. + +Can be initialized via `NutpieVar(sz)` where `sz` is either a `Tuple{Int}` or a `Tuple{Int,Int}`. + +# Fields + +$(FIELDS) +""" +mutable struct NutpieVar{T<:AbstractFloat,E<:AbstractVecOrMat{T},V<:AbstractVecOrMat{T}} <: DiagMatrixEstimator{T} + "Online variance estimator of the posterior positions." + position_estimator::WelfordVar{T,E,V} + "Online variance estimator of the posterior gradients." + gradient_estimator::WelfordVar{T,E,V} + "The number of observations collected so far." + n::Int + "The minimal number of observations after which the estimate of the variances can be updated." + n_min::Int + "The estimated variances - initialized to ones, updated after calling [`update!`](@ref) if `n > n_min`." + var::V + function NutpieVar(n::Int, n_min::Int, μ::E, M::E, δ::E, var::V) where {E,V} + return new{eltype(E),E,V}( + WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), + WelfordVar(n, n_min, copy(μ), copy(M), copy(δ), copy(var)), + n, n_min, var + ) + end +end + +function Base.show(io::IO, ::NutpieVar{T}) where {T} + return print(io, "NutpieVar{", T, "} adaptor") +end + +function NutpieVar{T}( + sz::Union{Tuple{Int},Tuple{Int,Int}}=(2,); n_min::Int=10, var=ones(T, sz) +) where {T<:AbstractFloat} + return NutpieVar(0, n_min, zeros(T, sz), zeros(T, sz), zeros(T, sz), var) +end + +function NutpieVar(sz::Union{Tuple{Int},Tuple{Int,Int}}; kwargs...) + return NutpieVar{Float64}(sz; kwargs...) +end + +function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int,Int}) where {T<:AbstractFloat} + if size_θ != size(nv.var) + @assert nv.n == 0 "Cannot resize a var estimator when it contains samples." + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) + nv.var = ones(T, size_θ) + end +end + +function resize_adaptor!(nv::NutpieVar{T}, size_θ::Tuple{Int}) where {T<:AbstractFloat} + length_θ = first(size_θ) + if length_θ != size(nv.var, 1) + @assert nv.n == 0 "Cannot resize a var estimator when it contains samples." + resize_adaptor!(nv.position_estimator, size_θ) + resize_adaptor!(nv.gradient_estimator, size_θ) + fill!(resize!(nv.var, length_θ), T(1)) + end +end + +function reset!(nv::NutpieVar) + nv.n = 0 + reset!(nv.position_estimator) + reset!(nv.gradient_estimator) +end + +Base.push!(::NutpieVar, x::AbstractVecOrMat{<:AbstractFloat}) = error("`NutpieVar` adaptation requires position and gradient information!") + +function Base.push!(nv::NutpieVar, z::PhasePoint) + nv.n += 1 + push!(nv.position_estimator, z.θ) + push!(nv.gradient_estimator, z.ℓπ.gradient) + return nothing +end + +# Ref: https://github.com/pymc-devs/nutpie +get_estimation(nv::NutpieVar) = sqrt.(get_estimation(nv.position_estimator) ./ get_estimation(nv.gradient_estimator)) + ## Dense mass matrix adaptor abstract type DenseMatrixEstimator{T} <: MassMatrixAdaptor end @@ -175,7 +260,7 @@ end NaiveCov{T}(sz::Tuple{Int}) where {T<:AbstractFloat} = NaiveCov(Vector{Vector{T}}()) -Base.push!(nc::NaiveCov, s::AbstractVector) = push!(nc.S, s) +Base.push!(nc::NaiveCov, s::AbstractVector{<:AbstractFloat}) = push!(nc.S, s) reset!(nc::NaiveCov{T}) where {T} = resize!(nc.S, 0) @@ -225,7 +310,7 @@ function reset!(wc::WelfordCov{T}) where {T<:AbstractFloat} return nothing end -function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T} +function Base.push!(wc::WelfordCov, s::AbstractVector{T}) where {T<:AbstractFloat} wc.n += 1 (; δ, μ, n, M) = wc n = T(n) diff --git a/src/adaptation/stan_adaptor.jl b/src/adaptation/stan_adaptor.jl index b36a2259..931e741a 100644 --- a/src/adaptation/stan_adaptor.jl +++ b/src/adaptation/stan_adaptor.jl @@ -136,20 +136,20 @@ is_window_end(a::StanHMCAdaptor) = a.state.i in a.state.window_splits function adapt!( tp::StanHMCAdaptor, - θ::AbstractVecOrMat{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) tp.state.i += 1 - adapt!(tp.ssa, θ, α) + adapt!(tp.ssa, z_or_theta, α) - resize_adaptor!(tp.pc, size(θ)) # Resize pre-conditioner if necessary. + resize_adaptor!(tp.pc, size(get_position(z_or_theta))) # Resize pre-conditioner if necessary. # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp if is_in_window(tp) # We accumlate stats from θ online and only trigger the update of M⁻¹ in the end of window. is_update_M⁻¹ = is_window_end(tp) - adapt!(tp.pc, θ, α, is_update_M⁻¹) + adapt!(tp.pc, z_or_theta, α, is_update_M⁻¹) end if is_window_end(tp) diff --git a/src/adaptation/stepsize.jl b/src/adaptation/stepsize.jl index 2afbb651..cacb463d 100644 --- a/src/adaptation/stepsize.jl +++ b/src/adaptation/stepsize.jl @@ -174,7 +174,7 @@ end # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/stepsize_adaptation.hpp # Note: This function is not merged with `adapt!` to empahsize the fact that # step size adaptation is not dependent on `θ`. -# Note 2: `da.state` and `α` support vectorised HMC but should do so together. +# Note 2: `da.state` and `α` support vectorised HMC but should do so together. function adapt_stepsize!( da::NesterovDualAveraging{T}, α::AbstractScalarOrVec{T} ) where {T<:AbstractFloat} @@ -211,7 +211,7 @@ end function adapt!( da::NesterovDualAveraging, - θ::AbstractVecOrMat{<:AbstractFloat}, + ::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) adapt_stepsize!(da, α) diff --git a/src/sampler.jl b/src/sampler.jl index 3e477ba3..1b282383 100644 --- a/src/sampler.jl +++ b/src/sampler.jl @@ -60,11 +60,11 @@ end function Adaptation.adapt!( h::Hamiltonian, κ::AbstractMCMCKernel, - adaptor::Adaptation.NoAdaptation, - i::Int, - n_adapts::Int, - θ::AbstractVecOrMat{<:AbstractFloat}, - α::AbstractScalarOrVec{<:AbstractFloat}, + ::Adaptation.NoAdaptation, + ::Int, + ::Int, + ::PositionOrPhasePoint, + ::AbstractScalarOrVec{<:AbstractFloat}, ) return h, κ, false end @@ -75,19 +75,18 @@ function Adaptation.adapt!( adaptor::AbstractAdaptor, i::Int, n_adapts::Int, - θ::AbstractVecOrMat{<:AbstractFloat}, + z_or_theta::PositionOrPhasePoint, α::AbstractScalarOrVec{<:AbstractFloat}, ) - isadapted = false - if i <= n_adapts + adapt = i <= n_adapts + if adapt i == 1 && Adaptation.initialize!(adaptor, n_adapts) - adapt!(adaptor, θ, α) + adapt!(adaptor, z_or_theta, α) i == n_adapts && finalize!(adaptor) h = update(h, adaptor) κ = update(κ, adaptor) - isadapted = true end - return h, κ, isadapted + return h, κ, adapt end """ @@ -148,7 +147,7 @@ end progress::Bool=false ) Sample `n_samples` samples using the proposal `κ` under Hamiltonian `h`. -- The randomness is controlled by `rng`. +- The randomness is controlled by `rng`. - If `rng` is not provided, the default random number generator (`Random.default_rng()`) will be used. - The initial point is given by `θ`. - The adaptor is set by `adaptor`, for which the default is no adaptation. @@ -185,7 +184,7 @@ function sample( t = transition(rng, h, κ, t.z) # Adapt h and κ; what mutable is the adaptor tstat = stat(t) - h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z.θ, tstat.acceptance_rate) + h, κ, isadapted = adapt!(h, κ, adaptor, i, n_adapts, t.z, tstat.acceptance_rate) if isadapted num_divergent_transitions_during_adaption += tstat.numerical_error else diff --git a/test/adaptation.jl b/test/adaptation.jl index 346423ea..df72c159 100644 --- a/test/adaptation.jl +++ b/test/adaptation.jl @@ -1,6 +1,8 @@ using ReTest, LinearAlgebra, Distributions, AdvancedHMC, Random, ForwardDiff +using AdvancedHMC: + PhasePoint, DualValue using AdvancedHMC.Adaptation: - WelfordVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset! + DiagMatrixEstimator, WelfordVar, NutpieVar, NaiveVar, WelfordCov, NaiveCov, get_estimation, get_estimation, reset! function runnuts(ℓπ, metric; n_samples=10_000) D = size(metric, 1) @@ -18,7 +20,37 @@ function runnuts(ℓπ, metric; n_samples=10_000) return (samples=samples, stats=stats, adaptor=adaptor) end +# Temporary function until we've settled on a different interface +function runnuts_nutpie(ℓπ, metric::DiagEuclideanMetric; n_samples=10_000) + D = size(metric, 1) + n_adapts = 5_000 + θ_init = rand(D) + rng = MersenneTwister(0) + + nuts = NUTS(0.8) + h = Hamiltonian(metric, ℓπ, ForwardDiff) + step_size = AdvancedHMC.make_step_size(rng, nuts, h, θ_init) + integrator = AdvancedHMC.make_integrator(nuts, step_size) + κ = AdvancedHMC.make_kernel(nuts, integrator) + # Constructing like this until we've settled on a different interface + adaptor = AdvancedHMC.StanHMCAdaptor( + AdvancedHMC.Adaptation.NutpieVar(size(metric); var=copy(metric.M⁻¹)), + AdvancedHMC.StepSizeAdaptor(nuts.δ, integrator) + ) + samples, stats = sample(h, κ, θ_init, n_samples, adaptor, n_adapts; verbose=false) + return (samples=samples, stats=stats, adaptor=adaptor) +end +""" +Computes the condition number of a covariance matrix `cov::AbstractMatrix` after preconditioning with the (diagonal) mass matrix estimated in `a::DiagMatrixEstimator`. + +This is a simple but serviceable proxy for eventual sampling efficiency, but see also https://arxiv.org/abs/1905.09813 for a more involved estimate. + +(A lower number generally means that the estimated mass matrix is better). +""" +preconditioned_cond(a::DiagMatrixEstimator, cov::AbstractMatrix) = cond(sqrt(Diagonal(a.var)) \ cov / sqrt(Diagonal(a.var))) + @testset "Adaptation" begin + Random.seed!(1) # Check that the estimated variance is approximately correct. @testset "Online v.s. naive v.s. true var/cov estimation" begin D = 10 @@ -60,15 +92,32 @@ end @testset "MassMatrixAdaptor constructors" begin θ = [0.0, 0.0, 0.0, 0.0] + z = PhasePoint( + θ, θ, DualValue(0., θ), DualValue(0., θ) + ) pc1 = MassMatrixAdaptor(UnitEuclideanMetric) # default dim = 2 pc2 = MassMatrixAdaptor(DiagEuclideanMetric) + # Constructing like this until we've settled on a different interface + pc2_nutpie = NutpieVar{Float64}((2, )) pc3 = MassMatrixAdaptor(DenseEuclideanMetric) - # Var adaptor dimention should be increased to length(θ) from 2 + # Var adaptor dimension should be increased to length(θ) from 2 AdvancedHMC.adapt!(pc1, θ, 1.0) AdvancedHMC.adapt!(pc2, θ, 1.0) + AdvancedHMC.adapt!(pc2_nutpie, z, 1.0) AdvancedHMC.adapt!(pc3, θ, 1.0) @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc3) == + LinearAlgebra.diagm(0 => ones(length(θ))) + + # Making sure "all" MassMatrixAdaptors support getting a PhasePoint instead of a Vector + AdvancedHMC.adapt!(pc1, z, 1.0) + AdvancedHMC.adapt!(pc2, z, 1.0) + AdvancedHMC.adapt!(pc2_nutpie, z, 1.0) + AdvancedHMC.adapt!(pc3, z, 1.0) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(pc2_nutpie) == ones(length(θ)) @test AdvancedHMC.Adaptation.getM⁻¹(pc3) == LinearAlgebra.diagm(0 => ones(length(θ))) end @@ -82,10 +131,14 @@ end adaptor2 = StanHMCAdaptor( MassMatrixAdaptor(DiagEuclideanMetric), NesterovDualAveraging(0.8, 0.5) ) + # Constructing like this until we've settled on a different interface + adaptor2_nutpie = StanHMCAdaptor( + NutpieVar{Float64}((2, )), NesterovDualAveraging(0.8, 0.5) + ) adaptor3 = StanHMCAdaptor( MassMatrixAdaptor(DenseEuclideanMetric), NesterovDualAveraging(0.8, 0.5) ) - for a in [adaptor1, adaptor2, adaptor3] + for a in [adaptor1, adaptor2, adaptor2_nutpie, adaptor3] AdvancedHMC.initialize!(a, 1_000) @test a.state.window_start == 76 @test a.state.window_end == 950 @@ -93,6 +146,7 @@ end AdvancedHMC.adapt!(a, θ, 1.0) end @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2) == ones(length(θ)) + @test AdvancedHMC.Adaptation.getM⁻¹(adaptor2_nutpie) == ones(length(θ)) @test AdvancedHMC.Adaptation.getM⁻¹(adaptor3) == LinearAlgebra.diagm(0 => ones(length(θ))) @@ -112,26 +166,32 @@ end @testset "Adapted mass v.s. true variance" begin D = 10 - n_tests = 5 - @testset "DiagEuclideanMetric" begin + n_tests = 10 + @testset "'Diagonal' MvNormal target" begin for _ in 1:n_tests - Random.seed!(1) # Random variance σ² = 1 .+ abs.(randn(D)) + Σ = Diagonal(σ²) # Diagonal Gaussian - ℓπ = LogDensityDistribution(MvNormal(Diagonal(σ²))) + ℓπ = LogDensityDistribution(MvNormal(Σ)) res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ σ² rtol = 0.2 + # For this target, Nutpie (without regularization) will arrive at the true variances after two draws. + res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) + @test res.adaptor.pc.var ≈ σ² rtol = 0.2 + @test preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) + res = runnuts(ℓπ, DenseEuclideanMetric(D)) @test res.adaptor.pc.cov ≈ Diagonal(σ²) rtol = 0.25 end end - @testset "DenseEuclideanMetric" begin + @testset "'Dense' MvNormal target" begin + n_nutpie_superior = 0 for _ in 1:n_tests # Random covariance m = randn(D, D) @@ -143,9 +203,17 @@ end res = runnuts(ℓπ, DiagEuclideanMetric(D)) @test res.adaptor.pc.var ≈ diag(Σ) rtol = 0.2 + # For this target, Nutpie will NOT converge towards the true variances, even after infinite draws. + # HOWEVER, it will asymptotically (but also generally more quickly than Stan) + # find the best preconditioner for the target. + # As these are statistical algorithms, superiority is not always guaranteed, hence this way of testing. + res_nutpie = runnuts_nutpie(ℓπ, DiagEuclideanMetric(D)) + n_nutpie_superior += preconditioned_cond(res_nutpie.adaptor.pc, Σ) < preconditioned_cond(res.adaptor.pc, Σ) + res = runnuts(ℓπ, DenseEuclideanMetric(D)) @test res.adaptor.pc.cov ≈ Σ rtol = 0.25 end + @test n_nutpie_superior > 1 + n_tests / 2 end end @@ -156,6 +224,10 @@ end res = runnuts(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1) @test res.adaptor.pc.var == mass_init + mass_init = fill(0.5, D) + res = runnuts_nutpie(ℓπ, DiagEuclideanMetric(mass_init); n_samples=1) + @test res.adaptor.pc.var == mass_init + mass_init = diagm(0 => fill(0.5, D)) res = runnuts(ℓπ, DenseEuclideanMetric(mass_init); n_samples=1) @test res.adaptor.pc.cov == mass_init