From 3c066957b6a90122a0005f0292ddfd44da73229c Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 15:44:35 -0500 Subject: [PATCH 1/8] add history entry, increment version --- HISTORY.md | 8 +++++++- Project.toml | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 13f2995e..81d582ba 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,6 @@ -# Release 0.5.1 +# Release 0.6 +## New Algorithms This update adds new variational inference algorithms in light of the flexibility added in the v0.5 update. Specifically, the following measure-space optimization algorithms have been added: @@ -7,6 +8,11 @@ Specifically, the following measure-space optimization algorithms have been adde - `KLMinNaturalGradDescent` - `KLMinSqrtNaturalGradDescent` +## Interface Change +The objective value returned by `estimate_objective` is now the value to be *minimized* by the algorithm. +For instance, for ELBO maximization algorithms, `estimate_objective` will return the negative ELBO. + +## Behavior Change In addition, `KLMinRepGradDescent`, `KLMinRepGradProxDescent`, `KLMinScoreGradDescent` will now throw a `RuntimException` if the objective value estimated at each step turns out to be degenerate (`Inf` or `NaN`). Previously, the algorithms ran until `max_iter` even if the optimization run has failed. # Release 0.5 diff --git a/Project.toml b/Project.toml index a7ff8a27..437f5216 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedVI" uuid = "b5ca4192-6429-45e5-a2d9-87aec30a685c" -version = "0.5.1" +version = "0.6" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" From 4a3f8c7ef8263dba1f8adaa98173aba14dc6f4a8 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 15:55:43 -0500 Subject: [PATCH 2/8] fix estimate_objective to return the objective to be minimized --- src/AdvancedVI.jl | 2 +- src/algorithms/abstractobjective.jl | 9 ++------- src/algorithms/common.jl | 2 +- src/algorithms/klminnaturalgraddescent.jl | 2 +- src/algorithms/klminsqrtnaturalgraddescent.jl | 2 +- src/algorithms/klminwassfwdbwd.jl | 2 +- src/algorithms/repgradelbo.jl | 2 +- src/algorithms/scoregradelbo.jl | 2 +- 8 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl index 7d57e32a..ff765005 100644 --- a/src/AdvancedVI.jl +++ b/src/AdvancedVI.jl @@ -258,7 +258,7 @@ output(::AbstractVariationalAlgorithm, ::Any) = nothing """ estimate_objective([rng,] alg, q, prob; kwargs...) -Estimate the variational objective associated with the algorithm `alg` targeting `prob` with respect to the variational approximation `q`. +Estimate the variational objective subject to be minimized by the algorithm `alg` for approximating the target `prob` with the variational approximation `q`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/abstractobjective.jl b/src/algorithms/abstractobjective.jl index d125c3be..65316c7e 100644 --- a/src/algorithms/abstractobjective.jl +++ b/src/algorithms/abstractobjective.jl @@ -3,12 +3,7 @@ """ AbstractVariationalObjective -Abstract type for the VI algorithms supported by `AdvancedVI`. - -# Implementations -To be supported by `AdvancedVI`, a VI algorithm must implement `AbstractVariationalObjective` and `estimate_objective`. -Also, it should provide gradients by implementing the function `estimate_gradient`. -If the estimator is stateful, it can implement `init` to initialize the state. +Abstract type for a variational objective to be optimized by some variational algorithm. """ abstract type AbstractVariationalObjective end @@ -42,7 +37,7 @@ end """ estimate_objective([rng,] obj, q, prob; kwargs...) -Estimate the variational objective `obj` targeting `prob` with respect to the variational approximation `q`. +Estimate the minimization objective `obj` of the variational approximation `q` targeting `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/common.jl b/src/algorithms/common.jl index f86198c0..0b99ff0d 100644 --- a/src/algorithms/common.jl +++ b/src/algorithms/common.jl @@ -11,7 +11,7 @@ const ParamSpaceSGD = Union{ """ estimate_objective([rng,] alg, q, prob; n_samples, entropy) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminnaturalgraddescent.jl b/src/algorithms/klminnaturalgraddescent.jl index e47be81c..4d2d8627 100644 --- a/src/algorithms/klminnaturalgraddescent.jl +++ b/src/algorithms/klminnaturalgraddescent.jl @@ -154,7 +154,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminsqrtnaturalgraddescent.jl b/src/algorithms/klminsqrtnaturalgraddescent.jl index a8965baa..5df07112 100644 --- a/src/algorithms/klminsqrtnaturalgraddescent.jl +++ b/src/algorithms/klminsqrtnaturalgraddescent.jl @@ -129,7 +129,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminwassfwdbwd.jl b/src/algorithms/klminwassfwdbwd.jl index 570321ac..ec81a5b4 100644 --- a/src/algorithms/klminwassfwdbwd.jl +++ b/src/algorithms/klminwassfwdbwd.jl @@ -124,7 +124,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/repgradelbo.jl b/src/algorithms/repgradelbo.jl index cd0fbf1b..ee2fccb8 100644 --- a/src/algorithms/repgradelbo.jl +++ b/src/algorithms/repgradelbo.jl @@ -114,7 +114,7 @@ function estimate_objective( ) samples, entropy = reparam_with_entropy(rng, q, q, n_samples, obj.entropy) energy = estimate_energy_with_samples(prob, samples) - return energy + entropy + return -(energy + entropy) end function estimate_objective(obj::RepGradELBO, q, prob; n_samples::Int=obj.n_samples) diff --git a/src/algorithms/scoregradelbo.jl b/src/algorithms/scoregradelbo.jl index 18573d6d..920be6cd 100644 --- a/src/algorithms/scoregradelbo.jl +++ b/src/algorithms/scoregradelbo.jl @@ -61,7 +61,7 @@ function estimate_objective( samples = rand(rng, q, n_samples) ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) - return mean(ℓπ - ℓq) + return -mean(ℓπ - ℓq) end function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_samples) From f8c785c7b87e9cbf6164dc957f0d12af2a9b6147 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 15:56:33 -0500 Subject: [PATCH 3/8] update tutorials to incorporate the flipped sign of the ELBO --- docs/src/tutorials/basic.md | 2 +- docs/src/tutorials/subsampling.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/tutorials/basic.md b/docs/src/tutorials/basic.md index 139e7d63..8998b078 100644 --- a/docs/src/tutorials/basic.md +++ b/docs/src/tutorials/basic.md @@ -232,7 +232,7 @@ function callback(; iteration, averaged_params, restructure, kwargs...) # Higher fidelity estimate of the ELBO on the averaged parameters n_samples = 256 - elbo_callback = estimate_objective(alg, q_avg, model; n_samples) + elbo_callback = -estimate_objective(alg, q_avg, model; n_samples) (elbo_callback=elbo_callback, accuracy=acc) else diff --git a/docs/src/tutorials/subsampling.md b/docs/src/tutorials/subsampling.md index afa2d31d..1816efb4 100644 --- a/docs/src/tutorials/subsampling.md +++ b/docs/src/tutorials/subsampling.md @@ -213,7 +213,7 @@ function callback(; iteration, averaged_params, restructure, kwargs...) # Higher fidelity estimate of the ELBO on the averaged parameters n_samples = 256 - elbo_callback = estimate_objective(alg_full, q_avg, model; n_samples) + elbo_callback = -estimate_objective(alg_full, q_avg, model; n_samples) (elbo_callback=elbo_callback, accuracy=acc, time_elapsed=time() - time_begin) else From 9340817df09baf029d2c505435fc79c20fed8b7d Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 15:57:08 -0500 Subject: [PATCH 4/8] update documentation clarify that the objective is to be minimized --- docs/src/general.md | 2 +- docs/src/klminnaturalgraddescent.md | 2 +- docs/src/klminsqrtnaturalgraddescent.md | 2 +- docs/src/klminwassfwdbwd.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/general.md b/docs/src/general.md index acc51898..26bcdc69 100644 --- a/docs/src/general.md +++ b/docs/src/general.md @@ -17,7 +17,7 @@ Therefore, please refer to the documentation of each different algorithm for a d ## [Monitoring the Objective Value](@id estimate_objective) -Furthermore, each algorithm has an associated variational objective. +Furthermore, each algorithm has an associated variational objective subject to *minimization*. (By convention, we assume all objective are minimized but never maximized.) The progress made by each optimization algorithm can be diagnosed by monitoring the variational objective value. This can be done by calling the following method. diff --git a/docs/src/klminnaturalgraddescent.md b/docs/src/klminnaturalgraddescent.md index 011e3b80..bb49203a 100644 --- a/docs/src/klminnaturalgraddescent.md +++ b/docs/src/klminnaturalgraddescent.md @@ -12,7 +12,7 @@ Since `KLMinNaturalGradDescent` is a measure-space algorithm, its use is restric KLMinNaturalGradDescent ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective can be estimated through the following: ```@docs; canonical=false estimate_objective( diff --git a/docs/src/klminsqrtnaturalgraddescent.md b/docs/src/klminsqrtnaturalgraddescent.md index 1d5c45b7..9f3aa1a3 100644 --- a/docs/src/klminsqrtnaturalgraddescent.md +++ b/docs/src/klminsqrtnaturalgraddescent.md @@ -13,7 +13,7 @@ Since `KLMinSqrtNaturalGradDescent` is a measure-space algorithm, its use is res KLMinSqrtNaturalGradDescent ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective value can be estimated through the following: ```@docs; canonical=false estimate_objective( diff --git a/docs/src/klminwassfwdbwd.md b/docs/src/klminwassfwdbwd.md index 4c1911ff..e5e917fe 100644 --- a/docs/src/klminwassfwdbwd.md +++ b/docs/src/klminwassfwdbwd.md @@ -10,7 +10,7 @@ Since `KLMinWassFwdBwd` is a measure-space algorithm, its use is restricted to f KLMinWassFwdBwd ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective value can be estimated through the following: ```@docs; canonical=false estimate_objective( From 55015c735e718a1ac5b1fbcf3e6f74ca81b55d0a Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 16:00:35 -0500 Subject: [PATCH 5/8] bump AdvancedVI version in subprojects --- bench/Project.toml | 2 +- docs/Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/Project.toml b/bench/Project.toml index 875cd853..0ca4fc68 100644 --- a/bench/Project.toml +++ b/bench/Project.toml @@ -20,7 +20,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] ADTypes = "1" -AdvancedVI = "0.5, 0.4" +AdvancedVI = "0.6" BenchmarkTools = "1" Bijectors = "0.13, 0.14, 0.15" Distributions = "0.25.111" diff --git a/docs/Project.toml b/docs/Project.toml index 9f126806..084e8744 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -25,7 +25,7 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] ADTypes = "1" Accessors = "0.1" -AdvancedVI = "0.5, 0.4" +AdvancedVI = "0.6" Bijectors = "0.13.6, 0.14, 0.15" DataFrames = "1" DifferentiationInterface = "0.7" From 49ebbbda71c3b478f299e1594291782e84d86ca1 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 16:09:43 -0500 Subject: [PATCH 6/8] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index 81d582ba..e70364d2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,7 @@ # Release 0.6 ## New Algorithms + This update adds new variational inference algorithms in light of the flexibility added in the v0.5 update. Specifically, the following measure-space optimization algorithms have been added: From 5c33aa62ab8fbbf4e67bf27ba720ebecfaf73f29 Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 16:09:51 -0500 Subject: [PATCH 7/8] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index e70364d2..418c5e1a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ Specifically, the following measure-space optimization algorithms have been adde - `KLMinSqrtNaturalGradDescent` ## Interface Change + The objective value returned by `estimate_objective` is now the value to be *minimized* by the algorithm. For instance, for ELBO maximization algorithms, `estimate_objective` will return the negative ELBO. From b14bb9f49907b2560b9b183d7933aaf47710ecae Mon Sep 17 00:00:00 2001 From: Kyurae Kim Date: Fri, 14 Nov 2025 16:09:56 -0500 Subject: [PATCH 8/8] run formatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index 418c5e1a..d07bf48f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,7 @@ The objective value returned by `estimate_objective` is now the value to be *min For instance, for ELBO maximization algorithms, `estimate_objective` will return the negative ELBO. ## Behavior Change + In addition, `KLMinRepGradDescent`, `KLMinRepGradProxDescent`, `KLMinScoreGradDescent` will now throw a `RuntimException` if the objective value estimated at each step turns out to be degenerate (`Inf` or `NaN`). Previously, the algorithms ran until `max_iter` even if the optimization run has failed. # Release 0.5