diff --git a/HISTORY.md b/HISTORY.md index 13f2995e0..d07bf48fd 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,6 @@ -# Release 0.5.1 +# Release 0.6 + +## New Algorithms This update adds new variational inference algorithms in light of the flexibility added in the v0.5 update. Specifically, the following measure-space optimization algorithms have been added: @@ -7,6 +9,13 @@ Specifically, the following measure-space optimization algorithms have been adde - `KLMinNaturalGradDescent` - `KLMinSqrtNaturalGradDescent` +## Interface Change + +The objective value returned by `estimate_objective` is now the value to be *minimized* by the algorithm. +For instance, for ELBO maximization algorithms, `estimate_objective` will return the negative ELBO. + +## Behavior Change + In addition, `KLMinRepGradDescent`, `KLMinRepGradProxDescent`, `KLMinScoreGradDescent` will now throw a `RuntimException` if the objective value estimated at each step turns out to be degenerate (`Inf` or `NaN`). Previously, the algorithms ran until `max_iter` even if the optimization run has failed. # Release 0.5 diff --git a/Project.toml b/Project.toml index a7ff8a27a..437f5216d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "AdvancedVI" uuid = "b5ca4192-6429-45e5-a2d9-87aec30a685c" -version = "0.5.1" +version = "0.6" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" diff --git a/bench/Project.toml b/bench/Project.toml index 875cd853e..0ca4fc686 100644 --- a/bench/Project.toml +++ b/bench/Project.toml @@ -20,7 +20,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] ADTypes = "1" -AdvancedVI = "0.5, 0.4" +AdvancedVI = "0.6" BenchmarkTools = "1" Bijectors = "0.13, 0.14, 0.15" Distributions = "0.25.111" diff --git a/docs/Project.toml b/docs/Project.toml index 9f126806b..084e8744f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -25,7 +25,7 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" [compat] ADTypes = "1" Accessors = "0.1" -AdvancedVI = "0.5, 0.4" +AdvancedVI = "0.6" Bijectors = "0.13.6, 0.14, 0.15" DataFrames = "1" DifferentiationInterface = "0.7" diff --git a/docs/src/general.md b/docs/src/general.md index acc518988..26bcdc694 100644 --- a/docs/src/general.md +++ b/docs/src/general.md @@ -17,7 +17,7 @@ Therefore, please refer to the documentation of each different algorithm for a d ## [Monitoring the Objective Value](@id estimate_objective) -Furthermore, each algorithm has an associated variational objective. +Furthermore, each algorithm has an associated variational objective subject to *minimization*. (By convention, we assume all objective are minimized but never maximized.) The progress made by each optimization algorithm can be diagnosed by monitoring the variational objective value. This can be done by calling the following method. diff --git a/docs/src/klminnaturalgraddescent.md b/docs/src/klminnaturalgraddescent.md index 011e3b804..bb49203a2 100644 --- a/docs/src/klminnaturalgraddescent.md +++ b/docs/src/klminnaturalgraddescent.md @@ -12,7 +12,7 @@ Since `KLMinNaturalGradDescent` is a measure-space algorithm, its use is restric KLMinNaturalGradDescent ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective can be estimated through the following: ```@docs; canonical=false estimate_objective( diff --git a/docs/src/klminsqrtnaturalgraddescent.md b/docs/src/klminsqrtnaturalgraddescent.md index 1d5c45b7f..9f3aa1a3b 100644 --- a/docs/src/klminsqrtnaturalgraddescent.md +++ b/docs/src/klminsqrtnaturalgraddescent.md @@ -13,7 +13,7 @@ Since `KLMinSqrtNaturalGradDescent` is a measure-space algorithm, its use is res KLMinSqrtNaturalGradDescent ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective value can be estimated through the following: ```@docs; canonical=false estimate_objective( diff --git a/docs/src/klminwassfwdbwd.md b/docs/src/klminwassfwdbwd.md index 4c1911fff..e5e917fec 100644 --- a/docs/src/klminwassfwdbwd.md +++ b/docs/src/klminwassfwdbwd.md @@ -10,7 +10,7 @@ Since `KLMinWassFwdBwd` is a measure-space algorithm, its use is restricted to f KLMinWassFwdBwd ``` -The associated objective value, which is the ELBO, can be estimated through the following: +The associated objective value can be estimated through the following: ```@docs; canonical=false estimate_objective( diff --git a/docs/src/tutorials/basic.md b/docs/src/tutorials/basic.md index 139e7d63d..8998b0789 100644 --- a/docs/src/tutorials/basic.md +++ b/docs/src/tutorials/basic.md @@ -232,7 +232,7 @@ function callback(; iteration, averaged_params, restructure, kwargs...) # Higher fidelity estimate of the ELBO on the averaged parameters n_samples = 256 - elbo_callback = estimate_objective(alg, q_avg, model; n_samples) + elbo_callback = -estimate_objective(alg, q_avg, model; n_samples) (elbo_callback=elbo_callback, accuracy=acc) else diff --git a/docs/src/tutorials/subsampling.md b/docs/src/tutorials/subsampling.md index afa2d31d4..1816efb44 100644 --- a/docs/src/tutorials/subsampling.md +++ b/docs/src/tutorials/subsampling.md @@ -213,7 +213,7 @@ function callback(; iteration, averaged_params, restructure, kwargs...) # Higher fidelity estimate of the ELBO on the averaged parameters n_samples = 256 - elbo_callback = estimate_objective(alg_full, q_avg, model; n_samples) + elbo_callback = -estimate_objective(alg_full, q_avg, model; n_samples) (elbo_callback=elbo_callback, accuracy=acc, time_elapsed=time() - time_begin) else diff --git a/src/AdvancedVI.jl b/src/AdvancedVI.jl index 7d57e32a0..ff7650054 100644 --- a/src/AdvancedVI.jl +++ b/src/AdvancedVI.jl @@ -258,7 +258,7 @@ output(::AbstractVariationalAlgorithm, ::Any) = nothing """ estimate_objective([rng,] alg, q, prob; kwargs...) -Estimate the variational objective associated with the algorithm `alg` targeting `prob` with respect to the variational approximation `q`. +Estimate the variational objective subject to be minimized by the algorithm `alg` for approximating the target `prob` with the variational approximation `q`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/abstractobjective.jl b/src/algorithms/abstractobjective.jl index d125c3be8..65316c7e7 100644 --- a/src/algorithms/abstractobjective.jl +++ b/src/algorithms/abstractobjective.jl @@ -3,12 +3,7 @@ """ AbstractVariationalObjective -Abstract type for the VI algorithms supported by `AdvancedVI`. - -# Implementations -To be supported by `AdvancedVI`, a VI algorithm must implement `AbstractVariationalObjective` and `estimate_objective`. -Also, it should provide gradients by implementing the function `estimate_gradient`. -If the estimator is stateful, it can implement `init` to initialize the state. +Abstract type for a variational objective to be optimized by some variational algorithm. """ abstract type AbstractVariationalObjective end @@ -42,7 +37,7 @@ end """ estimate_objective([rng,] obj, q, prob; kwargs...) -Estimate the variational objective `obj` targeting `prob` with respect to the variational approximation `q`. +Estimate the minimization objective `obj` of the variational approximation `q` targeting `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/common.jl b/src/algorithms/common.jl index f86198c06..0b99ff0d0 100644 --- a/src/algorithms/common.jl +++ b/src/algorithms/common.jl @@ -11,7 +11,7 @@ const ParamSpaceSGD = Union{ """ estimate_objective([rng,] alg, q, prob; n_samples, entropy) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminnaturalgraddescent.jl b/src/algorithms/klminnaturalgraddescent.jl index e47be81cf..4d2d86276 100644 --- a/src/algorithms/klminnaturalgraddescent.jl +++ b/src/algorithms/klminnaturalgraddescent.jl @@ -154,7 +154,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminsqrtnaturalgraddescent.jl b/src/algorithms/klminsqrtnaturalgraddescent.jl index a8965baaf..5df07112a 100644 --- a/src/algorithms/klminsqrtnaturalgraddescent.jl +++ b/src/algorithms/klminsqrtnaturalgraddescent.jl @@ -129,7 +129,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/klminwassfwdbwd.jl b/src/algorithms/klminwassfwdbwd.jl index 570321ac6..ec81a5b47 100644 --- a/src/algorithms/klminwassfwdbwd.jl +++ b/src/algorithms/klminwassfwdbwd.jl @@ -124,7 +124,7 @@ end """ estimate_objective([rng,] alg, q, prob; n_samples) -Estimate the ELBO of the variational approximation `q` against the target log-density `prob`. +Estimate the negative ELBO of the variational approximation `q` against the target log-density `prob`. # Arguments - `rng::Random.AbstractRNG`: Random number generator. diff --git a/src/algorithms/repgradelbo.jl b/src/algorithms/repgradelbo.jl index cd0fbf1b2..ee2fccb8a 100644 --- a/src/algorithms/repgradelbo.jl +++ b/src/algorithms/repgradelbo.jl @@ -114,7 +114,7 @@ function estimate_objective( ) samples, entropy = reparam_with_entropy(rng, q, q, n_samples, obj.entropy) energy = estimate_energy_with_samples(prob, samples) - return energy + entropy + return -(energy + entropy) end function estimate_objective(obj::RepGradELBO, q, prob; n_samples::Int=obj.n_samples) diff --git a/src/algorithms/scoregradelbo.jl b/src/algorithms/scoregradelbo.jl index 18573d6d6..920be6cdf 100644 --- a/src/algorithms/scoregradelbo.jl +++ b/src/algorithms/scoregradelbo.jl @@ -61,7 +61,7 @@ function estimate_objective( samples = rand(rng, q, n_samples) ℓπ = map(Base.Fix1(LogDensityProblems.logdensity, prob), eachsample(samples)) ℓq = logpdf.(Ref(q), AdvancedVI.eachsample(samples)) - return mean(ℓπ - ℓq) + return -mean(ℓπ - ℓq) end function estimate_objective(obj::ScoreGradELBO, q, prob; n_samples::Int=obj.n_samples)