diff --git a/test/algorithms/klminrepgraddescent.jl b/test/algorithms/klminrepgraddescent.jl new file mode 100644 index 000000000..784ed4ac0 --- /dev/null +++ b/test/algorithms/klminrepgraddescent.jl @@ -0,0 +1,195 @@ + +@testset "KLMinRepGradDescent" begin + begin + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10] + alg = KLMinRepGradDescent(AD; n_samples, operator=ClipScale()) + T = 1 + optimize(alg, T, model, q0; show_progress=PROGRESS) + end + + @testset "callback" begin + alg = KLMinRepGradDescent(AD; operator=ClipScale()) + T = 10 + callback(; iteration, kwargs...) = (iteration_check=iteration,) + _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS) + @test [i.iteration_check for i in info] == 1:T + end + + @testset "estimate_objective" begin + alg = KLMinRepGradDescent(AD; operator=ClipScale()) + q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-2 + end + + @testset "determinism" begin + alg = KLMinRepGradDescent(AD; operator=ClipScale()) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.location + L = q_out.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.location + L_repl = q_out.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "warn MvLocationScale with IdentityOperator" begin + @test_warn "IdentityOperator" begin + alg′ = KLMinRepGradDescent(AD; operator=IdentityOperator()) + optimize(alg′, 1, model, q0; show_progress=false) + end + end + + @testset "STL variance reduction" begin + @testset for n_montecarlo in [1, 10] + q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + params, re = Optimisers.destructure(q_true) + obj = RepGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy()) + out = DiffResults.DiffResult(zero(eltype(params)), similar(params)) + + aux = ( + rng=Random.default_rng(), + obj=obj, + problem=model, + restructure=re, + q_stop=q_true, + adtype=AD, + ) + AdvancedVI._value_and_gradient!( + AdvancedVI.estimate_repgradelbo_ad_forward, out, AD, params, aux + ) + grad = DiffResults.gradient(out) + @test norm(grad) ≈ 0 atol = 1e-5 + end + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinRepGradDescent(AD; n_samples=10, operator=ClipScale()) + q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.location) == realtype + @test eltype(q_out.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence $(entropy)" for entropy in + [ClosedFormEntropy(), StickingTheLandingEntropy()] + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, μ_true, L_true, is_meanfield) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinRepGradDescent(AD; entropy, optimizer, operator=ClipScale()) + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + + @testset "subsampling" begin + n_data = 8 + + @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4] + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + operator = ClipScale() + + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg = KLMinRepGradDescent(AD; n_samples=10, operator) + alg_sub = KLMinRepGradDescent(AD; n_samples=10, subsampling, operator) + + obj_full = estimate_objective(alg, q0, model; n_samples=10^5) + obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5) + @test obj_full ≈ obj_sub rtol=0.1 + end + + @testset "determinism" begin + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 10 + batchsize = 3 + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinRepGradDescent( + AD; n_samples=10, subsampling, operator=ClipScale() + ) + + q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS) + μ = q.location + L = q.scale + + rng_repl = StableRNG(seed) + q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS) + μ_repl = q.location + L_repl = q.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "convergence" begin + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 1000 + batchsize = 1 + optimizer = Descent(1e-3) + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinRepGradDescent( + AD; n_samples=10, subsampling, optimizer, operator=ClipScale() + ) + + q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + end +end diff --git a/test/algorithms/klminrepgraddescent_bijectors.jl b/test/algorithms/klminrepgraddescent_bijectors.jl new file mode 100644 index 000000000..0d26a309a --- /dev/null +++ b/test/algorithms/klminrepgraddescent_bijectors.jl @@ -0,0 +1,94 @@ + +@testset "KLMinRepGradDescent with Bijectors" begin + begin + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + b = Bijectors.bijector(model) + binv = inverse(b) + + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + @testset "estimate_objective" begin + alg = KLMinRepGradDescent(AD; operator=ClipScale()) + q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + q_true = Bijectors.transformed(q_true_unconstr, binv) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-2 + end + + @testset "determinism" begin + alg = KLMinRepGradDescent(AD; operator=ClipScale()) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.dist.location + L = q_out.dist.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.dist.location + L_repl = q_out.dist.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "warn MvLocationScale with IdentityOperator" begin + @test_warn "IdentityOperator" begin + alg′ = KLMinRepGradDescent(AD; operator=IdentityOperator()) + optimize(alg′, 1, model, q0; show_progress=false) + end + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normallognormal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinRepGradDescent(AD; n_samples=10, operator=ClipScale()) + q0_unconstr = MeanFieldGaussian( + zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)) + ) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.dist.location) == realtype + @test eltype(q_out.dist.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence $(entropy)" for entropy in + [ClosedFormEntropy(), StickingTheLandingEntropy()] + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, μ_true, L_true, is_meanfield) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinRepGradDescent(AD; entropy, optimizer, operator=ClipScale()) + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true) + Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end +end diff --git a/test/algorithms/klminrepgradproxdescent.jl b/test/algorithms/klminrepgradproxdescent.jl new file mode 100644 index 000000000..76372757c --- /dev/null +++ b/test/algorithms/klminrepgradproxdescent.jl @@ -0,0 +1,161 @@ + +@testset "KLMinRepGradProxDescent" begin + begin + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10] + alg = KLMinRepGradProxDescent(AD; n_samples) + T = 1 + optimize(alg, T, model, q0; show_progress=PROGRESS) + end + + @testset "callback" begin + alg = KLMinRepGradProxDescent(AD) + T = 10 + callback(; iteration, kwargs...) = (iteration_check=iteration,) + _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS) + @test [i.iteration_check for i in info] == 1:T + end + + @testset "estimate_objective" begin + alg = KLMinRepGradProxDescent(AD) + q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-3 + end + + @testset "determinism" begin + alg = KLMinRepGradProxDescent(AD) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.location + L = q_out.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.location + L_repl = q_out.scale + @test μ == μ_repl + @test L == L_repl + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinRepGradProxDescent(AD; n_samples=10) + q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.location) == realtype + @test eltype(q_out.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence $(entropy)" for entropy_zerograd in [ + ClosedFormEntropyZeroGradient(), StickingTheLandingEntropyZeroGradient() + ] + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, μ_true, L_true, is_meanfield) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinRepGradProxDescent(AD; optimizer, entropy_zerograd) + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + + @testset "subsampling" begin + n_data = 8 + + @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4] + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg = KLMinRepGradProxDescent(AD; n_samples=10) + alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, subsampling) + + obj_full = estimate_objective(alg, q0, model; n_samples=10^5) + obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5) + @test obj_full ≈ obj_sub rtol=0.1 + end + + @testset "determinism" begin + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 10 + batchsize = 3 + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, subsampling) + + q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS) + μ = q.location + L = q.scale + + rng_repl = StableRNG(seed) + q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS) + μ_repl = q.location + L_repl = q.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "convergence" begin + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 1000 + batchsize = 1 + optimizer = Descent(1e-3) + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, optimizer, subsampling) + + q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + end +end diff --git a/test/algorithms/klminrepgradproxdescent_bijectors.jl b/test/algorithms/klminrepgradproxdescent_bijectors.jl new file mode 100644 index 000000000..d2714b5aa --- /dev/null +++ b/test/algorithms/klminrepgradproxdescent_bijectors.jl @@ -0,0 +1,89 @@ + +@testset "KLMinRepGradProxDescent with Bijectors" begin + begin + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + b = Bijectors.bijector(model) + binv = inverse(b) + + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + @testset "estimate_objective" begin + alg = KLMinRepGradProxDescent(AD) + q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + q_true = Bijectors.transformed(q_true_unconstr, binv) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-3 + end + + @testset "determinism" begin + alg = KLMinRepGradProxDescent(AD) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.dist.location + L = q_out.dist.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.dist.location + L_repl = q_out.dist.scale + @test μ == μ_repl + @test L == L_repl + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normallognormal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinRepGradProxDescent(AD; n_samples=10) + q0_unconstr = MeanFieldGaussian( + zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)) + ) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.dist.location) == realtype + @test eltype(q_out.dist.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence $(entropy)" for entropy_zerograd in [ + ClosedFormEntropyZeroGradient(), StickingTheLandingEntropyZeroGradient() + ] + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, μ_true, L_true, is_meanfield) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinRepGradProxDescent(AD; n_samples=10, optimizer, entropy_zerograd) + + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true) + Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end +end diff --git a/test/algorithms/klminscoregraddescent.jl b/test/algorithms/klminscoregraddescent.jl new file mode 100644 index 000000000..9a307d570 --- /dev/null +++ b/test/algorithms/klminscoregraddescent.jl @@ -0,0 +1,171 @@ + +@testset "KLMinScoreGradDescent" begin + begin + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10] + alg = KLMinScoreGradDescent(AD; n_samples, operator=ClipScale()) + T = 1 + optimize(alg, T, model, q0; show_progress=PROGRESS) + end + + @testset "callback" begin + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + T = 10 + callback(; iteration, kwargs...) = (iteration_check=iteration,) + _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS) + @test [i.iteration_check for i in info] == 1:T + end + + @testset "estimate_objective" begin + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-3 + end + + @testset "determinism" begin + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.location + L = q_out.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.location + L_repl = q_out.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "warn MvLocationScale with IdentityOperator" begin + @test_warn "IdentityOperator" begin + alg′ = KLMinScoreGradDescent(AD; operator=IdentityOperator()) + optimize(alg′, 1, model, q0; show_progress=false) + end + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.location) == realtype + @test eltype(q_out.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence" begin + modelstats = normal_meanfield(Random.default_rng(), Float64) + (; model, μ_true, L_true, is_meanfield) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinScoreGradDescent(AD; n_samples=100, optimizer, operator=ClipScale()) + q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + + @testset "subsampling" begin + n_data = 8 + + @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4] + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + operator = ClipScale() + + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg = KLMinScoreGradDescent(AD; n_samples=10, operator) + alg_sub = KLMinScoreGradDescent(AD; n_samples=10, subsampling, operator) + + obj_full = estimate_objective(alg, q0, model; n_samples=10^5) + obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5) + @test obj_full ≈ obj_sub rtol=0.1 + end + + @testset "determinism" begin + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 10 + batchsize = 3 + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinScoreGradDescent( + AD; n_samples=10, subsampling, operator=ClipScale() + ) + + q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS) + μ = q.location + L = q.scale + + rng_repl = StableRNG(seed) + q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS) + μ_repl = q.location + L_repl = q.scale + @test μ == μ_repl + @test L == L_repl + end + + @testset "convergence" begin + modelstats = subsamplednormal(Random.default_rng(), n_data) + (; model, n_dims, μ_true, L_true) = modelstats + + L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims)) + q0 = FullRankGaussian(zeros(Float64, n_dims), L0) + + T = 1000 + batchsize = 1 + optimizer = Descent(1e-3) + subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) + alg_sub = KLMinScoreGradDescent( + AD; n_samples=100, optimizer, subsampling, operator=ClipScale() + ) + + q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) + Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end + end +end diff --git a/test/algorithms/klminscoregraddescent_bijectors.jl b/test/algorithms/klminscoregraddescent_bijectors.jl new file mode 100644 index 000000000..782d60616 --- /dev/null +++ b/test/algorithms/klminscoregraddescent_bijectors.jl @@ -0,0 +1,86 @@ + +@testset "KLMinScoreGradDescent with Bijectors" begin + begin + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + b = Bijectors.bijector(model) + binv = inverse(b) + + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + @testset "estimate_objective" begin + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true)) + q_true = Bijectors.transformed(q_true_unconstr, binv) + + obj_est = estimate_objective(alg, q_true, model) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=1) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=3) + @test isfinite(obj_est) + + obj_est = estimate_objective(alg, q_true, model; n_samples=10^5) + @test obj_est ≈ 0 atol=1e-2 + end + + @testset "determinism" begin + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + + seed = (0x38bef07cf9cc549d) + rng = StableRNG(seed) + T = 10 + + q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) + μ = q_out.dist.location + L = q_out.dist.scale + + rng_repl = StableRNG(seed) + q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) + μ_repl = q_out.dist.location + L_repl = q_out.dist.scale + @test μ == μ_repl + @test L == L_repl + end + end + + @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64] + modelstats = normallognormal_meanfield(Random.default_rng(), realtype) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1 + alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale()) + q0_unconstr = MeanFieldGaussian( + zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)) + ) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + @test eltype(q_out.dist.location) == realtype + @test eltype(q_out.dist.scale) == realtype + @test typeof(first(info).elbo) == realtype + end + + @testset "convergence" begin + modelstats = normallognormal_meanfield(Random.default_rng(), Float64) + (; model, n_dims, μ_true, L_true) = modelstats + + T = 1000 + optimizer = Descent(1e-3) + alg = KLMinScoreGradDescent(AD; n_samples=100, optimizer, operator=ClipScale()) + q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) + q0 = Bijectors.transformed(q0_unconstr, binv) + + q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS) + + Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true) + Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true) + + @test Δλ ≤ Δλ0/2 + end +end diff --git a/test/algorithms/klminwassfwdbwd.jl b/test/algorithms/klminwassfwdbwd.jl index 3832017b7..b878d629f 100644 --- a/test/algorithms/klminwassfwdbwd.jl +++ b/test/algorithms/klminwassfwdbwd.jl @@ -86,7 +86,7 @@ Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) Δλ = sum(abs2, q_avg.location - μ_true) + sum(abs2, q_avg.scale - L_true) - @test Δλ ≤ 0.1*Δλ0 + @test Δλ ≤ Δλ0/2 end @testset "subsampling" begin @@ -145,14 +145,14 @@ T = 1000 batchsize = 1 subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize) - alg_sub = KLMinWassFwdBwd(; n_samples=10, stepsize=1e-2, subsampling) + alg_sub = KLMinWassFwdBwd(; n_samples=10, stepsize=1e-3, subsampling) q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS) Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true) - @test Δλ ≤ 0.1*Δλ0 + @test Δλ ≤ Δλ0/2 end end end diff --git a/test/algorithms/repgradelbo.jl b/test/algorithms/repgradelbo.jl deleted file mode 100644 index f7f54209b..000000000 --- a/test/algorithms/repgradelbo.jl +++ /dev/null @@ -1,97 +0,0 @@ - -@testset "interface RepGradELBO" begin - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = normal_meanfield(rng, Float64) - - (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats - - q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) - q0_trans = Bijectors.transformed(q0, identity) - - @testset "basic" begin - @testset for n_montecarlo in [1, 10] - alg = KLMinRepGradDescent( - AD; - n_samples=n_montecarlo, - operator=ClipScale(), - averager=PolynomialAveraging(), - ) - - _, info, _ = optimize(rng, alg, 10, model, q0; show_progress=false) - @test isfinite(last(info).elbo) - - _, info, _ = optimize(rng, alg, 10, model, q0_trans; show_progress=false) - @test isfinite(last(info).elbo) - end - end - - @testset "estimate_objective" begin - q_true = MeanFieldGaussian(μ_true, L_true) - - @testset for alg in [KLMinRepGradDescent(AD), KLMinRepGradProxDescent(AD)] - obj_est = estimate_objective(rng, alg, q_true, model) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=1) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=3) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=10^5) - @test obj_est ≈ 0 atol=1e-2 - end - end - - @testset "warn MvLocationScale with IdentityOperator" begin - @test_warn "IdentityOperator" begin - alg = KLMinRepGradDescent(AD; operator=IdentityOperator()) - optimize(rng, alg, 1, model, q0; show_progress=false) - end - @test_warn "IdentityOperator" begin - alg = KLMinRepGradDescent(AD; operator=IdentityOperator()) - optimize(rng, alg, 1, model, q0_trans; show_progress=false) - end - end - - obj = RepGradELBO(10) - rng = StableRNG(seed) - elbo_ref = estimate_objective(rng, obj, q0, model; n_samples=10^4) - - @testset "determinism" begin - rng = StableRNG(seed) - elbo = estimate_objective(rng, obj, q0, model; n_samples=10^4) - @test elbo == elbo_ref - end - - @testset "default_rng" begin - elbo = estimate_objective(obj, q0, model; n_samples=10^4) - @test elbo ≈ elbo_ref rtol = 0.1 - end -end - -@testset "interface RepGradELBO STL variance reduction" begin - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = normal_meanfield(rng, Float64) - (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats - - @testset for n_montecarlo in [1, 10] - q_true = MeanFieldGaussian( - Vector{eltype(μ_true)}(μ_true), Diagonal(Vector{eltype(L_true)}(diag(L_true))) - ) - params, re = Optimisers.destructure(q_true) - obj = RepGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy()) - out = DiffResults.DiffResult(zero(eltype(params)), similar(params)) - - aux = (rng=rng, obj=obj, problem=model, restructure=re, q_stop=q_true, adtype=AD) - AdvancedVI._value_and_gradient!( - AdvancedVI.estimate_repgradelbo_ad_forward, out, AD, params, aux - ) - grad = DiffResults.gradient(out) - @test norm(grad) ≈ 0 atol = 1e-5 - end -end diff --git a/test/algorithms/repgradelbo_locationscale.jl b/test/algorithms/repgradelbo_locationscale.jl deleted file mode 100644 index eb6d26fb9..000000000 --- a/test/algorithms/repgradelbo_locationscale.jl +++ /dev/null @@ -1,60 +0,0 @@ -@testset "inference RepGradELBO VILocationScale" begin - @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:Normal => normal_meanfield, :Normal => normal_fullrank), - (objname, objective) in Dict( - :RepGradELBOClosedFormEntropy => RepGradELBO(10), - :RepGradELBOStickingTheLanding => - RepGradELBO(10; entropy=StickingTheLandingEntropy()), - ) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - T = 1000 - η = 1e-3 - alg = KLMinRepGradDescent(AD; optimizer=Descent(η), operator=ClipScale()) - - q0 = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - - μ = q_avg.location - L = q_avg.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - μ = q_avg.location - L = q_avg.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) - μ_repl = q_avg.location - L_repl = q_avg.scale - @test μ == μ_repl - @test L == L_repl - end - end -end diff --git a/test/algorithms/repgradelbo_locationscale_bijectors.jl b/test/algorithms/repgradelbo_locationscale_bijectors.jl deleted file mode 100644 index 0a398993d..000000000 --- a/test/algorithms/repgradelbo_locationscale_bijectors.jl +++ /dev/null @@ -1,68 +0,0 @@ -@testset "inference RepGradELBO VILocationScale Bijectors" begin - @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:NormalLogNormalMeanField => normallognormal_meanfield), - (objname, objective) in Dict( - :RepGradELBOClosedFormEntropy => RepGradELBO(10), - :RepGradELBOStickingTheLanding => - RepGradELBO(10; entropy=StickingTheLandingEntropy()), - ) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - T = 1000 - η = 1e-3 - alg = KLMinRepGradDescent(AD; optimizer=Descent(η), operator=ClipScale()) - - b = Bijectors.bijector(model) - b⁻¹ = inverse(b) - μ0 = Zeros(realtype, n_dims) - L0 = Diagonal(Ones(realtype, n_dims)) - - q0_η = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - q0_z = Bijectors.transformed(q0_η, b⁻¹) - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - - μ = q_avg.dist.location - L = q_avg.dist.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - μ = q_avg.dist.location - L = q_avg.dist.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize( - rng_repl, alg, T, model, q0_z; show_progress=PROGRESS - ) - μ_repl = q_avg.dist.location - L_repl = q_avg.dist.scale - @test μ == μ_repl - @test L == L_repl - end - end -end diff --git a/test/algorithms/repgradelbo_proximal_locationscale.jl b/test/algorithms/repgradelbo_proximal_locationscale.jl deleted file mode 100644 index 1eddac5f1..000000000 --- a/test/algorithms/repgradelbo_proximal_locationscale.jl +++ /dev/null @@ -1,61 +0,0 @@ -@testset "inference RepGradELBO Proximal VILocationScale" begin - @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:Normal => normal_meanfield, :Normal => normal_fullrank), - (objname, objective) in Dict( - :RepGradELBOClosedFormEntropy => - RepGradELBO(10; entropy=ClosedFormEntropyZeroGradient()), - :RepGradELBOStickingTheLanding => - RepGradELBO(10; entropy=StickingTheLandingEntropyZeroGradient()), - ) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - q0 = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - - T = 1000 - η = 1e-3 - alg = KLMinRepGradProxDescent(AD; optimizer=Descent(η)) - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - - μ = q_avg.location - L = q_avg.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - μ = q_avg.location - L = q_avg.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) - μ_repl = q_avg.location - L_repl = q_avg.scale - @test μ == μ_repl - @test L == L_repl - end - end -end diff --git a/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl b/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl deleted file mode 100644 index 2875c1c72..000000000 --- a/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl +++ /dev/null @@ -1,69 +0,0 @@ -@testset "inference RepGradELBO Proximal VILocationScale Bijectors" begin - @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:NormalLogNormalMeanField => normallognormal_meanfield), - (objname, objective) in Dict( - :RepGradELBOClosedFormEntropy => - RepGradELBO(10; entropy=ClosedFormEntropyZeroGradient()), - :RepGradELBOStickingTheLanding => - RepGradELBO(10; entropy=StickingTheLandingEntropyZeroGradient()), - ) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - T = 1000 - η = 1e-3 - alg = KLMinRepGradProxDescent(AD; optimizer=Descent(η)) - - b = Bijectors.bijector(model) - b⁻¹ = inverse(b) - μ0 = Zeros(realtype, n_dims) - L0 = Diagonal(Ones(realtype, n_dims)) - - q0_η = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - q0_z = Bijectors.transformed(q0_η, b⁻¹) - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - - μ = q_avg.dist.location - L = q_avg.dist.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - μ = q_avg.dist.location - L = q_avg.dist.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize( - rng_repl, alg, T, model, q0_z; show_progress=PROGRESS - ) - μ_repl = q_avg.dist.location - L_repl = q_avg.dist.scale - @test μ == μ_repl - @test L == L_repl - end - end -end diff --git a/test/algorithms/scoregradelbo.jl b/test/algorithms/scoregradelbo.jl deleted file mode 100644 index 5e4acb073..000000000 --- a/test/algorithms/scoregradelbo.jl +++ /dev/null @@ -1,68 +0,0 @@ -@testset "interface ScoreGradELBO" begin - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = normal_meanfield(rng, Float64) - - (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats - - q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims))) - q0_trans = Bijectors.transformed(q0, identity) - - @testset "basic" begin - @testset for n_montecarlo in [1, 10] - alg = KLMinScoreGradDescent( - AD; n_samples=n_montecarlo, operator=ClipScale(), optimizer=Descent(1e-5) - ) - - _, info, _ = optimize(rng, alg, 10, model, q0; show_progress=false) - @assert isfinite(last(info).elbo) - - _, info, _ = optimize(rng, alg, 10, model, q0_trans; show_progress=false) - @assert isfinite(last(info).elbo) - end - end - - @testset "estimate_objective" begin - q_true = MeanFieldGaussian(μ_true, L_true) - alg = KLMinScoreGradDescent(AD) - - obj_est = estimate_objective(rng, alg, q_true, model) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=1) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=3) - @test isfinite(obj_est) - - obj_est = estimate_objective(rng, alg, q_true, model; n_samples=10^5) - @test obj_est ≈ 0 atol = 1e-2 - end - - @testset "warn MvLocationScale with IdentityOperator" begin - @test_warn "IdentityOperator" begin - alg = KLMinScoreGradDescent(AD; operator=IdentityOperator()) - optimize(rng, alg, 1, model, q0; show_progress=false) - end - @test_warn "IdentityOperator" begin - alg = KLMinScoreGradDescent(AD; operator=IdentityOperator()) - optimize(rng, alg, 1, model, q0_trans; show_progress=false) - end - end - - obj = ScoreGradELBO(10) - rng = StableRNG(seed) - elbo_ref = estimate_objective(rng, obj, q0, model; n_samples=10^4) - - @testset "determinism" begin - rng = StableRNG(seed) - elbo = estimate_objective(rng, obj, q0, model; n_samples=10^4) - @test elbo == elbo_ref - end - - @testset "default_rng" begin - elbo = estimate_objective(obj, q0, model; n_samples=10^4) - @test elbo ≈ elbo_ref rtol = 0.2 - end -end diff --git a/test/algorithms/scoregradelbo_locationscale.jl b/test/algorithms/scoregradelbo_locationscale.jl deleted file mode 100644 index fc567b4c9..000000000 --- a/test/algorithms/scoregradelbo_locationscale.jl +++ /dev/null @@ -1,56 +0,0 @@ -@testset "inference ScoreGradELBO VILocationScale" begin - @testset "$(modelname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:Normal => normal_meanfield, :Normal => normal_fullrank) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - T = 1000 - η = 1e-4 - opt = Optimisers.Descent(η) - alg = KLMinScoreGradDescent(AD; n_samples=10, optimizer=opt, operator=ClipScale()) - - q0 = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - - μ = q_avg.location - L = q_avg.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS) - μ = q_avg.location - L = q_avg.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS) - μ_repl = q_avg.location - L_repl = q_avg.scale - @test μ ≈ μ_repl rtol = 1e-3 - @test L ≈ L_repl rtol = 1e-3 - end - end -end diff --git a/test/algorithms/scoregradelbo_locationscale_bijectors.jl b/test/algorithms/scoregradelbo_locationscale_bijectors.jl deleted file mode 100644 index bfc0da73c..000000000 --- a/test/algorithms/scoregradelbo_locationscale_bijectors.jl +++ /dev/null @@ -1,64 +0,0 @@ -@testset "inference ScoreGradELBO VILocationScale Bijectors" begin - @testset "$(modelname) $(realtype)" for realtype in [Float64, Float32], - (modelname, modelconstr) in - Dict(:NormalLogNormalMeanField => normallognormal_meanfield) - - seed = (0x38bef07cf9cc549d) - rng = StableRNG(seed) - - modelstats = modelconstr(rng, realtype) - (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats - - T = 1000 - η = 1e-4 - opt = Optimisers.Descent(η) - alg = KLMinScoreGradDescent(AD; n_samples=10, optimizer=opt, operator=ClipScale()) - - b = Bijectors.bijector(model) - b⁻¹ = inverse(b) - μ0 = Zeros(realtype, n_dims) - L0 = Diagonal(Ones(realtype, n_dims)) - - q0_η = if is_meanfield - MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))) - else - L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims)) - FullRankGaussian(zeros(realtype, n_dims), L0) - end - q0_z = Bijectors.transformed(q0_η, b⁻¹) - - # For small enough η, the error of SGD, Δλ, is bounded as - # Δλ ≤ ρ^T Δλ0 + O(η), - # where ρ = 1 - ημ, μ is the strong convexity constant. - contraction_rate = 1 - η * strong_convexity - - @testset "convergence" begin - Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - - μ = q_avg.dist.location - L = q_avg.dist.scale - Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true) - - @test Δλ ≤ contraction_rate^(T / 2) * Δλ0 - @test eltype(μ) == eltype(μ_true) - @test eltype(L) == eltype(L_true) - end - - @testset "determinism" begin - rng = StableRNG(seed) - q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS) - μ = q_avg.dist.location - L = q_avg.dist.scale - - rng_repl = StableRNG(seed) - q_avg, stats, _ = optimize( - rng_repl, alg, T, model, q0_z; show_progress=PROGRESS - ) - μ_repl = q_avg.dist.location - L_repl = q_avg.dist.scale - @test μ ≈ μ_repl rtol = 1e-3 - @test L ≈ L_repl rtol = 1e-3 - end - end -end diff --git a/test/algorithms/subsampledobj.jl b/test/general/subsampledobj.jl similarity index 100% rename from test/algorithms/subsampledobj.jl rename to test/general/subsampledobj.jl diff --git a/test/runtests.jl b/test/runtests.jl index ab67247b3..b4d14cb4b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -72,14 +72,12 @@ if GROUP == "All" || GROUP == "AD" # Tests that need to check correctness of the integration with AD backends include("general/ad.jl") include("general/mixedad_logdensity.jl") + include("general/subsampledobj.jl") - include("algorithms/subsampledobj.jl") - include("algorithms/repgradelbo.jl") - include("algorithms/scoregradelbo.jl") - include("algorithms/repgradelbo_locationscale.jl") - include("algorithms/repgradelbo_locationscale_bijectors.jl") - include("algorithms/repgradelbo_proximal_locationscale.jl") - include("algorithms/repgradelbo_proximal_locationscale_bijectors.jl") - include("algorithms/scoregradelbo_locationscale.jl") - include("algorithms/scoregradelbo_locationscale_bijectors.jl") + include("algorithms/klminrepgraddescent.jl") + include("algorithms/klminscoregraddescent.jl") + include("algorithms/klminrepgradproxdescent.jl") + include("algorithms/klminrepgraddescent_bijectors.jl") + include("algorithms/klminrepgradproxdescent_bijectors.jl") + include("algorithms/klminscoregraddescent_bijectors.jl") end