diff --git a/test/algorithms/klminrepgraddescent.jl b/test/algorithms/klminrepgraddescent.jl
new file mode 100644
index 000000000..784ed4ac0
--- /dev/null
+++ b/test/algorithms/klminrepgraddescent.jl
@@ -0,0 +1,195 @@
+
+@testset "KLMinRepGradDescent" begin
+    begin
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10]
+            alg = KLMinRepGradDescent(AD; n_samples, operator=ClipScale())
+            T = 1
+            optimize(alg, T, model, q0; show_progress=PROGRESS)
+        end
+
+        @testset "callback" begin
+            alg = KLMinRepGradDescent(AD; operator=ClipScale())
+            T = 10
+            callback(; iteration, kwargs...) = (iteration_check=iteration,)
+            _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS)
+            @test [i.iteration_check for i in info] == 1:T
+        end
+
+        @testset "estimate_objective" begin
+            alg = KLMinRepGradDescent(AD; operator=ClipScale())
+            q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-2
+        end
+
+        @testset "determinism" begin
+            alg = KLMinRepGradDescent(AD; operator=ClipScale())
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.location
+            L = q_out.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.location
+            L_repl = q_out.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "warn MvLocationScale with IdentityOperator" begin
+            @test_warn "IdentityOperator" begin
+                alg′ = KLMinRepGradDescent(AD; operator=IdentityOperator())
+                optimize(alg′, 1, model, q0; show_progress=false)
+            end
+        end
+
+        @testset "STL variance reduction" begin
+            @testset for n_montecarlo in [1, 10]
+                q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+                params, re = Optimisers.destructure(q_true)
+                obj = RepGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy())
+                out = DiffResults.DiffResult(zero(eltype(params)), similar(params))
+
+                aux = (
+                    rng=Random.default_rng(),
+                    obj=obj,
+                    problem=model,
+                    restructure=re,
+                    q_stop=q_true,
+                    adtype=AD,
+                )
+                AdvancedVI._value_and_gradient!(
+                    AdvancedVI.estimate_repgradelbo_ad_forward, out, AD, params, aux
+                )
+                grad = DiffResults.gradient(out)
+                @test norm(grad) ≈ 0 atol = 1e-5
+            end
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinRepGradDescent(AD; n_samples=10, operator=ClipScale())
+        q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.location) == realtype
+        @test eltype(q_out.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence $(entropy)" for entropy in
+                                          [ClosedFormEntropy(), StickingTheLandingEntropy()]
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, μ_true, L_true, is_meanfield) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinRepGradDescent(AD; entropy, optimizer, operator=ClipScale())
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+        Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+
+    @testset "subsampling" begin
+        n_data = 8
+
+        @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4]
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+            operator = ClipScale()
+
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg = KLMinRepGradDescent(AD; n_samples=10, operator)
+            alg_sub = KLMinRepGradDescent(AD; n_samples=10, subsampling, operator)
+
+            obj_full = estimate_objective(alg, q0, model; n_samples=10^5)
+            obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5)
+            @test obj_full ≈ obj_sub rtol=0.1
+        end
+
+        @testset "determinism" begin
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 10
+            batchsize = 3
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinRepGradDescent(
+                AD; n_samples=10, subsampling, operator=ClipScale()
+            )
+
+            q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ = q.location
+            L = q.scale
+
+            rng_repl = StableRNG(seed)
+            q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q.location
+            L_repl = q.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "convergence" begin
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 1000
+            batchsize = 1
+            optimizer = Descent(1e-3)
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinRepGradDescent(
+                AD; n_samples=10, subsampling, optimizer, operator=ClipScale()
+            )
+
+            q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS)
+
+            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+            Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true)
+
+            @test Δλ ≤ Δλ0/2
+        end
+    end
+end
diff --git a/test/algorithms/klminrepgraddescent_bijectors.jl b/test/algorithms/klminrepgraddescent_bijectors.jl
new file mode 100644
index 000000000..0d26a309a
--- /dev/null
+++ b/test/algorithms/klminrepgraddescent_bijectors.jl
@@ -0,0 +1,94 @@
+
+@testset "KLMinRepGradDescent with Bijectors" begin
+    begin
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        b = Bijectors.bijector(model)
+        binv = inverse(b)
+
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        @testset "estimate_objective" begin
+            alg = KLMinRepGradDescent(AD; operator=ClipScale())
+            q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+            q_true = Bijectors.transformed(q_true_unconstr, binv)
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-2
+        end
+
+        @testset "determinism" begin
+            alg = KLMinRepGradDescent(AD; operator=ClipScale())
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.dist.location
+            L = q_out.dist.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.dist.location
+            L_repl = q_out.dist.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "warn MvLocationScale with IdentityOperator" begin
+            @test_warn "IdentityOperator" begin
+                alg′ = KLMinRepGradDescent(AD; operator=IdentityOperator())
+                optimize(alg′, 1, model, q0; show_progress=false)
+            end
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normallognormal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinRepGradDescent(AD; n_samples=10, operator=ClipScale())
+        q0_unconstr = MeanFieldGaussian(
+            zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))
+        )
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.dist.location) == realtype
+        @test eltype(q_out.dist.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence $(entropy)" for entropy in
+                                          [ClosedFormEntropy(), StickingTheLandingEntropy()]
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, μ_true, L_true, is_meanfield) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinRepGradDescent(AD; entropy, optimizer, operator=ClipScale())
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true)
+        Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+end
diff --git a/test/algorithms/klminrepgradproxdescent.jl b/test/algorithms/klminrepgradproxdescent.jl
new file mode 100644
index 000000000..76372757c
--- /dev/null
+++ b/test/algorithms/klminrepgradproxdescent.jl
@@ -0,0 +1,161 @@
+
+@testset "KLMinRepGradProxDescent" begin
+    begin
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10]
+            alg = KLMinRepGradProxDescent(AD; n_samples)
+            T = 1
+            optimize(alg, T, model, q0; show_progress=PROGRESS)
+        end
+
+        @testset "callback" begin
+            alg = KLMinRepGradProxDescent(AD)
+            T = 10
+            callback(; iteration, kwargs...) = (iteration_check=iteration,)
+            _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS)
+            @test [i.iteration_check for i in info] == 1:T
+        end
+
+        @testset "estimate_objective" begin
+            alg = KLMinRepGradProxDescent(AD)
+            q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-3
+        end
+
+        @testset "determinism" begin
+            alg = KLMinRepGradProxDescent(AD)
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.location
+            L = q_out.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.location
+            L_repl = q_out.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinRepGradProxDescent(AD; n_samples=10)
+        q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.location) == realtype
+        @test eltype(q_out.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence $(entropy)" for entropy_zerograd in [
+        ClosedFormEntropyZeroGradient(), StickingTheLandingEntropyZeroGradient()
+    ]
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, μ_true, L_true, is_meanfield) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinRepGradProxDescent(AD; optimizer, entropy_zerograd)
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+        Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+
+    @testset "subsampling" begin
+        n_data = 8
+
+        @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4]
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg = KLMinRepGradProxDescent(AD; n_samples=10)
+            alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, subsampling)
+
+            obj_full = estimate_objective(alg, q0, model; n_samples=10^5)
+            obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5)
+            @test obj_full ≈ obj_sub rtol=0.1
+        end
+
+        @testset "determinism" begin
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 10
+            batchsize = 3
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, subsampling)
+
+            q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ = q.location
+            L = q.scale
+
+            rng_repl = StableRNG(seed)
+            q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q.location
+            L_repl = q.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "convergence" begin
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 1000
+            batchsize = 1
+            optimizer = Descent(1e-3)
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinRepGradProxDescent(AD; n_samples=10, optimizer, subsampling)
+
+            q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS)
+
+            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+            Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true)
+
+            @test Δλ ≤ Δλ0/2
+        end
+    end
+end
diff --git a/test/algorithms/klminrepgradproxdescent_bijectors.jl b/test/algorithms/klminrepgradproxdescent_bijectors.jl
new file mode 100644
index 000000000..d2714b5aa
--- /dev/null
+++ b/test/algorithms/klminrepgradproxdescent_bijectors.jl
@@ -0,0 +1,89 @@
+
+@testset "KLMinRepGradProxDescent with Bijectors" begin
+    begin
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        b = Bijectors.bijector(model)
+        binv = inverse(b)
+
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        @testset "estimate_objective" begin
+            alg = KLMinRepGradProxDescent(AD)
+            q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+            q_true = Bijectors.transformed(q_true_unconstr, binv)
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-3
+        end
+
+        @testset "determinism" begin
+            alg = KLMinRepGradProxDescent(AD)
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.dist.location
+            L = q_out.dist.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.dist.location
+            L_repl = q_out.dist.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normallognormal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinRepGradProxDescent(AD; n_samples=10)
+        q0_unconstr = MeanFieldGaussian(
+            zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))
+        )
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.dist.location) == realtype
+        @test eltype(q_out.dist.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence $(entropy)" for entropy_zerograd in [
+        ClosedFormEntropyZeroGradient(), StickingTheLandingEntropyZeroGradient()
+    ]
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, μ_true, L_true, is_meanfield) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinRepGradProxDescent(AD; n_samples=10, optimizer, entropy_zerograd)
+
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true)
+        Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+end
diff --git a/test/algorithms/klminscoregraddescent.jl b/test/algorithms/klminscoregraddescent.jl
new file mode 100644
index 000000000..9a307d570
--- /dev/null
+++ b/test/algorithms/klminscoregraddescent.jl
@@ -0,0 +1,171 @@
+
+@testset "KLMinScoreGradDescent" begin
+    begin
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        @testset "basic n_samples=$(n_samples)" for n_samples in [1, 10]
+            alg = KLMinScoreGradDescent(AD; n_samples, operator=ClipScale())
+            T = 1
+            optimize(alg, T, model, q0; show_progress=PROGRESS)
+        end
+
+        @testset "callback" begin
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+            T = 10
+            callback(; iteration, kwargs...) = (iteration_check=iteration,)
+            _, info, _ = optimize(alg, T, model, q0; callback, show_progress=PROGRESS)
+            @test [i.iteration_check for i in info] == 1:T
+        end
+
+        @testset "estimate_objective" begin
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+            q_true = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-3
+        end
+
+        @testset "determinism" begin
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.location
+            L = q_out.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.location
+            L_repl = q_out.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "warn MvLocationScale with IdentityOperator" begin
+            @test_warn "IdentityOperator" begin
+                alg′ = KLMinScoreGradDescent(AD; operator=IdentityOperator())
+                optimize(alg′, 1, model, q0; show_progress=false)
+            end
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+        q0 = MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.location) == realtype
+        @test eltype(q_out.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence" begin
+        modelstats = normal_meanfield(Random.default_rng(), Float64)
+        (; model, μ_true, L_true, is_meanfield) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinScoreGradDescent(AD; n_samples=100, optimizer, operator=ClipScale())
+        q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+        Δλ = sum(abs2, q_out.location - μ_true) + sum(abs2, q_out.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+
+    @testset "subsampling" begin
+        n_data = 8
+
+        @testset "estimate_objective batchsize=$(batchsize)" for batchsize in [1, 3, 4]
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+            operator = ClipScale()
+
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator)
+            alg_sub = KLMinScoreGradDescent(AD; n_samples=10, subsampling, operator)
+
+            obj_full = estimate_objective(alg, q0, model; n_samples=10^5)
+            obj_sub = estimate_objective(alg_sub, q0, model; n_samples=10^5)
+            @test obj_full ≈ obj_sub rtol=0.1
+        end
+
+        @testset "determinism" begin
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 10
+            batchsize = 3
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinScoreGradDescent(
+                AD; n_samples=10, subsampling, operator=ClipScale()
+            )
+
+            q, _, _ = optimize(rng, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ = q.location
+            L = q.scale
+
+            rng_repl = StableRNG(seed)
+            q, _, _ = optimize(rng_repl, alg_sub, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q.location
+            L_repl = q.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+
+        @testset "convergence" begin
+            modelstats = subsamplednormal(Random.default_rng(), n_data)
+            (; model, n_dims, μ_true, L_true) = modelstats
+
+            L0 = LowerTriangular(Matrix{Float64}(I, n_dims, n_dims))
+            q0 = FullRankGaussian(zeros(Float64, n_dims), L0)
+
+            T = 1000
+            batchsize = 1
+            optimizer = Descent(1e-3)
+            subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
+            alg_sub = KLMinScoreGradDescent(
+                AD; n_samples=100, optimizer, subsampling, operator=ClipScale()
+            )
+
+            q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS)
+
+            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
+            Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true)
+
+            @test Δλ ≤ Δλ0/2
+        end
+    end
+end
diff --git a/test/algorithms/klminscoregraddescent_bijectors.jl b/test/algorithms/klminscoregraddescent_bijectors.jl
new file mode 100644
index 000000000..782d60616
--- /dev/null
+++ b/test/algorithms/klminscoregraddescent_bijectors.jl
@@ -0,0 +1,86 @@
+
+@testset "KLMinScoreGradDescent with Bijectors" begin
+    begin
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        b = Bijectors.bijector(model)
+        binv = inverse(b)
+
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        @testset "estimate_objective" begin
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+            q_true_unconstr = MeanFieldGaussian(Vector(μ_true), Diagonal(L_true))
+            q_true = Bijectors.transformed(q_true_unconstr, binv)
+
+            obj_est = estimate_objective(alg, q_true, model)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=1)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=3)
+            @test isfinite(obj_est)
+
+            obj_est = estimate_objective(alg, q_true, model; n_samples=10^5)
+            @test obj_est ≈ 0 atol=1e-2
+        end
+
+        @testset "determinism" begin
+            alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+
+            seed = (0x38bef07cf9cc549d)
+            rng = StableRNG(seed)
+            T = 10
+
+            q_out, _, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
+            μ = q_out.dist.location
+            L = q_out.dist.scale
+
+            rng_repl = StableRNG(seed)
+            q_out, _, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
+            μ_repl = q_out.dist.location
+            L_repl = q_out.dist.scale
+            @test μ == μ_repl
+            @test L == L_repl
+        end
+    end
+
+    @testset "type stability realtype=$(realtype)" for realtype in [Float32, Float64]
+        modelstats = normallognormal_meanfield(Random.default_rng(), realtype)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1
+        alg = KLMinScoreGradDescent(AD; n_samples=10, operator=ClipScale())
+        q0_unconstr = MeanFieldGaussian(
+            zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims))
+        )
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, info, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        @test eltype(q_out.dist.location) == realtype
+        @test eltype(q_out.dist.scale) == realtype
+        @test typeof(first(info).elbo) == realtype
+    end
+
+    @testset "convergence" begin
+        modelstats = normallognormal_meanfield(Random.default_rng(), Float64)
+        (; model, n_dims, μ_true, L_true) = modelstats
+
+        T = 1000
+        optimizer = Descent(1e-3)
+        alg = KLMinScoreGradDescent(AD; n_samples=100, optimizer, operator=ClipScale())
+        q0_unconstr = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
+        q0 = Bijectors.transformed(q0_unconstr, binv)
+
+        q_out, _, _ = optimize(alg, T, model, q0; show_progress=PROGRESS)
+
+        Δλ0 = sum(abs2, q0.dist.location - μ_true) + sum(abs2, q0.dist.scale - L_true)
+        Δλ = sum(abs2, q_out.dist.location - μ_true) + sum(abs2, q_out.dist.scale - L_true)
+
+        @test Δλ ≤ Δλ0/2
+    end
+end
diff --git a/test/algorithms/klminwassfwdbwd.jl b/test/algorithms/klminwassfwdbwd.jl
index 3832017b7..b878d629f 100644
--- a/test/algorithms/klminwassfwdbwd.jl
+++ b/test/algorithms/klminwassfwdbwd.jl
@@ -86,7 +86,7 @@
         Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
         Δλ = sum(abs2, q_avg.location - μ_true) + sum(abs2, q_avg.scale - L_true)
 
-        @test Δλ ≤ 0.1*Δλ0
+        @test Δλ ≤ Δλ0/2
     end
 
     @testset "subsampling" begin
@@ -145,14 +145,14 @@
             T = 1000
             batchsize = 1
             subsampling = ReshufflingBatchSubsampling(1:n_data, batchsize)
-            alg_sub = KLMinWassFwdBwd(; n_samples=10, stepsize=1e-2, subsampling)
+            alg_sub = KLMinWassFwdBwd(; n_samples=10, stepsize=1e-3, subsampling)
 
             q, stats, _ = optimize(alg_sub, T, model, q0; show_progress=PROGRESS)
 
             Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
             Δλ = sum(abs2, q.location - μ_true) + sum(abs2, q.scale - L_true)
 
-            @test Δλ ≤ 0.1*Δλ0
+            @test Δλ ≤ Δλ0/2
         end
     end
 end
diff --git a/test/algorithms/repgradelbo.jl b/test/algorithms/repgradelbo.jl
deleted file mode 100644
index f7f54209b..000000000
--- a/test/algorithms/repgradelbo.jl
+++ /dev/null
@@ -1,97 +0,0 @@
-
-@testset "interface RepGradELBO" begin
-    seed = (0x38bef07cf9cc549d)
-    rng = StableRNG(seed)
-
-    modelstats = normal_meanfield(rng, Float64)
-
-    (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats
-
-    q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
-    q0_trans = Bijectors.transformed(q0, identity)
-
-    @testset "basic" begin
-        @testset for n_montecarlo in [1, 10]
-            alg = KLMinRepGradDescent(
-                AD;
-                n_samples=n_montecarlo,
-                operator=ClipScale(),
-                averager=PolynomialAveraging(),
-            )
-
-            _, info, _ = optimize(rng, alg, 10, model, q0; show_progress=false)
-            @test isfinite(last(info).elbo)
-
-            _, info, _ = optimize(rng, alg, 10, model, q0_trans; show_progress=false)
-            @test isfinite(last(info).elbo)
-        end
-    end
-
-    @testset "estimate_objective" begin
-        q_true = MeanFieldGaussian(μ_true, L_true)
-
-        @testset for alg in [KLMinRepGradDescent(AD), KLMinRepGradProxDescent(AD)]
-            obj_est = estimate_objective(rng, alg, q_true, model)
-            @test isfinite(obj_est)
-
-            obj_est = estimate_objective(rng, alg, q_true, model; n_samples=1)
-            @test isfinite(obj_est)
-
-            obj_est = estimate_objective(rng, alg, q_true, model; n_samples=3)
-            @test isfinite(obj_est)
-
-            obj_est = estimate_objective(rng, alg, q_true, model; n_samples=10^5)
-            @test obj_est ≈ 0 atol=1e-2
-        end
-    end
-
-    @testset "warn MvLocationScale with IdentityOperator" begin
-        @test_warn "IdentityOperator" begin
-            alg = KLMinRepGradDescent(AD; operator=IdentityOperator())
-            optimize(rng, alg, 1, model, q0; show_progress=false)
-        end
-        @test_warn "IdentityOperator" begin
-            alg = KLMinRepGradDescent(AD; operator=IdentityOperator())
-            optimize(rng, alg, 1, model, q0_trans; show_progress=false)
-        end
-    end
-
-    obj = RepGradELBO(10)
-    rng = StableRNG(seed)
-    elbo_ref = estimate_objective(rng, obj, q0, model; n_samples=10^4)
-
-    @testset "determinism" begin
-        rng = StableRNG(seed)
-        elbo = estimate_objective(rng, obj, q0, model; n_samples=10^4)
-        @test elbo == elbo_ref
-    end
-
-    @testset "default_rng" begin
-        elbo = estimate_objective(obj, q0, model; n_samples=10^4)
-        @test elbo ≈ elbo_ref rtol = 0.1
-    end
-end
-
-@testset "interface RepGradELBO STL variance reduction" begin
-    seed = (0x38bef07cf9cc549d)
-    rng = StableRNG(seed)
-
-    modelstats = normal_meanfield(rng, Float64)
-    (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats
-
-    @testset for n_montecarlo in [1, 10]
-        q_true = MeanFieldGaussian(
-            Vector{eltype(μ_true)}(μ_true), Diagonal(Vector{eltype(L_true)}(diag(L_true)))
-        )
-        params, re = Optimisers.destructure(q_true)
-        obj = RepGradELBO(n_montecarlo; entropy=StickingTheLandingEntropy())
-        out = DiffResults.DiffResult(zero(eltype(params)), similar(params))
-
-        aux = (rng=rng, obj=obj, problem=model, restructure=re, q_stop=q_true, adtype=AD)
-        AdvancedVI._value_and_gradient!(
-            AdvancedVI.estimate_repgradelbo_ad_forward, out, AD, params, aux
-        )
-        grad = DiffResults.gradient(out)
-        @test norm(grad) ≈ 0 atol = 1e-5
-    end
-end
diff --git a/test/algorithms/repgradelbo_locationscale.jl b/test/algorithms/repgradelbo_locationscale.jl
deleted file mode 100644
index eb6d26fb9..000000000
--- a/test/algorithms/repgradelbo_locationscale.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-@testset "inference RepGradELBO VILocationScale" begin
-    @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:Normal => normal_meanfield, :Normal => normal_fullrank),
-        (objname, objective) in Dict(
-            :RepGradELBOClosedFormEntropy => RepGradELBO(10),
-            :RepGradELBOStickingTheLanding =>
-                RepGradELBO(10; entropy=StickingTheLandingEntropy()),
-        )
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        T = 1000
-        η = 1e-3
-        alg = KLMinRepGradDescent(AD; optimizer=Descent(η), operator=ClipScale())
-
-        q0 = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-
-            μ = q_avg.location
-            L = q_avg.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-            μ = q_avg.location
-            L = q_avg.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
-            μ_repl = q_avg.location
-            L_repl = q_avg.scale
-            @test μ == μ_repl
-            @test L == L_repl
-        end
-    end
-end
diff --git a/test/algorithms/repgradelbo_locationscale_bijectors.jl b/test/algorithms/repgradelbo_locationscale_bijectors.jl
deleted file mode 100644
index 0a398993d..000000000
--- a/test/algorithms/repgradelbo_locationscale_bijectors.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-@testset "inference RepGradELBO VILocationScale Bijectors" begin
-    @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:NormalLogNormalMeanField => normallognormal_meanfield),
-        (objname, objective) in Dict(
-            :RepGradELBOClosedFormEntropy => RepGradELBO(10),
-            :RepGradELBOStickingTheLanding =>
-                RepGradELBO(10; entropy=StickingTheLandingEntropy()),
-        )
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        T = 1000
-        η = 1e-3
-        alg = KLMinRepGradDescent(AD; optimizer=Descent(η), operator=ClipScale())
-
-        b = Bijectors.bijector(model)
-        b⁻¹ = inverse(b)
-        μ0 = Zeros(realtype, n_dims)
-        L0 = Diagonal(Ones(realtype, n_dims))
-
-        q0_η = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-        q0_z = Bijectors.transformed(q0_η, b⁻¹)
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(
-                rng_repl, alg, T, model, q0_z; show_progress=PROGRESS
-            )
-            μ_repl = q_avg.dist.location
-            L_repl = q_avg.dist.scale
-            @test μ == μ_repl
-            @test L == L_repl
-        end
-    end
-end
diff --git a/test/algorithms/repgradelbo_proximal_locationscale.jl b/test/algorithms/repgradelbo_proximal_locationscale.jl
deleted file mode 100644
index 1eddac5f1..000000000
--- a/test/algorithms/repgradelbo_proximal_locationscale.jl
+++ /dev/null
@@ -1,61 +0,0 @@
-@testset "inference RepGradELBO Proximal VILocationScale" begin
-    @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:Normal => normal_meanfield, :Normal => normal_fullrank),
-        (objname, objective) in Dict(
-            :RepGradELBOClosedFormEntropy =>
-                RepGradELBO(10; entropy=ClosedFormEntropyZeroGradient()),
-            :RepGradELBOStickingTheLanding =>
-                RepGradELBO(10; entropy=StickingTheLandingEntropyZeroGradient()),
-        )
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        q0 = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-
-        T = 1000
-        η = 1e-3
-        alg = KLMinRepGradProxDescent(AD; optimizer=Descent(η))
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-
-            μ = q_avg.location
-            L = q_avg.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-            μ = q_avg.location
-            L = q_avg.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
-            μ_repl = q_avg.location
-            L_repl = q_avg.scale
-            @test μ == μ_repl
-            @test L == L_repl
-        end
-    end
-end
diff --git a/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl b/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl
deleted file mode 100644
index 2875c1c72..000000000
--- a/test/algorithms/repgradelbo_proximal_locationscale_bijectors.jl
+++ /dev/null
@@ -1,69 +0,0 @@
-@testset "inference RepGradELBO Proximal VILocationScale Bijectors" begin
-    @testset "$(modelname) $(objname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:NormalLogNormalMeanField => normallognormal_meanfield),
-        (objname, objective) in Dict(
-            :RepGradELBOClosedFormEntropy =>
-                RepGradELBO(10; entropy=ClosedFormEntropyZeroGradient()),
-            :RepGradELBOStickingTheLanding =>
-                RepGradELBO(10; entropy=StickingTheLandingEntropyZeroGradient()),
-        )
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        T = 1000
-        η = 1e-3
-        alg = KLMinRepGradProxDescent(AD; optimizer=Descent(η))
-
-        b = Bijectors.bijector(model)
-        b⁻¹ = inverse(b)
-        μ0 = Zeros(realtype, n_dims)
-        L0 = Diagonal(Ones(realtype, n_dims))
-
-        q0_η = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-        q0_z = Bijectors.transformed(q0_η, b⁻¹)
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(
-                rng_repl, alg, T, model, q0_z; show_progress=PROGRESS
-            )
-            μ_repl = q_avg.dist.location
-            L_repl = q_avg.dist.scale
-            @test μ == μ_repl
-            @test L == L_repl
-        end
-    end
-end
diff --git a/test/algorithms/scoregradelbo.jl b/test/algorithms/scoregradelbo.jl
deleted file mode 100644
index 5e4acb073..000000000
--- a/test/algorithms/scoregradelbo.jl
+++ /dev/null
@@ -1,68 +0,0 @@
-@testset "interface ScoreGradELBO" begin
-    seed = (0x38bef07cf9cc549d)
-    rng = StableRNG(seed)
-
-    modelstats = normal_meanfield(rng, Float64)
-
-    (; model, μ_true, L_true, n_dims, is_meanfield) = modelstats
-
-    q0 = MeanFieldGaussian(zeros(n_dims), Diagonal(ones(n_dims)))
-    q0_trans = Bijectors.transformed(q0, identity)
-
-    @testset "basic" begin
-        @testset for n_montecarlo in [1, 10]
-            alg = KLMinScoreGradDescent(
-                AD; n_samples=n_montecarlo, operator=ClipScale(), optimizer=Descent(1e-5)
-            )
-
-            _, info, _ = optimize(rng, alg, 10, model, q0; show_progress=false)
-            @assert isfinite(last(info).elbo)
-
-            _, info, _ = optimize(rng, alg, 10, model, q0_trans; show_progress=false)
-            @assert isfinite(last(info).elbo)
-        end
-    end
-
-    @testset "estimate_objective" begin
-        q_true = MeanFieldGaussian(μ_true, L_true)
-        alg = KLMinScoreGradDescent(AD)
-
-        obj_est = estimate_objective(rng, alg, q_true, model)
-        @test isfinite(obj_est)
-
-        obj_est = estimate_objective(rng, alg, q_true, model; n_samples=1)
-        @test isfinite(obj_est)
-
-        obj_est = estimate_objective(rng, alg, q_true, model; n_samples=3)
-        @test isfinite(obj_est)
-
-        obj_est = estimate_objective(rng, alg, q_true, model; n_samples=10^5)
-        @test obj_est ≈ 0 atol = 1e-2
-    end
-
-    @testset "warn MvLocationScale with IdentityOperator" begin
-        @test_warn "IdentityOperator" begin
-            alg = KLMinScoreGradDescent(AD; operator=IdentityOperator())
-            optimize(rng, alg, 1, model, q0; show_progress=false)
-        end
-        @test_warn "IdentityOperator" begin
-            alg = KLMinScoreGradDescent(AD; operator=IdentityOperator())
-            optimize(rng, alg, 1, model, q0_trans; show_progress=false)
-        end
-    end
-
-    obj = ScoreGradELBO(10)
-    rng = StableRNG(seed)
-    elbo_ref = estimate_objective(rng, obj, q0, model; n_samples=10^4)
-
-    @testset "determinism" begin
-        rng = StableRNG(seed)
-        elbo = estimate_objective(rng, obj, q0, model; n_samples=10^4)
-        @test elbo == elbo_ref
-    end
-
-    @testset "default_rng" begin
-        elbo = estimate_objective(obj, q0, model; n_samples=10^4)
-        @test elbo ≈ elbo_ref rtol = 0.2
-    end
-end
diff --git a/test/algorithms/scoregradelbo_locationscale.jl b/test/algorithms/scoregradelbo_locationscale.jl
deleted file mode 100644
index fc567b4c9..000000000
--- a/test/algorithms/scoregradelbo_locationscale.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-@testset "inference ScoreGradELBO VILocationScale" begin
-    @testset "$(modelname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:Normal => normal_meanfield, :Normal => normal_fullrank)
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        T = 1000
-        η = 1e-4
-        opt = Optimisers.Descent(η)
-        alg = KLMinScoreGradDescent(AD; n_samples=10, optimizer=opt, operator=ClipScale())
-
-        q0 = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, q0.location - μ_true) + sum(abs2, q0.scale - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-
-            μ = q_avg.location
-            L = q_avg.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0; show_progress=PROGRESS)
-            μ = q_avg.location
-            L = q_avg.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng_repl, alg, T, model, q0; show_progress=PROGRESS)
-            μ_repl = q_avg.location
-            L_repl = q_avg.scale
-            @test μ ≈ μ_repl rtol = 1e-3
-            @test L ≈ L_repl rtol = 1e-3
-        end
-    end
-end
diff --git a/test/algorithms/scoregradelbo_locationscale_bijectors.jl b/test/algorithms/scoregradelbo_locationscale_bijectors.jl
deleted file mode 100644
index bfc0da73c..000000000
--- a/test/algorithms/scoregradelbo_locationscale_bijectors.jl
+++ /dev/null
@@ -1,64 +0,0 @@
-@testset "inference ScoreGradELBO VILocationScale Bijectors" begin
-    @testset "$(modelname) $(realtype)" for realtype in [Float64, Float32],
-        (modelname, modelconstr) in
-        Dict(:NormalLogNormalMeanField => normallognormal_meanfield)
-
-        seed = (0x38bef07cf9cc549d)
-        rng = StableRNG(seed)
-
-        modelstats = modelconstr(rng, realtype)
-        (; model, μ_true, L_true, n_dims, strong_convexity, is_meanfield) = modelstats
-
-        T = 1000
-        η = 1e-4
-        opt = Optimisers.Descent(η)
-        alg = KLMinScoreGradDescent(AD; n_samples=10, optimizer=opt, operator=ClipScale())
-
-        b = Bijectors.bijector(model)
-        b⁻¹ = inverse(b)
-        μ0 = Zeros(realtype, n_dims)
-        L0 = Diagonal(Ones(realtype, n_dims))
-
-        q0_η = if is_meanfield
-            MeanFieldGaussian(zeros(realtype, n_dims), Diagonal(ones(realtype, n_dims)))
-        else
-            L0 = LowerTriangular(Matrix{realtype}(I, n_dims, n_dims))
-            FullRankGaussian(zeros(realtype, n_dims), L0)
-        end
-        q0_z = Bijectors.transformed(q0_η, b⁻¹)
-
-        # For small enough η, the error of SGD, Δλ, is bounded as
-        #     Δλ ≤ ρ^T Δλ0 + O(η),
-        # where ρ = 1 - ημ, μ is the strong convexity constant.
-        contraction_rate = 1 - η * strong_convexity
-
-        @testset "convergence" begin
-            Δλ0 = sum(abs2, μ0 - μ_true) + sum(abs2, L0 - L_true)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-            Δλ = sum(abs2, μ - μ_true) + sum(abs2, L - L_true)
-
-            @test Δλ ≤ contraction_rate^(T / 2) * Δλ0
-            @test eltype(μ) == eltype(μ_true)
-            @test eltype(L) == eltype(L_true)
-        end
-
-        @testset "determinism" begin
-            rng = StableRNG(seed)
-            q_avg, stats, _ = optimize(rng, alg, T, model, q0_z; show_progress=PROGRESS)
-            μ = q_avg.dist.location
-            L = q_avg.dist.scale
-
-            rng_repl = StableRNG(seed)
-            q_avg, stats, _ = optimize(
-                rng_repl, alg, T, model, q0_z; show_progress=PROGRESS
-            )
-            μ_repl = q_avg.dist.location
-            L_repl = q_avg.dist.scale
-            @test μ ≈ μ_repl rtol = 1e-3
-            @test L ≈ L_repl rtol = 1e-3
-        end
-    end
-end
diff --git a/test/algorithms/subsampledobj.jl b/test/general/subsampledobj.jl
similarity index 100%
rename from test/algorithms/subsampledobj.jl
rename to test/general/subsampledobj.jl
diff --git a/test/runtests.jl b/test/runtests.jl
index ab67247b3..b4d14cb4b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -72,14 +72,12 @@ if GROUP == "All" || GROUP == "AD"
     # Tests that need to check correctness of the integration with AD backends
     include("general/ad.jl")
     include("general/mixedad_logdensity.jl")
+    include("general/subsampledobj.jl")
 
-    include("algorithms/subsampledobj.jl")
-    include("algorithms/repgradelbo.jl")
-    include("algorithms/scoregradelbo.jl")
-    include("algorithms/repgradelbo_locationscale.jl")
-    include("algorithms/repgradelbo_locationscale_bijectors.jl")
-    include("algorithms/repgradelbo_proximal_locationscale.jl")
-    include("algorithms/repgradelbo_proximal_locationscale_bijectors.jl")
-    include("algorithms/scoregradelbo_locationscale.jl")
-    include("algorithms/scoregradelbo_locationscale_bijectors.jl")
+    include("algorithms/klminrepgraddescent.jl")
+    include("algorithms/klminscoregraddescent.jl")
+    include("algorithms/klminrepgradproxdescent.jl")
+    include("algorithms/klminrepgraddescent_bijectors.jl")
+    include("algorithms/klminrepgradproxdescent_bijectors.jl")
+    include("algorithms/klminscoregraddescent_bijectors.jl")
 end