ZIB-IOL · matbesancon · May 21, 2024 · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md
@@ -55,11 +55,11 @@ FrankWolfe.TrackingGradient
 FrankWolfe.TrackingLMO
 ```
 
-Also see the example "Tracking number of calls to different oracles".
+Also see the example [Tracking, counters and custom callbacks for Frank Wolfe](@ref).
 
 ## Update order for block-coordinate methods
 
-Block-coordinate methods can be run with different update order. All update order are subtypes of [`FrankWolfe.BlockCoordinateUpdateOrder`](@ref). They have to implement the method [`FrankWolfe.select_update_indices`](@ref) which select which blocks to update in what order.
+Block-coordinate methods can be run with different update orders. All update orders are subtypes of [`FrankWolfe.BlockCoordinateUpdateOrder`](@ref). They have to implement the method [`FrankWolfe.select_update_indices`](@ref) which selects which blocks to update in what order.
 
 ```@docs
 FrankWolfe.BlockCoordinateUpdateOrder
@@ -80,6 +80,11 @@ FrankWolfe.FrankWolfeStep
 FrankWolfe.BPCGStep
 ```
 
+## Block vector
+```@docs
+FrankWolfe.BlockVector
+```
+
 ## Index
 
 ```@index

diff --git a/examples/block_coordinate_frank_wolfe.jl b/examples/block_coordinate_frank_wolfe.jl
diff --git a/examples/docs_10_alternating_methods.jl b/examples/docs_10_alternating_methods.jl
@@ -10,7 +10,8 @@
 # ```math
 # Q = [-1,0]^n ~.
 # ```
-# The goal is to find either a point in the intersection, `` x \in P \cap Q``, or a pair of points, ``(x_P, x_Q) \in P \times Q``, which attains minimal distance between ``P`` and ``Q``,
+# The goal is to find a point that lies both in ``P`` and ``Q``. We do this by reformulating the problem first.
+# Instead of a finding a point in the intersection ``P \cap Q``, we search for a pair of points, ``(x_P, x_Q)`` in the cartesian product ``P \times Q``, which attains minimal distance between ``P`` and ``Q``,
 # ```math
 # \|x_P - x_Q\|_2 = \min_{(x,y) \in P \times Q} \|x - y \|_2 ~.
 # ```
@@ -19,7 +20,8 @@ using FrankWolfe
 include("../examples/plot_utils.jl")
 
 # ## Setting up objective, gradient and linear minimization oracles
-# Since we only consider the feasibility problem the objective function as well as the gradient are zero.
+# Alternating Linear Minimization (ALM) allows for an additional objective such that one can optimize over an intersection of sets instead of finding only feasible points.
+# Since this example only considers the feasibility, we set the objective function as well as the gradient to zero.
 
 n = 20
 
@@ -41,8 +43,9 @@ target_tolerance = 1e-6
 trajectories = [];
 
 # ## Running Alternating Linear Minimization
-# We run Alternating Linear Minimization (ALM) with [`FrankWolfe.block_coordinate_frank_wolfe`](@ref).
-# This method allows three different update orders, `FullUpdate`, `CyclicUpdate` and `Stochasticupdate`.
+# The method [`FrankWolfe.alternating_linear_minimization`](@ref) is not a FrankWolfe method itself. It is a wrapper translating a problem over the intersection of multiple sets to a problem over the product space.
+# ALM can be called with any FW method. The default choice though is [`FrankWolfe.block_coordinate_frank_wolfe`](@ref) as it allows to update the blocks separately.
+# There are three different update orders implemented, `FullUpdate`, `CyclicUpdate` and `Stochasticupdate`.
 # Accordingly both blocks are updated either simulatenously, sequentially or in random order.
 
 for order in [FrankWolfe.FullUpdate(), FrankWolfe.CyclicUpdate(), FrankWolfe.StochasticUpdate()]

diff --git a/examples/docs_11_block_coordinate_fw.jl b/examples/docs_11_block_coordinate_fw.jl
@@ -0,0 +1,92 @@
+# # Block-Coordinate Frank-Wolfe and Block-Vectors
+
+# In this example, we demonstrate the usage of the [`FrankWolfe.block_coordinate_frank_wolfe`](@ref) and [`FrankWolfe.BlockVector`](@ref).
+# We consider the problem of minimizing the squared Euclidean distance between two sets.
+# We compare different update orders and different update steps.
+
+# ## Import and setup
+# We first import the necessary packages and include the code for plotting the results.
+using FrankWolfe
+using LinearAlgebra
+
+include("plot_utils.jl")
+
+# Next, we define the objective function and its gradient. The iterates `x` are instances of the [`FrankWolfe.BlockVector`](@ref) type.
+# The different blocks of the vector can be accessed via the `blocks` field.
+
+f(x) = dot(x.blocks[1] - x.blocks[2], x.blocks[1] - x.blocks[2])
+
+function grad!(storage, x)
+    @. storage.blocks = [x.blocks[1] - x.blocks[2], x.blocks[2] - x.blocks[1]]
+end
+
+# In our example we consider the probability simplex and an L-infinity norm ball as the feasible sets.
+n = 100
+lmo1 = FrankWolfe.ScaledBoundLInfNormBall(-ones(n), zeros(n))
+lmo2 = FrankWolfe.ProbabilitySimplexOracle(1.0)
+prod_lmo = FrankWolfe.ProductLMO((lmo1, lmo2))
+
+# We initialize the starting point `x0` as a [`FrankWolfe.BlockVector`](@ref) with two blocks.
+# The two other arguments are the block sizes and the overall number of entries.
+x0 = FrankWolfe.BlockVector([-ones(n), [i == 1 ? 1 : 0 for i in 1:n]], [(n,), (n,)], 2 * n);
+
+
+# ## Running block-coordinate Frank-Wolfe with different update-orders
+
+# In a first step, we compare different update orders. There are three different update orders implemented, 
+# [`FrankWolfe.FullUpdate`](@ref), [`CyclicUpdate`](@ref) and [`Stochasticupdate`](@ref).
+# For creating a custome [`FrankWolfe.BlockCoordinateUpdateOrder`](@ref), one needs to implement the function `select_update_indices`.
+struct CustomOrder <: FrankWolfe.BlockCoordinateUpdateOrder end
+
+function FrankWolfe.select_update_indices(::CustomOrder, state::FrankWolfe.CallbackState, dual_gaps)
+    return [rand() < 1 / n ? 1 : 2 for _ in 1:length(state.lmo.lmos)]
+end
+
+# We run the block-coordinate Frank-Wolfe method with the different update orders and store the trajectories.
+
+trajectories = []
+
+for order in [
+    FrankWolfe.FullUpdate(),
+    FrankWolfe.CyclicUpdate(),
+    FrankWolfe.StochasticUpdate(),
+    CustomOrder(),
+]
+
+    _, _, _, _, traj_data = FrankWolfe.block_coordinate_frank_wolfe(
+        f,
+        grad!,
+        prod_lmo,
+        x0;
+        verbose=true,
+        trajectory=true,
+        update_order=order,
+    )
+    push!(trajectories, traj_data)
+end
+# ### Plotting the results
+labels = ["Full update", "Cyclic order", "Stochstic order", "Custom order"]
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+# ## Running BCFW with different update methods
+# As a second step, we compare different update steps. We consider the [`FrankWolfe.BPCGStep`](@ref) and the [`FrankWolfe.FrankWolfeStep`](@ref).
+# One can either pass a tuple of [`FrankWolfe.UpdateStep`](@ref) to define for each block the update procedure or pass a single update step so that each block uses the same procedure.
+
+trajectories = []
+
+for us in [(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()), (FrankWolfe.FrankWolfeStep(), FrankWolfe.BPCGStep()), FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()]
+
+    _, _, _, _, traj_data = FrankWolfe.block_coordinate_frank_wolfe(
+        f,
+        grad!,
+        prod_lmo,
+        x0;
+        verbose=true,
+        trajectory=true,
+        update_step=us,
+    )
+    push!(trajectories, traj_data)
+end
+# ### Plotting the results
+labels = ["BPCG FW", "FW BPCG", "BPCG", "FW"]
+plot_trajectories(trajectories, labels, xscalelog=true)
diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl
@@ -69,34 +69,34 @@ function alternating_linear_minimization(
         return storage.blocks = gradf.blocks + t
     end
 
-    infeasibility(x) = sum(
+    dist2(x) = sum(
         fast_dot(x.blocks[i] - x.blocks[j], x.blocks[i] - x.blocks[j]) for i in 1:N for j in 1:i-1
     )
 
-    f_bc(x) = sum(f(x.blocks[i]) for i in 1:N) + lambda * infeasibility(x)
+    f_bc(x) = sum(f(x.blocks[i]) for i in 1:N) + lambda * dist2(x)
 
-    infeasibilities = []
+    dist2_data = []
     if trajectory
-        function make_infeasibitly_callback(callback)
-            return function callback_infeasibility(state, args...)
-                push!(infeasibilities, infeasibility(state.x))
+        function make_dist2_callback(callback)
+            return function callback_dist2(state, args...)
+                push!(dist2_data, dist2(state.x))
                 if callback === nothing
                     return true
                 end
                 return callback(state, args...)
             end
         end
 
-        callback = make_infeasibitly_callback(callback)
+        callback = make_dist2_callback(callback)
     end
 
     if verbose
         println("\nAlternating Linear Minimization (ALM).")
         print("LAMBDA: $lambda")
 
         format_string = "%14e\n"
-        headers = ("Infeas",)
-        format_state(state, args...) = (Float64(infeasibility(state.x)),)
+        headers = ("Dist2",)
+        format_state(state, args...) = (Float64(dist2(state.x)),)
 
         callback = make_print_callback_extension(
             callback,
@@ -120,10 +120,10 @@ function alternating_linear_minimization(
     )
 
     if trajectory
-        traj_data = [(t...,infeasibilities[i]) for (i,t) in enumerate(traj_data)]
-        return x, v, primal, dual_gap, infeasibility(x), traj_data
+        traj_data = [(t...,dist2_data[i]) for (i,t) in enumerate(traj_data)]
+        return x, v, primal, dual_gap, dist2(x), traj_data
     else
-        return x, v, primal, dual_gap, infeasibility(x), traj_data
+        return x, v, primal, dual_gap, dist2(x), traj_data
     end
 end
 

diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl
@@ -195,7 +195,7 @@ function update_iterate(
     # (proof follows with minor modification when estimating the step)
     #println("Gaps: ", local_gap, " ", dual_gap)
     #println(fast_dot(gradient, x), " ", fast_dot(gradient, v))
-    if t > 1 && local_gap ≥ dual_gap / s.lazy_tolerance
+    if t > 1 && local_gap > dual_gap / s.lazy_tolerance
         d = muladd_memory_mode(memory_mode, d, a, v_local)
         vertex_taken = v_local
         gamma_max = a_lambda
@@ -461,7 +461,7 @@ function block_coordinate_frank_wolfe(
             first_iter = false
 
             xold = copy(x)
-            for i in update_indices
+            Threads.@threads for i in update_indices
 
                 function extend(y)
                     bv = copy(xold)
@@ -476,7 +476,7 @@ function block_coordinate_frank_wolfe(
                     @. storage = big_storage.blocks[i]
                 end
 
-                dual_gaps[i], v, d, gamma, tt = update_iterate(
+                dual_gaps[i], v.blocks[i], d, gamma, tt = update_iterate(
                     update_step[i],
                     x.blocks[i],
                     lmo.lmos[i],

diff --git a/src/block_oracles.jl b/src/block_oracles.jl
@@ -1,3 +1,9 @@
+"""
+    BlockVector{T, MT <: AbstractArray{T}, ST <: Tuple} <: AbstractVector{T}
+
+Represents a vector consisting of blocks. `T` is the element type of the vector, `MT` is the type of the underlying data array, and `ST` is the type of the tuple representing the sizes of each block.
+Each block can be accessed with the `blocks` field, and the sizes of the blocks are stored in the `block_sizes` field.
+"""
 mutable struct BlockVector{T, MT <: AbstractArray{T}, ST <: Tuple} <: AbstractVector{T}
     blocks::Vector{MT}
     block_sizes::Vector{ST}
@@ -129,6 +135,14 @@ end
 
 Base.:*(v::BlockVector, s::Number) = s * v
 
+function Base.:/(v::BlockVector, s::Number)
+    return BlockVector(
+        v.blocks ./ s,
+        copy(v.block_sizes),
+        v.tot_size,
+    )
+end
+
 function LinearAlgebra.dot(v1::BlockVector{T1}, v2::BlockVector{T2}) where {T1, T2}
     if size(v1) != size(v2) || length(v1.block_sizes) != length(v2.block_sizes)
         throw(DimensionMismatch("$(length(v1)) != $(length(v2))"))

diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
@@ -96,14 +96,20 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
     @test abs(x.blocks[2][1]) < 1e-6
 
     # test the edge case with a zero vector as direction for the step size computation
-    x, _, _, _, _ = FrankWolfe.alternating_linear_minimization(
+    x, v, primal, dual_gap, traj_data = FrankWolfe.alternating_linear_minimization(
         FrankWolfe.block_coordinate_frank_wolfe,
         x -> 0,
         (storage, x) -> zero(x),
         (lmo1, lmo3),
         ones(n),
+        verbose=true,
         line_search=FrankWolfe.Shortstep(2),
+        callback=(state, args...) -> println(state),
     )
+    println(x)
+    println(v)
+    println(primal)
+    println(dual_gap)
 
     @test norm(x.blocks[1] - zeros(n)) < 1e-6
     @test norm(x.blocks[2] - ones(n)) < 1e-6

diff --git a/test/blocks.jl b/test/blocks.jl
@@ -39,11 +39,18 @@ using SparseArrays
     arr4 = 2 * arr3
     @test arr4.blocks[1] == 6 * ones(2, 2)
     arr5 = FrankWolfe.BlockVector([6 * ones(2, 2), 2 * ones(3, 3)],)
+    arr6 = arr3/2
+    arr7 = arr3 * 2
+    @test arr6.blocks[1] == 1.5 * ones(2, 2)
+    @test isequal(arr4, arr7)
     @test isequal(arr4, arr4)
     @test isequal(arr4, arr5)
     @test !isequal(arr3, arr4)
     @test !isequal(arr2, FrankWolfe.BlockVector([zeros(2, 2)]))
 
+    arr8 = FrankWolfe.BlockVector([ones(2, 2), ones(2, 2)])
+    @test_throws DimensionMismatch dot(arr3, arr8)
+
 end
 
 @testset "FrankWolfe array methods" begin