Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] BCG enhancements #121

Merged
merged 20 commits into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 98 additions & 47 deletions examples/blended_cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ import FrankWolfe
using LinearAlgebra
using Random
using DoubleFloats
using FrankWolfe
using SparseArrays

n = Int(1e4)
k = 3000
n = 1000
k = 10000

s = rand(1:100)
@info "Seed $s"
Expand All @@ -13,83 +15,134 @@ s = rand(1:100)
s = 41
Random.seed!(s)

xpi = rand(n);
total = sum(xpi);
const xp = xpi # ./ total;

f(x) = norm(x - xp)^2
matrix = rand(n,n)
hessian = transpose(matrix) * matrix
linear = rand(n)
f(x) = dot(linear, x) + 0.5*transpose(x) * hessian * x
function grad!(storage, x)
@. storage = 2 * (x - xp)
storage .= linear + hessian * x
end
L = eigmax(hessian)

# better for memory consumption as we do coordinate-wise ops
#Run over the probability simplex
lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

function cf(x, xp)
return @. norm(x - xp)^2
end

function cgrad!(storage, x, xp)
return @. storage = 2 * (x - xp)
end
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_accel_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = true,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# this LMO might produce numerical instabilities do demonstrate the recovery feature
const lmo = FrankWolfe.KSparseLMO(100, 1.0)
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = false,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# full upgrade of the lmo (and hence optimization) to Double64.
# the same lmo with Double64 is much more numerically robust. costs relatively little in speed.
# const lmo = FrankWolfe.KSparseLMO(100, Double64(1.0))
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_convex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
emphasis=FrankWolfe.memory,
L=L,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# as above but now to bigfloats
# the same lmo here with bigfloat. even more robust but much slower
# const lmo = FrankWolfe.KSparseLMO(100, big"1.0")
data = [trajectoryBCG_accel_simplex, trajectoryBCG_simplex, trajectoryBCG_convex]
label = ["BCG (accel simplex)", "BCG (simplex)", "BCG (convex)"]
FrankWolfe.plot_trajectories(data, label)

# other oracles to test / experiment with
# const lmo = FrankWolfe.LpNormLMO{Float64,1}(1.0)
# const lmo = FrankWolfe.ProbabilitySimplexOracle(Double64(1.0));
# const lmo = FrankWolfe.UnitSimplexOracle(1.0);

const x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

FrankWolfe.benchmark_oracles(x -> cf(x, xp), (str, x) -> cgrad!(str, x, xp), ()->randn(n), lmo; k=100)
matrix = rand(n,n)
hessian = transpose(matrix) * matrix
linear = rand(n)
f(x) = dot(linear, x) + 0.5*transpose(x) * hessian * x
function grad!(storage, x)
storage .= linear + hessian * x
end
L = eigmax(hessian)

# copying here and below the x00 as the algorithms write back into the variables to save memory.
# as we do multiple runs from the same initial point we do not want this here.
#Run over the K-sparse polytope
lmo = FrankWolfe.KSparseLMO(100, 100.0)
x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

x0 = deepcopy(x00)

@time x, v, primal, dual_gap, trajectorySs = FrankWolfe.fw(
x, v, primal, dual_gap, trajectoryBCG_accel_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.shortstep,
L=2,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = true,
verbose=true,
trajectory=true,
);
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

x0 = deepcopy(x00)

@time x, v, primal, dual_gap, trajectoryAda = FrankWolfe.afw(
x, v, primal, dual_gap, trajectoryBCG_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
L=100,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = false,
verbose=true,
trajectory=true,
);
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

x0 = deepcopy(x00)

x, v, primal, dual_gap, trajectoryBCG = FrankWolfe.bcg(
x, v, primal, dual_gap, trajectoryBCG_convex = FrankWolfe.bcg(
f,
grad!,
lmo,
Expand All @@ -98,15 +151,13 @@ x, v, primal, dual_gap, trajectoryBCG = FrankWolfe.bcg(
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
emphasis=FrankWolfe.memory,
L=2,
L=L,
verbose=true,
trajectory=true,
Ktolerance=1.00,
goodstep_tolerance=0.95,
weight_purge_threshold=1e-10,
)

data = [trajectorySs, trajectoryAda, trajectoryBCG]
label = ["short step", "AFW", "BCG"]

data = [trajectoryBCG_accel_simplex, trajectoryBCG_simplex, trajectoryBCG_convex]
label = ["BCG (accel simplex)", "BCG (simplex)", "BCG (convex)"]
FrankWolfe.plot_trajectories(data, label)
Loading