Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] BCG enhancements #121

Merged
merged 20 commits into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 101 additions & 50 deletions examples/blended_cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ import FrankWolfe
using LinearAlgebra
using Random
using DoubleFloats
using FrankWolfe
using SparseArrays

n = Int(1e4)
k = 3000
n = Int(1000)
matbesancon marked this conversation as resolved.
Show resolved Hide resolved
k = 10000

s = rand(1:100)
@info "Seed $s"
Expand All @@ -13,83 +15,134 @@ s = rand(1:100)
s = 41
Random.seed!(s)

xpi = rand(n);
total = sum(xpi);
const xp = xpi # ./ total;

f(x) = norm(x - xp)^2
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the triple quote for?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a typo, I was commenting out the first test because I only wanted to check that the second test would work!

matrix = rand(n,n)
hessian = transpose(matrix) * matrix
linear = rand(n)
f(x) = dot(linear, x) + 0.5*transpose(x) * hessian * x
function grad!(storage, x)
@. storage = 2 * (x - xp)
end

# better for memory consumption as we do coordinate-wise ops

function cf(x, xp)
return @. norm(x - xp)^2
storage .= linear + hessian * x
end
L = eigmax(hessian)

function cgrad!(storage, x, xp)
return @. storage = 2 * (x - xp)
end
#Run over the probability simplex
lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

# this LMO might produce numerical instabilities do demonstrate the recovery feature
const lmo = FrankWolfe.KSparseLMO(100, 1.0)
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_accel_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = true,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# full upgrade of the lmo (and hence optimization) to Double64.
# the same lmo with Double64 is much more numerically robust. costs relatively little in speed.
# const lmo = FrankWolfe.KSparseLMO(100, Double64(1.0))
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = false,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# as above but now to bigfloats
# the same lmo here with bigfloat. even more robust but much slower
# const lmo = FrankWolfe.KSparseLMO(100, big"1.0")
x0 = deepcopy(x00)
x, v, primal, dual_gap, trajectoryBCG_convex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
emphasis=FrankWolfe.memory,
L=L,
verbose=true,
trajectory=true,
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

# other oracles to test / experiment with
# const lmo = FrankWolfe.LpNormLMO{Float64,1}(1.0)
# const lmo = FrankWolfe.ProbabilitySimplexOracle(Double64(1.0));
# const lmo = FrankWolfe.UnitSimplexOracle(1.0);
data = [trajectoryBCG_accel_simplex, trajectoryBCG_simplex, trajectoryBCG_convex]
label = ["BCG (accel simplex)", "BCG (simplex)", "BCG (convex)"]
FrankWolfe.plot_trajectories(data, label)
"""

const x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

FrankWolfe.benchmark_oracles(x -> cf(x, xp), (str, x) -> cgrad!(str, x, xp), ()->randn(n), lmo; k=100)
matrix = rand(n,n)
hessian = transpose(matrix) * matrix
linear = rand(n)
f(x) = dot(linear, x) + 0.5*transpose(x) * hessian * x
function grad!(storage, x)
storage .= linear + hessian * x
end
L = eigmax(hessian)

# copying here and below the x00 as the algorithms write back into the variables to save memory.
# as we do multiple runs from the same initial point we do not want this here.
#Run over the K-sparse polytope
lmo = FrankWolfe.KSparseLMO(100, 100.0)
x00 = FrankWolfe.compute_extreme_point(lmo, zeros(n))

x0 = deepcopy(x00)

@time x, v, primal, dual_gap, trajectorySs = FrankWolfe.fw(
x, v, primal, dual_gap, trajectoryBCG_accel_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.shortstep,
L=2,
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = true,
verbose=true,
trajectory=true,
);
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

x0 = deepcopy(x00)

@time x, v, primal, dual_gap, trajectoryAda = FrankWolfe.afw(
x, v, primal, dual_gap, trajectoryBCG_simplex = FrankWolfe.bcg(
f,
grad!,
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.adaptive,
L=100,
print_iter=k / 10,
hessian = hessian,
emphasis=FrankWolfe.memory,
L=L,
accelerated = false,
verbose=true,
trajectory=true,
);
Ktolerance=1.00,
weight_purge_threshold=1e-10,
)

x0 = deepcopy(x00)

x, v, primal, dual_gap, trajectoryBCG = FrankWolfe.bcg(
x, v, primal, dual_gap, trajectoryBCG_convex = FrankWolfe.bcg(
f,
grad!,
lmo,
Expand All @@ -98,15 +151,13 @@ x, v, primal, dual_gap, trajectoryBCG = FrankWolfe.bcg(
line_search=FrankWolfe.adaptive,
print_iter=k / 10,
emphasis=FrankWolfe.memory,
L=2,
L=L,
verbose=true,
trajectory=true,
Ktolerance=1.00,
goodstep_tolerance=0.95,
weight_purge_threshold=1e-10,
)

data = [trajectorySs, trajectoryAda, trajectoryBCG]
label = ["short step", "AFW", "BCG"]

FrankWolfe.plot_trajectories(data, label)
data = [trajectoryBCG_accel_simplex, trajectoryBCG_simplex, trajectoryBCG_convex]
label = ["BCG (accel simplex)", "BCG (simplex)", "BCG (convex)"]
FrankWolfe.plot_trajectories(data, label)
matbesancon marked this conversation as resolved.
Show resolved Hide resolved
Loading