Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: ['1.5']
julia-version: ['1.6']
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v2
Expand Down
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Hyperopt"
uuid = "93e5fe13-2215-51db-baaf-2e9a34fb2712"
author = ["Fredrik Bagge Carlson <baggepinnen@gmail.com>"]
version = "0.4.4"
version = "0.5.0"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand All @@ -14,14 +14,17 @@ RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
ThreadPools = "b189fb0b-2eb5-4ed4-bc0c-d34c51242431"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
MultiKDE = "77543b7f-bd95-4024-91c1-46775346e0e7"

[compat]
Juno = "0.7, 0.8"
LatinHypercubeSampling = "1.2"
MacroTools = "0.5"
MultiKDE = "0.1"
RecipesBase = "0.7, 0.8, 1.0"
ThreadPools = "1, 2"
julia = "1.5"
julia = "1.6"

[extras]
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Expand Down
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ a (simple) working example using `Hyperband` and Optim is
using Optim
f(a;c=10) = sum(@. 100 + (a-3)^2 + (c-100)^2)
hohb = @hyperopt for i=18, sampler=Hyperband(R=50, η=3, inner=RandomSampler()), a = LinRange(1,5,1800), c = exp10.(LinRange(-1,3,1800))
if !(state === nothing)
if state !== nothing
a,c = state
end
res = Optim.optimize(x->f(x[1],c=x[2]), [a,c], SimulatedAnnealing(), Optim.Options(f_calls_limit=i))
Expand All @@ -194,7 +194,7 @@ hohb = @hyperopt for i=18, sampler=Hyperband(R=50, η=3, inner=RandomSampler()),
algorithm = [SimulatedAnnealing(), ParticleSwarm(), NelderMead(), BFGS(), NewtonTrustRegion()],
a = LinRange(1,5,1800),
c = exp10.(LinRange(-1,3,1800))
if !(state === nothing)
if state !== nothing
x0,algorithm = state
else
x0 = [a,c]
Expand Down Expand Up @@ -225,6 +225,31 @@ candidates = (a=LinRange(1,5,300), c=exp10.(LinRange(-1,3,300))) # A vector of v
hohb = hyperband(objective, candidates; R=50, η=3, threads=true)
```

## BOHB
[BOHB: Robust and Efficient Hyperparameter Optimization at Scale](https://arxiv.org/abs/1807.01774) refines Hyperband by replacing the random sampler by a bayesian-optimization-based sampler. Now you can use it by simply replace the sampler in `Hyperband` as `BOHB(dims=[<dims>...])`

### Example
```julia
using Optim
hb = @hyperopt for i=18, sampler=Hyperband(R=50, η=3, inner=RandomSampler()), a = LinRange(1,5,800), c = exp10.(LinRange(-1,3,1800))
if state !== nothing
a,c = state
end
res = Optim.optimize(x->f(x[1],c=x[2]), [a,c], NelderMead(), Optim.Options(f_calls_limit=i))
Optim.minimum(res), Optim.minimizer(res)
end

# Using BOHB with same setting, remember to specify dimension types!
bohb = @hyperopt for i=18, sampler=Hyperband(R=50, η=3, inner=BOHB(dims=[Hyperopt.Continuous(), Hyperopt.Continuous()])), a = LinRange(1,5,800), c = exp10.(LinRange(-1,3,1800))
if state !== nothing
a,c = state
end
res = Optim.optimize(x->f(x[1],c=x[2]), [a,c], NelderMead(), Optim.Options(f_calls_limit=i))
Optim.minimum(res), Optim.minimizer(res)
end
```


# Parallel execution
- The macro `@phyperopt` works in the same way as `@hyperopt` but distributes all computation on available workers. The usual caveats apply, code must be loaded on all workers etc.
- The macro `@thyperopt` uses `ThreadPools.tmap` to evaluate the objective on all available threads. Beware of high memory consumption if your objective allocates a lot of memory.
13 changes: 12 additions & 1 deletion src/Hyperopt.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module Hyperopt

export Hyperoptimizer, @hyperopt, @phyperopt, @thyperopt, printmin, printmax
export RandomSampler, BlueNoiseSampler, LHSampler, CLHSampler, Continuous, Categorical, Hyperband, hyperband
export RandomSampler, BlueNoiseSampler, LHSampler, CLHSampler, hyperband, Hyperband, BOHB, Continuous, Categorical, UnorderedCategorical

using Base.Threads: threadid, nthreads
using LinearAlgebra, Statistics, Random
Expand All @@ -12,9 +12,20 @@ using RecipesBase
using Distributed
using LatinHypercubeSampling
using ThreadPools
using Distributions: Normal, truncated
using MultiKDE

const HO_RNG = [MersenneTwister(rand(1:1000)) for _ in 1:nthreads()]

const DimensionType = LHCDimension

# # Types of dimensions
# const CategoricalDim = Categorical
# const ContinuousDim = Continuous
struct UnorderedCategorical <: DimensionType
levels::Int64
end

abstract type Sampler end
Base.@kwdef mutable struct Hyperoptimizer{S<:Sampler, F}
iterations::Int
Expand Down
182 changes: 176 additions & 6 deletions src/samplers.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
"""
Sample a value For each parameter uniformly at random from the candidate vectors. Log-uniform sampling available by providing a log-spaced candidate vector.
"""
struct RandomSampler <: Sampler end
Expand Down Expand Up @@ -64,7 +64,7 @@ Base.@kwdef mutable struct Hyperband <: Sampler
R
η = 3
minimum = (Inf,)
inner = RandomSampler()
inner::Sampler = RandomSampler()
end
Hyperband(R) = Hyperband(R=R)

Expand All @@ -87,7 +87,7 @@ end

function hyperband(ho::Hyperoptimizer{Hyperband}; threads=false)
hb = ho.sampler
R,η = hb.R, hb.η
R, η = hb.R, hb.η
hb.minimum = (Inf,)
smax = floor(Int, log(η,R))
B = (smax + 1)*R # B is budget
Expand Down Expand Up @@ -127,8 +127,9 @@ function successive_halving(ho, n, r=1, s=round(Int, log(hb.η, n)); threads=fal

append!(ho.history, T)
append!(ho.results, L)

# Find top K candidates
if hb.inner isa BOHB
update_observations(ho, rᵢ, T, L)
end
perm = sortperm(L)
besti = perm[1]
if L[besti] < minimum[1]
Expand Down Expand Up @@ -157,4 +158,173 @@ function hyperband(f, candidates; R, η=3, inner = RandomSampler(), threads=fals
)
hyperband(ho; threads)
ho
end
end

# BOHB ====================================================================
# Acknowledgement: Code structure refers to official implementation of BOHB in ['HpBandSter'](https://github.com/automl/HpBandSter)
#
# Copyright of HpBandSter:
#
# Copyright (c) 2017-2018, ML4AAD
# All rights reserved.

# struct to record BOHB Observation
mutable struct ObservationsRecord
dim::Int
observation::Union{Vector, Tuple}
loss::Real
end
function ObservationsRecord(observation, loss)
ObservationsRecord(length(observation), observation, loss)
end

"""
BOHB samplers
All variable names refer symbols in the [paper](`https://arxiv.org/pdf/1807.01774v1.pdf`)
- `ρ`: Fraction of random samples
- `q`: Fraction of best observations to build l and g
- `N_s`: Sample batch number
- `N_min`: Minimum number of points to build a model
- `bw_factor`: Bandwidth factor
- `D`: Evaluated observations
- `max_valid_budget`: Maximum budget i that |D_{i}| is big enough to fit a model
- `N_b`: |D_{max_valid_budget}|
- `KDE_good`: KDE consists of "good observations", see BOHB paper
- `KDE_bad`: KDE consists of "bad observations", see BOHB paper
"""
Base.@kwdef mutable struct BOHB <: Sampler
dims::Union{Vector{DimensionType}, Nothing}=nothing
# hyperparameters for BOHB
N_min::Union{Int, Nothing} = nothing
ρ::AbstractFloat = 1/3
q::AbstractFloat = 0.15
N_s::Int = 64
bw_factor::Real = 3
"minimum bandwidth: this parameter doesn't occur in the paper but used in the official implementation"
min_bandwidth::Real = 1e-3
"Random sampler used for random sampling in BOHB algorithm"
random_sampler::RandomSampler = RandomSampler()
# Context data
## Current observations, stored in a Dict, in which key is budget, value is a observation array to fit KDEs
## key of D: A real number represents budget
## value of D: An vector of ObservationsRecord, all the records of corresponding budget
D::Dict{Real, Vector{ObservationsRecord}} = Dict{Real, Vector{ObservationsRecord}}()
"Current maximum budget that |D_{b}| > N_{min}+2, means it is valid for fit KDEs"
max_valid_budget::Union{Number, Nothing} = nothing
"|D| of max_valid_budget"
N_b::Union{Int, Nothing} = nothing
## Good and bad kernel density estimator
KDE_good::Union{MultiKDE.KDEMulti, Nothing} = nothing
KDE_bad::Union{MultiKDE.KDEMulti, Nothing} = nothing
end

# object call of BOHB sampler
function (s::BOHB)(ho, iter)
# with probability ρ, return random sampled observations.
# If max_valid_budget is nothing, which means currently we don't have enough sample for TPE, random sample as well.
if rand() < s.ρ || s.max_valid_budget === nothing
return s.random_sampler(ho, iter)
end
potential_samples = [sample_potential_hyperparam(s.KDE_good, s.min_bandwidth, s.bw_factor) for _ in 1:s.N_s]
scores = [score(sample, s.KDE_good, s.KDE_bad) for sample in potential_samples]
_, best_idx = findmax(scores)
[potential_samples[best_idx]]
end

# Sample score l(x)/g(x), refers to line 6 of Algorithm2 in paper
function score(sample::Vector, KDE_good::MultiKDE.KDEMulti, KDE_bad::MultiKDE.KDEMulti)
pdf(KDE_good, sample) / pdf(KDE_bad, sample)
end

# Update budget observations in BOHB
function update_observations(ho::Hyperoptimizer{Hyperband}, rᵢ, observations, losses)
# history passed from hyperband is reversed, can not used for update
observations = reverse.(observations)
bohb = ho.sampler.inner
if !haskey(bohb.D, rᵢ)
bohb.D[rᵢ] = []
end
for (c, l) in zip(observations, losses)
push!(bohb.D[rᵢ], ObservationsRecord(c, l))
end
D_length = length(bohb.D[rᵢ])
if bohb.N_min === nothing
bohb.N_min = length(ho.candidates)+1
end
if D_length > bohb.N_min+2 && (bohb.max_valid_budget===nothing || rᵢ >= bohb.max_valid_budget)
bohb.max_valid_budget, bohb.N_b = rᵢ, D_length
update_KDEs(ho)
end
end

function update_KDEs(ho::Hyperoptimizer{Hyperband})
bohb = ho.sampler.inner
records = bohb.D[bohb.max_valid_budget]
# fit KDEs according to Eqs. (2) and (3) in paper
N_bl = max(bohb.N_min, floor(Int, bohb.q*bohb.N_b))
N_bg = max(bohb.N_min, bohb.N_b-N_bl)
sort_idx = sortperm(records, by=d->d.loss)
idx_N_bl = sort_idx[begin:N_bl]
idx_N_bg = reverse(sort_idx)[N_bg:end]
bohb.KDE_good = KDEMulti(bohb.dims, records[idx_N_bl], bohb.min_bandwidth, ho.candidates)
bohb.KDE_bad = KDEMulti(bohb.dims, records[idx_N_bg], bohb.min_bandwidth, ho.candidates)
end

# sample from KDEMulti
function sample_potential_hyperparam(kde::MultiKDE.KDEMulti, min_bandwidth, bw_factor)
idx = rand(1:size(kde.mat_observations)[2])
param = [kde.observations[i][idx] for i in 1:length(kde.observations)]
sample = Vector()
for (_i, _param, dim_type, _kde) in zip(1:length(kde.dims), param, kde.dims, kde.KDEs)
bw = max(_kde.bandwidth, min_bandwidth)
local ele
if dim_type isa MultiKDE.ContinuousDim
bw = bw*bw_factor
ele = rand(truncated(Normal(_param, bw), -_param/bw, (1-_param)/bw))
elseif dim_type isa Union{MultiKDE.CategoricalDim, MultiKDE.UnorderedCategoricalDim}
ele = rand() < (1-bw) ? _param : rand(1:dim_type.levels)
else
error(string("Dim type ", string(dim_type), " not supported. "))
end
if kde.mapped[_i]
ele = kde.index_to_unordered[_kde][ele]
end
push!(sample, ele)
end
sample
end

# Constructor extensions and adapters for MultiKDE.jl
const DIMENSION_TYPE = Dict(Categorical=>MultiKDE.CategoricalDim, Continuous=>MultiKDE.ContinuousDim, UnorderedCategorical=>MultiKDE.UnorderedCategoricalDim)

function MultiKDE.KDEMulti(dim_types::Vector{DimensionType}, records::Vector{ObservationsRecord}, min_bandwidth::Real, candidates::Tuple)
# Get KDEMulti with min_bandwidth
dim = records[1].dim
observations = Vector{Vector}()
for record in records
@assert record.dim == dim "All observations need to be same dimension. "
_observations = record.observation
if _observations isa Tuple
_observations = [_obs for _obs in _observations]
end
push!(observations, _observations)
end
multi_kde = KDEMulti(dim_types, observations, candidates)
for i in 1:length(multi_kde.KDEs)
multi_kde.KDEs[i].bandwidth = max(multi_kde.KDEs[i].bandwidth, min_bandwidth)
end
multi_kde
end

function MultiKDE.KDEMulti(dims::Vector{DimensionType}, observations::Vector, candidates::Tuple)
dims = Vector{MultiKDE.DimensionType}([DIMENSION_TYPE[typeof(dim)] === MultiKDE.ContinuousDim ? DIMENSION_TYPE[typeof(dim)]() :
DIMENSION_TYPE[typeof(dim)](dim.levels) for dim in dims])
MultiKDE.KDEMulti(dims, observations, candidates)
end

function Base.getproperty(dim_type::Union{MultiKDE.CategoricalDim, MultiKDE.UnorderedCategoricalDim}, v::Symbol)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this method do? It simply calls getfield no matter what argument is supplied?

if v === :levels
return getfield(dim_type, :level)
end
getfield(dim_type, v)
end
14 changes: 10 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ f(a,b=true;c=10) = sum(@. 100 + (a-3)^2 + (b ? 10 : 20) + (c-100)^2) # This func
@test length(hol.results) == 200


hocl = @hyperopt for i=100, sampler=CLHSampler(dims=[Continuous(),Categorical(2),Continuous()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
# hocl = @hyperopt for i=100, sampler=CLHSampler(dims=[Hyperopt.ContinuousDim(),Hyperopt.CategoricalDim(2),Hyperopt.ContinuousDim()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
hocl = @hyperopt for i=100, sampler=CLHSampler(dims=[Hyperopt.Continuous(),Hyperopt.Categorical(2),Hyperopt.Continuous()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the Hyperopt. here? these names are exported by Hyperopt

# println(i, "\t", a, "\t", b, "\t", c)
f(a,b,c=c)
end
@test minimum(hocl) < 300
@hyperopt for i=100, ho = hocl, sampler=CLHSampler(dims=[Continuous(),Categorical(2),Continuous()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
# @hyperopt for i=100, ho = hocl, sampler=CLHSampler(dims=[Hyperopt.ContinuousDim(),Hyperopt.CategoricalDim(2),Hyperopt.ContinuousDim()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these comments be cleared up?

@hyperopt for i=100, ho = hocl, sampler=CLHSampler(dims=[Hyperopt.Continuous(),Hyperopt.Categorical(2),Hyperopt.Continuous()]), a = LinRange(1,5,100), b = [true, false], c = exp10.(LinRange(-1,3,100))
# println(i, "\t", a, "\t", b, "\t", c)
f(a,b,c=c)
end
Expand All @@ -76,7 +78,8 @@ f(a,b=true;c=10) = sum(@. 100 + (a-3)^2 + (b ? 10 : 20) + (c-100)^2) # This func
f(a,b,c=c)
end

@test_throws ArgumentError @hyperopt for i=100, sampler=CLHSampler(dims=[Continuous(),Categorical(2),Continuous()]), a = LinRange(1,5,99), b = [true, false], c = exp10.(LinRange(-1,3,100))
# @test_throws ArgumentError @hyperopt for i=100, sampler=CLHSampler(dims=[Hyperopt.ContinuousDim(),Hyperopt.CategoricalDim(2),Hyperopt.ContinuousDim()]), a = LinRange(1,5,99), b = [true, false], c = exp10.(LinRange(-1,3,100))
@test_throws ArgumentError @hyperopt for i=100, sampler=CLHSampler(dims=[Hyperopt.Continuous(),Hyperopt.CategoricalDim(2),Hyperopt.Continuous()]), a = LinRange(1,5,99), b = [true, false], c = exp10.(LinRange(-1,3,100))
# println(i, "\t", a, "\t", b, "\t", c)
f(a,b,c=c)
end
Expand Down Expand Up @@ -272,5 +275,8 @@ f(a,b=true;c=10) = sum(@. 100 + (a-3)^2 + (b ? 10 : 20) + (c-100)^2) # This func


end

@testset "BOHB" begin
@info "Testing BOHB"
include("test_BOHB.jl")
end
end
Loading