Merge pull request #32 from alan-turing-institute/grid-refactor

For a 0.3.2 release
JuliaAI · Apr 4, 2020 · 6363115 · 6363115
2 parents 9dcfa02 + 941fdc2
commit 6363115
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 73 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJTuning"
 uuid = "03970b2e-30c4-11ea-3135-d1576263f10f"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.3.1"
+version = "0.3.2"
 
 [deps]
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"

diff --git a/README.md b/README.md
@@ -291,38 +291,42 @@ Grid(; goal=nothing, resolution=10, shuffle=true,
 
 Generally new types are defined for each class of range object a
 tuning strategy should like to handle, and the tuning strategy
-functions to be implemented are dispatched on these types. Here are
-the range objects supported by `Grid`:
-
-  - one-dimensional `NumericRange` or `NominalRange` objects (of
-    abstract type `ParamRange`) provided by MLJBase.
-
-  - a tuple `(p, r)` where `p` is one of the above range objects, and
-	`r` a resolution to override the default `resolution` of the
-	strategy
-
-  - vectors of objects of the above form, e.g., `[r1, (r2, 5), r3]`
-	where `r1` and `r2` are `NumericRange` objects and `r3` a
-	`NominalRange` object.
-
-Both `NumericRange` and `NominalRange` are constructed with the
-`MLJBase` extension to the `range` function. Use the `iterator` and
-`sampler` methods to convert ranges into one-dimensional grids or for
-random sampling, respectively. See the docstrings for details.
-
-Recall that `NominalRange` has a `values` field, while `NumericRange`
-has the fields `upper`, `lower`, `scale`, `unit` and `origin`. The
-`unit` field specifies a preferred length scale, while `origin` a
-preferred "central value". These default to `(upper - lower)/2` and
-`(upper + lower)/2`, respectively, in the bounded case (neither `upper
-= Inf` nor `lower = -Inf`). The fields `origin` and `unit` are used in
-generating grids or fitting probability distributions to unbounded
-ranges.
+functions to be implemented are dispatched on these types. It is
+recommended that every tuning strategy support at least these types:
+
+- one-dimensional ranges `r`, where `r` is a `MLJBase.ParamRange` instance
+
+- (optional) pairs of the form `(r, data)`, where `data` is metadata,
+  such as a resolution in a grid search, or a distribution in a random
+  search
+
+- abstract vectors whose elements are of the above form
+
+Recall that `ParamRange` has two concrete subtypes `NumericRange` and
+`NominalRange`, whose instances are constructed with the `MLJBase`
+extension to the `range` function. 
+
+Note in particular that a `NominalRange` has a `values` field, while
+`NumericRange` has the fields `upper`, `lower`, `scale`, `unit` and
+`origin`. The `unit` field specifies a preferred length scale, while
+`origin` a preferred "central value". These default to `(upper -
+lower)/2` and `(upper + lower)/2`, respectively, in the bounded case
+(neither `upper = Inf` nor `lower = -Inf`). The fields `origin` and
+`unit` are used in generating grids or fitting probability
+distributions to unbounded ranges.
 
 A `ParamRange` object is always associated with the name of a
 hyperparameter (a field of the prototype in the context of tuning)
-which is recorded in its `field` attribute, but for composite models
-this might be a be a "nested name", such as `:(atom.max_depth)`.
+which is recorded in its `field` attribute, a `Symbol`, but for
+composite models this might be a be an `Expr`, such as
+`:(atom.max_depth)`.
+
+Use the `iterator` and `sampler` methods to convert ranges into
+one-dimensional grids or for random sampling, respectively. See the
+[tuning
+section](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#API-1)
+of the MLJ manual or doc-strings for more on these methods and the
+`Grid` and `RandomSearch` implementations.
 
 
 #### The `result` method: For building each entry of the history

diff --git a/src/range_methods.jl b/src/range_methods.jl
@@ -31,21 +31,18 @@ the results. Otherwise models are ordered, with the first
 hyperparameter referenced cycling fastest.
 
 """
-grid(rng::AbstractRNG, prototype::Model, ranges, resolutions) =
-    shuffle(rng, grid(prototype, ranges, resolutions))
+grid(rng::AbstractRNG, prototype::Model, fields, iterators) =
+    shuffle(rng, grid(prototype, fields, iterators))
 
-function grid(prototype::Model, ranges, resolutions)
-
-    iterators = broadcast(iterator, ranges, resolutions)
+function grid(prototype::Model, fields, iterators)
 
     A = MLJBase.unwind(iterators...)
 
     N = size(A, 1)
     map(1:N) do i
         clone = deepcopy(prototype)
-        for k in eachindex(ranges)
-            field = ranges[k].field
-            recursive_setproperty!(clone, field, A[i,k])
+        for k in eachindex(fields)
+            recursive_setproperty!(clone, fields[k], A[i,k])
         end
         clone
     end
@@ -57,8 +54,8 @@ end
 """
     process_grid_range(user_specified_range, resolution, verbosity)
 
-Utility to convert a user-specified range (see [`Grid`](@ref)) into a
-pair of tuples `(ranges, resolutions)`.
+Convert a user-specified range (see [`Grid`](@ref)) into a tuple  of
+tuples `(ranges, resolutions)`.
 
 For example, if `r1`, `r2` are `NumericRange`s and `s` is a
 NominalRange` with 5 values, then we have:

diff --git a/src/strategies/grid.jl b/src/strategies/grid.jl
@@ -13,14 +13,18 @@ A single one-dimensional range or vector of one-dimensioinal ranges
 can be specified. Specifically, in `Grid` search, the `range` field
 of a `TunedModel` instance can be:
 
-- A single one-dimensional range (ie, `ParamRange` object) `r`, or pair of
-  the form `(r, res)` where `res` specifies a resolution to override
-  the default `resolution`.
+- A single one-dimensional range - ie, `ParamRange` object - `r`, or
+  pair of the form `(r, res)` where `res` specifies a resolution to
+  override the default `resolution`.
 
 - Any vector of objects of the above form
 
+Two elements of a `range` vector may share the same `field` attribute,
+with the effect that their grids are combined, as in Example 3 below.
+
 `ParamRange` objects are constructed using the `range` method.
 
+
 Example 1:
 
     range(model, :hyper1, lower=1, origin=2, unit=1)
@@ -31,6 +35,12 @@ Example 2:
       range(model, :hyper2, lower=2, upper=4),
       range(model, :hyper3, values=[:ball, :tree])]
 
+Example 3:
+
+    # a range generating the grid `[1, 2, 10, 20, 30]` for `:hyper1`:
+    [range(model, :hyper1, values=[1, 2]),
+     (range(model, :hyper1, lower= 10, upper=30), 3)]
+
 Note: All the `field` values of the `ParamRange` objects (`:hyper1`,
 `:hyper2`, `:hyper3` in the preceding example) must refer to field
 names a of single model (the `model` specified during `TunedModel`
@@ -44,7 +54,8 @@ cases all `values` of each specified `NominalRange` are exhausted. If
 `goal` is specified, then all resolutions are ignored, and a global
 resolution is applied to the `NumericRange` objects that maximizes the
 number of grid points, subject to the restriction that this not exceed
-`goal`. Otherwise the default `resolution` and any parameter-specific
+`goal`. (This assumes no field appears twice in the `range` vector.)
+Otherwise the default `resolution` and any parameter-specific
 resolutions apply.
 
 In all cases the models generated are shuffled using `rng`, unless
@@ -68,6 +79,8 @@ Grid(; goal=nothing, resolution=10, shuffle=true,
 isnumeric(::Any) = false
 isnumeric(::NumericRange) = true
 
+# To replace resolutions for numeric ranges with goal-adjusted ones if
+# a goal is specified:
 adjusted_resolutions(::Nothing,  ranges, resolutions) = resolutions
 function adjusted_resolutions(goal, ranges, resolutions)
     # get the product Π of the lengths of the NominalRanges:
@@ -85,19 +98,50 @@ function adjusted_resolutions(goal, ranges, resolutions)
     end
 end
 
+# For deciding scale for duplicated fields:
+_merge(s1, s2) = (s1 == :none ? s2 : s1)
+
+function fields_iterators_and_scales(ranges, resolutions)
+
+    # following could have non-unique entries:
+    fields = map(r -> r.field, ranges)
+
+    iterator_given_field = Dict{Union{Symbol,Expr},Vector}()
+    scale_given_field = Dict{Union{Symbol,Expr},Any}()
+    for i in eachindex(ranges)
+        fld = fields[i]
+        r = ranges[i]
+        if haskey(iterator_given_field, fld)
+            iterator_given_field[fld] =
+                vcat(iterator_given_field[fld], iterator(r, resolutions[i]))
+            scale_given_field[fld] =
+                _merge(scale_given_field[fld], scale(r))
+        else
+            iterator_given_field[fld] = iterator(r, resolutions[i])
+            scale_given_field[fld] = scale(r)
+        end
+    end
+    fields = unique(fields)
+    iterators = map(fld->iterator_given_field[fld], fields)
+    scales = map(fld->scale_given_field[fld], fields)
+
+    return fields, iterators, scales
+
+end
+
 function setup(tuning::Grid, model, user_range, verbosity)
     ranges, resolutions =
         process_grid_range(user_range, tuning.resolution, verbosity)
-    resolutions = adjusted_resolutions(tuning.goal, ranges, resolutions)
 
-    fields = map(r -> r.field, ranges)
+    resolutions = adjusted_resolutions(tuning.goal, ranges, resolutions)
 
-    parameter_scales = scale.(ranges)
+    fields, iterators, parameter_scales =
+        fields_iterators_and_scales(ranges, resolutions)
 
     if tuning.shuffle
-        models = grid(tuning.rng, model, ranges, resolutions)
+        models = grid(tuning.rng, model, fields, iterators)
     else
-        models = grid(model, ranges, resolutions)
+        models = grid(model, fields, iterators)
     end
 
     state = (models=models,
@@ -126,12 +170,15 @@ function tuning_report(tuning::Grid, history, state)
 end
 
 function default_n(tuning::Grid, user_range)
+
     ranges, resolutions =
         process_grid_range(user_range, tuning.resolution, -1)
 
     resolutions = adjusted_resolutions(tuning.goal, ranges, resolutions)
-    len(t::Tuple{NumericRange,Integer}) = length(iterator(t[1], t[2]))
-    len(t::Tuple{NominalRange,Integer}) = t[2]
-    return prod(len.(zip(ranges, resolutions)))
+
+    fields, iterators, parameter_scales =
+        fields_iterators_and_scales(ranges, resolutions)
+
+    return prod(length.(iterators))
 
 end
diff --git a/src/strategies/random_search.jl b/src/strategies/random_search.jl
@@ -66,7 +66,7 @@ distribution types  | for fitting to ranges of this type
     # uniform sampling of :(atom.λ) from [0, 1] without defining a NumericRange:
     struct MySampler end
     Base.rand(rng::Random.AbstractRNG, ::MySampler) = rand(rng)
-    range3 = (:(atom.λ), MySampler(), range1)
+    range3 = (:(atom.λ), MySampler())
 
 ### Algorithm
 

diff --git a/test/range_methods.jl b/test/range_methods.jl
@@ -53,10 +53,15 @@ r2 = range(super_model, :K, lower=1, upper=10, scale=:log10)
 
 @testset "models from cartesian range and resolutions" begin
 
+    f1 = r1.field
+    f2 = r2.field
+    itr1 = iterator(r1, nothing)
+    itr2 = iterator(r2, 7)
+
     # with method:
-    m1 = MLJTuning.grid(super_model, [r1, r2], [nothing, 7])
-    m1r = MLJTuning.grid(MersenneTwister(123), super_model, [r1, r2],
-                         [nothing, 7])
+    m1 = MLJTuning.grid(super_model, [f1, f2], [itr1, itr2])
+    m1r = MLJTuning.grid(MersenneTwister(123), super_model, [f1, f2],
+                         [itr1, itr2])
 
     # generate all models by hand:
     models1 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model),
@@ -76,8 +81,10 @@ r2 = range(super_model, :K, lower=1, upper=10, scale=:log10)
     @test m1r != models1
     @test _issubset(models1, m1r) && _issubset(m1, models1)
 
+    itr1 = iterator(r1, 1)
+
     # with method:
-    m2 = MLJTuning.grid(super_model, [r1, r2], [1, 7])
+    m2 = MLJTuning.grid(super_model, [f1, f2], [itr1, itr2])
 
     # generate all models by hand:
     models2 = [SuperModel(1, DummyModel(1.2, 9.5, 'c'), dummy_model),

diff --git a/test/strategies/grid.jl b/test/strategies/grid.jl
@@ -34,8 +34,26 @@ super_model = SuperModel(4, dummy_model, deepcopy(dummy_model))
 
 s = range(super_model, :(model1.kernel), values=['c', 'd'])
 r1 = range(super_model, :(model1.lambda), lower=20, upper=31)
+rr1 = range(super_model, :(model1.lambda), values=[0.0, 1.0])
 r2 = range(super_model, :K, lower=1, upper=11, scale=:log10)
 
+@testset "scale merge" begin
+    @test MLJTuning._merge(sin, cos) == sin
+    @test MLJTuning._merge(:none, sin) == sin
+    @test MLJTuning._merge(sin, :none) == sin
+    @test MLJTuning._merge(:log, :linear) == :log
+end
+
+@testset "extracting fields and iterators" begin
+    ranges = (r1, r2, rr1)
+    resolutions = (2, 3, nothing)
+    fields, iterators, scales =
+        MLJTuning.fields_iterators_and_scales(ranges, resolutions)
+    @test fields == [:(model1.lambda), :K]
+    @test iterators == [[20.0, 31.0, 0.0, 1.0], [1, 3, 11]]
+    @test scales == [:linear, :log10]
+end
+
 @testset "setup, default_n" begin
     user_range = [r1, (r2, 3), s]
 
@@ -107,7 +125,6 @@ r2 = range(super_model, :K, lower=1, upper=11, scale=:log10)
 
 end
 
-
 @testset "2-parameter tune, with nesting" begin
 
     sel = FeatureSelector()
@@ -225,22 +242,25 @@ end
 
 end
 
+@testset "field duplicated" begin
+    N = 100
+    X = (x = rand(3N), );
+    y = categorical(rand("abc", 3N));
 
-# ## LEARNING CURVE
-
-# @testset "learning curves" begin
-#     atom = FooBarRegressor()
-#     ensemble = EnsembleModel(atom=atom, n=50, rng=1)
-#     mach = machine(ensemble, X, y)
-#     r_lambda = range(ensemble, :(atom.lambda),
-#                      lower=0.0001, upper=0.1, scale=:log10)
-#     curve = MLJ.learning_curve!(mach; range=r_lambda)
-#     atom.lambda=0.3
-#     r_n = range(ensemble, :n, lower=10, upper=100)
-#     curve2 = MLJ.learning_curve!(mach; range=r_n)
-#     curve3 = learning_curve(ensemble, X, y; range=r_n)
-#     @test curve2.measurements ≈ curve3.measurements
-# end
+    model = KNNClassifier()
+    r1 = range(model, :K, values=[2, 3, 4, 5])
+    r2 = range(model, :K, lower=10, upper=50, scale=:log)
+    r3 = range(model, :leafsize, values=[10, 11])
+    tuning = Grid(resolution=2, shuffle=false)
+    tuned_model = TunedModel(model=model,
+                             tuning=tuning, measure=BrierScore(),
+                             resampling=Holdout(fraction_train=2/3),
+                             range=[r1, r2, r3])
+    mach = fit!(machine(tuned_model, X, y))
+    Kvalues = map(m->m.K, first.(report(mach).history))
+    once = [2, 3, 4, 5, 10, 50]
+    @test Kvalues == vcat(once, once)
+end
 
 end # module
 true