Patch release 0.5.3 (#332)

- [x] (**Enhancement**) Overload `mean`, `mode` and `median` for `Nodes` (#288)
alan-turing-institute · Nov 13, 2019 · f251001 · tlienart · Nov 13, 2019 · f251001
1 parent d03bd36
commit f251001
Show file tree

Hide file tree

Showing 16 changed files with 209 additions and 137 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,12 +2,10 @@
 language: julia
 os:
   - linux
-  - osx
 julia:
   - 1.0
   - 1.1
   - 1.2
-  - 1.3
   - nightly
 notifications:
   email: false
@@ -26,7 +24,8 @@ after_success:
 ## uncomment following lines to deploy documentation
 jobs:
   include:
-    - stage: "Documentation"
+    - if: branch = master
+      stage: "Documentation"
       julia: 1.2
       os: linux
       # disable global before_script in order not to install Compose twice

diff --git a/Project.toml b/Project.toml
@@ -1,12 +1,13 @@
 name = "MLJ"
 uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.5.2"
+version = "0.5.3"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -20,31 +21,30 @@ PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
-CategoricalArrays = "<0.5.3, 0.7"
-ComputationalResources = "0.3"
-Distributions = "0.21.3"
-DocStringExtensions = "0.8.1"
-MLJBase = "0.7.3"
-MLJModels = "0.5.4"
-OrderedCollections = "1.1"
-PrettyTables = "0.6"
-ProgressMeter = "1.1.0"
-RecipesBase = "0.7.0"
-Requires = "^0.5.2"
-ScientificTypes = "0.2.2"
-StatsBase = "0.32"
-Tables = "<0.1.19, 0.2"
+CategoricalArrays = "^0.7"
+ComputationalResources = "^0.3"
+Distributions = "^0.21"
+DocStringExtensions = "^0.8"
+MLJBase = "^0.7"
+MLJModels = "^0.5"
+OrderedCollections = "^1.1"
+PrettyTables = "^0.6"
+ProgressMeter = "^1.1"
+RecipesBase = "^0.7"
+ScientificTypes = "^0.2"
+StatsBase = "^0.32"
+Tables = "^0.2"
 julia = "1"
 
 [extras]
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
@@ -54,4 +54,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 
 [targets]
-test = ["DecisionTree", "LossFunctions", "MLJBase", "MultivariateStats", "NearestNeighbors", "RDatasets", "Test", "UnicodePlots"]
+test = ["DecisionTree", "DelimitedFiles", "LossFunctions", "MLJBase", "MultivariateStats", "NearestNeighbors", "RDatasets", "Test", "UnicodePlots"]
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 A pure Julia machine learning framework.
 
-[MLJ News](https://github.com/alan-turing-institute/MLJ.jl/blob/master/docs/src/NEWS.md) for  MLJ and its satellite packages, [MLJBase](https://github.com/alan-turing-institute/MLJModels.jl), [MLJModels](https://github.com/alan-turing-institute/MLJModels.jl) and [ScientificTypes](https://github.com/alan-turing-institute/ScientificTypes.jl) | [MLJ Cheatsheet](docs/src/mlj_cheatsheet.md)
+[MLJ News](https://github.com/alan-turing-institute/MLJ.jl/blob/master/docs/src/NEWS.md) for  MLJ and its satellite packages, [MLJBase](https://github.com/alan-turing-institute/MLJBase.jl), [MLJModels](https://github.com/alan-turing-institute/MLJModels.jl) and [ScientificTypes](https://github.com/alan-turing-institute/ScientificTypes.jl) | [MLJ Cheatsheet](docs/src/mlj_cheatsheet.md)
 
 ## `join!(MLJ, YourModel)`
 
@@ -16,7 +16,7 @@ crucially on:
 The MLJ model interface is now relatively stable and
 [well-documented](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/),
 and the core team is happy to respond to [issue requests](https://github.com/alan-turing-institute/MLJ.jl/issues) for
-assistance. Please click [here](CONTRIBUTE.md) for more details on
+assistance. Please click [here](CONTRIBUTING.md) for more details on
 contributing.
 
 MLJ is presently supported by a small Alan Turing Institute grant and is looking for new funding sources to grow and maintain the project.

diff --git a/docs/src/NEWS.md b/docs/src/NEWS.md
@@ -14,8 +14,14 @@ News for MLJ and its satellite packages: [MLJBase](https://github.com/alan-turin
 [ScientificTypes](https://github.com/alan-turing-institute/ScientificTypes.jl/releases) (mainly for developers)
 
 
+
+
 ## News
 
+*Note:* New patch releases are no longer being announced below. Refer to the
+links above for complete release notes.
+
+
 ### 30 Oct 2019
 
 - MLJModels 0.5.3 released.

diff --git a/docs/src/adding_models_for_general_use.md b/docs/src/adding_models_for_general_use.md
@@ -498,13 +498,12 @@ The above remarks continue to hold unchanged for the case multivariate
 targets.  For example, if we declare
 
 ```julia
-target_scitype(SomeSupervisedModel) = AbstractVector{<:Tuple{Continuous,Count}}
+target_scitype(SomeSupervisedModel) = Table(Continuous)
 ```
 
-then each element of `y` will be a tuple of type
-`Tuple{AbstractFloat,Integer}`. For predicting variable length
-sequences of, say, binary values (`CategoricalValue`s or
-`CategoricalString`s with some common size-two pool) we declare
+For predicting variable length sequences of, say, binary values
+(`CategoricalValue`s or `CategoricalString`s with some common size-two
+pool) we declare
 
 ```julia
 target_scitype(SomeSupervisedModel) = AbstractVector{<:NTuple{<:Binary}}
@@ -543,7 +542,9 @@ MLJBase.package_uuid(::Type{<:DecisionTreeClassifier}) = "7806a523-6efd-50cb-b5f
 MLJBase.package_url(::Type{<:DecisionTreeClassifier}) = "https://github.com/bensadeghi/DecisionTree.jl"
 MLJBase.is_pure_julia(::Type{<:DecisionTreeClassifier}) = true
 ```
+
 Alternatively these traits can also be declared using `MLJBase.metadata_pkg` and `MLJBase.metadata_model` helper functions as:
+
 ```julia
 MLJBase.metadata_pkg(DecisionTreeClassifier,name="DecisionTree",
                      uuid="7806a523-6efd-50cb-b5f6-3fa6f1930dbb",
@@ -554,7 +555,8 @@ MLJBase.metadata_model(DecisionTreeClassifier,
                         input=MLJBase.Table(MLJBase.Continuous),
                         target=AbstractVector{<:MLJBase.Finite},
                         path="MLJModels.DecisionTree_.DecisionTreeClassifier")
- ```
+```
+
 You can test all your declarations of traits by calling `MLJBase.info_dict(SomeModel)`.
 
 
@@ -602,7 +604,10 @@ method.
 TODO
 
 This is basically the same but with no target `y` appearing in the
-signatures, and no `target_scitype` trait to declare.
+signatures, and no `target_scitype` trait to declare. Instead, one
+declares an `output_scitype` trait. Instead of implementing a
+`predict` methods, one implements a `transform` operation, and an
+optional `inverse_transform` operation.
 
 
 ### Convenience methods

diff --git a/docs/src/common_mlj_workflows.md b/docs/src/common_mlj_workflows.md
@@ -27,6 +27,8 @@ y, X =  unpack(channing,
 first(X, 4) 
 ```
 
+*Note:* Before julia 1.2, replace `!=(:Time)` with `col -> col != :Time`. 
+
 ```@example workflows
 y[1:4]
 ```

diff --git a/docs/src/evaluating_model_performance.md b/docs/src/evaluating_model_performance.md
@@ -106,11 +106,11 @@ CV
 To define your own resampling strategy, make relevant parameters of
 your strategy the fields of a new type `MyResamplingStrategy <:
 MLJ.ResamplingStrategy`, and implement
-`MLJ.train_test_pairs(my_strategy::MyStragegy, rows, X, y)`, a method
-which will take a vector of indices `rows` and return a vector `[(t1,
-e1), (t2, e2), ... (tk, ek)]` of train/test pairs of row indices
-selected from `rows`. Here `X`, `y` are the input and target data
-(ignored in simple strategies, such as `Holdout` and `CV`).
+`MLJ.train_test_pairs(my_strategy::MyResamplingStrategy, rows, X, y)`,
+a method which will take a vector of indices `rows` and return a
+vector `[(t1, e1), (t2, e2), ... (tk, ek)]` of train/test pairs of row
+indices selected from `rows`. Here `X`, `y` are the input and target
+data (ignored in simple strategies, such as `Holdout` and `CV`).
 
 Here is the code for the `Holdout` strategy as an example:
 

diff --git a/src/MLJ.jl b/src/MLJ.jl
@@ -66,29 +66,35 @@ export models, localmodels, @load, load, info,
 using MLJBase
 using MLJModels
 
+# these are defined in MLJBase
+export load_boston, load_ames, load_iris
+export load_reduced_ames
+export load_crabs
+
 # to be extended:
 import MLJBase: fit, update, clean!,
     predict, predict_mean, predict_median, predict_mode,
     transform, inverse_transform, se, evaluate, fitted_params,
     show_as_constructed, params
 import MLJModels: models
 
-using Requires
-import Pkg.TOML
-import Pkg
-using OrderedCollections
+import Pkg, Pkg.TOML
+using Tables, OrderedCollections
+
 using  CategoricalArrays
-import Distributions: pdf, mode
 import Distributions
-import StatsBase
+import Distributions: pdf, mode
+import Statistics, StatsBase, LinearAlgebra, Random
+import Random: AbstractRNG, MersenneTwister
+
 using ProgressMeter
-import Tables
+
 import PrettyTables
-import Random
 using ScientificTypes
-import ScientificTypes
+
 using ComputationalResources
-import ComputationalResources: CPUProcesses
+using ComputationalResources: CPUProcesses
+
 const DEFAULT_RESOURCE = Ref{AbstractResource}(CPU1())
 
 # convenience packages
@@ -99,9 +105,6 @@ import Base: ==, getindex, setindex!
 import StatsBase.fit!
 
 # from Standard Library:
-using Statistics
-using LinearAlgebra
-using Random
 import Distributed: @distributed, nworkers, pmap
 using RecipesBase # for plotting
 
@@ -118,7 +121,6 @@ const CategoricalElement = Union{CategoricalString,CategoricalValue}
 toml = Pkg.TOML.parsefile(joinpath(dirname(dirname(pathof(MLJ))), "Project.toml"))
 const MLJ_VERSION = toml["version"]
 
-
 ## INCLUDES
 
 include("utilities.jl")     # general purpose utilities
@@ -127,6 +129,11 @@ include("networks.jl")      # for building learning networks
 include("composites.jl")    # composite models & exporting learning networks
 include("pipelines.jl")     # pipelines (exported linear learning networks)
 include("operations.jl")    # syntactic sugar for operations (predict, etc)
+
+if VERSION ≥ v"1.3.0-"
+    include("arrows.jl")
+end
+
 include("resampling.jl")    # resampling strategies and model evaluation
 include("parameters.jl")    # hyperparameter ranges and grid generation
 include("tuning.jl")
@@ -136,13 +143,4 @@ include("tasks.jl")         # enhancements to MLJBase task interface
 include("scitypes.jl")      # extensions to ScientificTypes.sictype
 include("plotrecipes.jl")
 
-
-## INCLUDES FOR OPTIONAL DEPENDENCIES
-
-function __init__()
-    @require(CSV="336ed68f-0bac-5ca0-87d4-7b16caf5d00b",
-             include("datasets_requires.jl"))
-end
-
-
 end # module
diff --git a/src/arrows.jl b/src/arrows.jl
@@ -0,0 +1,24 @@
+# Syntactic sugar for arrow syntax
+# we need version ≥ 1.3 in order to make use of multiple dispatch
+# over abstract types
+
+
+# This allows implicit: data |> machine
+(mach::AbstractMachine{<:Unsupervised})(data) = transform(mach, data)
+(mach::AbstractMachine{<:Supervised})(data)   = predict(mach, data)
+(mach::AbstractMachine)(data::AbstractMatrix) = data |> table |> mach
+
+# This allows implicit: data |> Unsupervised
+(m::Unsupervised)(data::AbstractNode) = data |> machine(m, data)
+(m::Unsupervised)(data) = source(data) |> m
+(m::Unsupervised)(data::AbstractMatrix) = data |> table |> m
+
+# This allows implicit: data |> Supervised
+(m::Supervised)(data::NTuple{2,AbstractNode}) = data[1] |> machine(m, data...)
+(m::Supervised)(data::Tuple{AbstractNode,Any}) = (data[1], source(data[2], kind=:target)) |> m
+(m::Supervised)(data::Tuple) = (source(data[1]), data[2]) |> m
+(m::Supervised)(data::Tuple{AbstractMatrix,Any}) = (data[1] |> table, data[2]) |> m
+
+# This allows implicit: data |> inverse_transform(node)
+inverse_transform(node::Node{<:NodalMachine{<:Unsupervised}}) =
+    data -> inverse_transform(node.machine, data)
diff --git a/src/datasets_requires.jl b/src/datasets_requires.jl
diff --git a/src/operations.jl b/src/operations.jl
@@ -85,25 +85,5 @@ function fitted_params(machine::AbstractMachine)
     end
 end
 
-
-# Syntactic sugar for pipe syntax
-# we need version ≥ 1.3 in order to make use of multiple dispatch
-# over abstract types
-if VERSION ≥ v"1.3.0-"
-
-    (mach::AbstractMachine{<:Unsupervised})(data) = transform(mach, data)
-    (mach::AbstractMachine{<:Supervised})(data)   = predict(mach, data)
-
-    (m::Unsupervised)(data::AbstractNode) = data |> machine(m, data)
-    (m::Unsupervised)(data) = source(data) |> m
-
-    (m::Supervised)(data::NTuple{2,AbstractNode}) = data[1] |> machine(m, data...)
-    (m::Supervised)(data::Tuple) = source.(data) |> m
-
-    inverse_transform(node::Node{<:NodalMachine{<:Unsupervised}}) =
-        data->inverse_transform(node.machine, data)
-end # version ≥ 1.3
-
-# Syntactic sugar to directly access hyperparameters
 getindex(n::Node{<:NodalMachine{<:Model}}, s::Symbol) = getproperty(n.machine.model, s)
 setindex!(n::Node{<:NodalMachine{<:Model}}, v, s::Symbol) = setproperty!(n.machine.model, s, v)