In [1]:
import StatsBase: predict
import Base: getindex
import MLBase: Kfold
using MLMetrics
using SparseRegression


Use "abstract type AvgMode end" instead.

Use "abstract type AbstractBinary end" instead.

Use "abstract type AbstractMultiClass end" instead.

Use "MultiClass{T,N}(...) where {T,N}" instead.


In [2]:
function FakeData(N,d)
    n_obs = 100
    x = randn((n_obs,d))
    y = sum(2*x,2)
    
    hcat(x,y)
end

FakeData (generic function with 1 method)

In [3]:
immutable Task 
    task_type::String
    target::Int
    features::Array{Int}
end

function Task(;task_type="regression", target=nothing, data=nothing)
    if target == nothing || data == nothing
        throw("Requires target and data to be set")
    end
    
    features = size(data,2)
    features = deleteat!( collect(1:features), target)
    
    Task(task_type, target, features)
end

immutable Learner
    name::String
    parameters::Union{Void,Dict{Any}}
    Learner(learner::String) = new(learner, Dict())
    Learner(learner::String, parameters::Dict{Any}) = new(learner, parameters)
end

immutable Resampling
    method::String
    iterations::Int
    Resampling() = new("KFold", 3)
end

abstract type Parameter end

immutable DiscreteParameter <: Parameter 
    name::String
    values::Array{Any}
    DiscreteParameter(;name=nothing,values=nothing) = new(name, values)
end

immutable ContinuousParameter <: Parameter
    name::String
    lower::Real
    upper::Real
    transform::Function
    ContinuousParameter(;name=nothing, lower=nothing, upper=nothing, transform=nothing) = new(name, lower, upper, transform)
end


immutable ParametersSet
   parameters::Array{Parameter}
end

getindex(p::ParametersSet, i::Int64) = p.parameters[i]

immutable MLRModel{T}
    model::T
    parameters
end

In [4]:
#### ABSTRACT FUNCTIONS ####

function MLRModel(learner::Learner, task::Task, data)
    # Calls function with name "makeModelname"
    f_name = learner.name
    f_name = "make" * titlecase(f_name)
    
    f = getfield(Main, Symbol(f_name))
    f(learner, task, data)
end    

function learnᵧ(learner::Learner, task::Task, data)
    modelᵧ = MLRModel(learner, task, data)
    learnᵧ!(modelᵧ, learner=learner, task=task, data=data)
    modelᵧ
end

learnᵧ (generic function with 1 method)

In [50]:
### TRANSITION ###
function makeRidge(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        push!(parameters, get_λ(learner.parameters, data))
        model = SModel(data[:, task.features], data[:, task.target], L2DistLoss(), L2Penalty(), parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

function makeGlm(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        if get(learner.parameters, "λ", false) !== false
            # Add λ
            push!(parameters, get_λ(learner.parameters, task))
        end
        if get(learner.parameters, "penalty", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["penalty"])
        end
        if get(learner.parameters, "loss", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["loss"])
        end
        model = SModel(data[:, task.features], data[:, task.target], parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

makeGlm (generic function with 1 method)

In [6]:
#### MODEL WRAPPERS ####
using SparseRegression

function get_λ(parameters, task)
    if get(parameters, "λ", false) == false
        lambda = fill(0.0, task.features)
    elseif typeof(parameters["λ"]) <: Real
        lambda = fill(parameters["λ"], length(task.features) )
    elseif typeof(parameters["λ"]) <: Vector{Float64}
        lambda = copy(parameters["λ"])
    end
    lambda
end


function predictᵧ(modelᵧ::MLRModel{<:SModel}; data=data, task=task)
    predict(modelᵧ.model, data[:, task.features])
end

function learnᵧ!(modelᵧ::MLRModel{<:SModel}; learner=nothing::Learner, data=nothing::Matrix{Real}, task=nothing::Task)
    learn!(modelᵧ.model)
end

learnᵧ! (generic function with 1 method)

In [60]:
function update_parameters!(array, range) 
    array[1] += 1
    for i in 1:length(array)
        println("Array: $(array[i]) range: $(range[i])")
        if array[i] > range[i][end]
            array[i+1] += 1
            array[i] = range[i][1]
        end
    end
end

function parameters_dictionary(ps::ParametersSet, array, discrete_dictionary)
    dict = Dict()
    for i in 1:length(array)
        if typeof(ps[i]) <: ContinuousParameter
            dict[ps[i].name] = ps[i].transform( convert(Float64, array[i]) )
        else
            dict[ps[i].name] = discrete_dictionary[ps[i].name][array[i]]
        end
    end
    dict
end

function get_samples(sampler::Resampling, n_obs::Int64)
    trainᵢ = []
    testᵢ = []
    if sampler.method == "KFold"
        kfold = Kfold(n_obs, sampler.iterations)
        for train in kfold
            push!(trainᵢ, collect(train))
            push!(testᵢ, setdiff(1:n_obs, trainᵢ[end]))
        end
    end  
    trainᵢ, testᵢ
end

function tune(;learner=nothing::Learner, task=nothing::Task, data=nothing::Matrix{Real}, 
                parameters_set=nothing::ParametersSet, sampler=Resampling()::Resampling, 
                measure=nothing::Function)
    
    n_parameters = length(parameters_set.parameters)
    n_obs        = size(data,1)
        
    parameters_array = Array{Any}(n_parameters)
    parameters_range = Array{Tuple}(n_parameters)
    
    # For discrete parameters, the range is set to 1:n_discrete_values
    # The discrete dictionary variable allows to connect this range to 
    # the actual discrete value
    discrete_dictionary = Dict()
    
    total_parameters = 1
    
    # Prepare parameters
    for i in 1:n_parameters
        if typeof(parameters_set[i]) <: ContinuousParameter 
            lower = parameters_set[i].lower
            upper = parameters_set[i].upper
            parameters_array[i] = 1
            parameters_range[i] = Tuple(lower:upper)
            params = length(lower:upper)
        else
            parameters_array[i] = 1
            parameters_range[i] = Tuple(1:length(parameters_set[i].values))
            discrete_dictionary[parameters_set[i].name] = parameters_set[i].values
            params = length(parameters_set[i].values)
        end
        total_parameters *= params
    end
    
    println("Array $parameters_array")
    println("Ranges $parameters_range")
    println("Dict $discrete_dictionary")
    
    
    # Loop over parameters
    for i in 1:total_parameters
        # Set new parametersparameters_set[i].values
        update_parameters!(parameters_array, parameters_range) 
        pd = parameters_dictionary(parameters_set, parameters_array, discrete_dictionary)

        # Update learner with new parameters
        lrn = Learner(learner.name, pd)
                
        # Get training/testing samples
        trainⱼ, testⱼ = get_samples(sampler, n_obs)
        for j in 1:length(trainⱼ)  
            modelᵧ = learnᵧ(lrn, task, data[trainⱼ[j], :])
            preds = predictᵧ(modelᵧ, data=data[testⱼ[j],:], task=task)
            
            error = mean_squared_error( data[testⱼ[j], task.target], preds)
#             println("    --> Error: $error")
#             println(modelᵧ.model)
        end
        
    end
end

tune (generic function with 1 method)

In [59]:
ps = ParametersSet([
    ContinuousParameter(
        name = "λ",
        lower = -4,
        upper = 1,
        transform = x->10^x
    )
        ,
    DiscreteParameter(
        name = "penalty",
        values = [L1Penalty(), L2Penalty()]
    )
])

data = FakeData(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn = Learner("glm")

tune(learner=lrn, task=task, data=data, parameters_set=ps, measure=mean_squared_error)

Array Any[-4, 1]
Ranges Tuple[(-4, -3, -2, -1, 0, 1), (1, 2)]
Dict Dict{Any,Any}(Pair{Any,Any}("penalty", Any[L1Penalty, L2Penalty]))
Array: -3 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: -2 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: -1 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 0 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 1 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 2 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 3 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 4 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 5 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 6 range: (-4, -3, -2, -1, 0, 1)
Array: 1 range: (1, 2)
Array: 7 range: (-4, -3, -2, -1, 0, 1)
Array: 2 range: (1, 2)
Array: 2 range: (-4, -3, -2, -1, 0, 1)
Array: 2 range: (1, 2)


[1m[36mINFO: [39m[22m[36mConverged after 24 iterations: [1.99868, 1.99844, 1.99908]
[39m[1m[36mINFO: [39m[22m[36mConverged after 17 iterations: [1.99882, 1.99893, 1.99917]
[39m[1m[36mINFO: [39m[22m[36mConverged after 24 iterations: [1.99834, 1.99885, 1.9991]
[39m[1m[36mINFO: [39m[22m[36mConverged after 20 iterations: [1.98659, 1.98937, 1.99282]
[39m[1m[36mINFO: [39m[22m[36mConverged after 21 iterations: [1.98556, 1.98685, 1.99]
[39m[1m[36mINFO: [39m[22m[36mConverged after 24 iterations: [1.98704, 1.98595, 1.99077]
[39m[1m[36mINFO: [39m[22m[36mConverged after 25 iterations: [1.84061, 1.84735, 1.90877]
[39m[1m[36mINFO: [39m[22m[36mConverged after 22 iterations: [1.86553, 1.87288, 1.92484]
[39m[1m[36mINFO: [39m[22m[36mConverged after 18 iterations: [1.87888, 1.90028, 1.90343]
[39m[1m[36mINFO: [39m[22m[36mConverged after 19 iterations: [0.637052, 0.691663, 1.1139]
[39m[1m[36mINFO: [39m[22m[36mConverged after 24 iterations: [0

In [9]:
data = FakeData(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn  = Learner("ridge")

train = 1:80
test  = 81:100


modelᵧ = learnᵧ(lrn, task, data[train,:])
pred = predictᵧ(modelᵧ, data=data[test,:], task=task)

mean_squared_error(data[test,task.target],pred)

[1m[36mINFO: [39m[22m[36mSweep finished
[39m

0.13596084571572437

In [10]:
2*data[2,1]+2*data[2,2]+2*data[2,3]+3

2.0171995843927104

In [11]:
mean_squared_error([1,1,1,1,1,1], [2,2,2,2,2,2])

1.0