In [19]:
import StatsBase: predict
import Base: getindex
import MLBase: Kfold
using MLMetrics
using SparseRegression

In [55]:
function FakeData(N,d)
    n_obs = 100
    x = randn((n_obs,d))
    y = sum(2*x,2)
    
    hcat(x,y)
end

FakeData (generic function with 1 method)

In [3]:
immutable Task 
    task_type::String
    target::Int
    features::Array{Int}
end

function Task(;task_type="regression", target=nothing, data=nothing)
    if target == nothing || data == nothing
        throw("Requires target and data to be set")
    end
    
    features = size(data,2)
    features = deleteat!( collect(1:features), target)
    
    Task(task_type, target, features)
end

immutable Learner
    name::String
    parameters::Union{Void,Dict{String, Float64}}
    Learner(learner::String) = new(learner, Dict())
    Learner(learner::String, parameters::Dict{Any}) = new(learner, parameters)
end

immutable Resampling
    method::String
    iterations::Int
    Resampling() = new("KFold", 3)
end

abstract type Parameter end

immutable DiscreteParameter <: Parameter 
    name::String
    values::Array{Any}
end

immutable ContinuousParameter <: Parameter
    name::String
    lower::Real
    upper::Real
    transform::Function
    ContinuousParameter(;name=nothing, lower=nothing, upper=nothing, transform=nothing) = new(name, lower, upper, transform)
end


immutable ParametersSet
   parameters::Array{Parameter}
end

getindex(p::ParametersSet, i::Int64) = p.parameters[i]

immutable MLRModel{T}
    model::T
    parameters
end

In [4]:
#### ABSTRACT FUNCTIONS ####

function MLRModel(learner::Learner, task::Task, data)
    if learner.name == "ridge"
        makeRidge(learner, task, data)
    end
end    

function learnᵧ(learner::Learner, task::Task, data)
    modelᵧ = MLRModel(learner, task, data)
    learnᵧ!(modelᵧ, learner=learner, task=task, data=data)
    modelᵧ
end

learnᵧ (generic function with 1 method)

In [77]:
### TRANSITION ###
function makeRidge(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        push!(parameters, get_λ(learner.parameters, data))
        model = SModel(data[:, task.features], data[:, task.target], L2DistLoss(), L2Penalty(), parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

function makeGLM(learner::Learner, task::Task, data)
    if isempty(learner.parameters)
        model = SModel(data[:, task.features], data[:, task.target])
    else
        parameters = []
        if get(learner.parameters, "λ", false) !== false
            # Add λ
            push!(parameters, get_λ(learner.parameters, task))
        end
        if get(learner.parameters, "penalty", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["penalty"])
        end
        if get(learner.parameters, "loss", false) !== false
            # Add penalty
            push!(parameters, learner.parameters["loss"])
        end
        model = SModel(data[:, task.features], data[:, task.target], parameters...)
    end
    MLRModel(model, copy(learner.parameters))
end

makeGLM (generic function with 1 method)

In [35]:
#### MODEL WRAPPERS ####
using SparseRegression

function get_λ(parameters, task)
    if get(parameters, "λ", false) == false
        lambda = fill(0.0, task.features)
    elseif typeof(parameters["λ"]) <: Real
        lambda = fill(parameters["λ"], task.features)
    elseif typeof(parameters["λ"]) <: Vector{Float64}
        lambda = copy(parameters["λ"])
    end
    lambda
end


function predictᵧ(modelᵧ::MLRModel{<:SModel}; data=data, task=task)
    predict(modelᵧ.model, data[:, task.features])
end

function learnᵧ!(modelᵧ::MLRModel{<:SModel}; learner=nothing::Learner, data=nothing::Matrix{Real}, task=nothing::Task)
    learn!(modelᵧ.model)
end

learnᵧ! (generic function with 1 method)

In [70]:
function update_parameters!(array, range) 
    array[1] += 1
    for i in 1:length(array)
        if array[i] > range[i][2]
            array[i+1] += 1
            array[i] = range[i][1]
        end
    end
end

function parameters_dictionary(ps::ParametersSet, array)
    dict = Dict()
    for i in 1:length(array)
        if typeof(ps[i]) <: ContinuousParameter
           dict[ps[i].name] = ps[i].transform( array[i] )
        else
            dict[ps[i].name] = array[i]
        end
    end
    dict
end

function get_samples(sampler::Resampling, n_obs::Int64)
    trainᵢ = []
    testᵢ = []
    if sampler.method == "KFold"
        kfold = Kfold(n_obs, sampler.iterations)
        for train in kfold
            push!(trainᵢ, collect(train))
            push!(testᵢ, setdiff(1:n_obs, trainᵢ[end]))
        end
    end  
    trainᵢ, testᵢ
end

function tune(;learner=nothing::Learner, task=nothing::Task, data=nothing::Matrix{Real}, 
                parameters_set=nothing::ParametersSet, sampler=Resampling()::Resampling, measure=nothing::Function)
    
    n_parameters = length(parameters_set.parameters)
    n_obs        = size(data,1)
        
    parameters_array = Array{Any}(n_parameters)
    parameters_range = Array{Tuple{Int64,Int64}}(n_parameters)
    
    total_parameters = 0
    
    # Prepare parameters
    for i in 1:n_parameters
        lower = parameters_set[i].lower
        upper = parameters_set[i].upper
        parameters_array[i] = lower
        parameters_range[i] = (lower,upper)
        total_parameters += abs(upper-lower)+1
    end
    
    # Loop over parameters
    for i in 1:total_parameters
        println("Cross validating $parameters_array")
        # Set new parameters
        update_parameters!(parameters_array, parameters_range) 
        pd = parameters_dictionary(parameters_set, parameters_array)

        # Update learner with new parameters
        lrn = Learner(learner.name, pd)
                
        # Get training/testing samples
        trainⱼ, testⱼ = get_samples(sampler, n_obs)
        for j in 1:length(trainⱼ)  
            modelᵧ = learnᵧ(lrn, task, data[trainⱼ[j], :])
            preds = predictᵧ(modelᵧ, data=data[testⱼ[j],:], task=task)
            
            error = mean_squared_error( data[testⱼ[j], task.target], preds)
            println("    --> Error: $error")
        end
        
    end
end

tune (generic function with 1 method)

In [72]:
ps = ParametersSet([
    ContinuousParameter(
            name = "λ",
            lower = -4,
            upper = 2,
            transform = x->10^x
        )
])

data = FakeData(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn = Learner("ridge")

tune(learner=lrn, task=task, data=data, parameters_set=ps, measure=mean_squared_error)

Cross validating [-4.0]
    --> Error: 1.3526313161973208e-5
    --> Error: 7.75962176600907e-6
    --> Error: 1.2207078965633208e-5
Cross validating [-3.0]
    --> Error: 0.000950516329265247
    --> Error: 0.0007535299298411125
    --> Error: 0.001264545500673288
Cross validating [-2.0]
    --> Error: 0.08939166215261694
    --> Error: 0.09594166292127079
    --> Error: 0.06848532121127057
Cross validating [-1.0]
    --> Error: 3.488083251593377
    --> Error: 4.741811380118283
    --> Error: 1.7981541778982075
Cross validating [0.0]
    --> Error: 7.536280902880949
    --> Error: 14.099075005027823
    --> Error: 14.209249657578672
Cross validating [1.0]
    --> Error: 17.93676220360404
    --> Error: 13.548942334354585
    --> Error: 12.44335647801738


[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m[1m[36mINFO: [39m[22m[36mSweep finished
[39m

In [73]:
data = FakeData(1000,3)

task = Task(task_type="regression", target=4, data=data)
lrn  = Learner("ridge")

train = 1:80
test  = 81:100


modelᵧ = learnᵧ(lrn, task, data[train,:])
pred = predictᵧ(modelᵧ, data=data[test,:], task=task)

mean_squared_error(data[test,task.target],pred)

[1m[36mINFO: [39m[22m[36mSweep finished
[39m

0.09265461914856005

In [33]:
2*data[2,1]+2*data[2,2]+2*data[2,3]+3

4.399200183091448

In [42]:
mean_squared_error([1,1,1,1,1,1], [2,2,2,2,2,2])

1.0