In [118]:
using MLJ
using MLJModels, Statistics, PrettyPrinting
using ProgressMeter

include("src/modelHDSparse.jl")
include("src/libEncoding.jl")



getNextNodes (generic function with 1 method)

In [12]:
using Pkg; Pkg.activate("."); Pkg.instantiate()

[32m[1mActivating[22m[39m environment at `~/WORK/MLJ/HDComputing.jl/Project.toml`


### Lib

In [128]:

irisKeys = X |> keys |> collect

# data to dictionary
getData = i -> map(k -> string(k) => X[k][i], irisKeys) |> 
                    Dict


# coarse coding
function mapPos2Indices(x, xMin, xMax, nGridlat) # approximation on the top left of grid
    ceil(nGridlat * (x - xMin) / (xMax - xMin)) |> Int64
end


# embedding with neighbours in "manifold?"
getCoarseEvent = (x, feat, fMin, fMax, gridResolution, gridNb) -> 
                    map(i -> x + i*((fMax - fMin) / gridResolution), -gridNb:gridNb) |>
                        (A -> map(x -> mapPos2Indices(x, fMin, fMax, gridResolution), A)) |>
                            (A -> feat => map(a -> string(a), A))


function dataModel(dD)
    vcat(
         map(d -> (feat = Symbol(d[1]);
                   x = d[2];
                   fMin = dicBoundaries[feat][:min];
                   fMax = dicBoundaries[feat][:max];
                   getCoarseEvent(x, feat, fMin, fMax, nGridResolution, gridNb)),
             collect(dD))
         ...)
end



sparse2Dense (generic function with 1 method)

### Data Preprocessing
##### Data is continuous so we embedd it within a scheme approximating distance or similiarity

In [129]:
X, y = @load_iris;

data = [getData(i) for i=1:length(X.petal_length)]


dicBoundaries = map(k -> k => Dict(:max => maximum(X[k]), :min => minimum(X[k])), collect(keys(X))) |>
                    Dict;


### SDM model

In [2]:

include("../src/modelHDSparse.jl")

n = 100000

# We build SDRs dictionary "as you go"
dicSDMs = Dict()

wTxt, wTag, wTarget = 1, 1, 13 

dicModelEncoding = Dict(:sepal_length => Dict(:N => n, :W => 1),
                        :petal_width => Dict(:N => n, :W => 1),
                        :petal_length => Dict(:N => n, :W => 1),
                        :sepal_width => Dict(:N => n, :W => 1))


function encoderHD(dicSDMs,
                   dicModelEncoding::Dict{Symbol,Dict{Symbol,Int64}},
                   dicData::Array{Pair{Symbol,Array{String,1}},1})
     [(k = kv[1];
       ws = kv[2];
       vcat(map(w -> HDSparse.encodeOnTheFly(dicSDMs, dicModelEncoding[k], string(k, "_", w)), ws)...))
      for kv in dicData] |>
                HDSparse.superposition
end


function sparse2Dense(sparseVec)
    """
        Utility to convert into classifiable format.
    """
    v = zeros(n)
    for i in sparseVec.nzind
         v[i] = 1
    end
    v
end


sparse2Dense (generic function with 1 method)

### Encoding data

In [133]:
vv = data[1] |> dataModel
dataSDM = @showprogress map(d -> encoderHD(dicSDMs, dicModelEncoding, d |> dataModel) |>
                                    sparse2Dense, data);

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


### Epic fail with MLJ

In [138]:
@load DecisionTreeClassifier
tree_model = DecisionTreeClassifier()
tree = machine(tree_model, dataSDM, y)

└ @ MLJ /root/.julia/packages/MLJ/LDDzK/src/machines.jl:54


[34mMachine{DecisionTreeClassifier} @ 3…33[39m


In [139]:
train, test = partition(eachindex(y), 0.7, shuffle=true)
fit!(tree, rows=train)

┌ Info: Training [34mMachine{DecisionTreeClassifier} @ 3…33[39m.
└ @ MLJ /root/.julia/packages/MLJ/LDDzK/src/machines.jl:172


ArgumentError: ArgumentError: 