# Application of HD computing on multinomial, metrical dataset

In [86]:

using MLJ
using MLJModels, Statistics, PrettyPrinting
using ProgressMeter
using Dates
using SparseArrays


include("../src/manifoldLearning.jl")
include("../src/coarseEncoding.jl")
#include("../src/libEncoding.jl")


encodeDataset2SparseMat (generic function with 1 method)

In [87]:

X, y = @load_iris;


### Model merging metric features as a large vector

In [88]:

dimensions = collect(keys(X))
nLattices = 5

maximas = Dict(map(d -> d => maximum(X[d]), dimensions))
minimas = Dict(map(d -> d => minimum(X[d]), dimensions))
resolution = Dict(map(d -> d => 0.5, dimensions))

@time coarseEncoder = CoarseEncoder(dimensions,nLattices,maximas,minimas,resolution)
@time lattice = generateLattice(coarseEncoder);


  0.011441 seconds (4.58 k allocations: 220.979 KiB)
  0.183622 seconds (188.71 k allocations: 9.303 MiB)


In [89]:

l = length(X.petal_length)
data2Encode = map(i -> map(d -> d => X[d][i], dimensions) |> Dict, 1:l)

@time sparseHDM = encodeDataset2SparseMat(data2Encode, coarseEncoder, lattice);


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


  0.230788 seconds (275.73 k allocations: 13.984 MiB)


In [90]:

function sparse2Dense(hdvec::SparseVector)
    
    n = hdvec.n
    vec = zeros(n)
    map(i -> vec[i] += 1, hdvec.nzind)
    vec
    
end


sparse2Dense (generic function with 1 method)

In [95]:

ENV["PYTHON"] = "/usr/bin/python3"
using Pkg
Pkg.build("PyCall")

using PyCall

NB = pyimport("sklearn.naive_bayes")
SVM = pyimport("sklearn.svm")
LINMODEL = pyimport("sklearn.linear_model")
METRICS = pyimport("sklearn.metrics")
model_selection = pyimport("sklearn.model_selection")


[32m[1m   Building[22m[39m Conda ─→ `~/.julia/packages/Conda/3rPhK/deps/build.log`
[32m[1m   Building[22m[39m PyCall → `~/.julia/packages/PyCall/zqDXB/deps/build.log`


PyObject <module 'sklearn.model_selection' from '/home/jair/.julia/conda/3/lib/python3.7/site-packages/sklearn/model_selection/__init__.py'>

In [97]:

my_clf = SVM.SVC(kernel="linear", C=1)

data = map(i -> sparseHDM[i,:] |> sparse2Dense, 1:l)
labels = map(i -> i == "setosa", y)

# precisions
model_selection.cross_val_score(my_clf, data, labels, cv=5)


5-element Array{Float64,1}:
 1.0
 0.9
 0.9666666666666667
 0.9666666666666667
 0.9666666666666667

In [98]:

my_clf = LINMODEL.LogisticRegression(random_state=0)

data = map(i -> sparseHDM[i,:] |> sparse2Dense, 1:l)
labels = map(i -> i == "setosa", y)

# precisions
model_selection.cross_val_score(my_clf, data, labels, cv=5)


5-element Array{Float64,1}:
 0.9666666666666667
 0.8333333333333334
 0.9
 0.9
 0.9

### Standard, without encoding

In [75]:

X, y = @load_iris
l = length(y)
data = map(i -> map(d -> X[d][i], dimensions) , 1:l);

my_clf = SVM.SVC(kernel="linear", C=1)
labels = map(i -> i == "setosa", y)

# precisions
model_selection.cross_val_score(my_clf, data, labels, cv=5)


5-element Array{Float64,1}:
 1.0
 1.0
 1.0
 1.0
 1.0

In [76]:

my_clf = LINMODEL.LogisticRegression(random_state=0)

# precisions
model_selection.cross_val_score(my_clf, data, labels, cv=5)


5-element Array{Float64,1}:
 1.0
 1.0
 1.0
 1.0
 1.0