Examples of using the Mondrian forest and tree classifiers (not online 
versions).

In [1]:
include("Mondrian_Forest_Classifier.jl")

FakedataClassif (generic function with 2 methods)

In [15]:
using MLBase
using Plots
plotly()

X, Y, x, y = FakedataClassif(1000,2,1000);
Y = Y.+1;
y = y.+1;

if size(X,2)==2
    scatter(X[Y.==1,1],X[Y.==1,2], color="red")
    scatter!(X[Y.==2,1],X[Y.==2,2],color="green")
end

# Mondrian tree classifier

In [16]:
MT = Mondrian_Tree()
train!(MT,X,Y,1e6)
pred=predict!(MT, X)
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MT, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

Train Accuracy
0.966
Accuracy
0.962


In [18]:
predict_proba!(MT,x[1:10,:])

10-element Array{Any,1}:
 [1.0, 0.0]
 [0.0, 1.0]
 [1.0, 0.0]
 [0.0, 1.0]
 [0.0, 1.0]
 [1.0, 0.0]
 [1.0, 0.0]
 [1.0, 0.0]
 [1.0, 0.0]
 [1.0, 0.0]

# Mondrian Forest classifier

Python has $\sim 27 - 33$ seconds runtime, $\sim 276-619$ MiB of memory allocations, and about $0.8-0.92$ accuracy on an
equivalent problem

See [python comparison](comparisons/python_comp.ipynb) and the plots below. S


![alt text](comparisons/n_tree_scale.png)
![alt text](comparisons/n_data_scale.png)

In [19]:
MF = Mondrian_Forest_Classifier(100)
@time train!(MF, X, Y, 1e9)
pred=predict!(MF, X);
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MF, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

  1.204428 seconds (11.62 M allocations: 354.683 MiB, 5.61% gc time)
Train Accuracy
0.998
Accuracy
0.992


In [20]:
predict_proba!(MF,x[1:10,:])

10-element Array{Array{Float64,1},1}:
 [0.956088, 0.0439123]
 [0.0611665, 0.938833]
 [0.918693, 0.0813066]
 [0.151121, 0.848879] 
 [0.106611, 0.893389] 
 [0.745457, 0.254543] 
 [0.5167, 0.4833]     
 [0.548808, 0.451192] 
 [0.882253, 0.117747] 
 [0.839362, 0.160638] 

# MLBase Gridtune

In [24]:
d=5
X, Y, x, y = FakedataClassif(1000,d,100);
Y = Y.+1;
y = y.+1;

In [25]:
function estfun(n_trees,λ)
    MF = Mondrian_Forest_Classifier(n_trees)
    train!(MF,X,Y,λ)
    return MF
end

function evalfun(MF::Mondrian_Forest_Classifier)
    return correctrate(y, predict!(MF,x))
end

best_model, best_config, best_score = gridtune(estfun, 
                                               evalfun, 
                                               ("n_trees",[10,100,250,500]),
                                               ("λ",[1e1,1e2,1e4,1e8,1e16]),
                                               verbose=true
)
println("\nBest Configuration: ", best_config)
println("\nBest score: ", best_score)

[n_trees=10, λ=10.0] => 0.85
[n_trees=100, λ=10.0] => 0.91
[n_trees=250, λ=10.0] => 0.95
[n_trees=500, λ=10.0] => 0.92
[n_trees=10, λ=100.0] => 0.8
[n_trees=100, λ=100.0] => 0.97
[n_trees=250, λ=100.0] => 0.89
[n_trees=500, λ=100.0] => 0.9
[n_trees=10, λ=10000.0] => 0.85
[n_trees=100, λ=10000.0] => 0.93
[n_trees=250, λ=10000.0] => 0.89
[n_trees=500, λ=10000.0] => 0.93
[n_trees=10, λ=1.0e8] => 0.91
[n_trees=100, λ=1.0e8] => 0.93
[n_trees=250, λ=1.0e8] => 0.9
[n_trees=500, λ=1.0e8] => 0.93
[n_trees=10, λ=1.0e16] => 0.83
[n_trees=100, λ=1.0e16] => 0.94
[n_trees=250, λ=1.0e16] => 0.94
[n_trees=500, λ=1.0e16] => 0.92

Best Configuration: (100, 100.0)

Best score: 0.97
