Examples of using the Mondrian forest and tree classifiers (not online 
versions).

In [1]:
include("Mondrian_Forest_Classifier.jl")

FakedataClassif (generic function with 2 methods)

In [13]:
using MLBase
using Plots
plotly()

X, Y, x, y = FakedataClassif(1000,2,1000);
Y = Y.+1;
y = y.+1;

if (size(X,2)==2
    scatter(X[Y.==1,1],X[Y.==1,2], color="red")
    scatter!(X[Y.==2,1],X[Y.==2,2],color="green")
end

# Mondrian tree classifier

In [78]:
MT = Mondrian_Tree()
train!(MT,X,Y,1e6)
pred=predict!(MT, X)
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MT, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

Train Accuracy
0.625
Accuracy
0.54


In [72]:
length(MT.leaves)

52

In [73]:
predict_proba!(MT,x[1:10,:])

10-element Array{Any,1}:
 [1.0, 0.0]          
 [0.0, 1.0]          
 [1.0, 0.0]          
 [0.217391, 0.782609]
 [1.0, 0.0]          
 [0.0, 1.0]          
 [1.0, 0.0]          
 [0.0, 1.0]          
 [1.0, 0.0]          
 [1.0, 0.0]          

# Mondrian Forest classifier

Python has $\sim 27 - 33$ seconds runtime, $\sim 276-619$ MiB of memory allocations, and about $0.8-0.92$ accuracy on an
equivalent problem

See [python comparison](comparisons/python_comp.ipynb) and the plots below. S


![alt text](comparisons/n_tree_scale.png)
![alt text](comparisons/n_data_scale.png)

In [74]:
MF = Mondrian_Forest_Classifier(100)
@time train!(MF, X, Y, 1e9)
pred=predict!(MF, X);
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MF, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

  1.327526 seconds (12.94 M allocations: 412.542 MiB, 5.03% gc time)
Train Accuracy
0.999
Accuracy
0.96


In [26]:
predict_proba!(MF,x[1:10,:])

10-element Array{Array{Float64,1},1}:
 [0.118568, 0.881432]
 [0.530019, 0.469981]
 [0.495267, 0.504733]
 [0.650166, 0.349834]
 [0.206734, 0.793266]
 [0.611845, 0.388155]
 [0.592886, 0.407114]
 [0.230515, 0.769485]
 [0.346378, 0.653622]
 [0.339732, 0.660268]

# MLBase Gridtune

In [29]:
d=5
X, Y = FakedataClassif(1000,d);
Y = Y.+1;
x,y = FakedataClassif(100,d);
y=y.+1;

In [30]:
function estfun(n_trees,λ)
    MF = Mondrian_Forest_Classifier(n_trees)
    train!(MF,X,Y,λ)
    return MF
end

function evalfun(MF::Mondrian_Forest_Classifier)
    return correctrate(y, predict!(MF,x))
end

best_model, best_config, best_score = gridtune(estfun, 
                                               evalfun, 
                                               ("n_trees",[10,100,250,500]),
                                               ("λ",[1e1,1e2,1e4,1e8,1e16]),
                                               verbose=true
)
println("\nBest Configuration: ", best_config)
println("\nBest score: ", best_score)

[n_trees=10, λ=10.0] => 0.26
[n_trees=100, λ=10.0] => 0.29
[n_trees=250, λ=10.0] => 0.27
[n_trees=500, λ=10.0] => 0.26
[n_trees=10, λ=100.0] => 0.31
[n_trees=100, λ=100.0] => 0.29
[n_trees=250, λ=100.0] => 0.3
[n_trees=500, λ=100.0] => 0.28
[n_trees=10, λ=10000.0] => 0.22
[n_trees=100, λ=10000.0] => 0.26
[n_trees=250, λ=10000.0] => 0.27
[n_trees=500, λ=10000.0] => 0.29
[n_trees=10, λ=1.0e8] => 0.34
[n_trees=100, λ=1.0e8] => 0.26
[n_trees=250, λ=1.0e8] => 0.3
[n_trees=500, λ=1.0e8] => 0.28
[n_trees=10, λ=1.0e16] => 0.36
[n_trees=100, λ=1.0e16] => 0.28


LoadError: [91mInterruptException:[39m