Examples of using the Mondrian forest and tree classifiers (not online 
versions).

In [1]:
include("Mondrian_Forest_Classifier.jl")

predict_proba! (generic function with 2 methods)

In [2]:
using MLBase
function Fakedata(n,dim) 
    x = randn(n,dim)
    y = (sum(exp.(x)/(1+exp.(x)),2)).>0.5
    return 1.0*x,1*y[:,1]
end

d=5
X, Y = Fakedata(1000,d);
Y = Y.+1;

x,y = Fakedata(100,d)
y = y.+1;

# Mondrian tree classifier

In [3]:
MT = Mondrian_Tree()
train!(MT,X,Y,1e3)
pred=predict!(MT, X)
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MT, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

Train Accuracy
0.69
Accuracy
0.61


In [4]:
for l in MT.leaves 
    println(sum(l.Gₚ))
end

0.9999999999999999
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.9999999999999999
0.9999999999999999
1.0
1.0
1.0
1.0
1.0


In [8]:
MT = Mondrian_Tree()
train!(MT,X,Y,1e3)
pred=predict_proba!(MT, X)

1000-element Array{Any,1}:
 [0.0, 1.0]           
 [0.6, 0.4]           
 [0.0666667, 0.933333]
 [0.0, 1.0]           
 [1.0, 0.0]           
 [0.769231, 0.230769] 
 [1.0, 0.0]           
 [0.774194, 0.225806] 
 [0.7, 0.3]           
 [0.0, 1.0]           
 [0.571429, 0.428571] 
 [0.774194, 0.225806] 
 [1.0, 0.0]           
 ⋮                    
 [0.916667, 0.0833333]
 [0.333333, 0.666667] 
 [1.0, 0.0]           
 [0.0, 1.0]           
 [1.0, 0.0]           
 [0.0, 1.0]           
 [0.565217, 0.434783] 
 [0.15, 0.85]         
 [0.25, 0.75]         
 [0.0, 1.0]           
 [0.774194, 0.225806] 
 [0.0, 1.0]           

# Mondrian Forest classifier

Python has $\sim 27 - 33$ seconds runtime, $\sim 276-619$ MiB of memory allocations, and about $0.8-0.92$ accuracy on an
equivalent problem

See [python comparison](comparisons/python_comp.ipynb) and the plots below. S


![alt text](comparisons/n_tree_scale.png)
![alt text](comparisons/n_data_scale.png)

In [9]:
MF = Mondrian_Forest_Classifier(100)
@time train!(MF, X, Y, 1e9)
pred=predict!(MF, X);
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MF, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

  2.376377 seconds (21.76 M allocations: 615.014 MiB, 4.48% gc time)
Train Accuracy
0.913
Accuracy
0.79


In [10]:
predict_proba!(MF, X)

Any[[0.222222, 0.777778], [1.0, 0.0], [0.0, 1.0], [0.222222, 0.777778], [0.55, 0.45], [0.606838, 0.393162], [0.946429, 0.0535714], [0.833333, 0.166667], [1.0, 0.0], [0.222222, 0.777778], [0.884615, 0.115385], [0.888889, 0.111111], [0.222222, 0.777778], [1.0, 0.0], [0.222222, 0.777778], [0.288889, 0.711111], [0.0, 1.0], [0.222222, 0.777778], [0.606838, 0.393162], [0.606838, 0.393162], [0.6, 0.4], [1.0, 0.0], [0.5, 0.5], [0.0, 1.0], [0.285714, 0.714286], [0.222222, 0.777778], [0.555556, 0.444444], [0.222222, 0.777778], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.666667, 0.333333], [1.0, 0.0], [0.222222, 0.777778], [0.884615, 0.115385], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.884615, 0.115385], [0.888889, 0.111111], [0.946429, 0.0535714], [0.606838, 0.393162], [1.0, 0.0], [0.606838, 0.393162], [0.288889, 0.711111], [0.6, 0.4], [0.606838, 0.393162], [1.0, 0.0], [0.222222, 0.777778], [0.666667, 0.333333], [0.222222, 0.777778], [0.606838, 0.393162], [1.0, 0.0], [0.946429, 0.0535714], [0.666667, 0.

 0.777778], [0.946429, 0.0535714], [1.0, 0.0], [0.5, 0.5], [0.222222, 0.777778], [1.0, 0.0], [0.928571, 0.0714286], [0.6, 0.4], [0.214286, 0.785714], [0.606838, 0.393162], [0.666667, 0.333333], [0.222222, 0.777778], [0.55, 0.45], [0.222222, 0.777778], [0.0, 1.0], [0.375, 0.625], [0.222222, 0.777778], [0.666667, 0.333333], [0.946429, 0.0535714], [0.0, 1.0], [0.0, 1.0], [0.214286, 0.785714], [0.222222, 0.777778], [0.222222, 0.777778], [0.55, 0.45], [1.0, 0.0], [0.606838, 0.393162], [1.0, 0.0], [0.555556, 0.444444], [1.0, 0.0], [0.714286, 0.285714], [0.606838, 0.393162], [0.375, 0.625], [1.0, 0.0], [0.946429, 0.0535714], [0.222222, 0.777778], [0.222222, 0.777778], [1.0, 0.0], [0.666667, 0.333333], [0.288889, 0.711111], [0.5, 0.5], [0.222222, 0.777778], [0.214286, 0.785714], [0.0, 1.0], [0.946429, 0.0535714], [0.5, 0.5], [0.666667, 0.333333], [0.166667, 0.833333], [1.0, 0.0], [1.0, 0.0], [0.222222, 0.777778], [0.666667, 0.333333], [1.0, 0.0], [0.0909091, 0.909091], [0.222222, 0.777778], [0

LoadError: [91mMethodError: Cannot `convert` an object of type Array{Float64,1} to an object of type Float64
This may have arisen from a call to the constructor Float64(...),
since type constructors fall back to convert methods.[39m

# Cross validation

In [26]:
d=5
X, Y = Fakedata(10000,d);
Y = Y.+1;

In [92]:
function estfun(train_inds)
    MF = Mondrian_Forest_Classifier(n_trees)
    train!(MF,X[train_inds,:],Y[train_inds],λ)
    return MF
end

function evalfun(MF::Mondrian_Forest_Classifier, test_inds)
    return correctrate(Y[test_inds], predict!(MF,X[test_inds,:]))
end

best_model, best_config, best_score = gridtune(estfun, 
                                               evalfun, 
                                               ("n_trees",[10,100,250,500]),
                                               ("λ",[1e1,1e2,1e4,1e8,1e16]),
                                               verbose=true
)
println("\nBest Configuration: ", best_config)
println("\nBest score: ", best_score)

[n_trees=10, λ=10.0] => 0.915
[n_trees=100, λ=10.0] => 0.936
[n_trees=250, λ=10.0] => 0.943
[n_trees=500, λ=10.0] => 0.947
[n_trees=10, λ=100.0] => 0.9
[n_trees=100, λ=100.0] => 0.944
[n_trees=250, λ=100.0] => 0.954
[n_trees=500, λ=100.0] => 0.955
[n_trees=10, λ=10000.0] => 0.833
[n_trees=100, λ=10000.0] => 0.942
[n_trees=250, λ=10000.0] => 0.952
[n_trees=500, λ=10000.0] => 0.941
[n_trees=10, λ=1.0e8] => 0.87
[n_trees=100, λ=1.0e8] => 0.928
[n_trees=250, λ=1.0e8] => 0.951
[n_trees=500, λ=1.0e8] => 0.927
[n_trees=10, λ=1.0e16] => 0.897
[n_trees=100, λ=1.0e16] => 0.941
[n_trees=250, λ=1.0e16] => 0.934
[n_trees=500, λ=1.0e16] => 0.941

Best Configuration: (500, 100.0)

Best score: 0.955
