Examples of using the Mondrian forest and tree classifiers (not online 
versions).

In [1]:
include("Mondrian_Forest_Classifier.jl")

predict_proba! (generic function with 2 methods)

In [28]:
using MLBase
function Fakedata(n,dim) 
    x = randn(n,dim)
    y = (sum(exp.(x)/(1+exp.(x)),2)).>0.5
    return 1.0*x,1*y[:,1]
end

d=5
X, Y = Fakedata(1000,d);
Y = Y.+1;

x,y = Fakedata(100,d)
y = y.+1;

# Mondrian tree classifier

In [29]:
MT = Mondrian_Tree()
train!(MT,X,Y,1e3)
pred=predict!(MT, X)
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MT, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

Train Accuracy
0.721
Accuracy
0.75


In [34]:
predict_proba!(MT,x[1:10,:])

10-element Array{Any,1}:
 [0.601036, 0.398964]
 [0.328767, 0.671233]
 [1.0, 0.0]          
 [0.25, 0.75]        
 [0.328767, 0.671233]
 [0.6875, 0.3125]    
 [0.601036, 0.398964]
 [0.25, 0.75]        
 [0.166667, 0.833333]
 [0.601036, 0.398964]

# Mondrian Forest classifier

Python has $\sim 27 - 33$ seconds runtime, $\sim 276-619$ MiB of memory allocations, and about $0.8-0.92$ accuracy on an
equivalent problem

See [python comparison](comparisons/python_comp.ipynb) and the plots below. S


![alt text](comparisons/n_tree_scale.png)
![alt text](comparisons/n_data_scale.png)

In [31]:
MF = Mondrian_Forest_Classifier(10)
@time train!(MF, X, Y, 1e3)
pred=predict!(MF, X);
println("Train Accuracy")
println(correctrate(Y,convert(Array{Int,1},pred)))
pred=predict!(MF, x)
println("Accuracy") 
println(correctrate(y,convert(Array{Int,1},pred)))

  0.265484 seconds (2.41 M allocations: 69.843 MiB, 4.10% gc time)
Train Accuracy
0.885
Accuracy
0.83


In [106]:
predict_proba!(MF,x[1:10,:])

10-element Array{Array{Float64,1},1}:
 [0.513914, 0.486086]
 [0.6111, 0.3889]    
 [0.796923, 0.203077]
 [0.491248, 0.508752]
 [0.591178, 0.408822]
 [0.680126, 0.319874]
 [0.342672, 0.657328]
 [0.748841, 0.251159]
 [0.135424, 0.864576]
 [0.580112, 0.419888]

# Cross validation

In [26]:
d=5
X, Y = Fakedata(10000,d);
Y = Y.+1;

In [92]:
function estfun(train_inds)
    MF = Mondrian_Forest_Classifier(n_trees)
    train!(MF,X[train_inds,:],Y[train_inds],λ)
    return MF
end

function evalfun(MF::Mondrian_Forest_Classifier, test_inds)
    return correctrate(Y[test_inds], predict!(MF,X[test_inds,:]))
end

best_model, best_config, best_score = gridtune(estfun, 
                                               evalfun, 
                                               ("n_trees",[10,100,250,500]),
                                               ("λ",[1e1,1e2,1e4,1e8,1e16]),
                                               verbose=true
)
println("\nBest Configuration: ", best_config)
println("\nBest score: ", best_score)

[n_trees=10, λ=10.0] => 0.915
[n_trees=100, λ=10.0] => 0.936
[n_trees=250, λ=10.0] => 0.943
[n_trees=500, λ=10.0] => 0.947
[n_trees=10, λ=100.0] => 0.9
[n_trees=100, λ=100.0] => 0.944
[n_trees=250, λ=100.0] => 0.954
[n_trees=500, λ=100.0] => 0.955
[n_trees=10, λ=10000.0] => 0.833
[n_trees=100, λ=10000.0] => 0.942
[n_trees=250, λ=10000.0] => 0.952
[n_trees=500, λ=10000.0] => 0.941
[n_trees=10, λ=1.0e8] => 0.87
[n_trees=100, λ=1.0e8] => 0.928
[n_trees=250, λ=1.0e8] => 0.951
[n_trees=500, λ=1.0e8] => 0.927
[n_trees=10, λ=1.0e16] => 0.897
[n_trees=100, λ=1.0e16] => 0.941
[n_trees=250, λ=1.0e16] => 0.934
[n_trees=500, λ=1.0e16] => 0.941

Best Configuration: (500, 100.0)

Best score: 0.955
