In [36]:
from sklearn import datasets
from sklearn.utils.validation import check_random_state
from sklearn.model_selection import StratifiedKFold, cross_val_score
from rgf.sklearn import RGFClassifier

iris = datasets.load_iris()
rng = check_random_state(0)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]

rgf = RGFClassifier(max_leaf=1400,
                    algorithm="RGF",
                    test_interval=100,
                    loss="Log",
                    l2=0.2,
                    sl2=0.07,
                    n_iter=7,
                    n_tree_search=1,
                    min_samples_leaf=8,
                    learning_rate=0.7,
                    verbose=True)

n_folds = 3

rgf_scores = cross_val_score(rgf,
                             iris.data,
                             iris.target,
                             cv=StratifiedKFold(n_folds))

rgf_score = sum(rgf_scores)/n_folds
print('RGF Classfier score: {0:.5f}'.format(rgf_score))

"train": 
   algorithm=RGF
   train_x_fn=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.train.data.x
   train_y_fn=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.train.data.y
   Log:ON
   model_fn_prefix=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.model
--------------------
Sat Apr 14 06:32:37 2018: Reading training data ... 
Sat Apr 14 06:32:37 2018: Start ... #train=99
--------------------
Forest-level: 
   loss=Log
   max_leaf_forest=1400
   max_tree=700
   opt_interval=100
   test_interval=100
   num_tree_search=1
   Verbose:ON
   memory_policy=Generous
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization: 
   loss=Log
   num_iteration_opt=7
   reg_L2=0.2
   opt_stepsize=0.7
   max_delta=1
Tree-level: min_pop=8
Node split: reg_L2=0.07
--------------------
Sat Apr 14 06:32:37 2018: Calling optimizer with 30 trees and 100 leaves
Sat Apr 14 06:32:37 2018: Writing model: seq#=1
Sat Apr 14 06:32:37 2018: Calling optimizer with 60


None
"predict": 
   model_fn=/tmp/rgf/952c58d6-ad8f-4cd9-99d5-12fc21e25aee118.model-14
   test_x_fn=/tmp/rgf/952c58d6-ad8f-4cd9-99d5-12fc21e25aee118.test.data.x
   prediction_fn=/tmp/rgf/952c58d6-ad8f-4cd9-99d5-12fc21e25aee118.predictions.txt
   Log:ON
--------------------
Sat Apr 14 06:32:37 2018: Reading test data ... 
Sat Apr 14 06:32:37 2018: Predicting ... 
elapsed: 0.000604
/tmp/rgf/952c58d6-ad8f-4cd9-99d5-12fc21e25aee118.predictions.txt: /tmp/rgf/952c58d6-ad8f-4cd9-99d5-12fc21e25aee118.model-14,#leaf=1400,#tree=700
Sat Apr 14 06:32:37 2018: Done ... 

None
"predict": 
   model_fn=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.model-14
   test_x_fn=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.test.data.x
   prediction_fn=/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.predictions.txt
   Log:ON
--------------------
Sat Apr 14 06:32:37 2018: Reading test data ... 
Sat Apr 14 06:32:37 2018: Predicting ... 
elapsed: 0.000662
/tmp/rgf/93b693a6-3add-4152-81e8-256b6fe3f712119.pr


None
"train": 
   algorithm=RGF
   train_x_fn=/tmp/rgf/e7a0affd-8d64-4f1c-a107-09540481e0b7121.train.data.x
   train_y_fn=/tmp/rgf/e7a0affd-8d64-4f1c-a107-09540481e0b7121.train.data.y
   Log:ON
   model_fn_prefix=/tmp/rgf/e7a0affd-8d64-4f1c-a107-09540481e0b7121.model
--------------------
Sat Apr 14 06:32:37 2018: Reading training data ... 
Sat Apr 14 06:32:37 2018: Start ... #train=99
--------------------
Forest-level: 
   loss=Log
   max_leaf_forest=1400
   max_tree=700
   opt_interval=100
   test_interval=100
   num_tree_search=1
   Verbose:ON
   memory_policy=Generous
-------------
Training data: 4x99, nonzero_ratio=1; managed as dense data.
-------------
Optimization: 
   loss=Log
   num_iteration_opt=7
   reg_L2=0.2
   opt_stepsize=0.7
   max_delta=1
Tree-level: min_pop=8
Node split: reg_L2=0.07
--------------------
Sat Apr 14 06:32:37 2018: Calling optimizer with 50 trees and 100 leaves
Sat Apr 14 06:32:37 2018: Writing model: seq#=1
Sat Apr 14 06:32:37 2018: Calling optimizer w


None
"train": 
   algorithm=RGF
   train_x_fn=/tmp/rgf/bedebed5-8db4-412b-bc15-3985709a2b01126.train.data.x
   train_y_fn=/tmp/rgf/bedebed5-8db4-412b-bc15-3985709a2b01126.train.data.y
   Log:ON
   model_fn_prefix=/tmp/rgf/bedebed5-8db4-412b-bc15-3985709a2b01126.model
--------------------
Sat Apr 14 06:32:38 2018: Reading training data ... 
Sat Apr 14 06:32:38 2018: Start ... #train=102
--------------------
Forest-level: 
   loss=Log
   max_leaf_forest=1400
   max_tree=700
   opt_interval=100
   test_interval=100
   num_tree_search=1
   Verbose:ON
   memory_policy=Generous
-------------
Training data: 4x102, nonzero_ratio=1; managed as dense data.
-------------
Optimization: 
   loss=Log
   num_iteration_opt=7
   reg_L2=0.2
   opt_stepsize=0.7
   max_delta=1
Tree-level: min_pop=8
Node split: reg_L2=0.07
--------------------
Sat Apr 14 06:32:38 2018: Calling optimizer with 44 trees and 100 leaves
Sat Apr 14 06:32:38 2018: Writing model: seq#=1
Sat Apr 14 06:32:38 2018: Calling optimizer

In [40]:
datasets.load_iris()

 'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  