Skip to content
Permalink
Browse files

trees: Default to using float datatype

Start of making it configurable through convert()
  • Loading branch information...
jonnor committed Jun 7, 2019
1 parent dad4d4f commit 2c1fd91d7dabb860ee662fe6cd8317c8d066c31c
Showing with 34 additions and 62 deletions.
  1. +4 −5 bindings/eml_trees.cpp
  2. +2 −2 emlearn/cgen.py
  3. +3 −3 emlearn/convert.py
  4. +5 −7 emlearn/eml_trees.h
  5. +20 −40 emlearn/trees.py
  6. +0 −2 examples/digits.py
  7. +0 −3 test/test_trees.py
@@ -16,7 +16,7 @@ class TreesClassifier {
EmlTrees forest;

public:
TreesClassifier(std::vector<EmlTreesValue> node_data, std::vector<int32_t> _roots)
TreesClassifier(std::vector<float> node_data, std::vector<int32_t> _roots)
: roots(_roots)
{
// TODO: take model coefficients as a Numpy array (perf)
@@ -45,7 +45,7 @@ class TreesClassifier {


py::array_t<int32_t>
predict(py::array_t<int32_t, py::array::c_style | py::array::forcecast> in) {
predict(py::array_t<float, py::array::c_style | py::array::forcecast> in) {
if (in.ndim() != 2) {
throw std::runtime_error("predict input must have dimensions 2");
}
@@ -57,8 +57,7 @@ class TreesClassifier {
//auto s = in.unchecked();
auto r = classes.mutable_unchecked<1>();
for (int i=0; i<n_samples; i++) {
//const int32_t *v = s.data(i);
const int32_t *v = in.data(i);
const float *v = in.data(i);
const int32_t p = eml_trees_predict(&forest, v, n_features);
if (p < 0) {
const std::string msg = eml_trees_errors[-p];
@@ -76,7 +75,7 @@ PYBIND11_MODULE(eml_trees, m) {
m.doc() = "Tree-based machine learning classifiers for embedded devices";

py::class_<TreesClassifier>(m, "Classifier")
.def(py::init<std::vector<EmlTreesValue>, std::vector<int32_t>>())
.def(py::init<std::vector<float>, std::vector<int32_t>>())
.def("predict", &TreesClassifier::predict);
}

@@ -21,7 +21,7 @@ def constant(val, dtype='float'):
"3.14f"
"""
if dtype == 'float':
return "{:f}f".format(val)
return "{:.6f}f".format(val)
else:
return str(val)

@@ -41,7 +41,7 @@ def array_declare(name, size, dtype='float', modifiers='static const',
>>> from emlearn import cgen
>>> cgen.array_declare("initialized", 3, dtype='int', modifiers='const')
"const float initialized[3] = { 1, 2, 3 };"
"const int initialized[3] = { 1, 2, 3 };"
"""

init = ''
@@ -3,15 +3,15 @@
from . import net
from . import bayes

def convert(estimator, kind=None, method='pymodule'):
def convert(estimator, kind=None, method='pymodule', dtype='float'):
"""Main entrypoint for converting a model"""

if kind is None:
kind = type(estimator).__name__

# Uname instead of instance to avoid hard dependency on the libraries
# Use name instead of instance to avoid hard dependency on the libraries
if kind in ['RandomForestClassifier', 'ExtraTreesClassifier', 'DecisionTreeClassifier']:
return trees.Wrapper(estimator, method)
return trees.Wrapper(estimator, method, dtype=dtype)
elif kind == 'MLPClassifier':
return net.convert_sklearn_mlp(estimator, method)
elif kind == 'Sequential':
@@ -4,11 +4,9 @@

#include <stdint.h>

typedef int32_t EmlTreesValue;

typedef struct _EmlTreesNode {
int8_t feature;
EmlTreesValue value;
float value;
int16_t left;
int16_t right;
} EmlTreesNode;
@@ -44,22 +42,22 @@ const char *eml_trees_errors[EmlTreesErrorLength+1] = {
#endif

static int32_t
eml_trees_predict_tree(const EmlTrees *forest, int32_t tree_root, const EmlTreesValue *features, int8_t features_length) {
eml_trees_predict_tree(const EmlTrees *forest, int32_t tree_root, const float *features, int8_t features_length) {
int32_t node_idx = tree_root;

// TODO: see if using a pointer node instead of indirect adressing using node_idx improves perf
while (forest->nodes[node_idx].feature >= 0) {
const int8_t feature = forest->nodes[node_idx].feature;
const EmlTreesValue value = features[feature];
const EmlTreesValue point = forest->nodes[node_idx].value;
const float value = features[feature];
const float point = forest->nodes[node_idx].value;
//printf("node %d feature %d. %d < %d\n", node_idx, feature, value, point);
node_idx = (value < point) ? forest->nodes[node_idx].left : forest->nodes[node_idx].right;
}
return forest->nodes[node_idx].value;
}

int32_t
eml_trees_predict(const EmlTrees *forest, const EmlTreesValue *features, int8_t features_length) {
eml_trees_predict(const EmlTrees *forest, const float *features, int8_t features_length) {

//printf("features %d\n", features_length);
//printf("trees %d\n", forest->n_trees);
@@ -24,7 +24,7 @@ def flatten_tree(tree):
cls = numpy.argmax(value[0])
n = [ -1, cls, -1, -1 ] # leaf
else:
n = [ feature, int(th), left, right ]
n = [ feature, th, left, right ]

flat.append(n)

@@ -195,23 +195,11 @@ def node(n):
nodes_length = len(flat)
nodes = "EmlTreesNode {nodes_name}[{nodes_length}] = {{\n {nodes_structs} \n}};".format(**locals());

# FIXME: remove when having native support for floats
wrapper_func = """
int32_t eml_trees_predict_float(const EmlTrees *model, const float *features, int32_t features_length) {{
// Exposes a float based interface to outside.
int32_t features_int[features_length];
for (int32_t i=0; i < features_length; i++) {{
features_int[i] = features[i];
}}
return eml_trees_predict(model, features_int, features_length);
}}
""".format(**{})

out = nodes + wrapper_func
out = nodes

return out

def generate_c_inlined(forest, name):
def generate_c_inlined(forest, name, dtype='float'):
nodes, roots = forest

def is_leaf(n):
@@ -225,6 +213,8 @@ def class_value(n):
n_classes = max(class_values)+1
tree_names = [ name + '_tree_{}'.format(i) for i,_ in enumerate(roots) ]

ctype = dtype

indent = 2
def c_leaf(n, depth):
return (depth*indent * ' ') + "return {};".format(n[1])
@@ -248,20 +238,21 @@ def c_node(nid, depth):
return c_internal(n, depth+1)

def tree_func(name, root):
return """static inline int32_t {function_name}(const EmlTreesValue *features, int32_t features_length) {{
return """static inline int32_t {function_name}(const {ctype} *features, int32_t features_length) {{
{code}
}}
""".format(**{
'function_name': name,
'code': c_node(root, 0),
'ctype': ctype,
})

def tree_vote(name):
return '_class = {}(features, features_length); votes[_class] += 1;'.format(name)

tree_votes = [ tree_vote(n) for n in tree_names ]

forest_func = """int32_t {function_name}(const EmlTreesValue *features, int32_t features_length) {{
forest_func = """int32_t {function_name}(const {ctype} *features, int32_t features_length) {{
int32_t votes[{n_classes}] = {{0,}};
int32_t _class = -1;
@@ -282,26 +273,15 @@ def tree_vote(name):
""".format(**{
'function_name': name,
'n_classes': n_classes,
'tree_predictions': '\n '.join(tree_votes)
'tree_predictions': '\n '.join(tree_votes),
'ctype': ctype,
})

# FIXME: remove in favor of 'native' float support
wrapper_func = """
int32_t {function_name}_float(const float *features, int32_t features_length) {{
// Exposes a float based interface to outside.
int32_t features_int[features_length];
for (int32_t i=0; i < features_length; i++) {{
features_int[i] = features[i];
}}
return {function_name}(features_int, features_length);
}}
""".format(**{'function_name': name})

tree_funcs = [tree_func(n, r) for n,r in zip(tree_names, roots)]

return '\n\n'.join(tree_funcs + [forest_func] + [wrapper_func])
return '\n\n'.join(tree_funcs + [forest_func])

def generate_c_forest(forest, name='myclassifier'):
def generate_c_forest(forest, name='myclassifier', dtype='float'):
nodes, roots = forest

nodes_name = name+'_nodes'
@@ -313,8 +293,6 @@ def generate_c_forest(forest, name='myclassifier'):
tree_roots_values = ', '.join(str(t) for t in roots)
tree_roots = 'int32_t {tree_roots_name}[{tree_roots_length}] = {{ {tree_roots_values} }};'.format(**locals())

# TODO: generate a float wrapper

forest_struct = """EmlTrees {name} = {{
{nodes_length},
{nodes_name},
@@ -328,15 +306,17 @@ def generate_c_forest(forest, name='myclassifier'):
#include <eml_trees.h>
"""

inline = generate_c_inlined(forest, name+'_predict')
inline = generate_c_inlined(forest, name+'_predict', dtype=dtype)

return '\n\n'.join([head, nodes_c, tree_roots, forest_struct, inline])




class Wrapper:
def __init__(self, estimator, classifier):
def __init__(self, estimator, classifier, dtype='float'):

self.dtype = dtype

if hasattr(estimator, 'estimators_'):
trees = [ e.tree_ for e in estimator.estimators_]
@@ -353,19 +333,19 @@ def __init__(self, estimator, classifier):
node_data = []
for node in nodes:
assert len(node) == 4
node_data += node # [int(v) for v in node]
node_data += node
assert len(node_data) % 4 == 0

self.classifier_ = eml_trees.Classifier(node_data, roots)

elif classifier == 'loadable':
name = 'mytree'
func = 'eml_trees_predict_float(&{}, values, length)'.format(name)
func = 'eml_trees_predict(&{}, values, length)'.format(name)
code = self.save(name=name)
self.classifier_ = common.CompiledClassifier(code, name=name, call=func)
elif classifier == 'inline':
name = 'myinlinetree'
func = '{}_predict_float(values, length)'.format(name)
func = '{}_predict(values, length)'.format(name)
code = self.save(name=name)
self.classifier_ = common.CompiledClassifier(code, name=name, call=func)
else:
@@ -382,7 +362,7 @@ def save(self, name=None, file=None):
else:
name = os.path.splitext(os.path.basename(file))[0]

code = generate_c_forest(self.forest_, name)
code = generate_c_forest(self.forest_, name, dtype=self.dtype)
if file:
with open(file, 'w') as f:
f.write(code)
@@ -9,8 +9,6 @@
rnd = 11
digits = datasets.load_digits()
Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target, random_state=rnd)
Xtrain = (Xtrain * 2**16).astype(numpy.int32)
Xtest = (Xtest * 2**16).astype(numpy.int32)

print('Loading digits dataset. 8x8=64 features')

@@ -31,8 +31,6 @@ def test_trees_sklearn_predict(data, model, method):
X, y = DATASETS[data]
estimator = MODELS[model]

X = (X * 2**16).astype(int) # currently only integers supported

estimator.fit(X, y)
cmodel = emlearn.convert(estimator, method=method)

@@ -64,7 +62,6 @@ def test_deduplicate_single_tree():
def test_trees_to_dot():
X, Y = datasets.make_classification(n_classes=2, n_samples=10, random_state=1)
model = RandomForestClassifier(n_estimators=3, max_depth=5, random_state=1)
X = (X * 2**16).astype(int) # convert to integer
model.fit(X, Y)

trees = emlearn.convert(model)

0 comments on commit 2c1fd91

Please sign in to comment.
You can’t perform that action at this time.