Tc/feature/remove dist stream when not supported (#17)

* do not generate code for distributed/streaming modes unless needed * moving streaming and ditributed arg to pre-computed mechanism
intel · Nov 23, 2018 · 2133108 · 2133108
1 parent 540de64
commit 2133108
Show file tree

Hide file tree

Showing 12 changed files with 181 additions and 192 deletions.
diff --git a/conda-recipe-nodist/bld.bat b/conda-recipe-nodist/bld.bat
diff --git a/conda-recipe-nodist/build.sh b/conda-recipe-nodist/build.sh
diff --git a/conda-recipe-nodist/meta.yaml b/conda-recipe-nodist/meta.yaml
diff --git a/conda-recipe-nodist/run_test.py b/conda-recipe-nodist/run_test.py
diff --git a/examples/linear_regression_spmd.py b/examples/linear_regression_spmd.py
@@ -42,7 +42,7 @@
     # Now let's do some prediction
     # It run only on a single node
     if d4p.my_procid() == 0:
-        predict_algo = d4p.linear_regression_prediction(distributed=True)
+        predict_algo = d4p.linear_regression_prediction()
         # read test data (with same #features)
         pdata = loadtxt("./data/distributed/linear_regression_test.csv", delimiter=',', usecols=range(10))
         # now predict using the model from the training above

diff --git a/examples/naive_bayes_spmd.py b/examples/naive_bayes_spmd.py
@@ -31,7 +31,7 @@
     infile = "./data/batch/naivebayes_train_dense.csv"
 
     # Configure a training object (20 classes)
-    talgo = d4p.multinomial_naive_bayes_training(20)
+    talgo = d4p.multinomial_naive_bayes_training(20, distributed=True)
 
     # Read data. Let's use 20 features per observation
     data   = loadtxt(infile, delimiter=',', usecols=range(20))

diff --git a/examples/ridge_regression_spmd.py b/examples/ridge_regression_spmd.py
@@ -42,7 +42,7 @@
     # Now let's do some prediction
     # It run only on a single node
     if d4p.my_procid() == 0:
-        predict_algo = d4p.ridge_regression_prediction(distributed=True)
+        predict_algo = d4p.ridge_regression_prediction()
         # read test data (with same #features)
         pdata = loadtxt("./data/distributed/linear_regression_test.csv", delimiter=',', usecols=range(10))
         # now predict using the model from the training above

diff --git a/examples/run_examples.py b/examples/run_examples.py
@@ -34,17 +34,19 @@
 else:
     logdir = jp(exdir, '_results', 'intel64')
 
-def get_exe_cmd(ex, nodist):
-    if 'batch' in ex:
+def get_exe_cmd(ex, nodist, nostream):
+    if any(ex.endswith(x) for x in ['batch.py', 'stream.py']):
         return '"' + sys.executable + '" "' + ex + '"'
-    elif nodist:
-        return None
-    elif IS_WIN:
-        return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"'
-    else:
-        return 'mpirun -n 4 "' + sys.executable + '" "' + ex + '"'
+    if not nostream and ex.endswith('streaming.py'):
+        return '"' + sys.executable + '" "' + ex + '"'
+    if not nodist and ex.endswith('spmd.py'):
+        if IS_WIN:
+            return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"'
+        else:
+            return 'mpirun -n 4 "' + sys.executable + '" "' + ex + '"'
+    return None
 
-def run_all(nodist=False):
+def run_all(nodist=False, nostream=False):
     success = 0
     n = 0
     if not os.path.exists(logdir):
@@ -56,7 +58,7 @@ def run_all(nodist=False):
                 logfn = jp(logdir, script.replace('.py', '.res'))
                 with open(logfn, 'w') as logfile:
                     print('\n##### ' + jp(dirpath, script))
-                    execute_string = get_exe_cmd(jp(dirpath, script), nodist)
+                    execute_string = get_exe_cmd(jp(dirpath, script), nodist, nostream)
                     if execute_string:
                         proc = subprocess.Popen(execute_string if IS_WIN else ['/bin/bash', '-c', execute_string],
                                                 stdout=subprocess.PIPE,
@@ -83,4 +85,4 @@ def run_all(nodist=False):
         return 0
 
 if __name__ == '__main__':
-    sys.exit(run_all('nodist' in sys.argv))
+    sys.exit(run_all('nodist' in sys.argv, 'nostream' in sys.argv))
diff --git a/generator/format.py b/generator/format.py
@@ -0,0 +1,79 @@
+#*******************************************************************************
+# Copyright 2014-2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#******************************************************************************/
+
+from collections import namedtuple, defaultdict
+
+pydefaults = defaultdict(lambda: None)
+pydefaults.update({'double': 'NaN64',
+                 'float': 'NaN32',
+                 'int': '-1',
+                 'long': '-1',
+                 'size_t': '-1',
+                 'bool': 'False',
+                 #'std::string' : '""',
+                 'std::string &' : '""',
+})
+
+cppdefaults = defaultdict(lambda: 'NULL')
+cppdefaults.update({'bool': 'false',})
+
+
+FmtVar = namedtuple('formatted_variable',
+                    ['name',      # variable's name
+                     'typ',       # type
+                     'default',   # default value
+                     'arg_cpp',   # use as argument to a C++ function call from C++
+                     'arg_cy',    # use as argument to a C++ function call in cython
+                     'arg_py',    # use as argument to a Python function call in cython
+                     'decl_dflt_cpp',  # use as var declaration in C++ with default value
+                     'decl_cpp',  # use as var declaration in C++ without default
+                     'decl_dflt_cy',   # use as C++ member/var declaration in Cython with default
+                     'decl_dflt_py',   # use as Python member/var declaration in Cython with default
+                     'decl_member',# use as member declaration in C++
+                     'arg_member',# use as member used in C++
+                     'init_member', # initializer for member var
+                    ])
+FmtVar.__new__.__defaults__ = ('',) * len(FmtVar._fields)
+
+def mk_var(decl=''):
+    if decl == '':
+        return FmtVar()
+    d          = decl.rsplit('=', 1)
+    t, name    = d[0].strip().rsplit(' ', 1)
+    const      = 'const ' if 'const' in t else ''
+    ref        = '&' if any('&' in x for x in [t, name]) else ''
+    ptr        = '*' if any('*' in x for x in [t, name]) else ''
+    name       = name.replace('&', '').replace('*', '').strip()
+    typ        = t.replace('const', '').replace('&', '').replace('*', '').strip()
+    pydefault  = ' = {}'.format(pydefaults[typ]) if len(d) > 1 else ''
+    cppdefault = ' = {}'.format(cppdefaults[typ]) if len(d) > 1 else ''
+    assert(' ' not in typ), 'Error in parsing variable "{}"'.format(decl)
+
+    return FmtVar(name          = name,
+                  typ           = typ,
+                  arg_cpp       = name,
+                  arg_cy        = name,
+                  arg_py        = name,
+                  decl_dflt_cpp = '{}{}{} {}{}'.format(const, typ, ref if ref != '' else ptr, name, cppdefault),
+                  decl_cpp      = '{}{}{} {}'.format(const, typ, ref if ref != '' else ptr, name),
+                  decl_dflt_cy  = '{}{} {}{}'.format(typ, ref if ref != '' else ptr, name, pydefault),
+                  decl_dflt_py  = '{} {}{}'.format(typ, name, pydefault),
+                  decl_member   = '{}{} _{}'.format(typ, ref if ref != '' else ptr, name),
+                  arg_member    = '_{}'.format(name),
+                  init_member   = '_{0}({0})'.format(name),
+    )
+
+
diff --git a/generator/gen_daal4py.py b/generator/gen_daal4py.py
@@ -31,6 +31,7 @@
 from .parse import parse_header
 from .wrappers import required, ignore, defaults, specialized, has_dist, ifaces, no_warn, no_constructor, fallbacks, add_setup, enum_maps
 from .wrapper_gen import wrapper_gen, typemap_wrapper_template
+from .format import mk_var
 
 try:
     basestring
@@ -532,7 +533,7 @@ def order_iargs(self, tmp_input_args, tmp_iargs_decl, tmp_iargs_call):
 
 
 ###############################################################################
-    def prepare_hlwrapper(self, ns, mode, func):
+    def prepare_hlwrapper(self, ns, mode, func, no_dist, no_stream):
         """
         Prepare data structures for generating high level wrappers.
 
@@ -725,11 +726,15 @@ def prepare_hlwrapper(self, ns, mode, func):
             'create': no_constructor[fcls] if fcls in no_constructor else '',
             'add_setup': True if ns in add_setup else False,
         }
-        if ns in has_dist:
+        if not no_dist and ns in has_dist:
             retjp['dist'] = has_dist[ns]
-        if 'Online' in self.namespace_dict[ns].classes and not ns.endswith('pca'):
-            retjp['streaming'] = True
-
+            retjp['distributed'] = mk_var('bool distributed=false')
+        else:
+            retjp['distributed'] = mk_var()
+        if not no_stream and  'Online' in self.namespace_dict[ns].classes and not ns.endswith('pca'):
+            retjp['streaming'] = mk_var('bool streaming=false')
+        else:
+            retjp['streaming'] = mk_var()
         return {ns + '::' + mode : retjp}
 
 
@@ -775,7 +780,7 @@ def prepare_model_hierachy(self, cfg):
                 cfg[ns]['model_typemap']['derived'] = model_hierarchy[m]
 
 
-    def hlapi(self, algo_patterns):
+    def hlapi(self, algo_patterns, no_dist=False, no_stream=False):
         """
         Generate high level wrappers for namespaces listed in algo_patterns (or all).
 
@@ -816,7 +821,7 @@ def hlapi(self, algo_patterns):
                     func = '_'.join(nn[1:])
                 else:
                     func = '_'.join(nn)
-                algoconfig.update(self.prepare_hlwrapper(ns, 'Batch', func))
+                algoconfig.update(self.prepare_hlwrapper(ns, 'Batch', func, no_dist, no_stream))
 
         self.prepare_model_hierachy(algoconfig)
 
@@ -861,7 +866,7 @@ def hlapi(self, algo_patterns):
                     hlargs[ns] = tmp[7]
 
         hds = wg.gen_headers()
-        fts = wg.gen_footers()
+        fts = wg.gen_footers(no_dist, no_stream)
 
         pyx_end += fts[1]
         # we add a comment with tables providing parameters for each algorithm
@@ -884,7 +889,7 @@ def hlapi(self, algo_patterns):
 ###############################################################################
 ###############################################################################
 
-def gen_daal4py(daalroot, outdir, version, warn_all=False):
+def gen_daal4py(daalroot, outdir, version, warn_all=False, no_dist=False, no_stream=False):
     global no_warn
     if warn_all:
         no_warn = {}
@@ -893,6 +898,7 @@ def gen_daal4py(daalroot, outdir, version, warn_all=False):
            "Path/$DAALROOT '"+ipath+"' doesn't seem host DAAL headers. Please provide correct daalroot."
     iface = cython_interface(ipath)
     iface.read()
+    print('Generating sources')
     cpp_h, cpp_cpp, pyx_file = iface.hlapi(['kmeans',
                                             'pca',
                                             'svd',
@@ -914,7 +920,7 @@ def gen_daal4py(daalroot, outdir, version, warn_all=False):
                                             'distance',
                                             'cholesky',
                                             'kdtree_knn_classification',
-    ])
+    ], no_dist, no_stream)
     # 'ridge_regression', parametertype is a template without any need
     with open(jp(outdir, 'daal4py_cpp.h'), 'w') as f:
         f.write(cpp_h)