Merge pull request #1 from numba/master

Sync with original
esc · Oct 27, 2020 · e464068 · e464068
2 parents 3a682bd + 986c7d1
commit e464068
Show file tree

Hide file tree

Showing 8 changed files with 103 additions and 38 deletions.
diff --git a/buildscripts/condarecipe.local/meta.yaml b/buildscripts/condarecipe.local/meta.yaml
@@ -17,6 +17,9 @@ build:
     # optional dependency: required only when omp is chosen as the backend for
     # the threading layer
     - lib/libiomp5.dylib # [osx]
+  ignore_run_exports:
+    # tbb-devel triggers hard dependency on tbb, this is not the case.
+    - tbb     # [not (armv6l or armv7l or aarch64 or linux32)]
 
 requirements:
   # build and run dependencies are duplicated to avoid setuptools issues
@@ -33,8 +36,9 @@ requirements:
     - setuptools
     # On channel https://anaconda.org/numba/
     - llvmlite >=0.35.0dev0,<0.35
-    # TBB devel version is to match TBB libs
-    - tbb-devel >=2019.5       # [not (armv6l or armv7l or aarch64 or linux32)]
+    # TBB devel version is to match TBB libs.
+    # 2020.3 is the last version with the "old" ABI
+    - tbb-devel >=2019.5,<=2020.3       # [not (armv6l or armv7l or aarch64 or linux32)]
   run:
     - python >=3.6
     - numpy >=1.15
@@ -43,9 +47,10 @@ requirements:
     - llvmlite >=0.35.0dev0,<0.35
   run_constrained:
     # If TBB is present it must be at least version 2019.5 from Anaconda due to
-    # build flag issues triggering UB. It must also be <=2020.0 due to ABI
+    # build flag issues triggering UB. It must also be <=2020.3 due to ABI
     # changes in TBB, see https://github.com/numba/numba/pull/6096.
-    - tbb >=2019.5,<=2020.0    # [not (armv6l or armv7l or aarch64 or linux32)]
+    # 2020.3 is the last version with the "old" ABI
+    - tbb >=2019.5,<=2020.3    # [not (armv6l or armv7l or aarch64 or linux32)]
     # avoid confusion from openblas bugs
     - libopenblas !=0.3.6      # [x86_64]
     # CUDA 9.0 or later is required for CUDA support

diff --git a/numba/core/codegen.py b/numba/core/codegen.py
@@ -95,9 +95,10 @@ def __init__(self, cres, name, py_func, **kwargs):
         self.dot = ll.get_function_cfg(fn)
         self.kwargs = kwargs
 
-    def pretty_printer(self, filename=None, view=None, highlight=True,
-                      interleave=False, strip_ir=False, show_key=True,
-                      fontsize=10):
+    def pretty_printer(self, filename=None, view=None, render_format=None,
+                       highlight=True,
+                       interleave=False, strip_ir=False, show_key=True,
+                       fontsize=10):
         """
         "Pretty" prints the DOT graph of the CFG.
         For explanation of the parameters see the docstring for
@@ -118,7 +119,9 @@ def pretty_printer(self, filename=None, view=None, highlight=True,
                                     returns=_default,
                                     raises=_default,
                                     meminfo=_default,
-                                    branches=_default)
+                                    branches=_default,
+                                    llvm_intrin_calls=_default,
+                                    function_calls=_default,)
         _interleave = SimpleNamespace(python=_default, lineinfo=_default)
 
         def parse_config(_config, kwarg):
@@ -163,6 +166,8 @@ def parse_config(_config, kwarg):
         cs['raise'] = 'lightpink'
         cs['meminfo'] = 'lightseagreen'
         cs['return'] = 'purple'
+        cs['llvm_intrin_calls'] = 'rosybrown'
+        cs['function_calls'] = 'tomato'
 
         # Get the raw dot format information from LLVM and the LLVM IR
         fn = self.cres.get_function(self.name)
@@ -191,7 +196,16 @@ def init_digraph(name, fname, fontsize):
             # name and fname are arbitrary graph and file names, they appear in
             # some rendering formats, the fontsize determines the output
             # fontsize.
-            f = gv.Digraph(name, fname)
+
+            # truncate massive mangled names as file names as it causes OSError
+            # when trying to render to pdf
+            cmax = 200
+            if len(fname) > cmax:
+                wstr = (f'CFG output filname "{fname}" exceeds maximum '
+                        f'supported length, it will be truncated.')
+                warnings.warn(wstr, NumbaInvalidConfigWarning)
+                fname = fname[:cmax]
+            f = gv.Digraph(name, filename=fname)
             f.attr(rankdir='TB')
             f.attr('node', shape='none', fontsize='%s' % str(fontsize))
             return f
@@ -217,8 +231,10 @@ def init_digraph(name, fname, fontsize):
         nrt_incref = re.compile(r"@NRT_incref\b")
         nrt_decref = re.compile(r"@NRT_decref\b")
         nrt_meminfo = re.compile("@NRT_MemInfo")
-        ll_raise = re.compile("ret i32 1,")
-        ll_return = re.compile("ret i32 [^1],")
+        ll_intrin_calls = re.compile(r".*call.*@llvm\..*")
+        ll_function_call = re.compile(r".*call.*@.*")
+        ll_raise = re.compile("ret i32.*\!ret_is_raise.*")
+        ll_return = re.compile("ret i32 [^1],?.*")
 
         # wrapper function for line wrapping LLVM lines
         def wrap(s):
@@ -227,12 +243,23 @@ def wrap(s):
         # function to fix (sometimes escaped for DOT!) LLVM IR etc that needs to
         # be HTML escaped
         def clean(s):
+            # Grab first 300 chars only, 1. this should be enough to identify
+            # the token and it keeps names short. 2. graphviz/dot has a maximum
+            # buffer size near 585?!, with additional transforms it's hard to
+            # know if this would be exceeded. 3. hash of the token string is
+            # written into the rendering to permit exact identification against
+            # e.g. LLVM IR dump if necessary.
+            n = 300
+            if len(s) > n:
+                hs = str(hash(s))
+                s = '{}...<hash={}>'.format(s[:n], hs)
             s = html.escape(s) # deals with  &, < and >
             s = s.replace('\\{', "&#123;")
             s = s.replace('\\}', "&#125;")
             s = s.replace('\\', "&#92;")
             s = s.replace('%', "&#37;")
-            return s.replace('!', "&#33;")
+            s = s.replace('!', "&#33;")
+            return s
 
         # These hold the node and edge ids from the raw dot information. They
         # are used later to wire up a new DiGraph that has the same structure
@@ -399,6 +426,10 @@ def metadata_interleave(l, new_lines):
                     colour = cs['raise']
                 elif _highlight.returns and ll_return.search(l):
                     colour = cs['return']
+                elif _highlight.llvm_intrin_calls and ll_intrin_calls.search(l):
+                    colour = cs['llvm_intrin_calls']
+                elif _highlight.function_calls and ll_function_call.search(l):
+                    colour = cs['function_calls']
                 else:
                     colour = cs['default']
 
@@ -457,21 +488,23 @@ def metadata_interleave(l, new_lines):
 
         # Render if required
         if filename is not None or view is not None:
-            f.render(filename=filename, view=view, format='pdf')
+            f.render(filename=filename, view=view, format=render_format)
 
         # Else pipe out a SVG
         return f.pipe(format='svg')
 
-    def display(self, filename=None, view=False):
+    def display(self, filename=None, format='pdf', view=False):
         """
         Plot the CFG.  In IPython notebook, the return image object can be
         inlined.
 
         The *filename* option can be set to a specific path for the rendered
         output to write to.  If *view* option is True, the plot is opened by
-        the system default application for the image format (PDF).
+        the system default application for the image format (PDF). *format* can
+        be any valid format string accepted by graphviz, default is 'pdf'.
         """
-        rawbyt = self.pretty_printer(filename=filename, view=view, **self.kwargs)
+        rawbyt = self.pretty_printer(filename=filename, view=view,
+                                     render_format=format, **self.kwargs)
         return rawbyt.decode('utf-8')
 
     def _repr_svg_(self):
@@ -1071,7 +1104,7 @@ def _init(self, llvm_module):
         self._data_layout = str(self._target_data)
         self._mpm_cheap = self._module_pass_manager(loop_vectorize=False,
                                                     slp_vectorize=False,
-                                                    opt=1)
+                                                    opt=0)
         self._mpm_full = self._module_pass_manager()
 
         self._engine.set_object_cache(self._library_class._object_compiled_hook,

diff --git a/numba/cuda/api.py b/numba/cuda/api.py
@@ -129,6 +129,9 @@ def managed_array(shape, dtype=np.float, strides=None, order='C', stream=0,
     Allocate a np.ndarray with a buffer that is managed.
     Similar to np.empty().
 
+    Managed memory is supported on Linux, and is considered experimental on
+    Windows.
+
     :param attach_global: A flag indicating whether to attach globally. Global
                           attachment implies that the memory is accessible from
                           any stream on any device. If ``False``, attachment is

diff --git a/numba/cuda/tests/cudadrv/test_managed_alloc.py b/numba/cuda/tests/cudadrv/test_managed_alloc.py
@@ -1,5 +1,4 @@
 import numpy as np
-import platform
 from ctypes import byref, c_size_t
 from numba.cuda.cudadrv.driver import device_memset, driver
 from numba import cuda
@@ -9,6 +8,7 @@
 
 
 @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
+@linux_only
 class TestManagedAlloc(ContextResettingTestCase):
 
     def get_total_gpu_memory(self):
@@ -57,12 +57,8 @@ def test_managed_alloc_driver_undersubscribe(self):
     def test_managed_alloc_driver_oversubscribe(self):
         msg = "Oversubscription of managed memory unsupported prior to CC 6.0"
         self.skip_if_cc_major_lt(6, msg)
-        if platform.system() != "Linux":
-            msg = "Oversubscription of managed memory only supported on Linux"
-            self.skipTest(msg)
         self._test_managed_alloc_driver(2.0)
 
-    @linux_only
     def test_managed_alloc_driver_host_attach(self):
         msg = "Host attached managed memory is not accessible prior to CC 6.0"
         self.skip_if_cc_major_lt(6, msg)
@@ -115,7 +111,6 @@ def kernel(x):
     def test_managed_array_attach_global(self):
         self._test_managed_array()
 
-    @linux_only
     def test_managed_array_attach_host(self):
         self._test_managed_array()
         msg = "Host attached managed memory is not accessible prior to CC 6.0"

diff --git a/numba/parfors/array_analysis.py b/numba/parfors/array_analysis.py
@@ -2064,6 +2064,17 @@ def _analyze_op_arrayexpr(self, scope, equiv_set, expr):
         )
 
     def _analyze_op_build_tuple(self, scope, equiv_set, expr):
+        # For the moment, we can't do anything with tuples that
+        # contain arrays, compared to array dimensions.  Return
+        # None to say we won't track this tuple if a part of it
+        # is an array.
+        for x in expr.items:
+            if (
+                isinstance(x, ir.Var)
+                and isinstance(self.typemap[x.name], types.ArrayCompatible)
+            ):
+                return None
+
         consts = []
         for var in expr.items:
             x = guard(find_const, self.func_ir, var)

diff --git a/numba/tests/test_dyn_array.py b/numba/tests/test_dyn_array.py
@@ -2,6 +2,7 @@
 import sys
 import numpy as np
 import random
+import re
 import threading
 import gc
 
@@ -636,8 +637,9 @@ def func(n, dt):
 
         excstr = str(raises.exception)
         self.assertIn('No match', excstr)
-        self.assertIn('{}({}, unicode_type)'.format(pyfunc.__name__,
-                                                    np.intp.__name__), excstr)
+        restr = r'\b{}\(int.*?, unicode_type\)\B'
+        regex = re.compile(restr.format(pyfunc.__name__))
+        self.assertRegex(excstr, regex)
 
     def test_2d(self):
         pyfunc = self.pyfunc
@@ -732,8 +734,9 @@ def func(n, fv, dt):
 
         excstr = str(raises.exception)
         self.assertIn('No match', excstr)
-        self.assertIn('full(UniTuple({} x 1), float64, unicode_type)'.format(
-            np.intp.__name__), excstr)
+        restr = r'\bfull\(UniTuple\(int.*? x 1\), float64, unicode_type\)\B'
+        regex = re.compile(restr)
+        self.assertRegex(excstr, regex)
 
     def test_2d(self):
         def func(m, n):
@@ -1025,8 +1028,8 @@ def func(n, dt):
 
         excstr = str(raises.exception)
         self.assertIn('No match', excstr)
-        self.assertIn('identity({}, unicode_type)'.format(np.intp.__name__),
-                      excstr)
+        regex = re.compile(r'\bidentity\(int.*?, unicode_type\)\B')
+        self.assertRegex(excstr, regex)
 
 
 class TestNdEye(BaseTest):
@@ -1272,8 +1275,9 @@ def func(arg, dt):
 
         excstr = str(raises.exception)
         self.assertIn('No match', excstr)
-        self.assertIn('array(UniTuple({} x 2), dtype=unicode_type)'.format(
-            np.intp.__name__), excstr)
+        restr = r'\barray\(UniTuple\(int.*? x 2\), dtype=unicode_type\)\B'
+        regex = re.compile(restr)
+        self.assertRegex(excstr, regex)
 
     def test_2d(self):
         def pyfunc(arg):

diff --git a/numba/tests/test_parfors.py b/numba/tests/test_parfors.py
@@ -1722,6 +1722,16 @@ def test_impl(a):
         x = np.array([1, 1])
         self.check(test_impl, x)
 
+    @skip_parfors_unsupported
+    def test_array_tuple_concat(self):
+        # issue6399
+        def test_impl(a):
+            S = (a,) + (a, a)
+            return S[0].sum()
+
+        x = np.ones((3,3))
+        self.check(test_impl, x)
+
 
 class TestParforsLeaks(MemoryLeakMixin, TestParforsBase):
     def check(self, pyfunc, *args, **kwargs):

diff --git a/numba/tests/test_vectorization.py b/numba/tests/test_vectorization.py
@@ -1,7 +1,10 @@
+import platform
 import numpy as np
-from numba import njit, types
-from unittest import TestCase
+from numba import types
+from unittest import TestCase, skipIf
 from numba.tests.support import override_env_config
+from numba.core.compiler import compile_isolated, Flags
+from numba.core.cpu_options import FastMathOptions
 
 _DEBUG = False
 if _DEBUG:
@@ -10,18 +13,19 @@
     llvm.set_option("", "--debug-only=loop-vectorize")
 
 
+@skipIf(platform.machine() != 'x86_64', 'x86_64 only test')
 class TestVectorization(TestCase):
     """
     Tests to assert that code which should vectorize does indeed vectorize
     """
-    def gen_ir(self, func, args_tuple, **flags):
+    def gen_ir(self, func, args_tuple, fastmath=False):
         with override_env_config(
             "NUMBA_CPU_NAME", "skylake-avx512"
         ), override_env_config("NUMBA_CPU_FEATURES", ""):
-            jobj = njit(**flags)(func)
-            jobj.compile(args_tuple)
-            ol = jobj.overloads[jobj.signatures[0]]
-            return ol.library.get_llvm_str()
+            _flags = Flags()
+            _flags.set('fastmath', FastMathOptions(fastmath))
+            jitted = compile_isolated(func, args_tuple, flags=_flags)
+            return jitted.library.get_llvm_str()
 
     def test_nditer_loop(self):
         # see https://github.com/numba/numba/issues/5033