From 88e701658aae2044445848a290827993d9e2d165 Mon Sep 17 00:00:00 2001 From: Graham Markall Date: Fri, 11 Sep 2020 17:39:48 +0100 Subject: [PATCH] [WIP] lazily compile in kernels --- numba/cuda/compiler.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py index cfee7c4090b..65370b4425d 100644 --- a/numba/cuda/compiler.py +++ b/numba/cuda/compiler.py @@ -510,7 +510,6 @@ class _Kernel(serialize.ReduceMixin): object launches the kernel on the device. ''' - @global_compiler_lock def __init__(self, py_func, argtypes, link, debug=False, inline=False, fastmath=False, extensions=None, max_registers=None, opt=True): super().__init__() @@ -527,9 +526,7 @@ def __init__(self, py_func, argtypes, link, debug=False, inline=False, self.definitions = {} - cc = get_current_device().compute_capability - self.compile(cc) - + @global_compiler_lock def compile(self, cc): cres = compile_cuda(self.py_func, types.void, self.argtypes, @@ -559,7 +556,7 @@ def compile(self, cc): cufunc = CachedCUFunction(name, ptx, self.link, self.max_registers) # populate members - self.definitions[cc] = _KernelDefinition( + defn = _KernelDefinition( entry_name=name, signature=signature, type_annotation=type_annotation, @@ -567,6 +564,9 @@ def compile(self, cc): call_helper=call_helper ) + self.definitions[cc] = defn + return defn + @property def argument_types(self): return tuple(self.signature.args) @@ -599,6 +599,7 @@ def _reduce_states(self): Thread, block and shared memory configuration are serialized. Stream information is discarded. """ + # FIXME: this needs fixing. return dict(name=self.entry_name, argtypes=self.argument_types, cufunc=self._func, link=self.link, debug=self.debug, call_helper=self.call_helper, extensions=self.extensions) @@ -616,26 +617,44 @@ def __call__(self, *args, **kwargs): @property def _func(self): cc = get_current_device().compute_capability - return self.definitions[cc].func + defn = self.definitions.get(cc, None) + if defn is None: + defn = self.compile(cc) + return defn.func @property def _type_annotation(self): - return next(iter(self.definitions.values())).type_annotation + try: + defn = next(iter(self.definitions.values())) + except StopIteration: + cc = get_current_device().compute_capability + defn = self.compile(cc) + + return defn.type_annotation @property def entry_name(self): cc = get_current_device().compute_capability - return self.definitions[cc].entry_name + defn = self.definitions.get(cc, None) + if defn is None: + defn = self.compile(cc) + return defn.entry_name @property def call_helper(self): cc = get_current_device().compute_capability - return self.definitions[cc].call_helper + defn = self.definitions.get(cc, None) + if defn is None: + defn = self.compile(cc) + return defn.call_helper @property def signature(self): cc = get_current_device().compute_capability - return self.definitions[cc].signature + defn = self.definitions.get(cc, None) + if defn is None: + defn = self.compile(cc) + return defn.signature def bind(self): """