In [1]:

# imports
import os
import sys
import types
import json
import base64

# figure size/format
fig_width = 7
fig_height = 5
fig_format = 'retina'
fig_dpi = 96
interactivity = ''
is_shiny = False
is_dashboard = False
plotly_connected = True

# matplotlib defaults / format
try:
  import matplotlib.pyplot as plt
  plt.rcParams['figure.figsize'] = (fig_width, fig_height)
  plt.rcParams['figure.dpi'] = fig_dpi
  plt.rcParams['savefig.dpi'] = "figure"
  from IPython.display import set_matplotlib_formats
  set_matplotlib_formats(fig_format)
except Exception:
  pass

# plotly use connected mode
try:
  import plotly.io as pio
  if plotly_connected:
    pio.renderers.default = "notebook_connected"
  else:
    pio.renderers.default = "notebook"
  for template in pio.templates.keys():
    pio.templates[template].layout.margin = dict(t=30,r=0,b=0,l=0)
except Exception:
  pass

# disable itables paging for dashboards
if is_dashboard:
  try:
    from itables import options
    options.dom = 'fiBrtlp'
    options.maxBytes = 1024 * 1024
    options.language = dict(info = "Showing _TOTAL_ entries")
    options.classes = "display nowrap compact"
    options.paging = False
    options.searching = True
    options.ordering = True
    options.info = True
    options.lengthChange = False
    options.autoWidth = False
    options.responsive = True
    options.keys = True
    options.buttons = []
  except Exception:
    pass
  
  try:
    import altair as alt
    # By default, dashboards will have container sized
    # vega visualizations which allows them to flow reasonably
    theme_sentinel = '_quarto-dashboard-internal'
    def make_theme(name):
        nonTheme = alt.themes._plugins[name]    
        def patch_theme(*args, **kwargs):
            existingTheme = nonTheme()
            if 'height' not in existingTheme:
              existingTheme['height'] = 'container'
            if 'width' not in existingTheme:
              existingTheme['width'] = 'container'

            if 'config' not in existingTheme:
              existingTheme['config'] = dict()
            
            # Configure the default font sizes
            title_font_size = 15
            header_font_size = 13
            axis_font_size = 12
            legend_font_size = 12
            mark_font_size = 12
            tooltip = False

            config = existingTheme['config']

            # The Axis
            if 'axis' not in config:
              config['axis'] = dict()
            axis = config['axis']
            if 'labelFontSize' not in axis:
              axis['labelFontSize'] = axis_font_size
            if 'titleFontSize' not in axis:
              axis['titleFontSize'] = axis_font_size  

            # The legend
            if 'legend' not in config:
              config['legend'] = dict()
            legend = config['legend']
            if 'labelFontSize' not in legend:
              legend['labelFontSize'] = legend_font_size
            if 'titleFontSize' not in legend:
              legend['titleFontSize'] = legend_font_size  

            # The header
            if 'header' not in config:
              config['header'] = dict()
            header = config['header']
            if 'labelFontSize' not in header:
              header['labelFontSize'] = header_font_size
            if 'titleFontSize' not in header:
              header['titleFontSize'] = header_font_size    

            # Title
            if 'title' not in config:
              config['title'] = dict()
            title = config['title']
            if 'fontSize' not in title:
              title['fontSize'] = title_font_size

            # Marks
            if 'mark' not in config:
              config['mark'] = dict()
            mark = config['mark']
            if 'fontSize' not in mark:
              mark['fontSize'] = mark_font_size

            # Mark tooltips
            if tooltip and 'tooltip' not in mark:
              mark['tooltip'] = dict(content="encoding")

            return existingTheme
            
        return patch_theme

    # We can only do this once per session
    if theme_sentinel not in alt.themes.names():
      for name in alt.themes.names():
        alt.themes.register(name, make_theme(name))
      
      # register a sentinel theme so we only do this once
      alt.themes.register(theme_sentinel, make_theme('default'))
      alt.themes.enable('default')

  except Exception:
    pass

# enable pandas latex repr when targeting pdfs
try:
  import pandas as pd
  if fig_format == 'pdf':
    pd.set_option('display.latex.repr', True)
except Exception:
  pass

# interactivity
if interactivity:
  from IPython.core.interactiveshell import InteractiveShell
  InteractiveShell.ast_node_interactivity = interactivity

# NOTE: the kernel_deps code is repeated in the cleanup.py file
# (we can't easily share this code b/c of the way it is run).
# If you edit this code also edit the same code in cleanup.py!

# output kernel dependencies
kernel_deps = dict()
for module in list(sys.modules.values()):
  # Some modules play games with sys.modules (e.g. email/__init__.py
  # in the standard library), and occasionally this can cause strange
  # failures in getattr.  Just ignore anything that's not an ordinary
  # module.
  if not isinstance(module, types.ModuleType):
    continue
  path = getattr(module, "__file__", None)
  if not path:
    continue
  if path.endswith(".pyc") or path.endswith(".pyo"):
    path = path[:-1]
  if not os.path.exists(path):
    continue
  kernel_deps[path] = os.stat(path).st_mtime
print(json.dumps(kernel_deps))

# set run_path if requested
run_path = 'L2hvbWUvZnJhZGF2L0Rldi9QeXRob24vbWlhc2hzLTItYWR2YW5jZWQtcHJvZ3JhbW1pbmctcGFyYWxsZWwtY29tcHV0aW5nLTIwMjQtMjAyNS9Db3Vyc2VzL1NvbHV0aW9ucw=='
if run_path:
  # hex-decode the path
  run_path = base64.b64decode(run_path.encode("utf-8")).decode("utf-8")
  os.chdir(run_path)

# reset state
%reset

# shiny
# Checking for shiny by using False directly because we're after the %reset. We don't want
# to set a variable that stays in global scope.
if False:
  try:
    import htmltools as _htmltools
    import ast as _ast

    _htmltools.html_dependency_render_mode = "json"

    # This decorator will be added to all function definitions
    def _display_if_has_repr_html(x):
      try:
        # IPython 7.14 preferred import
        from IPython.display import display, HTML
      except:
        from IPython.core.display import display, HTML

      if hasattr(x, '_repr_html_'):
        display(HTML(x._repr_html_()))
      return x

    # ideally we would undo the call to ast_transformers.append
    # at the end of this block whenver an error occurs, we do 
    # this for now as it will only be a problem if the user 
    # switches from shiny to not-shiny mode (and even then likely
    # won't matter)
    import builtins
    builtins._display_if_has_repr_html = _display_if_has_repr_html

    class _FunctionDefReprHtml(_ast.NodeTransformer):
      def visit_FunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

      def visit_AsyncFunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

    ip = get_ipython()
    ip.ast_transformers.append(_FunctionDefReprHtml())

  except:
    pass

def ojs_define(**kwargs):
  import json
  try:
    # IPython 7.14 preferred import
    from IPython.display import display, HTML
  except:
    from IPython.core.display import display, HTML

  # do some minor magic for convenience when handling pandas
  # dataframes
  def convert(v):
    try:
      import pandas as pd
    except ModuleNotFoundError: # don't do the magic when pandas is not available
      return v
    if type(v) == pd.Series:
      v = pd.DataFrame(v)
    if type(v) == pd.DataFrame:
      j = json.loads(v.T.to_json(orient='split'))
      return dict((k,v) for (k,v) in zip(j["index"], j["data"]))
    else:
      return v

  v = dict(contents=list(dict(name=key, value=convert(value)) for (key, value) in kwargs.items()))
  display(HTML('<script type="ojs-define">' + json.dumps(v) + '</script>'), metadata=dict(ojs_define = True))
globals()["ojs_define"] = ojs_define
# globals()["__spec__"] = None



In [2]:
import numpy as np
from numba import jit

In [3]:
def find_instr(func, keyword, sig=0, limit=5):
    count = 0
    for l in func.inspect_asm(func.signatures[sig]).split('\n'):
        if keyword in l:
            count += 1
            print(l)
            if count >= limit:
                break
    if count == 0:
        print('No instructions found')

In [4]:
@jit(nopython=True)
def sqdiff(x, y):
    out = np.empty_like(x)
    for i in range(x.shape[0]):
        out[i] = (x[i] - y[i])**2
    return out

In [5]:
x32 = np.linspace(1, 2, 10000, dtype=np.float32)
y32 = np.linspace(2, 3, 10000, dtype=np.float32)
sqdiff(x32, y32)

array([1.        , 0.99999976, 1.        , ..., 1.        , 1.0000002 ,
       1.        ], dtype=float32)

In [6]:
x64 = x32.astype(np.float64)
y64 = y32.astype(np.float64)
sqdiff(x64, y64)

array([1.        , 0.99999976, 1.        , ..., 1.        , 1.00000024,
       1.        ])

In [7]:
sqdiff.signatures

[(Array(float32, 1, 'C', False, aligned=True),
  Array(float32, 1, 'C', False, aligned=True)),
 (Array(float64, 1, 'C', False, aligned=True),
  Array(float64, 1, 'C', False, aligned=True))]

In [8]:
%timeit sqdiff(x32, y32)
%timeit sqdiff(x64, y64)

1.94 μs ± 9.66 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


2.98 μs ± 17.5 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [9]:
print('float32:')
find_instr(sqdiff, keyword='subp', sig=0)
print('---\nfloat64:')
find_instr(sqdiff, keyword='subp', sig=1)

float32:
	vsubps	(%rax,%rdi,4), %ymm0, %ymm0
	vsubps	32(%rax,%rdi,4), %ymm1, %ymm1
	vsubps	64(%rax,%rdi,4), %ymm2, %ymm2
	vsubps	96(%rax,%rdi,4), %ymm3, %ymm3
	vsubps	128(%rax,%rdi,4), %ymm0, %ymm0
---
float64:
	vsubpd	(%rax,%rdi,8), %ymm0, %ymm0
	vsubpd	32(%rax,%rdi,8), %ymm1, %ymm1
	vsubpd	64(%rax,%rdi,8), %ymm2, %ymm2
	vsubpd	96(%rax,%rdi,8), %ymm3, %ymm3
	vsubpd	128(%rax,%rdi,8), %ymm0, %ymm0


In [10]:
@jit(nopython=True)
def frac_diff1(x, y):
    out = np.empty_like(x)
    for i in range(x.shape[0]):
        out[i] = 2 * (x[i] - y[i]) / (x[i] + y[i])
    return out

In [11]:
frac_diff1(x32, y32)

array([-0.6666667 , -0.66662216, -0.66657776, ..., -0.400032  ,
       -0.40001604, -0.4       ], dtype=float32)

In [12]:
find_instr(frac_diff1, keyword='subp', sig=0)

No instructions found


In [13]:
@jit(nopython=True, error_model='numpy')
def frac_diff2(x, y):
    out = np.empty_like(x)
    for i in range(x.shape[0]):
        out[i] = 2 * (x[i] - y[i]) / (x[i] + y[i])
    return out

In [14]:
frac_diff2(x32, y32)

array([-0.6666667 , -0.66662216, -0.66657776, ..., -0.400032  ,
       -0.40001604, -0.4       ], dtype=float32)

In [15]:
find_instr(frac_diff2, keyword='subp', sig=0)

	vsubps	%ymm1, %ymm0, %ymm2
	vsubps	%ymm1, %ymm0, %ymm2
	vsubps	%ymm1, %ymm0, %ymm2


In [16]:
%timeit frac_diff2(x32, y32)
%timeit frac_diff2(x64, y64)

6.76 μs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


6.45 μs ± 9.79 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [17]:
frac_diff2.inspect_types(pretty=True)


The pretty_annotate functionality is experimental and might change API



0
label 0
"x = arg(0, name=x) :: array(float32, 1d, C)"
"y = arg(1, name=y) :: array(float32, 1d, C)"
$4load_global.0 = global(np: <module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>) :: Module(<module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>)
"$14load_attr.2 = getattr(value=$4load_global.0, attr=empty_like) :: Function(<built-in function empty_like>)"
del $4load_global.0
"out = call $14load_attr.2(x, func=$14load_attr.2, args=[Var(x, 2090011437.py:1)], kws=(), vararg=None, varkwarg=None, target=None) :: (Array(float32, 1, 'C', False, aligned=True), omitted(default=None)) -> array(float32, 1d, C)"
del $14load_attr.2
$46load_global.5 = global(range: <class 'range'>) :: Function(<class 'range'>)
"$58load_attr.8 = getattr(value=x, attr=shape) :: UniTuple(int64 x 1)"

0
label 0
"x = arg(0, name=x) :: array(float32, 1d, C)"
"y = arg(1, name=y) :: array(float32, 1d, C)"

0
$4load_global.0 = global(np: <module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>) :: Module(<module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>)
"$14load_attr.2 = getattr(value=$4load_global.0, attr=empty_like) :: Function(<built-in function empty_like>)"
del $4load_global.0
"out = call $14load_attr.2(x, func=$14load_attr.2, args=[Var(x, 2090011437.py:1)], kws=(), vararg=None, varkwarg=None, target=None) :: (Array(float32, 1, 'C', False, aligned=True), omitted(default=None)) -> array(float32, 1d, C)"
del $14load_attr.2

0
$46load_global.5 = global(range: <class 'range'>) :: Function(<class 'range'>)
"$58load_attr.8 = getattr(value=x, attr=shape) :: UniTuple(int64 x 1)"
"$const78.9 = const(int, 0) :: Literal[int](0)"
"$80binary_subscr.10 = static_getitem(value=$58load_attr.8, index=0, index_var=$const78.9, fn=<built-in function getitem>) :: int64"
del $const78.9
del $58load_attr.8
"$84call.11 = call $46load_global.5($80binary_subscr.10, func=$46load_global.5, args=[Var($80binary_subscr.10, 2090011437.py:4)], kws=(), vararg=None, varkwarg=None, target=None) :: (int64,) -> range_state_int64"
del $80binary_subscr.10
del $46load_global.5
$92get_iter.12 = getiter(value=$84call.11) :: range_iter_int64

0
"$const100.2 = const(int, 2) :: Literal[int](2)"
"$106binary_subscr.5 = getitem(value=x, index=i, fn=<built-in function getitem>) :: float32"
"$114binary_subscr.8 = getitem(value=y, index=i, fn=<built-in function getitem>) :: float32"
$binop_sub118.9 = $106binary_subscr.5 - $114binary_subscr.8 :: float32
del $114binary_subscr.8
del $106binary_subscr.5
$binop_mul122.10 = $const100.2 * $binop_sub118.9 :: float64
del $const100.2
del $binop_sub118.9
"$130binary_subscr.13 = getitem(value=x, index=i, fn=<built-in function getitem>) :: float32"

0
label 160
del y
del x
del $phi98.1
del $phi94.0
del $94for_iter.3
"$164return_value.3 = cast(value=out) :: array(float32, 1d, C)"
del out
return $164return_value.3

0
label 0
"x = arg(0, name=x) :: array(float64, 1d, C)"
"y = arg(1, name=y) :: array(float64, 1d, C)"
$4load_global.0 = global(np: <module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>) :: Module(<module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>)
"$14load_attr.2 = getattr(value=$4load_global.0, attr=empty_like) :: Function(<built-in function empty_like>)"
del $4load_global.0
"out = call $14load_attr.2(x, func=$14load_attr.2, args=[Var(x, 2090011437.py:1)], kws=(), vararg=None, varkwarg=None, target=None) :: (Array(float64, 1, 'C', False, aligned=True), omitted(default=None)) -> array(float64, 1d, C)"
del $14load_attr.2
$46load_global.5 = global(range: <class 'range'>) :: Function(<class 'range'>)
"$58load_attr.8 = getattr(value=x, attr=shape) :: UniTuple(int64 x 1)"

0
label 0
"x = arg(0, name=x) :: array(float64, 1d, C)"
"y = arg(1, name=y) :: array(float64, 1d, C)"

0
$4load_global.0 = global(np: <module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>) :: Module(<module 'numpy' from '/home/fradav/.micromamba/envs/miashs-hpc/lib/python3.12/site-packages/numpy/__init__.py'>)
"$14load_attr.2 = getattr(value=$4load_global.0, attr=empty_like) :: Function(<built-in function empty_like>)"
del $4load_global.0
"out = call $14load_attr.2(x, func=$14load_attr.2, args=[Var(x, 2090011437.py:1)], kws=(), vararg=None, varkwarg=None, target=None) :: (Array(float64, 1, 'C', False, aligned=True), omitted(default=None)) -> array(float64, 1d, C)"
del $14load_attr.2

0
$46load_global.5 = global(range: <class 'range'>) :: Function(<class 'range'>)
"$58load_attr.8 = getattr(value=x, attr=shape) :: UniTuple(int64 x 1)"
"$const78.9 = const(int, 0) :: Literal[int](0)"
"$80binary_subscr.10 = static_getitem(value=$58load_attr.8, index=0, index_var=$const78.9, fn=<built-in function getitem>) :: int64"
del $const78.9
del $58load_attr.8
"$84call.11 = call $46load_global.5($80binary_subscr.10, func=$46load_global.5, args=[Var($80binary_subscr.10, 2090011437.py:4)], kws=(), vararg=None, varkwarg=None, target=None) :: (int64,) -> range_state_int64"
del $80binary_subscr.10
del $46load_global.5
$92get_iter.12 = getiter(value=$84call.11) :: range_iter_int64

0
"$const100.2 = const(int, 2) :: Literal[int](2)"
"$106binary_subscr.5 = getitem(value=x, index=i, fn=<built-in function getitem>) :: float64"
"$114binary_subscr.8 = getitem(value=y, index=i, fn=<built-in function getitem>) :: float64"
$binop_sub118.9 = $106binary_subscr.5 - $114binary_subscr.8 :: float64
del $114binary_subscr.8
del $106binary_subscr.5
$binop_mul122.10 = $const100.2 * $binop_sub118.9 :: float64
del $const100.2
del $binop_sub118.9
"$130binary_subscr.13 = getitem(value=x, index=i, fn=<built-in function getitem>) :: float64"

0
label 160
del y
del x
del $phi98.1
del $phi94.0
del $94for_iter.3
"$164return_value.3 = cast(value=out) :: array(float64, 1d, C)"
del out
return $164return_value.3


In [18]:
@jit(nopython=True, error_model='numpy')
def frac_diff3(x, y):
    out = np.empty_like(x)
    dt = x.dtype # Cast the constant using the dtype of the input
    for i in range(x.shape[0]):
        # Could also use np.float32(2) to always use same type, regardless of input
        out[i] = dt.type(2) * (x[i] - y[i]) / (x[i] + y[i])
    return out

In [19]:
frac_diff3(x32, y32)

array([-0.6666667 , -0.66662216, -0.66657776, ..., -0.400032  ,
       -0.40001604, -0.4       ], dtype=float32)

In [20]:
%timeit frac_diff3(x32, y32)
%timeit frac_diff3(x64, y64)

2.62 μs ± 7.41 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


6.48 μs ± 17.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [21]:
@jit(nopython=True)
def do_sum(A):
    acc = 0.
    # without fastmath, this loop must accumulate in strict order
    for x in A:
        acc += x**2
    return acc

@jit(nopython=True, fastmath=True)
def do_sum_fast(A):
    acc = 0.
    # with fastmath, the reduction can be vectorized as floating point
    # reassociation is permitted.
    for x in A:
        acc += x**2
    return acc

In [22]:
do_sum(x32)
find_instr(do_sum, keyword='mulp')  # look for vector multiplication

No instructions found


In [23]:
do_sum_fast(x32)
find_instr(do_sum_fast, keyword='mulp')

	vmulps	%xmm4, %xmm4, %xmm4
	vmulps	%xmm5, %xmm5, %xmm5
	vmulps	%xmm6, %xmm6, %xmm6
	vmulps	%xmm7, %xmm7, %xmm7


In [24]:
%timeit do_sum(x32)
%timeit do_sum_fast(x32)

11.1 μs ± 27.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


5.03 μs ± 5.33 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [25]:
! conda list icc_rt

zsh:1: command not found: conda


In [26]:
SQRT_2PI = np.sqrt(2 * np.pi)

@jit(nopython=True, error_model='numpy', fastmath=True)
def kde(x, means, widths):
    '''Compute value of gaussian kernel density estimate.
    
    x - location of evaluation
    means - array of kernel means
    widths - array of kernel widths
    '''
    n = means.shape[0]
    acc = 0.
    for i in range(n):
        acc += np.exp( -0.5 * ((x - means[i]) / widths[i])**2 ) / widths[i]
    return acc / SQRT_2PI / n

In [27]:
# The distribution we are approximating is flat between -1 and 1, so we expect a KDE value of ~0.5 everywhere
means = np.random.uniform(-1, 1, size=10000)
# These widths are not selected in any reasonable way.  Consult your local statistician before approximating a PDF.
widths = np.random.uniform(0.1, 0.3, size=10000)

kde(0.4, means, widths)

0.4926645301770076

In [28]:
find_instr(kde, 'subp')

	vsubpd	(%rbx,%rsi,8), %ymm3, %ymm0
	vsubpd	32(%rbx,%rsi,8), %ymm3, %ymm1
	vsubpd	64(%rbx,%rsi,8), %ymm3, %ymm2
	vsubpd	96(%rbx,%rsi,8), %ymm3, %ymm3


In [29]:
find_instr(kde, keyword='svml')

	movabsq	$__svml_exp4, %rdi


In [30]:
%timeit kde(0.4, means, widths)

17.9 μs ± 712 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [31]:
slow_kde = jit(nopython=True)(kde.py_func)

slow_kde(0.4, means, widths)

0.4926645301770067

In [32]:
find_instr(slow_kde, keyword='subp')
print('---')
find_instr(slow_kde, keyword='svml')

No instructions found
---
No instructions found


In [33]:
%timeit kde(0.4, means, widths)
%timeit slow_kde(0.4, means, widths)

18.2 μs ± 14.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


82.2 μs ± 78.4 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [34]:
def numpy_kde(x, means, widths):
    acc = (np.exp( -0.5 * ((x - means) / widths)**2 ) / widths).mean()
    # .mean() already divides by n
    return acc / SQRT_2PI

In [35]:
numpy_kde(0.4, means, widths)

np.float64(0.4926645301770076)

In [36]:
%timeit numpy_kde(0.4, means, widths)

36.9 μs ± 143 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [37]:
numba_numpy_kde = jit(nopython=True, error_model='numpy', fastmath=True)(numpy_kde)

numba_numpy_kde(0.4, means, widths)

0.4926645301770071

In [38]:
find_instr(numba_numpy_kde, keyword='subp')
print('---')
find_instr(numba_numpy_kde, keyword='svml')

	vsubpd	(%r14,%rsi,8), %ymm8, %ymm0
	vsubpd	(%r14,%rsi,8), %ymm8, %ymm0
---
	movabsq	$__svml_exp4, %rdi
	movabsq	$__svml_exp4, %rdi
	movabsq	$__svml_exp4, %rdi
	movabsq	$__svml_exp4, %rax


In [39]:
%timeit kde(0.4, means, widths)
%timeit numba_numpy_kde(0.4, means, widths)
%timeit numpy_kde(0.4, means, widths)

17.8 μs ± 835 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


22.1 μs ± 24 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


37 μs ± 35.7 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
