# UDF comparision

In [51]:
import math
import iarray as ia
import numpy as np
from time import time
from iarray.udf import Array, jit, float64, int64
import numba as nb

max_num_threads = 8
nrep = 5

In [52]:
#str_expr = "sin(x)"
str_expr = "x"

@jit
def poly_udf(y: Array(float64, 1), x: Array(float64, 1)) -> int64:
    n = x.shape[0]
    for i in range(n):
        #s = math.sin(x[i])
        #a = math.atan(x[i])
        #a = math.atan2(x[i], 0.5)  # try this!
        #y[i] = s * a  # try this!
        #y[i] = math.sin(x[i])# * math.atan(x[i])  # try combining several ufuncs
        #y[i] = math.sin(x[i])  # try this!
        y[i] = x[i]

    return 0

@nb.njit(parallel=True)
def poly_numba(x):
    y = np.empty(x.shape, x.dtype)
    for i in nb.prange(len(x)):
        #s = math.sin(x[i])
        #a = math.atan(x[i])
        #a = math.atan2(x[i], 0.5)
        #y[i] = s * a
        #y[i] = math.sin(x[i])# * math.atan(x[i])
        y[i] = x[i]
    return y


In [53]:
# Define array params
shape = [10 * 1000 * 1000]
chunkshape = [1 * 1000 * 1000]
blockshape = [8 * 1000]
dtshape = ia.dtshape(shape)
size = int(np.prod(shape))
nthreads = 6
clevel = 5

In [54]:
# iarray UDF
bstorage = ia.StorageProperties("blosc", chunkshape, blockshape)
kwargs = dict(nthreads=nthreads, clevel=clevel, storage=bstorage)
a1 = ia.linspace(dtshape, 0, 10, **kwargs)
expr = poly_udf.create_expr([a1], dtshape, method="auto", **kwargs)
t0 = time()
b1 = expr.eval()
t1 = time()
print("Time to evaluate expression with iarray.udf:", round(t1 - t0, 3))

Time to evaluate expression with iarray.udf: 0.018


In [55]:
# iarray juggernaut
expr = ia.Expr(**kwargs)
expr.bind("x", a1)
expr.bind_out_properties(dtshape, storage=bstorage)
expr.compile(str_expr)
t0 = time()
b2 = expr.eval()
t1 = time()
print("Time to evaluate expression with iarray (juggernaut):", round(t1 - t0, 3))

Time to evaluate expression with iarray (juggernaut): 0.017


In [56]:
# numba
a1 = np.linspace(0, 10, size).reshape(shape)
nb.set_num_threads(nthreads)
t0 = time()
np3 = poly_numba(a1)
t1 = time()
print("Time to evaluate expression with numba:", round(t1 - t0, 3))

Time to evaluate expression with numba: 0.276


In [57]:
# Compare results.  The regular juggernaut works.
np2 = ia.iarray2numpy(b2)
np.testing.assert_almost_equal(np2, np3, decimal=5)

In [58]:
# The UDF result fails
np1 = ia.iarray2numpy(b1)
np.testing.assert_almost_equal(np1, np3, decimal=5)


Time to evaluate expression with numba: 0.262


In [7]:
# Compare results.  The regular juggernaut works.
np2 = ia.iarray2numpy(b2)
np.testing.assert_almost_equal(np2, np3, decimal=5)

In [8]:
# The UDF result fails
np1 = ia.iarray2numpy(b1)
np.testing.assert_almost_equal(np1, np3, decimal=5)


AssertionError: 
Arrays are not almost equal to 5 decimals

x and y nan location mismatch:
 x: array([0., 0., 0., ..., 0., 0., 0.])
 y: array([0.e+00, 1.e-06, 2.e-06, ..., 1.e+01, 1.e+01, 1.e+01])