In [1]:
import numpy as np
import minterpy as mp
import matplotlib.pyplot as plt

from utils_base import eval_newton_driver_base
from utils_numba import eval_newton_driver_numba_cpu
from utils_numba_par import eval_newton_driver_numba_cpu_par
from utils_numba_cuda import eval_newton_driver_numba_gpu

In [2]:
def runge(xx):
    return 1 / (1 + np.sum(xx**2, axis=1))

In [3]:
spatial_dimension = 6

In [4]:
mi = mp.MultiIndexSet.from_degree(
    spatial_dimension=spatial_dimension,
    poly_degree=8,
    lp_degree=2.0,
)
len(mi)

43774

In [5]:
grd = mp.Grid(mi)
lag_coeffs = runge(grd.unisolvent_nodes)  # Evaluate the function at the interpolating points
lag_poly = mp.LagrangePolynomial(mi, lag_coeffs)
nwt_poly = mp.LagrangeToNewton(lag_poly)()

In [33]:
xx_test = -1 + 2 * np.random.rand(1000000, spatial_dimension)

In [7]:
nwt_coeffs = nwt_poly.coeffs
exponents = nwt_poly.multi_index.exponents
gen_points = nwt_poly.grid.generating_points

## Base implementation

In [12]:
yy_base = eval_newton_driver_base(xx_test, nwt_coeffs, exponents, gen_points)

In [10]:
assert np.allclose(yy_poly, yy_base)

In [13]:
%%timeit
eval_newton_driver_base(xx_test, nwt_coeffs, exponents, gen_points)

501 ms ± 55 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Numba implementation

In [11]:
yy_numba = eval_newton_driver_numba_cpu(xx_test, nwt_coeffs, exponents, gen_points)

In [16]:
assert np.allclose(yy_base, yy_numba)

In [12]:
%%timeit
eval_newton_driver_numba_cpu(xx_test, nwt_coeffs, exponents, gen_points)

4.07 s ± 23.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Numba parallel implementation

In [34]:
yy_numba_cpu_par = eval_newton_driver_numba_cpu_par(xx_test, nwt_coeffs, exponents, gen_points)

In [14]:
assert np.allclose(yy_numba, yy_numba_cpu_par)

In [15]:
%%timeit
eval_newton_driver_numba_cpu_par(xx_test, nwt_coeffs, exponents, gen_points)

438 ms ± 24.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Numba GPU implementation

In [35]:
yy_numba_gpu = eval_newton_driver_numba_gpu(xx_test, nwt_coeffs, exponents, gen_points)

In [26]:
assert np.allclose(yy_numba, yy_numba_gpu)

In [27]:
%%timeit
eval_newton_driver_numba_gpu(xx_test, nwt_coeffs, exponents, gen_points)

121 ms ± 1.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [37]:
%%timeit
eval_newton_driver_numba_gpu(xx_test, nwt_coeffs, exponents, gen_points, threads_per_block=512)

7.55 s ± 118 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
