In [1]:
import numpy as np
import pandas as pd
from functools import lru_cache
import numba

In [10]:
size = 1000_000
n = 100

df = pd.DataFrame(
    {
        "a": np.random.randn(size),
        "b": np.random.randn(size),
        "N": np.random.randint(n, size, (size)),
        "x": "x",
    }
)
df

Unnamed: 0,a,b,N,x
0,-0.108667,-0.294316,516793,x
1,0.319296,0.173228,329032,x
2,-0.023702,0.899143,65128,x
3,1.508781,-2.080241,799069,x
4,-0.260146,0.526553,315095,x
...,...,...,...,...
999995,0.854311,1.326949,255008,x
999996,0.320445,-2.987504,507077,x
999997,-0.139886,1.478656,739823,x
999998,-2.305677,-1.780381,12218,x


In [11]:
# numba.set_num_threads(8)

@numba.jit
def f(x):
    return x * (x - 1)


@numba.jit
def integrate_f(a, b, N):
    s = 0
    dx = (b - a) / N

    for i in range(N):
        s += f(a + i * dx)
    
    return s * dx

In [None]:
%%timeit 
result = df.apply(lambda x: integrate_f(x["a"], x["b"], x["N"]), axis=1)
# print(result)

In [15]:

# numba.set_num_threads(8)

@numba.jit(nopython=True, fastmath=True)
def f_plain(x):
    return x * (x - 1)

@numba.jit(nopython=True, fastmath=True)
def integrate_f_numba(a, b, N):
    s = 0
    dx = (b - a) / N
    for i in range(N):
        s += f_plain(a + i * dx)
    return s * dx


@numba.jit(nopython=True, fastmath=True)
def apply_integrate_f_numba(col_a, col_b, col_N):
    n = len(col_N)
    result = np.empty(n, dtype="float64")
    assert len(col_a) == len(col_b) == n
    for i in range(n):
        result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i])
    return result


def compute_numba(df):
    result = apply_integrate_f_numba(
        df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy()
    )
    return pd.Series(result, index=df.index, name="result")

In [16]:
%%timeit 
result = compute_numba(df)

4min 20s ± 30 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
