In [1]:
import pandas as pd
import numpy as np
import numba
import warnings
warnings.filterwarnings('ignore')

In [2]:
X = np.random.rand(100000, 5)
d_X = pd.DataFrame(X, columns=[f'c_{i}' for i in range(5)])
d_X.shape

(100000, 5)

In [3]:
roll = d_X.rolling(5)

In [4]:
def moving_avg(x):
    return x.mean()

In [5]:
#Pandas Series
%timeit -n 1 -r 1 roll.apply(moving_avg)

43.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [6]:
#Pandas using Numpy
%timeit -n 1 -r 1 roll.apply(moving_avg, raw=True)

2.93 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [7]:
#Numba
%timeit -n 1 -r 1 roll.apply(moving_avg, engine='numba', raw=True)

992 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
#Numba multithreads
numba.set_num_threads(4)
%timeit -n 1 -r 1 roll.apply(moving_avg, engine='numba', raw=True, engine_kwargs={"parallel": True})

817 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [9]:
#Cython
%timeit -n 1 -r 1 roll.apply(moving_avg, engine='cython')

43.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [10]:
%timeit -n 1 -r 1 roll.apply(moving_avg, engine='cython', raw=True)

2.94 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [11]:
def softmax(logit):
    e_x = np.exp(logit)
    result = e_x / e_x.sum()
    return result.argmax()

%timeit -n 1 -r 1 d_X.apply(softmax, axis=1)

23.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [12]:
%timeit -n 1 -r 1 d_X.apply(softmax, axis=1, raw=True)

724 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [13]:
@numba.njit
def softmax(logit):
    e_x = np.exp(logit)
    result = e_x / e_x.sum()
    return result.argmax()

In [14]:
@numba.njit
def apply_softmax(arr):
    labels = []
    for row in arr:
        label = softmax(row)
        labels.append(label)

    return labels

%timeit -n 1 -r 1 apply_softmax(d_X.to_numpy())

495 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
