In [1]:
import pandas as pd
import numpy as np
from numba import njit

In [2]:
@njit
def ewma(x, halflife):
    """
    Exponentially Weighted Moving Average
    It is expected that the numbers passed as x will be finite, halflife is
    expected to be a finite, non negative number.
    >>> ewma(np.arange(5), halflife=2)
    array([ 0.        ,  0.58578644,  1.22654092,  1.91911977,  2.65947261])
    """
    assert np.isfinite(halflife) and 0 < halflife

    decay_coefficient = np.exp(np.log(0.5) / halflife)
    out = np.empty_like(x, dtype=np.float64)

    for i in range(out.shape[0]):
        if i == 0:
            out[i] = x[i]
            sum_prior = 1
        else:
            sum_i = sum_prior + np.power(decay_coefficient, i)
            out[i] = (decay_coefficient * out[i - 1] * sum_prior + x[i]) / sum_i
            sum_prior = sum_i

    return out

In [3]:
x = np.random.rand(100, 100)

In [4]:
pd.testing.assert_frame_equal(pd.DataFrame(x).ewm(halflife=10).mean(), pd.DataFrame(ewma(x, halflife=10)))

In [5]:
wide_shapes = [(10, 10 ** x) for x in range(2,7)]
square_shapes = [(10 ** x, 10 ** x) for x in range(2,5)]
tall_shapes = [(10 ** x, 10) for x in range(2,7)]

In [6]:
results = []
for shape in wide_shapes + square_shapes + tall_shapes:
    arr = np.random.rand(*shape)
    arr_as_df = pd.DataFrame(arr)
    pd_timing = %timeit -o arr_as_df.ewm(halflife=10).mean()
    new_timing = %timeit -o ewma(arr, halflife=10)
    results.append((shape, pd_timing.best, new_timing.best))

1.48 ms ± 37.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
3.63 µs ± 73.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
8.14 ms ± 625 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
11.7 µs ± 291 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
70.6 ms ± 1.87 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
99 µs ± 2.27 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
692 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.24 ms ± 54.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
7.07 s ± 275 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
39.1 ms ± 345 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.69 ms ± 6.17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
31.3 µs ± 381 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
78 ms ± 3.26 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
2.32 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9

In [7]:
results_as_df = pd.DataFrame.from_records(results, columns=['shape', 'pandas time (s)', 'fastats time (s)'])
results_as_df['time_diff_multiple'] = round(results_as_df['pandas time (s)'] /  results_as_df['fastats time (s)'])
results_as_df

Unnamed: 0,shape,pandas time (s),fastats time (s),time_diff_multiple
0,"(10, 100)",0.001445,4e-06,410.0
1,"(10, 1000)",0.007467,1.1e-05,677.0
2,"(10, 10000)",0.067716,9.5e-05,710.0
3,"(10, 100000)",0.673922,0.003158,213.0
4,"(10, 1000000)",6.861648,0.03873,177.0
5,"(100, 100)",0.00168,3.1e-05,54.0
6,"(1000, 1000)",0.07351,0.002116,35.0
7,"(10000, 10000)",9.222459,0.386896,24.0
8,"(100, 10)",0.000802,1.7e-05,46.0
9,"(1000, 10)",0.001307,0.000166,8.0
