In [1]:
import pandas as pd
import numpy as np
from numba import njit

In [2]:
@njit
def ewma(x, halflife):
    decay_coefficient = np.exp(np.log(0.5) / halflife)
    out = np.empty_like(x, dtype=np.float64)

    for i in range(out.shape[0]):
        if i == 0:
            out[i, :] = x[i, :]
            sum_prior = 1
        else:
            sum_i = sum_prior + np.power(decay_coefficient, i)
            out[i, :] = (decay_coefficient * out[i - 1, :] * sum_prior + x[i, :]) / sum_i
            sum_prior = sum_i

    return out

In [3]:
x = np.random.rand(100, 100)

In [4]:
pd.testing.assert_frame_equal(pd.DataFrame(x).ewm(halflife=10).mean(), pd.DataFrame(ewma(x, halflife=10)))

In [5]:
wide_shapes = [(10, 10 ** x) for x in range(2,7)]
square_shapes = [(10 ** x, 10 ** x) for x in range(2,5)]
tall_shapes = [(10 ** x, 10) for x in range(2,7)]

In [6]:
results = []
for shape in wide_shapes + square_shapes + tall_shapes:
    arr = np.random.rand(*shape)
    arr_as_df = pd.DataFrame(arr)
    pd_timing = %timeit -o arr_as_df.ewm(halflife=10).mean()
    new_timing = %timeit -o ewma(arr, halflife=10)
    results.append((shape, pd_timing.best, new_timing.best))

921 µs ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
2.65 µs ± 28.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
4.37 ms ± 30.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.43 µs ± 75 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
40.5 ms ± 823 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
76.2 µs ± 284 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
396 ms ± 2.05 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
3.04 ms ± 43.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.05 s ± 54.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
57.4 ms ± 510 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.16 ms ± 15 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
22.9 µs ± 52.7 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
58.4 ms ± 394 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
3.11 ms ± 47.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
results_as_df = pd.DataFrame.from_records(results, columns=['shape', 'pandas time (s)', 'fastats time (s)'])
results_as_df['time_diff_multiple'] = round(results_as_df['pandas time (s)'] /  results_as_df['fastats time (s)'])
results_as_df

Unnamed: 0,shape,pandas time (s),fastats time (s),time_diff_multiple
0,"(10, 100)",0.000908,3e-06,347.0
1,"(10, 1000)",0.004336,9e-06,465.0
2,"(10, 10000)",0.039539,7.6e-05,522.0
3,"(10, 100000)",0.393719,0.002988,132.0
4,"(10, 1000000)",3.987798,0.05697,70.0
5,"(100, 100)",0.001145,2.3e-05,50.0
6,"(1000, 1000)",0.057874,0.003051,19.0
7,"(10000, 10000)",7.645597,0.342471,22.0
8,"(100, 10)",0.000574,1.3e-05,43.0
9,"(1000, 10)",0.00093,0.000131,7.0
