In [1]:
import pandas as pd
import numpy as np
from numba import njit

In [2]:
@njit
def ewma(x, halflife):
    """
    Exponentially Weighted Moving Average
    It is expected that the numbers passed as x will be finite, halflife is
    expected to be a finite, non negative number.
    >>> ewma(np.arange(5), halflife=2)
    array([ 0.        ,  0.58578644,  1.22654092,  1.91911977,  2.65947261])
    """
    assert np.isfinite(halflife) and 0 < halflife

    decay_coefficient = np.exp(np.log(0.5) / halflife)
    out = np.empty_like(x, dtype=np.float64)

    for i in range(out.shape[0]):
        if i == 0:
            out[i] = x[i]
            sum_prior = 1
        else:
            sum_i = sum_prior + np.power(decay_coefficient, i)
            out[i] = (decay_coefficient * out[i - 1] * sum_prior + x[i]) / sum_i
            sum_prior = sum_i

    return out

In [3]:
@njit
def ewma_2d(x, halflife):

    assert x.ndim == 2
    assert np.isfinite(halflife) and halflife > 0

    decay_coefficient = np.exp(np.log(0.5) / halflife)
    out = np.empty_like(x, dtype=np.float64)

    for i in range(out.shape[0]):
        if i == 0:
            out[i, :] = x[i, :]
            sum_prior = 1
            first_weight = 1
        else:
            first_weight *= decay_coefficient
            sum_i = sum_prior + first_weight

            for j in range(x.shape[1]):
                out[i, j] = (decay_coefficient * out[i - 1, j] * sum_prior + x[i, j]) / sum_i

            sum_prior = sum_i

    return out

In [4]:
x = np.random.rand(100, 100)

In [5]:
pd.testing.assert_frame_equal(pd.DataFrame(x).ewm(halflife=10).mean(), pd.DataFrame(ewma(x, halflife=10)))

In [6]:
pd.testing.assert_frame_equal(pd.DataFrame(x).ewm(halflife=10).mean(), pd.DataFrame(ewma_2d(x, halflife=10)))

In [7]:
wide_shapes = [(10, 10 ** x) for x in range(2, 5)]
square_shapes = [(10 ** x, 10 ** x) for x in range(2, 4)]
tall_shapes = [(10 ** x, 10) for x in range(2, 5)]

## EWMA

In [8]:
results = []
for idx, shape in enumerate(wide_shapes + square_shapes + tall_shapes):
    print('{0} of {1}'.format(idx + 1, len(wide_shapes + square_shapes + tall_shapes)))
    arr = np.random.rand(*shape)
    arr_as_df = pd.DataFrame(arr)
    pd_timing = %timeit -o -q arr_as_df.ewm(halflife=10).mean()
    new_timing = %timeit -o -q ewma(arr, halflife=10)
    results.append((shape, pd_timing.best, new_timing.best))

1 of 8
2 of 8
3 of 8
4 of 8
5 of 8
6 of 8
7 of 8
8 of 8


In [9]:
results_as_df = pd.DataFrame.from_records(results, columns=['shape', 'pandas time (s)', 'fastats time (s)'])
results_as_df['time_diff_multiple'] = round(results_as_df['pandas time (s)'] /  results_as_df['fastats time (s)'])
results_as_df

Unnamed: 0,shape,pandas time (s),fastats time (s),time_diff_multiple
0,"(10, 100)",0.00257,7e-06,357.0
1,"(10, 1000)",0.011441,1.7e-05,678.0
2,"(10, 10000)",0.102917,0.000207,498.0
3,"(100, 100)",0.003098,6.4e-05,49.0
4,"(1000, 1000)",0.10426,0.007915,13.0
5,"(100, 10)",0.001542,3.8e-05,41.0
6,"(1000, 10)",0.002636,0.00035,8.0
7,"(10000, 10)",0.013061,0.003192,4.0


## EWMA_2D

In [10]:
results = []
for idx, shape in enumerate(wide_shapes + square_shapes + tall_shapes):
    print('{0} of {1}'.format(idx + 1, len(wide_shapes + square_shapes + tall_shapes)))
    arr = np.random.rand(*shape)
    arr_as_df = pd.DataFrame(arr)
    pd_timing = %timeit -o -q arr_as_df.ewm(halflife=10).mean()
    new_timing = %timeit -o -q ewma_2d(arr, halflife=10)
    results.append((shape, pd_timing.best, new_timing.best))

1 of 8
2 of 8
3 of 8
4 of 8
5 of 8
6 of 8
7 of 8
8 of 8


In [12]:
results_as_df_2d = pd.DataFrame.from_records(results, columns=['shape', 'pandas time (s)', 'fastats time (s)'])
results_as_df_2d['time_diff_multiple'] = round(results_as_df_2d['pandas time (s)'] /  results_as_df_2d['fastats time (s)'])
results_as_df_2d

Unnamed: 0,shape,pandas time (s),fastats time (s),time_diff_multiple
0,"(10, 100)",0.002565,3e-06,813.0
1,"(10, 1000)",0.012412,1.5e-05,833.0
2,"(10, 10000)",0.119828,8.8e-05,1364.0
3,"(100, 100)",0.003023,1.6e-05,187.0
4,"(1000, 1000)",0.164218,0.006928,24.0
5,"(100, 10)",0.001673,4e-06,396.0
6,"(1000, 10)",0.002702,2.6e-05,105.0
7,"(10000, 10)",0.014882,0.000228,65.0
