In [None]:
#hide
%load_ext autoreload
%autoreload 2

# Window ops

> Naive and fast implementations of common window operations.

This library is intended to be used as an alternative to `pd.Series.rolling` and `pd.Series.expanding` to gain a speedup by using numba optimized functions operating on numpy arrays and avoiding input checks. There are also online classes for more efficient updates of window statistics.

## Install

`pip install window-ops`

## How to use

For a transformations `n_samples` -> `n_samples` you can use `{[seasonal_](rolling|expanding)}_{(mean|max|min|std)}` on an array.

#### Benchmarks

In [None]:
#hide
import random
import time
from functools import partial

import numpy as np
import pandas as pd
from IPython.display import Markdown

import window_ops
from window_ops.ewm import *
from window_ops.expanding import *
from window_ops.online import *
from window_ops.rolling import *

In [None]:
n_samples = 1_000  # array size
window_size = 8  # for rolling operations
season_length = 7  # for seasonal operations
execute_times = 1_000 # number of times each function will be executed

In [None]:
#hide
np.random.seed(0)
random.seed(0)
y = np.random.rand(n_samples)
ys = pd.Series(y)
groups = np.arange(n_samples) % season_length
grouped_y = ys.groupby(groups)

In [None]:
#hide
times = {'window_ops': {}, 'pandas': {}}
ops = ('mean', 'max', 'min', 'std')
for kind in ('rolling', 'expanding', 'seasonal_rolling', 'seasonal_expanding'):
    for op in ops:
        functions = {}
        if kind == 'rolling':
            functions['window_ops'] = partial(getattr(window_ops.rolling, f'{kind}_{op}'), window_size=window_size)
            functions['pandas'] = lambda y: y.rolling(window_size, min_periods=window_size).agg(op)
        elif kind == 'expanding':
            functions['window_ops'] = getattr(window_ops.expanding, f'{kind}_{op}')
            functions['pandas'] = lambda y: y.expanding().agg(op)
        elif kind == 'seasonal_rolling':
            functions['window_ops'] = partial(getattr(window_ops.rolling, f'{kind}_{op}'), 
                                              season_length=season_length, window_size=window_size)
            functions['pandas'] = lambda grouped_y: grouped_y.transform(lambda x: x.rolling(window_size).agg(op))
        elif kind == 'seasonal_expanding':
            functions['window_ops'] = partial(getattr(window_ops.expanding, f'{kind}_{op}'), season_length=season_length)
            functions['pandas'] = lambda grouped_y: grouped_y.transform(lambda x: x.expanding().agg(op))
        else:
            raise ValueError(kind)
            
        results = {}            
        for impl in ('window_ops', 'pandas'):
            if impl == 'window_ops':
                x = y
            else:
                x = grouped_y if kind.startswith('seasonal') else ys
            func = functions[impl]
            results[impl] = func(x)
            start = time.perf_counter()
            for _ in range(execute_times):
                func(x)
            times[impl][f'{kind}_{op}'] = time.perf_counter() - start
        assert np.allclose(results['window_ops'], results['pandas'], equal_nan=True)
times = pd.DataFrame(times) * 1_000 / execute_times

In [None]:
#hide
def display_dataframe(df, fmt):
    return Markdown(df.applymap(fmt.format).to_markdown())

Average times in milliseconds.

In [None]:
display_dataframe(times, fmt='{:.2f}')

|                         |   window_ops |   pandas |
|:------------------------|-------------:|---------:|
| rolling_mean            |         0    |     0.19 |
| rolling_max             |         0.01 |     0.2  |
| rolling_min             |         0.01 |     0.2  |
| rolling_std             |         0.01 |     0.22 |
| expanding_mean          |         0    |     0.13 |
| expanding_max           |         0.01 |     0.14 |
| expanding_min           |         0.01 |     0.13 |
| expanding_std           |         0.01 |     0.15 |
| seasonal_rolling_mean   |         0.01 |     2.99 |
| seasonal_rolling_max    |         0.02 |     2.68 |
| seasonal_rolling_min    |         0.02 |     2.64 |
| seasonal_rolling_std    |         0.02 |     2.84 |
| seasonal_expanding_mean |         0.02 |     2.36 |
| seasonal_expanding_max  |         0.02 |     2.19 |
| seasonal_expanding_min  |         0.02 |     2.24 |
| seasonal_expanding_std  |         0.02 |     2.43 |

In [None]:
#hide
speedups = times['pandas'] / times['window_ops']
speedups = speedups.to_frame('times faster')

In [None]:
display_dataframe(speedups, fmt='{:.0f}')

|                         |   times faster |
|:------------------------|---------------:|
| rolling_mean            |             89 |
| rolling_max             |             16 |
| rolling_min             |             15 |
| rolling_std             |             38 |
| expanding_mean          |             55 |
| expanding_max           |             11 |
| expanding_min           |             10 |
| expanding_std           |             17 |
| seasonal_rolling_mean   |            204 |
| seasonal_rolling_max    |            110 |
| seasonal_rolling_min    |            111 |
| seasonal_rolling_std    |            161 |
| seasonal_expanding_mean |            156 |
| seasonal_expanding_max  |            127 |
| seasonal_expanding_min  |            130 |
| seasonal_expanding_std  |            119 |