In [None]:
#hide
%load_ext autoreload
%autoreload 2

# Window ops

> Naive and fast implementations of common window operations.

This library is intended to be used as an alternative to `pd.Series.rolling` and `pd.Series.expanding` to gain a speedup by using numba optimized functions operating on numpy arrays and avoiding input checks. There are also online classes for more efficient updates of window statistics.

## Install

`pip install window-ops`

## How to use

For a transformations `n_samples` -> `n_samples` you can use `{[seasonal_](rolling|expanding)}_{(mean|max|min|std)}` on an array.

#### Benchmarks

In [None]:
#hide
import random
import time
from functools import partial

import numpy as np
import pandas as pd
from IPython.display import Markdown

import window_ops
from window_ops.ewm import *
from window_ops.expanding import *
from window_ops.online import *
from window_ops.rolling import *

In [None]:
n_samples = 1_000  # array size
window_size = 8  # for rolling operations
season_length = 7  # for seasonal operations
execute_times = 1_000 # number of times each function will be executed

In [None]:
#hide
np.random.seed(0)
random.seed(0)
y = np.random.rand(n_samples)
ys = pd.Series(y)
groups = np.arange(n_samples) % season_length
grouped_y = ys.groupby(groups)

In [None]:
#hide
times = {'window_ops': {}, 'pandas': {}}
ops = ('mean', 'max', 'min', 'std')
for kind in ('rolling', 'expanding', 'seasonal_rolling', 'seasonal_expanding'):
    for op in ops:
        functions = {}
        if kind == 'rolling':
            functions['window_ops'] = partial(getattr(window_ops.rolling, f'{kind}_{op}'), window_size=window_size)
            functions['pandas'] = lambda y: y.rolling(window_size, min_periods=window_size).agg(op)
        elif kind == 'expanding':
            functions['window_ops'] = getattr(window_ops.expanding, f'{kind}_{op}')
            functions['pandas'] = lambda y: y.expanding().agg(op)
        elif kind == 'seasonal_rolling':
            functions['window_ops'] = partial(getattr(window_ops.rolling, f'{kind}_{op}'), 
                                              season_length=season_length, window_size=window_size)
            functions['pandas'] = lambda grouped_y: grouped_y.transform(lambda x: x.rolling(window_size).agg(op))
        elif kind == 'seasonal_expanding':
            functions['window_ops'] = partial(getattr(window_ops.expanding, f'{kind}_{op}'), season_length=season_length)
            functions['pandas'] = lambda grouped_y: grouped_y.transform(lambda x: x.expanding().agg(op))
        else:
            raise ValueError(kind)
            
        results = {}            
        for impl in ('window_ops', 'pandas'):
            if impl == 'window_ops':
                x = y
            else:
                x = grouped_y if kind.startswith('seasonal') else ys
            func = functions[impl]
            results[impl] = func(x)
            start = time.time()
            for _ in range(execute_times):
                func(x)
            times[impl][f'{kind}_{op}'] = time.time() - start
        assert np.allclose(results['window_ops'], results['pandas'], equal_nan=True)
times = pd.DataFrame(times) * 1_000 / execute_times

Average times in milliseconds.

In [None]:
#hide
def display_dataframe(df):
    return Markdown(df.applymap('{:.2f}'.format).to_markdown())

In [None]:
display_dataframe(times)

|                         |   window_ops |   pandas |
|:------------------------|-------------:|---------:|
| rolling_mean            |         0    |     0.33 |
| rolling_max             |         0.01 |     0.37 |
| rolling_min             |         0.01 |     0.32 |
| rolling_std             |         0.01 |     0.33 |
| expanding_mean          |         0    |     0.31 |
| expanding_max           |         0.01 |     0.32 |
| expanding_min           |         0.01 |     0.32 |
| expanding_std           |         0.01 |     0.33 |
| seasonal_rolling_mean   |         0.01 |     3.78 |
| seasonal_rolling_max    |         0.03 |     3.85 |
| seasonal_rolling_min    |         0.02 |     3.8  |
| seasonal_rolling_std    |         0.02 |     3.93 |
| seasonal_expanding_mean |         0.01 |     3.73 |
| seasonal_expanding_max  |         0.24 |     3.86 |
| seasonal_expanding_min  |         0.22 |     3.83 |
| seasonal_expanding_std  |         0.02 |     3.86 |

In [None]:
#hide
speedups = times['pandas'] / times['window_ops']
speedups = speedups.to_frame('times faster')

In [None]:
display_dataframe(speedups)

|                         |   times faster |
|:------------------------|---------------:|
| rolling_mean            |         122.05 |
| rolling_max             |          29.85 |
| rolling_min             |          23.45 |
| rolling_std             |          64.31 |
| expanding_mean          |         104.72 |
| expanding_max           |          22.17 |
| expanding_min           |          23.35 |
| expanding_std           |          40.69 |
| seasonal_rolling_mean   |         275.68 |
| seasonal_rolling_max    |         148.73 |
| seasonal_rolling_min    |         154.22 |
| seasonal_rolling_std    |         229.91 |
| seasonal_expanding_mean |         267.48 |
| seasonal_expanding_max  |          16.25 |
| seasonal_expanding_min  |          17.34 |
| seasonal_expanding_std  |         212.16 |