In [1]:
import pandas as pd
import numpy as np
%config Completer.use_jedi=False

In [2]:
s = pd.Series(range(5))
s

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [3]:
s.rolling(window=2).sum()

0    NaN
1    1.0
2    3.0
3    5.0
4    7.0
dtype: float64

In [5]:
s = pd.Series(range(5), index=pd.date_range('2020-01-01', periods=5, freq='1D'))
s

2020-01-01    0
2020-01-02    1
2020-01-03    2
2020-01-04    3
2020-01-05    4
Freq: D, dtype: int64

In [10]:
s.rolling(window='3D').sum()

2020-01-01    0.0
2020-01-02    1.0
2020-01-03    3.0
2020-01-04    6.0
2020-01-05    9.0
Freq: D, dtype: float64

In [11]:
df = pd.DataFrame({'A': ['a', 'b', 'a', 'b', 'a'], 'B': range(5)})
df

Unnamed: 0,A,B
0,a,0
1,b,1
2,a,2
3,b,3
4,a,4


In [12]:
df.groupby('A').expanding().sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,0.0
a,2,2.0
a,4,6.0
b,1,1.0
b,3,4.0


In [13]:
df.groupby('A').sum()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
a,6
b,4


In [14]:
s = pd.Series([np.nan, 1, 2, np.nan, np.nan, 3])
s

0    NaN
1    1.0
2    2.0
3    NaN
4    NaN
5    3.0
dtype: float64

In [15]:
s.rolling(window=3, min_periods=1).sum()

0    NaN
1    1.0
2    3.0
3    3.0
4    2.0
5    3.0
dtype: float64

In [16]:
s.rolling(window=3, min_periods=2).sum()

0    NaN
1    NaN
2    3.0
3    3.0
4    NaN
5    NaN
dtype: float64

In [18]:
s.rolling(window=3, min_periods=None).sum()

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
dtype: float64

## Rolling window
---

In [19]:
times = ['2020-01-01', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-29']
s = pd.Series(range(5), index=pd.DatetimeIndex(times))
s

2020-01-01    0
2020-01-03    1
2020-01-04    2
2020-01-05    3
2020-01-29    4
dtype: int64

In [20]:
# Window with 2 observations
s.rolling(window=2).sum()

2020-01-01    NaN
2020-01-03    1.0
2020-01-04    3.0
2020-01-05    5.0
2020-01-29    7.0
dtype: float64

In [21]:
# Window with 2 days worth of observations
s.rolling(window='2D').sum()

2020-01-01    0.0
2020-01-03    1.0
2020-01-04    3.0
2020-01-05    5.0
2020-01-29    4.0
dtype: float64

## Centering windows
---

In [22]:
s = pd.Series(range(10))
s.rolling(window=5).mean()

0    NaN
1    NaN
2    NaN
3    NaN
4    2.0
5    3.0
6    4.0
7    5.0
8    6.0
9    7.0
dtype: float64

In [23]:
s.rolling(window=5, center=True).mean()

0    NaN
1    NaN
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    NaN
9    NaN
dtype: float64

## Rolling window endpoints
---

In [24]:
df = pd.DataFrame(
        {"x": 1},
        index=[
            pd.Timestamp("20130101 09:00:01"),
            pd.Timestamp("20130101 09:00:02"),
            pd.Timestamp("20130101 09:00:03"),
            pd.Timestamp("20130101 09:00:04"),
            pd.Timestamp("20130101 09:00:06"),
        ],
    )
df

Unnamed: 0,x
2013-01-01 09:00:01,1
2013-01-01 09:00:02,1
2013-01-01 09:00:03,1
2013-01-01 09:00:04,1
2013-01-01 09:00:06,1


In [27]:
df["right"] = df.rolling("2s", closed="right").x.sum()  # default
df["both"] = df.rolling("2s", closed="both").x.sum()
df["left"] = df.rolling("2s", closed="left").x.sum()
df["neither"] = df.rolling("2s", closed="neither").x.sum()

In [28]:
df

Unnamed: 0,x,right,both,left,neither
2013-01-01 09:00:01,1,1.0,1.0,,
2013-01-01 09:00:02,1,2.0,2.0,1.0,1.0
2013-01-01 09:00:03,1,2.0,3.0,2.0,1.0
2013-01-01 09:00:04,1,2.0,3.0,2.0,1.0
2013-01-01 09:00:06,1,1.0,2.0,1.0,


## Custom window rolling
---

In [29]:
use_expanding = [True, False, True, False, True]
df = pd.DataFrame({"values": range(5)})
df

Unnamed: 0,values
0,0
1,1
2,2
3,3
4,4


In [30]:
from pandas.api.indexers import BaseIndexer
class CustomIndexer(BaseIndexer):
    def get_window_bounds(self, num_values, min_periods, center, closed):
        start = np.empty(num_values, dtype=np.int64)
        end = np.empty(num_values, dtype=np.int64)
        for i in range(num_values):
            if self.use_expanding[i]:
                start[i] = 0
                end[i] = i + 1
            else:
                start[i] = i
                end[i] = i + self.window_size
        return start, end
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
df.rolling(indexer).sum()

Unnamed: 0,values
0,0.0
1,1.0
2,3.0
3,3.0
4,10.0


## Rolling apply
---

In [31]:
def mad(x):
    return np.fabs(x - x.mean()).mean()
s = pd.Series(range(10))

s.rolling(window=4).apply(mad, raw=True)

0    NaN
1    NaN
2    NaN
3    1.0
4    1.0
5    1.0
6    1.0
7    1.0
8    1.0
9    1.0
dtype: float64

In [32]:
s.rolling(window=4).apply(mad, raw=False)

0    NaN
1    NaN
2    NaN
3    1.0
4    1.0
5    1.0
6    1.0
7    1.0
8    1.0
9    1.0
dtype: float64

## Binary window functions
---

In [35]:
df = pd.DataFrame(
        np.random.randn(10, 4),
        index=pd.date_range("2020-01-01", periods=10),
        columns=["A", "B", "C", "D"],
    )
print(df)
df = df.cumsum()
print(df)
df2 = df[:4]
print(df2)
df2.rolling(window=2).corr(df2["B"])

                   A         B         C         D
2020-01-01  1.489678 -1.309462 -1.062893  1.469829
2020-01-02  0.553684  0.157114 -0.422946  0.846427
2020-01-03 -0.103848  0.561674  0.102911 -1.858137
2020-01-04 -1.231929  1.755637 -0.827609 -0.197673
2020-01-05  1.251455  0.269165  0.632590  0.104955
2020-01-06 -0.412423  2.180769  3.280583  2.022628
2020-01-07 -1.475718  1.195374 -0.752726  0.563125
2020-01-08  1.969305  0.184828  0.446919  2.313013
2020-01-09  0.531595  1.033206  0.176782  0.417005
2020-01-10 -0.810489  1.356864  1.297758 -1.859441
                   A         B         C         D
2020-01-01  1.489678 -1.309462 -1.062893  1.469829
2020-01-02  2.043362 -1.152348 -1.485839  2.316256
2020-01-03  1.939514 -0.590674 -1.382928  0.458119
2020-01-04  0.707585  1.164963 -2.210537  0.260445
2020-01-05  1.959040  1.434127 -1.577948  0.365400
2020-01-06  1.546618  3.614896  1.702635  2.388028
2020-01-07  0.070900  4.810270  0.949909  2.951153
2020-01-08  2.040204  4.995098 

Unnamed: 0,A,B,C,D
2020-01-01,,,,
2020-01-02,1.0,1.0,-1.0,1.0
2020-01-03,-1.0,1.0,1.0,-1.0
2020-01-04,-1.0,1.0,-1.0,-1.0
