## Rolling Window in Pandas

In [2]:
import pandas as pd

In [35]:
s = pd.Series(range(5))
s

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [38]:
s.rolling(window=3).sum()

0    NaN
1    NaN
2    3.0
3    6.0
4    9.0
dtype: float64

In [39]:
df = pd.DataFrame({'A': ['a', 'b', 'a', 'b', 'a'], 'B': range(5)})

In [34]:
df.groupby('A').expanding().sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,0.0
a,2,2.0
a,4,6.0
b,1,1.0
b,3,4.0


In [15]:
s = pd.Series(range(5), index=pd.date_range('2020-01-01', periods=5, freq='1D'))

In [19]:
s

2020-01-01    0
2020-01-02    1
2020-01-03    2
2020-01-04    3
2020-01-05    4
Freq: D, dtype: int64

In [16]:
s.rolling(window='2D').sum()

2020-01-01    0.0
2020-01-02    1.0
2020-01-03    3.0
2020-01-04    5.0
2020-01-05    7.0
Freq: D, dtype: float64

In [40]:
df = pd.DataFrame({'A': ['a', 'b', 'a', 'b', 'a'], 'B': range(5)})
df.groupby('A').expanding().sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,0.0
a,2,2.0
a,4,6.0
b,1,1.0
b,3,4.0


In [41]:
def weighted_mean(x):
    arr = np.ones((1, x.shape[1]))
    arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
    return arr


In [26]:
df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])

df.ewm(0.5).mean()

online_ewm = df.head(2).ewm(0.5).online()

online_ewm.mean()

online_ewm.mean(update=df.tail(1))

Unnamed: 0,0,1,2
0,1.0,2.0,0.6
1,1.75,2.75,0.45
2,2.615385,3.615385,0.276923
3,3.55,4.55,0.5625


In [None]:
 times = ['2020-01-01', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-29']
s = pd.Series(range(5), index=pd.DatetimeIndex(times))

# Window with 2 observations
s.rolling(window=2).sum()

# Window with 2 days worth of observations
s.rolling(window='2D').sum()



## Centering Window

In [None]:
s = pd.Series(range(10))
s.rolling(window=5).mean()
s.rolling(window=5, center=True).mean()



In [None]:
df = pd.DataFrame(
        {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D")
    )


In [None]:
df.rolling("2D", center=False).mean()

In [None]:
df.rolling("2D", center=True).mean()

In [28]:
df = pd.DataFrame(
     {"x": 1},
     index=[
         pd.Timestamp("20130101 09:00:01"),
        pd.Timestamp("20130101 09:00:02"),
        pd.Timestamp("20130101 09:00:03"),
          pd.Timestamp("20130101 09:00:04"),
        pd.Timestamp("20130101 09:00:06"),
   ],
)

In [29]:
df

Unnamed: 0,x
2013-01-01 09:00:01,1
2013-01-01 09:00:02,1
2013-01-01 09:00:03,1
2013-01-01 09:00:04,1
2013-01-01 09:00:06,1


In [None]:
df["right"] = df.rolling("2s", closed="right").x.sum()  # default
df["both"] = df.rolling("2s", closed="both").x.sum()
df["left"] = df.rolling("2s", closed="left").x.sum()
df["neither"] = df.rolling("2s", closed="neither").x.sum()
df


In [None]:
# rolling Apply

In [None]:
def mad(x):
    return np.fabs(x - x.mean()).mean() 

s = pd.Series(range(10))

s.rolling(window=4).apply(mad, raw=True)

In [None]:
df = pd.DataFrame(
        np.random.randn(10, 4),
        index=pd.date_range("2020-01-01", periods=10),
        columns=["A", "B", "C", "D"],
)

In [None]:
df = df.cumsum()
df2 = df[:4]
df2.rolling(window=2).corr(df2["B"])