In [None]:
'''
数据的累计统计分析

   适用于Series和DataFrame类型的常用累计统计分析函数(默认按照axis = 0轴进行计算)
   
       函数                             说明
       .cumsum()                        依次给出前1,2，...，n个数的和
       .cumprod()                       依次给出前1,2，...，n个数的积
       .cummax()                        依次给出前1,2，...，n个数的最大值
       .cummin()                        依次给出前1,2，...，n个数的最小值
       
   适用于Series和DataFrame类型的滚动计算函数（滚动计算即计算相邻k个数的值，而并非计算累计值）
   
       函数                             说明
       .rolling(w).sum()                依次计算相邻W个元素的和
       .rolling(w).mean()               依次计算相邻W个元素的算术平均值
       .rolling(w).var()                依次计算相邻W个元素的方差
       .rolling(w).std()                依次计算相邻W个元素的标准差
       .rolling(w).min()                依次计算相邻W个元素的最小值和最大值
       .rolling(w).max()
'''

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.arange(20).reshape(4,5), index = ['c', 'a', 'd', 'b'])
df

Unnamed: 0,0,1,2,3,4
c,0,1,2,3,4
a,5,6,7,8,9
d,10,11,12,13,14
b,15,16,17,18,19


In [4]:
# 累计和
df.cumsum()

Unnamed: 0,0,1,2,3,4
c,0,1,2,3,4
a,5,7,9,11,13
d,15,18,21,24,27
b,30,34,38,42,46


In [7]:
df.cumsum(axis = 1)

Unnamed: 0,0,1,2,3,4
c,0,1,3,6,10
a,5,11,18,26,35
d,10,21,33,46,60
b,15,31,48,66,85


In [10]:
# 累计乘积
df.cumprod()

Unnamed: 0,0,1,2,3,4
c,0,1,2,3,4
a,0,6,14,24,36
d,0,66,168,312,504
b,0,1056,2856,5616,9576


In [13]:
# 前n个数的最大值
df.cummax()

Unnamed: 0,0,1,2,3,4
c,0,1,2,3,4
a,5,6,7,8,9
d,10,11,12,13,14
b,15,16,17,18,19


In [14]:
# 前n个数的最小值
df.cummin()

Unnamed: 0,0,1,2,3,4
c,0,1,2,3,4
a,0,1,2,3,4
d,0,1,2,3,4
b,0,1,2,3,4


In [15]:
# 计算相邻两个元素的和
df.rolling(2).sum()

Unnamed: 0,0,1,2,3,4
c,,,,,
a,5.0,7.0,9.0,11.0,13.0
d,15.0,17.0,19.0,21.0,23.0
b,25.0,27.0,29.0,31.0,33.0


In [17]:
# 计算相邻两个元素的算术平均值
df.rolling(2).mean()

Unnamed: 0,0,1,2,3,4
c,,,,,
a,2.5,3.5,4.5,5.5,6.5
d,7.5,8.5,9.5,10.5,11.5
b,12.5,13.5,14.5,15.5,16.5


In [18]:
# 计算相邻两个元素的方差
df.rolling(2).var()

Unnamed: 0,0,1,2,3,4
c,,,,,
a,12.5,12.5,12.5,12.5,12.5
d,12.5,12.5,12.5,12.5,12.5
b,12.5,12.5,12.5,12.5,12.5


In [19]:
# 计算相邻两个元素的标准差
df.rolling(2).std()

Unnamed: 0,0,1,2,3,4
c,,,,,
a,3.535534,3.535534,3.535534,3.535534,3.535534
d,3.535534,3.535534,3.535534,3.535534,3.535534
b,3.535534,3.535534,3.535534,3.535534,3.535534


In [22]:
# 计算相邻两个元素的最小值
df.rolling(2).min() # 相邻两个元素的最大值df.rolling(2).max()

Unnamed: 0,0,1,2,3,4
c,,,,,
a,0.0,1.0,2.0,3.0,4.0
d,5.0,6.0,7.0,8.0,9.0
b,10.0,11.0,12.0,13.0,14.0
