In [1]:
import pandas as pd
import numpy as np
import akshare as ak
from numpy_ext import rolling_apply

In [2]:
# 获取分钟数据
df = ak.stock_zh_a_minute(symbol='sh000001', period='1')
# 将除'day'外的各列的字符串转换为数值
df =  df.apply(pd.to_numeric, errors='ignore')

In [3]:
df

Unnamed: 0,day,open,high,low,close,volume
0,2022-07-20 11:23:00,3301.781,3301.876,3300.838,3301.756,73882300
1,2022-07-20 11:24:00,3301.921,3302.014,3300.916,3301.312,68880500
2,2022-07-20 11:25:00,3301.267,3301.582,3300.829,3301.205,64910100
3,2022-07-20 11:26:00,3301.789,3302.333,3301.431,3301.686,62111600
4,2022-07-20 11:27:00,3301.815,3301.815,3301.075,3301.211,61409100
...,...,...,...,...,...,...
29995,2023-01-20 14:55:00,3262.308,3262.309,3261.467,3261.991,216202300
29996,2023-01-20 14:56:00,3262.316,3262.492,3261.679,3262.407,227037500
29997,2023-01-20 14:57:00,3262.022,3262.337,3261.556,3261.948,235980300
29998,2023-01-20 14:58:00,3261.840,3262.260,3261.840,3262.260,16865600


In [4]:
# 计算更优波动率
def better_volatility_numpy_ext(o, h, l, c):
    # 将4列数据合并成一个数组
    data = np.array([o, h, l, c])
    # 计算20个数据的标准差
    std = np.std(data)
    # 计算20个数据的均值
    mean = np.mean(data)
    # 计算更优波动率
    return (std / mean) ** 2

In [5]:
better_volatility_using_numpy_ext = rolling_apply(better_volatility_numpy_ext, 5, df['open'], df['high'], df['low'], df['close'])

In [8]:
better_volatility_using_numpy_ext[:10]

array([           nan,            nan,            nan,            nan,
       1.56392876e-08, 1.39366604e-08, 1.13857554e-08, 1.03195484e-08,
       1.82236140e-08, 2.76023282e-08])

In [9]:
# 计算更优波动率
def better_volatility_pandas(data):
    # 计算20个数据的标准差
    std = np.std(data)
    # 计算20个数据的均值
    mean = np.mean(data)
    # 计算更优波动率
    return (std / mean) ** 2

In [10]:
better_volatility_using_pandas = df[['open', 'high', 'low', 'close']].rolling(5, method='table').apply(better_volatility_pandas, raw=True, engine='numba')

In [11]:
better_volatility_using_pandas

Unnamed: 0,open,high,low,close
0,,,,
1,,,,
2,,,,
3,,,,
4,1.563929e-08,1.563929e-08,1.563929e-08,1.563929e-08
...,...,...,...,...
29995,1.310551e-08,1.310551e-08,1.310551e-08,1.310551e-08
29996,1.468630e-08,1.468630e-08,1.468630e-08,1.468630e-08
29997,1.075377e-08,1.075377e-08,1.075377e-08,1.075377e-08
29998,8.314668e-09,8.314668e-09,8.314668e-09,8.314668e-09


这一条命令的输出是好几列相同的数据（计算和输出的时候和df的形状是匹配的），而我们需要的只有1列，所以需要单独取一下。

In [12]:
better_volatility_using_pandas = better_volatility_using_pandas.iloc[:, 0].values

In [14]:
better_volatility_using_pandas[:10]

array([           nan,            nan,            nan,            nan,
       1.56392876e-08, 1.39366604e-08, 1.13857554e-08, 1.03195484e-08,
       1.82236140e-08, 2.76023282e-08])

In [15]:
# 手动验算，与上面的结果一致
std = df.iloc[:5][['open', 'high', 'low', 'close']].values.std()
mean = df.iloc[:5][['open', 'high', 'low', 'close']].values.mean()
better_volatility_using_numpy = (std / mean) ** 2
print(better_volatility_using_numpy)

1.5639287606963217e-08
