In [4]:
# 加载模块
from datetime import datetime

# 统计分析相关
import pandas as pd
from statsmodels.api import OLS
from statsmodels.tsa.stattools import coint

# 绘图分析相关
import plotly.graph_objects as go

# 读取数据相关
from vnpy.trader.database import database_manager
from vnpy.trader.utility import extract_vt_symbol
from vnpy.trader.constant import Interval

In [2]:
# 定义函数
def load_symbol_data(vt_symbol, start, end):
    symbol, exchange = extract_vt_symbol(vt_symbol)
    start = datetime.strptime(start, "%Y%m%d")
    end = datetime.strptime(end, "%Y%m%d")
    interval = Interval.MINUTE

    data = database_manager.load_bar_data(symbol, exchange, interval, start, end)

    dt_list = []
    close_list = []
    for bar in data:
        dt_list.append(bar.datetime)
        close_list.append(bar.close_price)
    
    s = pd.Series(close_list, index=dt_list)
    return s


def load_portfolio_data(vt_symbols, start, end):
    df = pd.DataFrame()
    for vt_symbol in vt_symbols:
        s = load_symbol_data(vt_symbol, start, end)
        df[vt_symbol] = s
    return df

In [7]:
load_portfolio_data(["IF2006.CFFEX", "IF2009.CFFEX"], "20200101", "20200621")

Unnamed: 0,IF2006.CFFEX,IF2009.CFFEX
2020-01-02 09:30:00,4162.0,
2020-01-02 09:31:00,4161.8,
2020-01-02 09:32:00,4168.0,
2020-01-02 09:33:00,4167.0,
2020-01-02 09:34:00,4157.8,
2020-01-02 09:35:00,4159.2,
2020-01-02 09:36:00,4164.6,
2020-01-02 09:37:00,4167.0,
2020-01-02 09:38:00,4164.0,
2020-01-02 09:39:00,4166.2,


In [None]:
# 加载数据
vt_symbols = ["XBTUSD.BITMEX", "btcusdt.BINANCE"]
start = "20200101"
end = "20200630"

df = load_portfolio_data(vt_symbols, start, end)
print(df)    

In [None]:
# 绘制原始价格图表
fig = go.Figure()

for vt_symbol in vt_symbols:
    line = go.Scatter(y=df[vt_symbol], mode='lines', name=vt_symbol)
    fig.add_trace(line)
    
fig.show()

In [None]:
# 执行回归分析
result = OLS(df[vt_symbols[0]], df[vt_symbols[-1]]).fit()
print(result.summary())

In [None]:
# 对残差绘图
df["spread"] = df[vt_symbols[0]] - 1 * df[vt_symbols[-1]]

fig = go.Figure()
line = go.Scatter(x=df.index, y=df["spread"], mode='lines', name="Spread")
fig.add_trace(line)
    
fig.show()

In [None]:
# 执行协整检验
score, pvalue, _ = coint(df[vt_symbols[0]], df[vt_symbols[-1]])
print(f"协整分析的p-value为：{pvalue}")

## p-value如果小于0.05，则可以明确证明协整关系，但在实践中非常少见。价差整体上还是存在大量的均值偏移情况，但只要震荡回归的次数足够多，即使不满足协整也能通过交易盈利。