In [4]:
# 加载模块
from datetime import datetime

# 统计分析相关
import pandas as pd
from statsmodels.api import OLS
from statsmodels.tsa.stattools import coint

# 绘图分析相关
import plotly.graph_objects as go

# 读取数据相关
from vnpy.trader.database import database_manager
from vnpy.trader.utility import extract_vt_symbol
from vnpy.trader.constant import Interval

In [2]:
# 定义函数
def load_symbol_data(vt_symbol, start, end):
    symbol, exchange = extract_vt_symbol(vt_symbol)
    start = datetime.strptime(start, "%Y%m%d")
    end = datetime.strptime(end, "%Y%m%d")
    interval = Interval.MINUTE

    data = database_manager.load_bar_data(symbol, exchange, interval, start, end)

    dt_list = []
    close_list = []
    for bar in data:
        dt_list.append(bar.datetime)
        close_list.append(bar.close_price)
    
    s = pd.Series(close_list, index=dt_list)
    return s


def load_portfolio_data(vt_symbols, start, end):
    df = pd.DataFrame()
    for vt_symbol in vt_symbols:
        s = load_symbol_data(vt_symbol, start, end)
        df[vt_symbol] = s
    return df

In [7]:
load_portfolio_data(["IF2006.CFFEX", "IF2009.CFFEX"], "20200101", "20200621")

Unnamed: 0,IF2006.CFFEX,IF2009.CFFEX
2020-01-02 09:30:00,4162.0,
2020-01-02 09:31:00,4161.8,
2020-01-02 09:32:00,4168.0,
2020-01-02 09:33:00,4167.0,
2020-01-02 09:34:00,4157.8,
2020-01-02 09:35:00,4159.2,
2020-01-02 09:36:00,4164.6,
2020-01-02 09:37:00,4167.0,
2020-01-02 09:38:00,4164.0,
2020-01-02 09:39:00,4166.2,


In [35]:
# 加载数据
vt_symbols = ["IH2006.CFFEX", "IF2006.CFFEX"]
start = "20200201"
end = "20200531"

df = load_portfolio_data(vt_symbols, start, end)
df = df.dropna()
print(df)    

                     IH2006.CFFEX  IF2006.CFFEX
2020-02-03 09:30:00        2698.8        3662.8
2020-02-03 09:31:00        2700.6        3660.8
2020-02-03 09:32:00        2707.2        3659.0
2020-02-03 09:33:00        2701.4        3680.6
2020-02-03 09:34:00        2700.0        3691.2
2020-02-03 09:35:00        2701.0        3688.2
2020-02-03 09:36:00        2700.0        3684.0
2020-02-03 09:37:00        2696.6        3681.6
2020-02-03 09:38:00        2692.6        3677.8
2020-02-03 09:39:00        2694.2        3679.4
2020-02-03 09:40:00        2700.4        3680.2
2020-02-03 09:41:00        2710.0        3691.4
2020-02-03 09:42:00        2721.4        3700.8
2020-02-03 09:43:00        2730.2        3718.8
2020-02-03 09:44:00        2734.6        3714.2
2020-02-03 09:45:00        2749.8        3725.0
2020-02-03 09:46:00        2739.8        3707.8
2020-02-03 09:47:00        2731.2        3711.6
2020-02-03 09:48:00        2733.6        3717.2
2020-02-03 09:49:00        2739.6       

In [36]:
# 绘制原始价格图表
fig = go.Figure()

for vt_symbol in vt_symbols:
    line = go.Scatter(y=df[vt_symbol], mode='lines', name=vt_symbol)
    fig.add_trace(line)
    
fig.show()

In [37]:
df = df.dropna()

In [38]:
# 执行回归分析
result = OLS(df[vt_symbols[0]], df[vt_symbols[-1]]).fit()
print(result.summary())

                                 OLS Regression Results                                
Dep. Variable:           IH2006.CFFEX   R-squared (uncentered):                   1.000
Model:                            OLS   Adj. R-squared (uncentered):              1.000
Method:                 Least Squares   F-statistic:                          3.098e+08
Date:                Sun, 21 Jun 2020   Prob (F-statistic):                        0.00
Time:                        15:41:26   Log-Likelihood:                         -87659.
No. Observations:               19440   AIC:                                  1.753e+05
Df Residuals:                   19439   BIC:                                  1.753e+05
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------

In [39]:
result.params[0]

0.7230747224772707

In [40]:
# 对残差绘图
df["spread"] = df[vt_symbols[0]] - result.params[0] * df[vt_symbols[-1]]

fig = go.Figure()
line = go.Scatter(y=df["spread"], mode='lines', name="Spread")
fig.add_trace(line)
    
fig.show()

In [41]:
# 执行协整检验
score, pvalue, _ = coint(df[vt_symbols[0]], df[vt_symbols[-1]])
print(f"协整分析的p-value为：{pvalue}")

协整分析的p-value为：0.09639652504642005


## p-value如果小于0.05，则可以明确证明协整关系，但在实践中非常少见。价差整体上还是存在大量的均值偏移情况，但只要震荡回归的次数足够多，即使不满足协整也能通过交易盈利。