In [2]:
import numpy as np
import pandas as pd
from statsmodels.tsa.api import VAR
import matplotlib.pyplot as plt
from data import fut_list, fut_read, stock_read
from util import adf_test, data_generator, significant, johansen_cointegration_test

# Config

In [3]:
fut = 'CU9999.XSGE'
feature = 'ChangeRatio'

# DATA
* 数据基础已经全部存在data里了。只需进一步处理得到想要的时间序列。

In [4]:
fut_df = fut_read(fut)
stock_df = stock_read(fut)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fut['ChangeRatio'].iloc[0] = 0


### 如果想要定制特征，处理fut_df \ stock_df即可。

# ADF验证是否平稳

In [5]:
adf_list = []
for idx, df in enumerate(stock_df):
   adf_list.append(adf_test(df[feature]))
adf_df = pd.concat(adf_list, axis=1)   

Test Statistic                  -33.507513
p-value                           0.000000
#Lags Used                        0.000000
Number of Observations Used    1189.000000
Critical Value (1%)              -3.435862
Critical Value (5%)              -2.863974
Critical Value (10%)             -2.568066
dtype: float64
Test Statistic                -1.370201e+01
p-value                        1.284425e-25
#Lags Used                     7.000000e+00
Number of Observations Used    1.186000e+03
Critical Value (1%)           -3.435876e+00
Critical Value (5%)           -2.863980e+00
Critical Value (10%)          -2.568069e+00
dtype: float64
Test Statistic                -1.434570e+01
p-value                        1.041917e-26
#Lags Used                     7.000000e+00
Number of Observations Used    1.176000e+03
Critical Value (1%)           -3.435923e+00
Critical Value (5%)           -2.864001e+00
Critical Value (10%)          -2.568080e+00
dtype: float64
Test Statistic                  -33.48

In [5]:
adf_df

Unnamed: 0,0,1,2,3,4
Test Statistic,-33.507513,-13.70201,-14.3457,-33.486238,-34.303758
p-value,0.0,1.284425e-25,1.041917e-26,0.0,0.0
#Lags Used,0.0,7.0,7.0,0.0,0.0
Number of Observations Used,1189.0,1186.0,1176.0,1193.0,1193.0
Critical Value (1%),-3.435862,-3.435876,-3.435923,-3.435843,-3.435843
Critical Value (5%),-2.863974,-2.86398,-2.864001,-2.863966,-2.863966
Critical Value (10%),-2.568066,-2.568069,-2.56808,-2.568061,-2.568061


对于changeratio而言，是平稳的

# 根据模型找出各个stock的联动显著时间段
* 由于只有VAR，故不再对模型选择进行分支。
* 由于p=0.05基本找不到显著的时间段，改成了0.10
* y是股票的y。第一个p是const的，第二个是期货前一时刻特征的p，第三个是股票前一时刻的特征的p。

In [6]:
sig_col = []
for idx, df in enumerate(stock_df):
    data = data_generator(fut_df, df, feature)
    sig_col.append(significant(data))
    

下面sig为list。list 0-4表明对应的stock。

比如list[0]中，就存了期股联动显著的时间切片。list中第一个数字是时间切片编号，第二个是三个p。p的解释如上

In [7]:
sig_col

[[[243, array([0.01900249, 0.08025969, 0.07651239])],
  [244, array([0.00802221, 0.03367894, 0.08705076])],
  [245, array([0.00516198, 0.02302983, 0.09857396])],
  [246, array([0.0027278 , 0.01661509, 0.09190853])],
  [247, array([0.0027793 , 0.01518037, 0.07193898])],
  [248, array([0.00186688, 0.02301486, 0.08227843])],
  [453, array([0.08585224, 0.08576275, 0.01895921])],
  [460, array([0.04912781, 0.05750771, 0.03928519])],
  [461, array([0.05073228, 0.065799  , 0.04595558])],
  [462, array([0.03719201, 0.05737676, 0.04374542])]],
 [[144, array([0.08462929, 0.0465453 , 0.03118692])],
  [248, array([0.00461322, 0.0197691 , 0.08287854])],
  [249, array([0.00187695, 0.01028218, 0.07665436])],
  [250, array([0.00112932, 0.00855091, 0.08571575])],
  [251, array([0.00065519, 0.00820359, 0.09171993])],
  [355, array([0.01853959, 0.07489875, 0.03481428])]],
 [[355, array([0.03795469, 0.06809477, 0.01351023])],
  [781, array([0.06248968, 0.04750179, 0.09777602])],
  [782, array([0.05183563,

sig_col 即为显著的index

还有更进一步的算最佳lead lag,不过都是1，所以似乎不重要了,写在下面。可以尝试下冲击

```python
lag_order = model.select_order(15)
print(f"Selected lag order: {lag_order.selected_orders['aic']}")
model_fitted = model.fit(maxlags=lag_order.selected_orders['aic'])
print(model_fitted.summary())

irf = model_fitted.irf(10) # 10期冲击响应
irf.plot(orth=True) # 正交化冲击响应图
plt.show()

```

# 协整检验
* 只用检验上面的data数据（就是一列是期货数据，一列是股票数据）

!! 只看一次，所以只用了一个break

In [9]:
# print('Results of Johansen Cointegration Test:')
# print(f"Test statistic: {result.lr1}")
# print(f"Critical values: {result.cvt}")
# print(f"Eigenstatistics: {result.lr2}")
# print(f"Eigenvalues: {result.eig}")
Test_statistic = []
Critical_values = []
Eigenstatistic = []
Eigenvalues = []
for idx, df in enumerate(stock_df):
    data = data_generator(fut_df, df, feature)
    test_result = johansen_cointegration_test(data)
    Test_statistic.append(test_result.lr1)
    Critical_values.append(test_result.cvt)
    Eigenstatistic.append(test_result.lr2)
    Eigenvalues.append(test_result.eig)
    
    
# 可以很方便的改成df

# ATE（暂时失败）

In [4]:
import numpy as np
from causalinference import CausalModel

# 生成示例数据
np.random.seed(0)
n = 1000
A = np.random.normal(loc=0, scale=1, size=n)
B = 2 * A + np.random.normal(loc=0, scale=1, size=n)

# 由于示例中没有协变量X，我们可以创建一个全为1的数组作为占位符
# 这样做是为了满足CausalModel的参数要求
X = np.ones((n, 1))

# 创建因果推断模型
# 注意：这里我们直接传递Y（结果变量B）、D（处理变量A）和X（协变量）
causal_model = CausalModel(Y=B, D=A, X=X)

# 进行因果推断分析
causal_model.est_via_ols(adj=1)
print(causal_model.estimates)

# 获取平均因果效应（ATE）
ate = causal_model.estimates['ols']['ate']
print("Average Treatment Effect (ATE):", ate)


ValueError: Too few treated units: N_t < K+1