In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import chi2, f
import warnings
warnings.filterwarnings("ignore")

In [2]:
stock_data000400 = pd.read_excel('./stock_data/月度数据/000400/000400RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data000600 = pd.read_excel('./stock_data/月度数据/000600/000600RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data000800 = pd.read_excel('./stock_data/月度数据/000800/000800RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data000900 = pd.read_excel('./stock_data/月度数据/000900/000900RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data600000 = pd.read_excel('./stock_data/月度数据/600000/600000RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data600100 = pd.read_excel('./stock_data/月度数据/600100/600100RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data600200 = pd.read_excel('./stock_data/月度数据/600200/600200RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
stock_data600600 = pd.read_excel('./stock_data/月度数据/600600/600600RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,2])
index_data = pd.read_excel('./stock_data/日度数据/000001_index/000001RESSET_IDXQTTN_1.xls',encoding='GB2312',usecols=[1,5]) # 读取的是日度上证指数
rf_data = pd.read_excel('./stock_data/月度数据/600000/600000RESSET_MRESSTK_1.xls',encoding='GB2312',usecols=[1,3])  #月无风险收益率

In [3]:
# 月度上证指数需要处理一下才能获取
# 先和一支股票的数据进行date的内连接
def monthIndex(stock_data,index_data):
    stock_data.columns = ['date', 'stock_close']
    index_data.columns = ['date', 'index_close']
    mergeData = pd.merge(left=stock_data,
                      right=index_data[['date', 'index_close']],
                      on='date',
                      how='inner',
                      sort=True)
    filtered_data = mergeData[['date', 'index_close']]
    filtered_data['return'] = np.log(filtered_data['index_close']) - np.log(filtered_data['index_close'].shift(periods=1))
    filtered_data['date'] = pd.to_datetime(filtered_data['date'])    
    filtered_data['date'] = filtered_data['date'].dt.strftime('%Y-%m')
    return filtered_data

In [4]:
# 获得上证指数的月度数据
index_data_cal = monthIndex(stock_data600000,index_data)

In [5]:
# 股票数据处理函数
def stockDataProcess(data):
    data.columns = ['date', 'close']
    data['date'] = pd.to_datetime(data['date'])    
    data['date'] = data['date'].dt.strftime('%Y-%m')
    data['return'] = np.log(data['close']) - np.log(data['close'].shift(periods=1))
    return data

In [6]:
# 无风险收益率数据处理
def rfDataProcess(rfData):
    rfData.columns = ['date', 'rfreturn']
    rfData['date'] = pd.to_datetime(rfData['date'])    
    rfData['date'] = rfData['date'].dt.strftime('%Y-%m')
    return rfData

In [7]:
# 处理所有的股票数据
stock_data000400 = stockDataProcess(stock_data000400)
stock_data000600 = stockDataProcess(stock_data000600)
stock_data000800 = stockDataProcess(stock_data000800)
stock_data000900 = stockDataProcess(stock_data000900)
stock_data600000 = stockDataProcess(stock_data600000)
stock_data600100 = stockDataProcess(stock_data600100)
stock_data600200 = stockDataProcess(stock_data600200)
stock_data600600 = stockDataProcess(stock_data600600)

In [8]:
#将所有的数据拼接
data_matrix = pd.merge(left=index_data_cal[['date', 'return']],
                      right=stock_data000400[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True) # sort=True参数确保合并后的DataFrame按照日期排序

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data000600[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data000800[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data000900[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data600000[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data600100[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data600200[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

data_matrix = pd.merge(left=data_matrix,
                      right=stock_data600600[['date', 'return']],
                      on='date',
                      how='inner',
                      sort=True)

In [9]:
# 处理月无风险收益
rf_data = rfDataProcess(rf_data)

In [10]:
# 将月无风险收益拼接
data_matrix = pd.merge(left=data_matrix,
                      right=rf_data[['date', 'rfreturn']],
                      on='date',
                      how='inner',
                      sort=True)

In [11]:
data_matrix.columns = ['date', 'index', 'stk000400', 'stk000600', 'stk000800', 'stk000900', 'stk600000', 'stk600100', 'stk600200', 'stk600600','rf']

In [12]:
# 对全部拼接完成的数据进行处理
data_matrix.dropna(inplace=True) #去除掉na值

In [13]:
# 计算超额收益率
data_matrix['index'] = data_matrix['index'] - data_matrix['rf']
data_matrix['stk000400'] = data_matrix['stk000400'] - data_matrix['rf']
data_matrix['stk000600'] = data_matrix['stk000600'] - data_matrix['rf']
data_matrix['stk000800'] = data_matrix['stk000800'] - data_matrix['rf']
data_matrix['stk000900'] = data_matrix['stk000900'] - data_matrix['rf']
data_matrix['stk600000'] = data_matrix['stk600000'] - data_matrix['rf']
data_matrix['stk600100'] = data_matrix['stk600100'] - data_matrix['rf']
data_matrix['stk600200'] = data_matrix['stk600200'] - data_matrix['rf']
data_matrix['stk600600'] = data_matrix['stk600600'] - data_matrix['rf']

In [14]:
ret_ind = data_matrix['index'].values
T = len(ret_ind) #计算了变量 ret_ind 的长度
N = 8  #变量 N，表示有多少支股票
mu_market = np.mean(ret_ind) #计算了 ret_ind 中所有收益率数据的平均值，代表了市场收益率的平均值。
sigma_market = np.sum((ret_ind-mu_market)**2)/T   #计算了市场收益率的方差
ret_stocks = data_matrix[['stk000400', 'stk000600', 'stk000800', 'stk000900', 'stk600000', 'stk600100','stk600200','stk600600']].values
ret_stocks

In [15]:
# 无限制模型
x = np.ones((T, 2))
x[:, 1] = ret_ind
y = ret_stocks
xTx = np.dot(np.transpose(x), x)
xTy = np.dot(np.transpose(x), y)
AB_hat = np.dot(np.linalg.inv(xTx), xTy)
ALPHA = AB_hat[0]
print(ALPHA)
BETA = AB_hat[1]
RESD = y - np.dot(x, AB_hat)
COV = np.dot(np.transpose(RESD), RESD)/T
invCOV = np.linalg.inv(COV)

[-0.0034918  -0.00465423 -0.0007631  -0.00572357 -0.00703279 -0.01146913
 -0.00498504  0.00678179]


In [16]:
# 限制模型
xr = np.ones((T, 1))
xr[:, 0] = ret_ind
yr = ret_stocks
xrTxr = np.dot(np.transpose(xr), xr)
xrTyr = np.dot(np.transpose(xr), yr)
ABr_hat = np.dot(np.linalg.inv(xrTxr), xrTyr)
RESDr = yr - np.dot(xr, ABr_hat)
COVr = np.dot(np.transpose(RESDr), RESDr)/T
invCOVr = np.linalg.inv(COVr)

In [17]:
# Wald检验
trans_ALPHA = np.ones((len(ALPHA), 1))
trans_ALPHA[:, 0] = ALPHA
SWchi2 = T*(1/(1+mu_market**2/sigma_market))*np.dot(np.dot(ALPHA, invCOV), trans_ALPHA)
SWF = (T-N-1)/N*(1/(1+mu_market**2/sigma_market))*np.dot(np.dot(ALPHA, invCOV), trans_ALPHA)
pvalue_Wchi2 = 1 - chi2.cdf(SWchi2[0], N)
pvalue_WF = 1 - f.cdf(SWF[0], N, T-N-1)
print(pvalue_Wchi2)
print(pvalue_WF)

0.3760675518105223
0.4052484860424572


In [18]:
# 似然比检验
SLRchi2 = T*(np.log(np.linalg.det(COVr)) - np.log(np.linalg.det(COV)))
pvalue_SLRchi2 = 1 - chi2.cdf(SLRchi2, N)
print(pvalue_SLRchi2)

0.3880397040946635


In [21]:
# 拉格朗日乘子检验
a = np.zeros((8, 1))
a[:, 0] =  np.sum(RESDr, axis=0)
salpha = np.dot(invCOVr, a)
b = np.dot(ret_ind, RESDr)
sbeta = np.zeros((8,1))
sbeta[:, 0] = np.dot(invCOVr, b)
score = np.concatenate((salpha, sbeta), axis=0)
print(score)

[[-1.07714950e+00]
 [-9.43601671e+01]
 [ 1.08689269e+01]
 [-1.84216624e+02]
 [-3.34137157e+02]
 [-2.24450761e+02]
 [-2.39223935e+01]
 [ 2.68091188e+02]
 [ 1.26593432e-14]
 [ 3.31277570e-14]
 [ 4.24523459e-14]
 [-3.52044166e-14]
 [ 2.89989165e-14]
 [ 1.74039590e-14]
 [-3.66885616e-15]
 [ 5.98122515e-15]]


In [22]:
a = np.concatenate((invCOVr*T, invCOVr*np.sum(ret_ind)), axis=1)
b = np.concatenate((invCOVr*np.sum(ret_ind), invCOVr*np.sum(ret_ind**2)), axis=1)
Minfo = np.concatenate((a, b), axis=0)
SLMchi2 = np.dot(np.dot(np.transpose(score), np.linalg.inv(Minfo)), score)
pvalue_SLMchi2 = 1-chi2.cdf(SLMchi2[0][0], N)
print(pvalue_SLMchi2)

0.3999946727163184
