In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import tushare as ts
import scipy as sp
tk = pd.read_csv('tk.csv')
ts.set_token(tk.iloc[0])

In [None]:
pro = ts.pro_api()
df1 = pro.daily(ts_code = '000002.SZ', start_date = '20170901', end_date = '20181011')

In [None]:
# T-检验
sp.random.seed(1235)
x=sp.stats.norm.rvs(size=10000)
# sp.stats.norm.rvs产生一个连续的正态分布函数
print("T-value P-value (two-tail)")
print(sp.stats.ttest_1samp(x,0.5))
print(sp.stats.ttest_1samp(x,0))
# 第一个检验的零假设是均值为0.5，第二个检验的零假设是均值为0，根据检验结果，第一个检验结果推翻了零假设，第二个检验结果接受了零假设

In [None]:
close_1 = df1.close.values

ret_1 = (close_1[:-1]-close_1[1:])/close_1[1:]

In [None]:
print("T-value P-value (two-tail)")
print(stats.ttest_1samp(ret_1,0))

In [None]:
# 检验方差是否相等
# 检验两只股票一段时间的收益方差是否相等，sp.stats.bartlett()函数采用Bartlett方法来检验多个样本集的方差相等的这个零假设，通常称为F-检验，
# 该函数输出t值和p值
def ret_f(ticker,start,end):
    price=pro.daily(ts_code = ticker, start_date = start, end_date = end)
    x=price['close'].values
    return(x[:-1]/x[1:]-1)
start = '20170901'
end = '20181011'
s1=ret_f('000001.SZ',start,end)
s2=ret_f('000002.SZ',start,end)
print(sp.stats.bartlett(s1,s2))

In [None]:
# 根据结果，可以拒绝零假设

In [None]:
# 检验一月效应
# 一月效应指的是一月份的回报率不同于其他月份
start = '20090901'
end = '20181011'
df1 = pro.daily(ts_code = '000002.SZ', start_date = start, end_date = end)
s1=ret_f('000001.SZ',start,end)
yyyymm=[]
for i in range(0,np.size(s1)):
    yyyymm.append(''.join([df1.trade_date[i][:6]]))
y = pd.DataFrame(s1, yyyymm, columns=['ret_monthly'])
ret_monthly = y.groupby(y.index).sum()
ret_monthly.reset_index(inplace= True)

In [None]:
ret_Jan = ret_monthly[ret_monthly['index'].apply(lambda x: x[4:] =='01')]
ret_others = ret_monthly[ret_monthly['index'].apply(lambda x: x[4:] !='01')]
print(sp.stats.bartlett(ret_Jan['ret_monthly'].values,ret_others['ret_monthly'].values))

In [None]:
# 根据结果，可以认为万科A的回报率不存在一月效应，但是这个结果仅仅针对万科一只股票，不应该一概而论，可以通过这种方法测试其他股票

In [None]:
# 基于52周最高价和最低价的交易策略
from datetime import datetime
from dateutil.relativedelta import relativedelta

ticker='000002.SZ'
enddate=datetime.now()
begdate=enddate-relativedelta(years=1)
start = str(begdate)[0:4] + str(begdate)[5:7] + str(begdate)[8:10]
end = str(enddate)[0:4] + str(enddate)[5:7] + str(enddate)[8:10]

p = pro.daily(ts_code = '000002.SZ', start_date = start, end_date = end)[['trade_date', 'close']]
x=p.iloc[0]
y=np.array(p['close'].values.tolist())[1:]
high=max(y)
low=min(y)
print("    Today,    Price     High   Low,  % from low ")
print(x[0],'   ', x[-1],'   ', high,' ', low,'  ',  round((x[-1]-low)/(high-low)*100,2))

In [None]:
# 滚动式估算市场风险系数
def ret_f(ticker,start,end):
    price=pro.daily(ts_code = ticker, start_date = start, end_date = end)
    x=price['close'].values
    return(x[:-1]/x[1:]-1)
start = '20170901'
end = '20181011'
s1=ret_f('000001.SZ',start,end)
s2=ret_f('000002.SZ',start,end)

y0=list(s1)
x0=sm.add_constant(list(s2))
m = []
window = 30
for i in range(len(y0)-window):
    y = y0[i: i+window]
    x = x0[i: i+window]
    model = sm.OLS(y, x).fit()
    m.append(list(model.params)[1])

In [None]:
import matplotlib.pyplot as plt
plt.plot(m)

In [None]:
# 在险价值简介，PPT
# 下面代码计算在10天内持有50只股000001股票的在险价值
n_shares=50                      # input 1
confidence_level=0.99            # input 2
n_days=10                        # input 3
z=sp.stats.norm.ppf(confidence_level)
# 在99%置信水平下的分界点
ticker='000001.SZ'
start = '20170901'
end = '20181011'
p = pro.daily(ts_code = ticker, start_date = start, end_date = end)[['trade_date', 'close']]
s1=ret_f('000001.SZ',start,end)
position=n_shares * p.close[0]
VaR = position*z*np.std(s1)*np.sqrt(n_days)
print("Holding=",position, "VaR=", round(VaR,4), "in ", n_days, "Days")

In [None]:
# 这就表示，今天的股票价值是493元，未来10天在99%置信水平的在险价值为79.46元，其实风险挺大的

In [None]:
# 构建有效边界
#首先， 估计方差矩阵
ret=np.matrix(np.array([[0.1,0.2],[0.10,0.1071],[-0.02,0.25],[0.012,0.028],[0.06,0.262],[0.14,0.115]]))
# 假定一个收益矩阵
print('收益矩阵')
print(ret)

covar_=ret.T*ret
weight=np.matrix(np.array([0.4,0.6]))
# 设定两只股票分别的权重

print ("权重",weight)
print(weight*covar_*weight.T)

In [None]:
# 其次，优化
from scipy.optimize import minimize
def y_f(x):
    return (3+2*x**2)
x0=100
res = minimize(y_f, x0, method='nelder-mead', options={'xtol':1e-8,'disp': True})
# method='nelder-mead'，options={'xtol':1e-8,'disp': True}这些都是统计上的参数，如果没有特殊要求，默认即可

print(res.x)

In [None]:
# 表示该函数最小值是3，当x取值为0时取得

In [None]:
通过三只股票，构建一个有效边界
简单讲解一下，大部分都是公式的编写，这些都是不需要改动的，可以直接套用，需要绘制有效边界时，基本上可以更改下step1和2即可
# Step 1: input area
start = '20180905'
end = '20180930'
n_stock = 3
stocks = ['000001', '000002', '000004']

# Step 2: Generate a return matrix R
y = pd.DataFrame()
y['000001'] = ret_f('000001.SZ',start,end)
y['000002'] = ret_f('000002.SZ',start,end)
y['000004'] = ret_f('000004.SZ',start,end)
R = np.array(y)
    
def objFunction(W, R, target_ret): 
    stock_mean=np.mean(R,axis=0)
    port_mean=np.dot(W,stock_mean)           # portfolio mean
    cov=np.cov(R.T)                          # variance-covariance matrix
    port_var=np.dot(np.dot(W,cov),W.T)       # portfolio variance
    penalty = 2000*abs(port_mean-target_ret) # penalty for deviation from target
    return np.sqrt(port_var) + penalty       # objective function 

#Step 3:estimate optimal portfolo for a given return 
out_mean,out_std,out_weight=[],[],[] 
stockMean=np.mean(R,axis=0)    
for r in np.linspace(np.min(stockMean), np.max(stockMean), num=100):
    W = np.ones([n_stock])/n_stock                       # starting from equal weights 
    b_ = [(0,1) for i in range(n_stock)]              # bounds, here no short
    c_ = ({'type':'eq', 'fun': lambda W: sum(W)-1. }) # constraint
    result=sp.optimize.minimize(objFunction,W,(R,r),method='SLSQP',constraints=c_, bounds=b_)    
    if not result.success:                            # handle error
        raise BaseException(result.message) 
    out_mean.append(round(r,4))                       # not too many decimal places
    std_=round(np.std(np.sum(R*result.x,axis=1)),6)
    out_std.append(std_)
    out_weight.append(result.x) 
# Step 4: plot the efficient frontier
plt.title('Efficient Frontier')
plt.xlabel('Standard Deviation of the porfolio (Risk))')
plt.ylabel('Return of the portfolio')
plt.figtext(0.5,0.75,str(n_stock)+' stock are used: ')
plt.figtext(0.5,0.7,' '+str(stocks))
plt.figtext(0.5,0.65,'Time period: '+str(start)+' ------ '+str(end))
plt.plot(out_std,out_mean,'--')
plt.show()

In [None]:
# 其实通过上面这个例子可以看出，基础函数是最重要的，看似一个很复杂的代码，但是里面都是基础的东西，只不过因为一些公式的复杂度，导致基础的东西
# 层层嵌套。看似复杂，其实最复杂的公式的东西都是不需要改动的，很多都可以直接套用，所以，那些最基础的一定要多练习

In [None]:
# 插值法简介
x=pd.Series([1,2,np.nan,np.nan,6])
# 假如想通过线性插值的方法填充缺失值，可以采用x.interpolate函数，但是注意，x必须是pands的格式(Series, Dataframe)
print(x.interpolate())

In [None]:
# 假如有一个债券的收益率数据，知道期限分别为2年，3年，5年，10年的债券收益率，通过线性插值算法，大致估算其他期限的收益率
x=pd.Series([0.29,0.57,np.nan,1.34,np.nan,np.nan,np.nan,np.nan,2.7])
y=x.interpolate()
print (y)

In [None]:
# 输出数据
save = pd.DataFrame(np.random.rand(3, 2), columns=['a', 'b'])

In [None]:
save

In [None]:
save.to_csv('sa.csv')
# 存储到csv文件中
save.to_excel('sa.xlsx')
# 存储到excel

In [None]:
# 关于talib，技术分析用的一个包，安装过程可以百度一下
# 技术分析使用这个包可以省去很多步骤，比如计算macd，ma，布林线等

In [None]:
# 举个例子，计算一个股票的ROCR指标，其实是ROC的移动平均
# ROC(Price Rate of Change)又称变动率指标，今天的收盘价比较其N天前的收盘价的差除以N天前的收盘价（本质其实是N日K线的涨幅）
# 重置
import numpy as np
import pandas as pd
import talib as ta
from pyexcel_xls import get_data
# 对于读取多个子表格的excel时，这个模块很方便
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# 忽略警告

In [None]:
# 读取文件
df1 = get_data('sz50.xlsx')
code = list(df1.keys())
# 因为这个excel中子表格很多，因此，通过get_data获取到excel对应的一个字典，字典的keys就是子表格名字，可以方便后期使用

In [None]:
data_dict = {}
for i in code:
    data = pd.read_excel('sz50.xlsx', sheetname=i, index_col= 'datetime')
    data_dict[i] = data.loc['2017-01-03': '2017-11-20']
PN = pd.Panel(data_dict)

data_rocr = PN.ix[:, :, 'close']

In [None]:
data_rocr

In [None]:
#用talib计算50只股票的周期为5的ROCR100，生成Dataframe
rocr100_5 = pd.DataFrame()
data_rocr.dropna(how= 'all', axis = 1, inplace= True)
code = list(data_rocr)
for i in code:
    rocr100_5[i] = ta.ROCR100(data_rocr[i], 5)

print(rocr100_5)

In [None]:
plt.figure(figsize=(15, 7))
plt.plot(rocr100_5.ix[:, 0])
plt.plot(rocr100_5.ix[:, 1])
plt.plot(rocr100_5.ix[:, 2])
plt.plot(rocr100_5.ix[:, 3])
plt.plot(rocr100_5.ix[:, 4])
plt.legend(list(rocr100_5)[:5])
plt.show()