In [3]:
pip install jqdatasdk

Collecting jqdatasdk
  Downloading jqdatasdk-1.8.11-py3-none-any.whl (158 kB)
[K     |████████████████████████████████| 158 kB 263 kB/s eta 0:00:01
[?25hCollecting thriftpy2>=0.3.9
  Downloading thriftpy2-0.4.16.tar.gz (643 kB)
[K     |████████████████████████████████| 643 kB 1.9 MB/s eta 0:00:01
Collecting pymysql>=0.7.6
  Downloading PyMySQL-1.0.2-py3-none-any.whl (43 kB)
[K     |████████████████████████████████| 43 kB 2.7 MB/s eta 0:00:01
Building wheels for collected packages: thriftpy2
  Building wheel for thriftpy2 (setup.py) ... [?25ldone
[?25h  Created wheel for thriftpy2: filename=thriftpy2-0.4.16-cp38-cp38-macosx_10_9_x86_64.whl size=490212 sha256=6b7b5f044f5a9270f74fb2d513c25865b6d15a29bd810328a9213b27742c0f4a
  Stored in directory: /Users/jeansha/Library/Caches/pip/wheels/a2/62/fd/0862bb8a1775cdea40a14af28ae2b490c9c32555df3361f590
Successfully built thriftpy2
Installing collected packages: thriftpy2, pymysql, jqdatasdk
Successfully installed jqdatasdk-1.8.11 pymysql-1

In [6]:
from jqdatasdk import *
auth('15251681552','Sha991223-') #账号是申请时所填写的手机号；密码为聚宽官网登录密码

auth success 


In [12]:
import jqdatasdk
from jqdatasdk import *
#import jqfactor
#from jqfactor import *
import datetime as dt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels import regression

In [14]:
#Initialise 
def initialize(context):
    #将沪深300作为基准
    set_benchmark({'000002.XSHG': 0.5, '399107.XSHE': 0.5})
    #开启动态复权模式(真实价格)
    set_option('use_real_price',True)
    # 过滤掉order系列API产生的比error级别低的log
    log.set_level('order','error')
    #设置是否开启避免未来数据模式
    set_option('avoid_future_data',True)
    #g是全局变量
    g.group=1
    g.method='FF5'
    run_monthly(before_market_open, monthday = -1, time='before_open', reference_security = '399300.XSHE')
    # 开盘时运行
    run_monthly(market_open, monthday = -1, time='open', reference_security = '399300.XSHE')


In [15]:
def before_market_open(context):
    if g.method=='BP':
        g.asc=False
    else:
        g.asc=True
    set_slip_fee(context)
    #获取沪深300成分股
    all_stocks = get_index_stocks('000300.XSHG')
    feasible_stocks = set_feasible_stocks(context, all_stocks)
    if g.method == 'CAPM':
        factor = hetero_factor(feasible_stocks, context.current_dt)
    elif g.method == 'FF3':
        factor = FF3(feasible_stocks, context.current_dt)
    elif g.method == 'CARHART':
        factor = CARHART(feasible_stocks, context.current_dt)
    elif g.method == 'FF5':
        factor = FF5(feasible_stocks, context.current_dt)
    elif g.method == 'circulating_market_cap':
        q = query(valuation.circulating_market_cap, valuation.code).filter(valuation.code.in_(feasible_stocks))
        factor = get_fundamentals(q, context.current_dt)
        factor.index = factor['code'].tolist()
        del factor['code']
        factor.columns = ['score']
    elif g.method == 'BP':
        q = query(1.0 / valuation.pb_ratio, valuation.code).filter(valuation.code.in_(feasible_stocks))
        factor = get_fundamentals(q, context.current_dt)
        factor.index = factor['code'].tolist()
        del factor['code']
        factor.columns = ['score']
        factor = factor.loc[factor['score'] > 0]
    #排序
    factor = factor.sort('score', ascending = g.asc)

    n = int(len(factor)/10)
    #分组取样
    if g.group == 10:
        g.tobuy_list = factor.index[(g.group - 1) * n :]
    else:
        g.tobuy_list = factor.index[(g.group - 1) * n : g.group * n]

In [16]:
def set_feasible_stocks(context,stocklist):
    #剔除st
    st_data=get_extras('is_st',stocklist,count=1,end_date=context.current_dt)
    stocklist=[stock for stock in stocklist if not st_data[stock][0]]
    #剔除*st
    stocklist =[stock for stock in stocklist if '*' not in get_security_info(stock).display_name]
    #剔除新股
    stocklist=delete_new(stocklist,context.current_dt, n = 91)
    #剔除停牌
    suspended_info_df = get_price(stocklist, end_date = context.current_dt, count = 1, frequency = 'daily', fields = 'paused')['paused']
    stocklist = [stock for stock in stocklist if suspended_info_df[stock][0] == 0]
    return stockList   

In [17]:
def delete_new(stocks, beginDate, n = 365):
    stocklist = []
    for stock in stocks:
        start_date = get_security_info(stock).start_date
        if start_date < dt.datetime.date(beginDate - dt.timedelta(days = n)):
            stocklist.append(stock)
    return stocklist

In [18]:
def set_slip_fee(context):
    # 将滑点设置为0
    set_slippage(FixedSlippage(0))
    # 根据不同的时间段设置手续费
    dt=context.current_dt
    
    if dt>datetime.datetime(2013,1, 1):
        set_commission(PerTrade(buy_cost=0.0003,
                                sell_cost=0.0013,
                                min_cost=5))
    else:
        set_commission(PerTrade(buy_cost=0.003,
                                sell_cost=0.004,
                                min_cost=5))

In [19]:
def market_open(context):
    #调仓，先卖出股票
    for stock in context.portfolio.long_positions:
        if stock not in g.tobuy_list:
            order_target_value(stock, 0)
    #再买入新股票
    total_value = context.portfolio.total_value # 获取总资产
    for i in range(len(g.tobuy_list)):
        value = total_value / len(g.tobuy_list) # 确定每个标的的权重

        order_target_value(g.tobuy_list[i], value) # 调整标的至目标权重
    
    #查看本期持仓股数
    print(len(context.portfolio.long_positions))

In [20]:
def hetero_factor(stocks, end_date, rf = 0.04):
    #设置统计范围
    start_date = list(get_tradeday_list(start = None, end = end_date, frequency = 'month', count = 24).date)[0]
    quote = get_price(stocks, start_date = start_date, end_date = end_date, fields=['close'])['close']
    ret = quote.pct_change()
    ret.dropna(how ='all', inplace = True)
    #构造市场基准收益：流通市值加权
    q = query(valuation.circulating_market_cap, valuation.code).filter(valuation.code.in_(stocks))
    df = get_fundamentals(q, start_date)
    df.index = df['code']
    del df['code']
    df = df/df.sum()
    
    ret_b = pd.DataFrame(np.dot(ret, df), index = ret.index)
    
    #OLS计算hetero_
    hetero = {}
    for stock in ret.columns:
        hetero[stock]  = {'vol': linreg(ret_b - rf/252, ret[stock] - rf/252)}
    
    #规范格式  
    hetero = pd.DataFrame(hetero).T
    hetero.dropna(inplace = True)
    hetero.columns = ['score']

    #返回特质波动率vol
    return hetero

In [21]:
def FF3(stocks, end_date, rf = 0.04):
    LoS=len(stocks)
    #查询三因子
    q = query(
        valuation.code,
        valuation.circulating_market_cap,
        (balance.total_owner_equities/valuation.circulating_market_cap/100000000.0).label("BP"),
        #indicator.roe,
        #balance.total_assets.label("Inv")
    ).filter(
        valuation.code.in_(stocks)
    )
    
    start_date = list(get_tradeday_list(start = None, end = end_date, frequency = 'month', count = 24).date)[0]
    df = get_fundamentals(q, start_date)
    df.index = df['code']
    del df['code']
    #选出特征股票组合
    S=df.sort('circulating_market_cap').index.tolist()[:LoS/3]
    B=df.sort('circulating_market_cap').index.tolist()[LoS-LoS/3:]
    L=df.sort('BP').index.tolist()[:LoS/3]
    H=df.sort('BP').index.tolist()[LoS-LoS/3:]
    
    df5 = df['circulating_market_cap']
    
    # 获得样本期间的股票价格并计算日收益率
    df2 = get_price(stocks, start_date = start_date, end_date = end_date, fields=['close'])['close']
    df4 = df2.pct_change()
    df4.dropna(how ='all', inplace = True)
    df4.fillna(0, inplace = True)
    #求因子的值，按流通市值加权
    SMB = list(np.dot(df4[S], df5.loc[S] / df5.loc[S].sum()) - np.dot(df4[B], df5.loc[B] / df5.loc[B].sum()))
    HML = list(np.dot(df4[H], df5.loc[H] / df5.loc[H].sum()) - np.dot(df4[L], df5.loc[L] / df5.loc[L].sum()))
    
    #用股票池，流通市值为权重作为市场基准
    df6 = df5.loc[df4.columns]
    df6.fillna(df5.mean(), inplace = True)
    RM = list(np.dot(df4, df6 / df6.sum()) - rf/252)
    if len(SMB) > len(RM):
        SMB.drop(SMB.index[0], inplace = True)
        HML.drop(HML.index[0], inplace = True)

    #将因子们计算好并且放好
    X=pd.DataFrame({"RM":RM, "SMB":SMB, "HML":HML})
    # 对样本数据进行线性回归并计算残差标准差
    t_scores=[0.0] * LoS
    for i in range(LoS):
        t_stock = stocks[i]
        t_r = linreg(X, df4[t_stock] - rf/252)
        t_scores[i] = t_r
    
    #这个scores就是残差标准差
    scores = pd.DataFrame({'score': t_scores}, index = stocks)
    
    return scores

In [22]:
def linreg(X,Y):
    X=sm.add_constant(array(X))
    Y=array(Y)
    results = sm.OLS(Y, X).fit()
    
    return results.resid.std() * sqrt(252)

In [23]:
def CARHART(stocks, end_date, rf = 0.04):
    LoS=len(stocks)
    #查询三因子
    q = query(
        valuation.code,
        valuation.circulating_market_cap,
        (balance.total_owner_equities/valuation.circulating_market_cap/100000000.0).label("BP"),
        #indicator.roe,
        #balance.total_assets.label("Inv")
    ).filter(
        valuation.code.in_(stocks)
    )
    
    start_date = list(get_tradeday_list(start = None, end = end_date, frequency = 'month', count = 24).date)[0]
    df = get_fundamentals(q, start_date)
    df.index = df['code']
    del df['code']
    #中性化
    #df = neutralize(df, how = ['sw_l1', 'market_cap'], date = start_date, axis = 0)

    #选出特征股票组合
    S=df.sort('circulating_market_cap').index.tolist()[:LoS/3]
    B=df.sort('circulating_market_cap').index.tolist()[LoS-LoS/3:]
    L=df.sort('BP').index.tolist()[:LoS/3]
    H=df.sort('BP').index.tolist()[LoS-LoS/3:]
    
    df5 = df['circulating_market_cap']
    
    # 获得样本期间的股票价格并计算日收益率
    df2 = get_price(stocks, start_date = start_date, end_date = end_date, fields=['close'])['close']
    df4 = df2.pct_change()
    df4.dropna(how ='all', inplace = True)
    df4.fillna(0, inplace = True)

    #动量（反转）因子
    prior_date = list(get_tradeday_list(start = None, end = start_date, frequency = 'month', count = 24).date)[0]
    df22 = get_price(stocks, start_date = prior_date, end_date = start_date, fields=['close'])['close']
    df42 = pd.DataFrame(df22.iloc[-1, :] / df22.iloc[0, :] - 1, columns = ['ret'])
    df42.replace(0, np.nan)
    df42.fillna(0, inplace = True)
    LO = df42.sort('ret').index.tolist()[:LoS/3]
    W = df42.sort('ret').index.tolist()[LoS-LoS/3:]
    
    #求因子的值，按流通市值加权
    SMB = list(np.dot(df4[S], df5.loc[S] / df5.loc[S].sum()) - np.dot(df4[B], df5.loc[B] / df5.loc[B].sum()))
    HML = list(np.dot(df4[H], df5.loc[H] / df5.loc[H].sum()) - np.dot(df4[L], df5.loc[L] / df5.loc[L].sum()))
    
    #价格数据股票比财务数据多
    df6 = df5.loc[df4.columns]
    df6.fillna(df5.mean(), inplace = True)
    MOM = list(np.dot(df4[LO], df6.loc[LO] / df6.loc[LO].sum()) - np.dot(df4[W], df6.loc[W] / df6.loc[W].sum()))
    #用股票池，流通市值为权重作为市场基准
    RM = list(np.dot(df4, df6 / df6.sum()) - rf/252)
    if len(SMB) > len(RM):
        SMB.drop(SMB.index[0], inplace = True)
        HML.drop(HML.index[0], inplace = True)

    #将因子们计算好并且放好
    X=pd.DataFrame({"RM": RM, "SMB": SMB, "HML": HML, "MOM": MOM})
    # 对样本数据进行线性回归并计算ai
    t_scores=[0.0] * LoS
    for i in range(LoS):
        t_stock = stocks[i]
        t_r = linreg(X, df4[t_stock] - rf/252)
        t_scores[i] = t_r
    
    #这个scores就是alpha
    scores = pd.DataFrame({'score': t_scores}, index = stocks)
    
    return scores

In [24]:
def FF5(stocks, end_date, rf = 0.04):
    LoS=len(stocks)
    #查询五因子
    q = query(
        valuation.code,
        valuation.circulating_market_cap,
        (balance.total_owner_equities/valuation.circulating_market_cap/100000000.0).label("BP"),
        indicator.roe,
        balance.total_assets.label("Inv")
    ).filter(
        valuation.code.in_(stocks)
    )
    
    start_date = list(get_tradeday_list(start = None, end = end_date, frequency = 'month', count = 24).date)[0]
    df = get_fundamentals(q, start_date)
    df.index = df['code']
    del df['code']
    #计算5因子再投资率的时候需要跟一年前的数据比较，所以单独取出计算
    ldf = get_fundamentals(q, start_date - datetime.timedelta(365))
    # 若前一年的数据不存在，则暂且认为Inv=0
    if len(ldf) == 0:
        ldf = df
    df["Inv"] = np.log(df["Inv"] / ldf["Inv"])
    
    
    #选出特征股票组合
    S=df.sort('circulating_market_cap').index.tolist()[:LoS/3]
    B=df.sort('circulating_market_cap').index.tolist()[LoS-LoS/3:]
    L=df.sort('BP').index.tolist()[:LoS/3]
    H=df.sort('BP').index.tolist()[LoS-LoS/3:]
    W=df.sort('roe').index.tolist()[:LoS/3]
    R=df.sort('roe').index.tolist()[LoS-LoS/3:]
    C=df.sort('Inv').index.tolist()[:LoS/3]
    A=df.sort('Inv').index.tolist()[LoS-LoS/3:]
    
    
    df5 = df['circulating_market_cap']
    
    # 获得样本期间的股票价格并计算日收益率
    df2 = get_price(stocks, start_date = start_date, end_date = end_date, fields=['close'])['close']
    df4 = df2.pct_change()
    df4.dropna(how ='all', inplace = True)
    df4.fillna(0, inplace = True)
    #求因子的值，按流通市值加权
    SMB = list(np.dot(df4[S], df5.loc[S] / df5.loc[S].sum()) - np.dot(df4[B], df5.loc[B] / df5.loc[B].sum()))
    HML = list(np.dot(df4[H], df5.loc[H] / df5.loc[H].sum()) - np.dot(df4[L], df5.loc[L] / df5.loc[L].sum()))
    RMW = list(np.dot(df4[R], df5.loc[R] / df5.loc[R].sum()) - np.dot(df4[W], df5.loc[W] / df5.loc[W].sum()))
    CMA = list(np.dot(df4[C], df5.loc[C] / df5.loc[C].sum()) - np.dot(df4[A], df5.loc[A] / df5.loc[A].sum()))
    
    
    #用股票池，流通市值为权重作为市场基准
    df6 = df5.loc[df4.columns]
    df6.fillna(df5.mean(), inplace = True)
    RM = list(np.dot(df4, df6 / df6.sum()) - rf/252)
    if len(SMB) > len(RM):
        SMB.drop(SMB.index[0], inplace = True)
        HML.drop(HML.index[0], inplace = True)
        RMW.drop(RMW.index[0], inplace = True)
        CMA.drop(CMA.index[0], inplace = True)

    #将因子们计算好并且放好
    X=pd.DataFrame({"RM":RM, "SMB":SMB, "HML":HML, "RMW":RMW, "CMA" : CMA})
    # 对样本数据进行线性回归并计算ai
    t_scores=[0.0] * LoS
    for i in range(LoS):
        t_stock = stocks[i]
        t_r = linreg(X, df4[t_stock] - rf/252)
        t_scores[i] = t_r
    
    #这个scores就是alpha残差
    scores = pd.DataFrame({'score': t_scores}, index = stocks)
    
    return scores

In [29]:
def get_tradeday_list(start,end,frequency=None,count=None):
    if count != None:
        df = get_price('000001.XSHG',end_date=end,count=count)
    else:
        df = get_price('000001.XSHG',start_date=start,end_date=end)
    if frequency == None or frequency =='day':
        return df.index
    else:
        df['year-month'] = [str(i)[0:7] for i in df.index]
        if frequency == 'month':
            return df.drop_duplicates('year-month', keep = 'last').index
        elif frequency == 'quarter':
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='04') | (df['month']=='07') | (df['month']=='10') ]
            return df.drop_duplicates('year-month').index
        elif frequency =='halfyear':
            df['month'] = [str(i)[5:7] for i in df.index]
            df = df[(df['month']=='01') | (df['month']=='07')]
            return df.drop_duplicates('year-month').index

In [26]:
def after_market_close(context):
     pass

In [30]:
start_date = '2021-01-1'
end_date = '2021-12-31'
period = get_tradeday_list(start = start_date, end = end_date, frequency = 'month').date

In [32]:
global factor
factors = ['CAPM', 'FF3', 'CARHART', 'FF5', 'circulating_market_cap', 'BP']
#得到IC检验结果（时间较长）
for f in factors:
    factor = f

In [34]:
created_bt_ids = []
methods = ['circulating_market_cap', 'CAPM', 'FF3', 'CARHART', 'FF5']


In [36]:
for method in methods:
#取第一组和第十组进行回测
    for i in [1, 10]:
        algorithm_id = "2baff207b49a1191c300e78a4d6d7c75" #用自己的策略ID
        extra_vars = {'group': i, 'method': method}

        params = {
                    "algorithm_id": algorithm_id,
                    "start_date": "2005-02-27",
                    "end_date": "2015-08-01",
                    "frequency": "day",
                    "initial_cash": "30000000",
                    "initial_positions": None,
                    "extras": extra_vars, 
                    "name" : method + ' group:' + str(i) 
                    }
        created_bt_ids.append(create_backtest(code = code, **params))

NameError: name 'create_backtest' is not defined