* 请在环境变量中设置`DB_URI`指向数据库

In [1]:
%matplotlib inline

import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *

plt.style.use('ggplot')

In [2]:
"""
Back test parameter settings
"""

start_date = '2020-01-02'
end_date = '2020-02-21'
frequency = '10b'

ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
industry_lower = 1.0
industry_upper = 1.0
method = 'risk_neutral'
neutralize_risk = industry_styles
industry_name = 'sw'
industry_level = 1
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
horizon = map_freq(frequency)
weight_gap = 0.01
benchmark_code = 300
universe_name = 'hs300'
universe = Universe(universe_name)


executor = NaiveExecutor()
data_source = os.environ['DB_URI']
engine = SqlEngine(data_source)

In [3]:
"""
Constraints settings
"""

industry_names = industry_list(industry_name, industry_level)
constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names[:-1]
total_risk_names = constraint_risk + ['benchmark', 'total']

b_type = []
l_val = []
u_val = []

for name in total_risk_names:
    if name == 'benchmark':
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(benchmark_total_lower)
        u_val.append(benchmark_total_upper)
    elif name in {'SIZE', 'SIZENL', 'BETA'}:
        b_type.append(BoundaryType.ABSOLUTE)
        l_val.append(0.0)
        u_val.append(0.0)
    else:
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(industry_lower)
        u_val.append(industry_upper)

bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)

In [4]:
def factor_analysis(engine, factor_name, universe, benchmark_code, positive):
    
    """
    Data phase
    """
    index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
                                                  offset=1).set_index('trade_date')

    codes_return = engine.fetch_dx_return_range(universe,
                                                dates=ref_dates,
                                                horizon=horizon,
                                                offset=1,
                                                benchmark=benchmark_code)
    return_groups = codes_return.groupby('trade_date')
    """
    Model phase: we need 1 constant linear model and one linear regression model
    """
    industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)
    industry_groups = industry_total.groupby('trade_date')
    
    alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]
    simple_expression = CSRes(LAST(factor_name), 'EARNYILD') if positive else -CSRes(LAST(factor_name), 'EARNYILD')

    const_features = {alpha_name[0]: simple_expression}
    const_weights = {alpha_name[0]: 1.}

    const_model = ConstLinearModel(features=alpha_name,
                                   weights=const_weights)

    const_model_factor_data = engine.fetch_data_range(universe,
                                                      factors=const_features,
                                                      dates=ref_dates,
                                                      benchmark=benchmark_code)['factor'].dropna()

    rets = []
    turn_overs = []
    leverags = []
    ics = []
    index_dates = []
    factor_groups = const_model_factor_data.groupby('trade_date')

    for i, value in enumerate(factor_groups):
        date = value[0]
        data = value[1]
        index_dates.append(date)
        
        industry_matrix = industry_groups.get_group(date)
        total_data = data.fillna(data[alpha_name].median())
        total_data = pd.merge(total_data, industry_matrix, on=['code'])
        alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
        risk_exp = total_data[neutralize_risk].values.astype(float)
        benchmark_w = total_data.weight.values
        is_in_benchmark = (benchmark_w > 0.).astype(float).reshape(-1, 1)

        constraint_exp = total_data[constraint_risk].values
        risk_exp_expand = np.concatenate((constraint_exp,
                                          is_in_benchmark,
                                          np.ones_like(is_in_benchmark)), axis=1).astype(float)
        total_risk_exp = pd.DataFrame(risk_exp_expand, columns=total_risk_names)
        constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)

        lbound = np.maximum(0., benchmark_w - weight_gap)
        ubound = weight_gap + benchmark_w

        factor_values = factor_processing(total_data[alpha_name].values,
                                          pre_process=[winsorize_normal, standardize],
                                          risk_factors=risk_exp,
                                          post_process=[winsorize_normal, standardize])

        # const linear model
        er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))

        alpha_logger.info('{0} full re-balance'.format(date))
        target_pos, _ = er_portfolio_analysis(er,
                                              total_data.industry_name.values,
                                              None,
                                              constraints,
                                              False,
                                              benchmark_w,
                                              method=method,
                                              lbound=lbound,
                                              ubound=ubound)

        target_pos['code'] = total_data['code'].values

        turn_over, executed_pos = executor.execute(target_pos=target_pos)
        dx_returns = return_groups.get_group(date)

        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
        result = pd.merge(result, dx_returns, on=['code'])

        leverage = result.weight_x.abs().sum()

        excess_return = np.exp(result.dx.values) - 1. - index_return.loc[date, 'dx']
        raw_weight = result.weight_x.values
        activate_weight = raw_weight - result.weight_y.values
        ret = raw_weight @ excess_return
        risk_adjusted_ic = np.corrcoef(excess_return, activate_weight)[0, 1]
        rets.append(np.log(1. + ret))
        ics.append(risk_adjusted_ic)
        executor.set_current(executed_pos)
        turn_overs.append(turn_over)
        leverags.append(leverage)

        alpha_logger.info('{0} is finished'.format(date))

    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'IC': ics, 'leverage': leverags}, index=index_dates)

    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
    ret_df = ret_df.shift(1)
    ret_df.iloc[0] = 0.
    ret_df['tc_cost'] = ret_df.turn_over * 0.002

    return alpha_name[0], ret_df

def worker_func_positive(factor_name):
    from alphamind.api import SqlEngine
    engine = SqlEngine(data_source)
    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True)


def worker_func_negative(factor_name):
    from alphamind.api import SqlEngine
    engine = SqlEngine(data_source)
    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False)

In [5]:
factors = ["EMA5D", "EMV6D"]

In [6]:
# %%time

res1 = [worker_func_positive(factor) for factor in factors]
res2 = [worker_func_negative(factor) for factor in factors]

factor_df = pd.DataFrame()
ic_df = pd.DataFrame()

for f_name, res in res1:
    factor_df[f_name] = res['returns']
    ic_df[f_name] = res['IC']

for f_name, res in res2:
    factor_df[f_name] = res['returns']
    ic_df[f_name] = res['IC']

2021-07-03 11:34:10,694 - ALPHA_MIND - INFO - 2020-01-02 00:00:00: 299
2021-07-03 11:34:12,912 - ALPHA_MIND - INFO - 2020-01-02 00:00:00 full re-balance
2021-07-03 11:34:12,940 - ALPHA_MIND - INFO - 2020-01-02 00:00:00 is finished
2021-07-03 11:34:12,946 - ALPHA_MIND - INFO - 2020-01-16 00:00:00: 300
2021-07-03 11:34:12,952 - ALPHA_MIND - INFO - 2020-01-16 00:00:00 full re-balance
2021-07-03 11:34:12,982 - ALPHA_MIND - INFO - 2020-01-16 00:00:00 is finished
2021-07-03 11:34:12,991 - ALPHA_MIND - INFO - 2020-02-07 00:00:00: 300
2021-07-03 11:34:12,999 - ALPHA_MIND - INFO - 2020-02-07 00:00:00 full re-balance
2021-07-03 11:34:13,032 - ALPHA_MIND - INFO - 2020-02-07 00:00:00 is finished
2021-07-03 11:34:13,039 - ALPHA_MIND - INFO - 2020-02-21 00:00:00: 300
2021-07-03 11:34:13,044 - ALPHA_MIND - INFO - 2020-02-21 00:00:00 full re-balance
2021-07-03 11:34:13,076 - ALPHA_MIND - INFO - 2020-02-21 00:00:00 is finished
2021-07-03 11:34:15,099 - ALPHA_MIND - INFO - 2020-01-02 00:00:00: 300
2021-

In [7]:
factor_res = factor_df.agg(['mean', 'std']).T
factor_res['t.'] = factor_res['mean'] / factor_res['std'] * np.sqrt(len(factor_df))

ic_res = ic_df.agg(['mean', 'std']).T
ic_res['t.'] = ic_res['mean'] / ic_res['std'] * np.sqrt(len(ic_df))

In [8]:
with pd.ExcelWriter(f'{universe_name}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:
    factor_df.to_excel(writer, sheet_name='ret')
    ic_df.to_excel(writer, sheet_name='ic')
    factor_res.to_excel(writer, sheet_name='ret_stat')
    ic_res.to_excel(writer, sheet_name='ic_stat')