In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt

plt.style.use('ggplot')

In [2]:
# Back test parameter settings
start_date = '2011-01-01'
end_date = '2018-02-14'

freq = '10b'
industry_lower = 1.
industry_upper = 1.
neutralized_risk = industry_styles
industry_name = 'sw_adj'
industry_level = 1
turn_over_target_base = 0.3
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
batch = 0
horizon = map_freq(freq)
weight_gap = 0.01
universe = Universe("custom", ['zz800'])
data_source = 'postgres+psycopg2://postgres:we083826@192.168.0.102/alpha'
benchmark_code = 905

executor = NaiveExecutor()
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
engine = SqlEngine(data_source)

In [3]:
base1 = LAST('roe_q')
base2 = LAST('ep_q')

alpha_factors = {
    'f01': base1,
    'f02': base2,
    'f03': LAST('ILLIQUIDITY'),
    'f04': LAST('cfinc1_q'),
    'f05': LAST('CHV'),
    'f06': LAST('CFToAsset'),
    'f07': LAST('market_confidence_75d'),
    'f08': LAST('PB'),
    'f09': LAST('con_pe_rolling_order'),
    'f10': LAST('con_pb_rolling_order'),
}

weights = dict(f01=1.,
                f02=1.,
                f03=0.25,
                f04=0.25,
                f05=0.25,
                f06=0.25,
                f07=0.25,
                f08=-0.25,
                f09=-0.25,
                f10=-0.25)
alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)

def train_worker(ref_date):
    data_meta = DataMeta(freq=freq,
                         universe=universe,
                         batch=batch,
                         neutralized_risk=neutralized_risk,
                         risk_model='short',
                         pre_process=[winsorize_normal, standardize],
                         post_process=[winsorize_normal, standardize],
                         warm_start=0,
                         data_source=data_source)
    
    return train_model(ref_date, alpha_model, data_meta)


def predict_worker(params):
    data_meta = DataMeta(freq=freq,
                         universe=universe,
                         batch=batch,
                         neutralized_risk=neutralized_risk,
                         risk_model='short',
                         pre_process=[winsorize_normal, standardize],
                         post_process=[winsorize_normal, standardize],
                         warm_start=0,
                         data_source=data_source)
    ref_date, model = params
    er = predict_by_model(ref_date, model, data_meta)
    return er

In [4]:
%%time

# training / predict on dask executor

from dask.distributed import Client
client = Client('192.168.0.102:8786')

tasks = client.map(predict_worker, [(d.strftime('%Y-%m-%d'), alpha_model) for d in ref_dates], pure=False)
predicts = client.gather(tasks)

Wall time: 6min 49s


In [5]:
# rebalance

industry_names = industry_list(industry_name, industry_level)
constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names
total_risk_names = constraint_risk + ['benchmark', 'total']

b_type = []
l_val = []
u_val = []

previous_pos = pd.DataFrame()
rets = []
turn_overs = []
leverags = []

for name in total_risk_names:
    if name == 'benchmark':
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(benchmark_total_lower)
        u_val.append(benchmark_total_upper)
    else:
        b_type.append(BoundaryType.ABSOLUTE)
        l_val.append(0.0)
        u_val.append(0.0)

bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)

industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)
benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)
risk_total = engine.fetch_risk_model_range(universe, dates=ref_dates)[1]

for i, ref_date in enumerate(ref_dates):
    ref_date = ref_date.strftime('%Y-%m-%d')
    industry_matrix = industry_total[industry_total.trade_date == ref_date]
    benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]
    risk_matrix = risk_total[risk_total.trade_date == ref_date]
    
    res = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)
    res = pd.merge(res, risk_matrix, on=['code'])
    res = res.dropna()
    codes = res.code.values.tolist()
    
    benchmark_w = res.weight.values
    is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))
    
    total_risk_exp = np.concatenate([res[constraint_risk].values.astype(float),
                                     is_in_benchmark,
                                     np.ones_like(is_in_benchmark)],
                                    axis=1)
    total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)
    constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)
    
    lbound = np.maximum(0., benchmark_w - weight_gap)  # np.zeros(len(total_data))
    ubound = weight_gap + benchmark_w
    
    if previous_pos.empty:
        current_position = None
        turn_over_target = None
    else:
        previous_pos.set_index('code', inplace=True)
        remained_pos = previous_pos.loc[codes]

        remained_pos.fillna(0., inplace=True)
        turn_over_target = turn_over_target_base
        current_position = remained_pos.weight.values
        
    er = predicts[i].loc[codes].values
    
    try:
        alpha_logger.info('{0} partial re-balance: {1}'.format(ref_date, len(er)))
        target_pos, _ = er_portfolio_analysis(er,
                                              industry_matrix.industry_name.values,
                                              None,
                                              constraints,
                                              False,
                                              benchmark_w,
                                              method='risk_neutral',
                                              turn_over_target=turn_over_target,
                                              current_position=current_position,
                                              lbound=lbound,
                                              ubound=ubound)
    except ValueError:
        alpha_logger.info('{0} full re-balance: {1}'.format(ref_date, len(er)))
        target_pos, _ = er_portfolio_analysis(er,
                                              industry_matrix.industry_name.values,
                                              None,
                                              constraints,
                                              False,
                                              benchmark_w,
                                              method='risk_neutral',
                                              lbound=lbound,
                                              ubound=ubound)
        
    target_pos['code'] = codes
    turn_over, executed_pos = executor.execute(target_pos=target_pos)

    executed_codes = executed_pos.code.tolist()
    dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)
    result = pd.merge(executed_pos, dx_returns, on=['code'])

    leverage = result.weight.abs().sum()

    ret = result.weight.values @ (np.exp(result.dx.values) - 1.)
    rets.append(np.log(1. + ret))
    executor.set_current(executed_pos)
    turn_overs.append(turn_over)
    leverags.append(leverage)

    previous_pos = executed_pos
    alpha_logger.info('{0} is finished'.format(ref_date))

2018-02-25 14:48:14,666 - ALPHA_MIND - INFO - 2011-01-04 partial re-balance: 799
2018-02-25 14:48:14,936 - ALPHA_MIND - INFO - 2011-01-04 is finished
2018-02-25 14:48:14,952 - ALPHA_MIND - INFO - 2011-01-18 partial re-balance: 799
2018-02-25 14:48:15,352 - ALPHA_MIND - INFO - 2011-01-18 is finished
2018-02-25 14:48:15,372 - ALPHA_MIND - INFO - 2011-02-01 partial re-balance: 798
2018-02-25 14:48:15,736 - ALPHA_MIND - INFO - 2011-02-01 is finished
2018-02-25 14:48:15,756 - ALPHA_MIND - INFO - 2011-02-22 partial re-balance: 797
2018-02-25 14:48:16,127 - ALPHA_MIND - INFO - 2011-02-22 is finished
2018-02-25 14:48:16,148 - ALPHA_MIND - INFO - 2011-03-08 partial re-balance: 798
2018-02-25 14:48:16,509 - ALPHA_MIND - INFO - 2011-03-08 is finished
2018-02-25 14:48:16,530 - ALPHA_MIND - INFO - 2011-03-22 partial re-balance: 798
2018-02-25 14:48:16,882 - ALPHA_MIND - INFO - 2011-03-22 is finished
2018-02-25 14:48:16,914 - ALPHA_MIND - INFO - 2011-04-07 partial re-balance: 798
2018-02-25 14:48:17

2018-02-25 14:48:37,149 - ALPHA_MIND - INFO - 2013-04-12 partial re-balance: 800
2018-02-25 14:48:37,516 - ALPHA_MIND - INFO - 2013-04-12 is finished
2018-02-25 14:48:37,535 - ALPHA_MIND - INFO - 2013-04-26 partial re-balance: 800
2018-02-25 14:48:37,960 - ALPHA_MIND - INFO - 2013-04-26 is finished
2018-02-25 14:48:37,981 - ALPHA_MIND - INFO - 2013-05-15 partial re-balance: 800
2018-02-25 14:48:38,379 - ALPHA_MIND - INFO - 2013-05-15 is finished
2018-02-25 14:48:38,406 - ALPHA_MIND - INFO - 2013-05-29 partial re-balance: 800
2018-02-25 14:48:38,778 - ALPHA_MIND - INFO - 2013-05-29 is finished
2018-02-25 14:48:38,799 - ALPHA_MIND - INFO - 2013-06-17 partial re-balance: 800
2018-02-25 14:48:39,181 - ALPHA_MIND - INFO - 2013-06-17 is finished
2018-02-25 14:48:39,200 - ALPHA_MIND - INFO - 2013-07-01 partial re-balance: 800
2018-02-25 14:48:39,568 - ALPHA_MIND - INFO - 2013-07-01 is finished
2018-02-25 14:48:39,591 - ALPHA_MIND - INFO - 2013-07-15 partial re-balance: 800
2018-02-25 14:48:40

2018-02-25 14:48:59,572 - ALPHA_MIND - INFO - 2015-07-16 partial re-balance: 800
2018-02-25 14:48:59,943 - ALPHA_MIND - INFO - 2015-07-16 is finished
2018-02-25 14:48:59,962 - ALPHA_MIND - INFO - 2015-07-30 partial re-balance: 800
2018-02-25 14:49:00,355 - ALPHA_MIND - INFO - 2015-07-30 is finished
2018-02-25 14:49:00,374 - ALPHA_MIND - INFO - 2015-08-13 partial re-balance: 800
2018-02-25 14:49:00,759 - ALPHA_MIND - INFO - 2015-08-13 is finished
2018-02-25 14:49:00,779 - ALPHA_MIND - INFO - 2015-08-27 partial re-balance: 800
2018-02-25 14:49:01,163 - ALPHA_MIND - INFO - 2015-08-27 is finished
2018-02-25 14:49:01,182 - ALPHA_MIND - INFO - 2015-09-14 partial re-balance: 800
2018-02-25 14:49:01,585 - ALPHA_MIND - INFO - 2015-09-14 is finished
2018-02-25 14:49:01,605 - ALPHA_MIND - INFO - 2015-09-28 partial re-balance: 800
2018-02-25 14:49:01,981 - ALPHA_MIND - INFO - 2015-09-28 is finished
2018-02-25 14:49:02,000 - ALPHA_MIND - INFO - 2015-10-19 partial re-balance: 800
2018-02-25 14:49:02

2018-02-25 14:49:22,212 - ALPHA_MIND - INFO - 2017-10-19 partial re-balance: 800
2018-02-25 14:49:22,594 - ALPHA_MIND - INFO - 2017-10-19 is finished
2018-02-25 14:49:22,614 - ALPHA_MIND - INFO - 2017-11-02 partial re-balance: 800
2018-02-25 14:49:23,003 - ALPHA_MIND - INFO - 2017-11-02 is finished
2018-02-25 14:49:23,023 - ALPHA_MIND - INFO - 2017-11-16 partial re-balance: 800
2018-02-25 14:49:23,442 - ALPHA_MIND - INFO - 2017-11-16 is finished
2018-02-25 14:49:23,462 - ALPHA_MIND - INFO - 2017-11-30 partial re-balance: 800
2018-02-25 14:49:23,854 - ALPHA_MIND - INFO - 2017-11-30 is finished
2018-02-25 14:49:23,875 - ALPHA_MIND - INFO - 2017-12-14 partial re-balance: 800
2018-02-25 14:49:24,276 - ALPHA_MIND - INFO - 2017-12-14 is finished
2018-02-25 14:49:24,281 - ALPHA_MIND - INFO - 2017-12-28 partial re-balance: 800
2018-02-25 14:49:24,679 - ALPHA_MIND - INFO - 2017-12-28 is finished
2018-02-25 14:49:24,711 - ALPHA_MIND - INFO - 2018-01-12 partial re-balance: 800
2018-02-25 14:49:25

IndexError: Out of bounds on buffer access (axis 0)

In [6]:
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=ref_dates)

# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
                                                  offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx']

ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']

ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq, 905),
                                             secondary_y='tc_cost')

ValueError: Shape of passed values is (3, 173), indices imply (3, 174)