In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
plt.style.use('fivethirtyeight')

# Parameter Setting
----------------------

In [3]:
#factors = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO']
#factor_weights = np.array([0.05, 0.3, 0.35, 0.075, 0.15, 0.05])

factors = alpha_factors = {
    'eps': LAST('eps_q'),
    'roe': LAST('roe_q'),
    'bdto': LAST('BDTO'),
    'cfinc1': LAST('CFinc1'),
    'chv': LAST('CHV'),
    'rvol': LAST('RVOL'),
    'val': LAST('VAL'),
    'grev': LAST('GREV'),
    'droeafternonorecurring': LAST('DROEAfterNonRecurring')
}

factor_weights = np.array([])

engine = SqlEngine()
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
freq = '1w'

if freq == '1m':
    horizon = 21
elif freq == '1w':
    horizon = 4
elif freq == '2w':
    horizon = 8
elif freq == '3w':
    horizon = 12
elif freq == '1d':
    horizon = 0
else:
    raise ValueError("Unrecognized freq: {0}".format(freq))
    
dates = makeSchedule('2012-01-01', '2017-11-02', tenor=freq, calendar='china.sse', dateGenerationRule=DateGeneration.Backward)

In [None]:
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

# Fixed Frequency Rebalance Strategy
-------------------------------------

In [None]:
rets = []
turn_overs = []
previous_pos = pd.DataFrame()

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    returns  = engine.fetch_dx_return(ref_date, codes, horizon=horizon)
    total_data = pd.merge(data, returns, on=['code']).dropna()
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    dx_return = total_data.dx.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    pos, _ = er_portfolio_analysis(er,
                                   industry,
                                   dx_return,
                                   constraint,
                                   False,
                                   benchmark_w)
    pos['code'] = total_data['code']
    
    ret = (pos.weight - benchmark_w) @ dx_return
    rets.append(ret)
    
    if previous_pos.empty:
        turn_over = 0.
    else:
        pos_merged = pd.merge(pos, previous_pos, on=['code'], how='outer')
        pos_merged.fillna(0, inplace=True)
        turn_over = np.abs(pos_merged.weight_x - pos_merged.weight_y).sum()
    
    turn_overs.append(turn_over)
    
    previous_pos = pos
    previous_pos

In [None]:
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002

In [None]:
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), title='Fixed frequency rebalanced: {0}'.format(freq), secondary_y='tc_cost')

# Targeted Turn Over Strategy
-------------------------------

In [None]:
freq = '1d'
dates = makeSchedule('2012-01-01', '2017-09-19', tenor=freq, calendar='china.sse', dateGenerationRule=DateGeneration.Backward)
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

if freq == '1m':
    horizon = 21
elif freq == '1w':
    horizon = 4
elif freq == '2w':
    horizon = 8
elif freq == '3w':
    horizon = 12
elif freq == '1d':
    horizon = 0
else:
    raise ValueError("Unrecognized freq: {0}".format(freq))

In [None]:
target_turn_over = 0.5

In [None]:
rets = []
turn_overs = []
previous_pos = pd.DataFrame()

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    returns  = engine.fetch_dx_return(ref_date, codes, horizon=horizon)
    total_data = pd.merge(data, returns, on=['code']).dropna()
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    dx_return = total_data.dx.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    pos, _ = er_portfolio_analysis(er,
                                   industry,
                                   dx_return,
                                   constraint,
                                   False,
                                   benchmark_w)
    pos['code'] = total_data['code'].astype(int).values
    
    if previous_pos.empty:
        turn_over = 0.
    else:
        pos_merged = pd.merge(pos, previous_pos, on=['code'], how='left')
        pos_merged.fillna(0, inplace=True)
        turn_over = np.abs(pos_merged.weight_x - pos_merged.weight_y).sum()
        
    if turn_over < target_turn_over and not previous_pos.empty:
        turn_over = 0.
        previous_pos = pos_merged[['weight_y', 'code']]
        previous_pos.rename(columns={'weight_y': 'weight'}, inplace=True)
        pos = previous_pos.copy()
        
    turn_overs.append(turn_over)
    
    ret = (pos.weight - benchmark_w) @ dx_return
    rets.append(ret)
    
    previous_pos = pos.copy()
    print('{0} is finished'.format(date))

In [None]:
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002

In [None]:
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Targeted turn over rebalanced: Monitored freq {0}, {1} target'.format(freq,
                                                                                                                          target_turn_over),
                                             secondary_y='tc_cost')