In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *
from PyFin.Math.Accumulators import MovingMaxDrawdown
plt.style.use('fivethirtyeight')

## 0. Parameter Settings
---------------------

In [None]:
engine = SqlEngine()
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
start_date = '2015-01-01'
end_date = '2017-10-16'
industry_lower = 0.5
industry_upper = 1.5

freq = '1d'
horizon = map_freq(freq)
dates = makeSchedule(start_date, end_date, tenor=freq, calendar='china.sse', dateGenerationRule=DateGeneration.Backward)

## 1. Current Production Strategy
-----------------------

* **Factor Group #1**
  * RVOL: 0.05
  * EPS: 0.3
  * DROEAfterNonRecurring: 0.35
  * DivP: 0.075
  * CFinc1: 0.15
  * BDTO: 0.05
    
    
* **Factor Group #2**
  * VAL: 0.034129344
  * RVOL: 0.015881607
  * ROEDiluted: 0.048765746
  * GREV: 0.042747382
  * EPS: -0.015900173
  * CHV: 0.019044573
  * CFinc1: -0.001792638
  * BDTO: 0.014277867

In [None]:
factor_group1 = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO']
factor_weight1 = [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]

factor_group2 = ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO']
factor_weight2 = [0.034129344, 0.015881607, 0.048765746, 0.042747382, -0.015900173, 0.019044573, -0.001792638, 0.014277867]

In [None]:
factors = list(set(factor_group1 + factor_group2))

In [None]:
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

In [None]:
rets = []
turn_overs = []
turn_over_threshold = 0.70
executor = ThresholdExecutor(turn_over_threshold=turn_over_threshold)
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er1 = factor_processing(total_data[factor_group1].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight1
    
    er2 = factor_processing(total_data[factor_group2].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight2
    
    er = (er1 / er1.std() + er2 / er2.std())
    
    target_pos, _ = er_portfolio_analysis(er,
                                          industry,
                                          dx_return,
                                          constraint,
                                          False,
                                          benchmark_w)
    target_pos['code'] = total_data['code'].values
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(advanceDateByCalendar('china.sse', date, '1b').strftime('%Y-%m-%d'),
                                       executed_codes,
                                       horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    leverage = result.weight_x.abs().sum()
    
    ret = (result.weight_x - result.weight_y).values @ result.dx.values
    rets.append(ret)
    leverags.append(executed_pos.weight.abs().sum())
    turn_overs.append(turn_over)

In [None]:
ret_df1 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df1.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df1 = ret_df1.shift(1)
ret_df1.iloc[0] = 0.
ret_df1['tc_cost'] = ret_df1.turn_over * 0.002

In [None]:
ret_df1[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Threshold tc rebalanced: Monitored freq {0}, {1} tc'.format(freq,
                                                                                                                turn_over_threshold),
                                             secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df1.returns - ret_df1.tc_cost
mmd = MovingMaxDrawdown(window=len(ret_atfer_tc))

for ret in ret_atfer_tc:
    mmd.push({'ret': ret})

print("total return: ", ret_atfer_tc.mean() * 252 * 100)
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))
print("mmd: ", -mmd.result()[0]*100)

## 2. Current Production Strategy with Target Turn Over
--------------------------------------------

In [None]:
rets = []
turn_overs = []
turn_over_target_base = 0.04
executor = NaiveExecutor()
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []
previous_pos = pd.DataFrame()

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er1 = factor_processing(total_data[factor_group1].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight1
    
    er2 = factor_processing(total_data[factor_group2].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight2
    
    er = (er1 / er1.std() + er2 / er2.std())
    
    codes = total_data['code'].values
    
    if previous_pos.empty:
        current_position = None
        turn_over_target = None
    else:
        previous_pos.set_index('code', inplace=True)
        remained_pos = previous_pos.loc[codes]
        
        remained_pos.fillna(0., inplace=True)
        turn_over_target = turn_over_target_base
        
        current_position = remained_pos.weight.values
    
    try:
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w,
                                              current_position=current_position,
                                              turn_over_target=turn_over_target)
    except ValueError:
        print('{0} full rebalance'.format(date))
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w)
    
    target_pos['code'] = codes
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(advanceDateByCalendar('china.sse', date, '1b').strftime('%Y-%m-%d'),
                                       executed_codes,
                                       horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    
    ret = (result.weight_x - result.weight_y ).values @ result.dx.values
    rets.append(ret)
    turn_overs.append(turn_over)
    previous_pos = executed_pos

In [None]:
ret_df2 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df2.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df2 = ret_df2.shift(1)
ret_df2.iloc[0] = 0.
ret_df2['tc_cost'] = ret_df2.turn_over * 0.002

In [None]:
ret_df2[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                              title='Target turn over rebalanced: Rebalance freq {0}, {1} turnover_target'.format(freq, turn_over_target_base),
                                              secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df2.returns - ret_df2.tc_cost
mmd = MovingMaxDrawdown(window=len(ret_atfer_tc))

for ret in ret_atfer_tc:
    mmd.push({'ret': ret})

print("total return: ", ret_atfer_tc.mean() * 252 * 100)
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))
print("mmd: ", -mmd.result()[0]*100)

## 3. Candidate Strategy
---------------------

* **Factor Group #1**
  * RVOL: 0.05
  * EPS: 0.3
  * DROEAfterNonRecurring: 0.35
  * DivP: 0.075
  * CFinc1: 0.15
  * BDTO: 0.05
    
    
* **Factor Group #2**
  * VAL: 0.034129344
  * IVR: 0.015881607
  * ROEDiluted: 0.048765746
  * GREV: 0.042747382
  * EPS: -0.015900173
  * CHV: 0.019044573
  * CFinc1: -0.001792638
  * BDTO: 0.014277867


* **Factor Group #3**
  * con_pe_rolling: -0.6
  * con_pb_rolling: -0.6
  * con_eps: 0.6
  * con_target_price: 1.2

In [None]:
factor_group1 = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO']
factor_weight1 = [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
group_weight1 = 1.

factor_group2 = ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO']
factor_weight2 = [0.034129344, 0.015881607, 0.048765746, 0.042747382, -0.015900173, 0.019044573, -0.001792638, 0.014277867]
group_weight2 = 1.

factor_group3 = ['con_pe_rolling', 'con_pb_rolling', 'con_eps', 'con_target_price']
factor_weight3 = [-0.6, -0.6, 0.6, 1.2]
group_weight3 = 0.5

factor_group4 = ['IVR']
factor_weight4 = [1.]
group_weight4 = 0.2

factor_group5 = ['BBIC']
factor_weight5 = [1.]
group_weight5 = 0.

In [None]:
factors = list(set(factor_group1 + factor_group2 + factor_group3 + factor_group4 + factor_group5))
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

In [None]:
rets = []
turn_overs = []
turn_over_threshold = 0.70
executor = ThresholdExecutor(turn_over_threshold=turn_over_threshold)
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er1 = factor_processing(total_data[factor_group1].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight1
    
    er2 = factor_processing(total_data[factor_group2].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight2
    
    er3 = factor_processing(total_data[factor_group3].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight3
    
    er4 = factor_processing(total_data[factor_group4].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight4
    
    er5 = factor_processing(total_data[factor_group4].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight5
    
    er = (group_weight1* er1 / er1.std() + 
          group_weight2 * er2 / er2.std() + 
          group_weight3 * er3 / er3.std() + 
          group_weight4 * er4 / er4.std() +
          group_weight5 * er5 / er5.std())
    
    target_pos, _ = er_portfolio_analysis(er,
                                          industry,
                                          dx_return,
                                          constraint,
                                          False,
                                          benchmark_w)
    target_pos['code'] = total_data['code'].values
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(advanceDateByCalendar('china.sse', date, '1b').strftime('%Y-%m-%d'),
                                       executed_codes,
                                       horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    
    ret = (result.weight_x - result.weight_y).values @ result.dx.values
    rets.append(ret)
    leverags.append(executed_pos.weight.abs().sum())
    turn_overs.append(turn_over)

In [None]:
ret_df3 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df3.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df3 = ret_df3.shift(1)
ret_df3.iloc[0] = 0.
ret_df3['tc_cost'] = ret_df3.turn_over * 0.002

In [None]:
ret_df3[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Threshold tc rebalanced: Monitored freq {0}, {1} tc'.format(freq,
                                                                                                                turn_over_threshold),
                                             secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df3.returns - ret_df3.tc_cost

mmd = MovingMaxDrawdown(window=len(ret_atfer_tc))

for ret in ret_atfer_tc:
    mmd.push({'ret': ret})

print("total return: ", ret_atfer_tc.mean() * 252 * 100)
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))
print("mmd: ", -mmd.result()[0]*100)

## 4. Candidate Strategy with Target Turn Over
-----------------

In [None]:
rets = []
turn_overs = []
turn_over_target_base = 0.04
executor = NaiveExecutor()
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []
previous_pos = pd.DataFrame()

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er1 = factor_processing(total_data[factor_group1].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight1
    
    er2 = factor_processing(total_data[factor_group2].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight2
    
    er3 = factor_processing(total_data[factor_group3].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight3
    
    er4 = factor_processing(total_data[factor_group4].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight4
    
    er5 = factor_processing(total_data[factor_group4].values,
                            pre_process=[winsorize_normal, standardize],
                            post_process=[winsorize_normal, standardize]) @ factor_weight5
    
    er = (group_weight1* er1 / er1.std() + 
          group_weight2 * er2 / er2.std() + 
          group_weight3 * er3 / er3.std() + 
          group_weight4 * er4 / er4.std() +
          group_weight5 * er5 / er5.std())
    
    codes = total_data['code'].values
    
    if previous_pos.empty:
        current_position = None
        turn_over_target = None
    else:
        previous_pos.set_index('code', inplace=True)
        remained_pos = previous_pos.loc[codes]
        
        remained_pos.fillna(0., inplace=True)
        turn_over_target = turn_over_target_base
        
        current_position = remained_pos.weight.values
    
    try:
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w,
                                              current_position=current_position,
                                              turn_over_target=turn_over_target)
    except ValueError:
        print('{0} full rebalance'.format(date))
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w)
    
    target_pos['code'] = codes
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(advanceDateByCalendar('china.sse', date, '1b').strftime('%Y-%m-%d'),
                                       executed_codes,
                                       horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    ret = (result.weight_x - result.weight_y).values @ result.dx.values
    rets.append(ret)
    turn_overs.append(turn_over)
    previous_pos = executed_pos

In [None]:
ret_df4 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=dates)
ret_df4.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df4 = ret_df4.shift(1)
ret_df4.iloc[0] = 0.
ret_df4['tc_cost'] = ret_df4.turn_over * 0.002

In [None]:
ret_df4[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                              title='Target turn over rebalanced: Rebalance freq {0}, {1} turnover_target'.format(freq, turn_over_target_base),
                                              secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df4.returns - ret_df4.tc_cost
mmd = MovingMaxDrawdown(window=len(ret_atfer_tc))

for ret in ret_atfer_tc:
    mmd.push({'ret': ret})

print("total return: ", ret_atfer_tc.mean() * 252 * 100)
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))
print("mmd: ", -mmd.result()[0]*100)