In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
from alphamind.api import *
from PyFin.api import *
from matplotlib import pyplot as plt
plt.style.use('fivethirtyeight')

# Parameter Setting
----------------------

In [None]:
def _map_freq(freq):

    if freq == '1m':
        horizon = 21
    elif freq == '1w':
        horizon = 4
    elif freq == '2w':
        horizon = 8
    elif freq == '3w':
        horizon = 12
    elif freq == '1d':
        horizon = 0
    else:
        raise ValueError("Unrecognized freq: {0}".format(freq))
    return horizon

In [None]:
factors = ['VAL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO', 'IVR']
factor_weights = np.array([0.034129344,
                    0.048765746,
                    0.042747382,
                    -0.015900173,
                    0.019044573,
                    -0.001792638,
                    0.014277867,
                    0.04])

engine = SqlEngine("postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha")
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
start_date = '2012-01-01'
end_date = '2017-10-13'
industry_lower = 0.75
industry_upper = 1.25

freq = '1w'
horizon = _map_freq(freq)
dates = makeSchedule(start_date, end_date, tenor=freq, calendar='china.sse', dateGenerationRule=DateGeneration.Backward)

In [None]:
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

# Naive Executor Strategy
---------------------------------

In [None]:
rets = []
turn_overs = []
executor = NaiveExecutor()
leverags = []

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    target_pos, _ = er_portfolio_analysis(er,
                                   industry,
                                   dx_return,
                                   constraint,
                                   False,
                                   benchmark_w)
    target_pos['code'] = total_data['code'].values
    
    turn_over, executed_pos = executor.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    
    leverage = result.weight_x.abs().sum()
    
    ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
    rets.append(ret)
    executor.set_current(executed_pos)
    turn_overs.append(turn_over)
    leverags.append(leverage)

In [None]:
ret_df1 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=dates)
ret_df1.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df1 = ret_df1.shift(1)
ret_df1.iloc[0] = 0.
ret_df1['tc_cost'] = ret_df1.turn_over * 0.002

In [None]:
ret_df1[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), title='Fixed frequency rebalanced: {0}'.format(freq), secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df1.returns - ret_df1.tc_cost
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(52))

In [None]:
ret_df1[['returns', 'leverage']].rolling(window=60).std().plot(figsize=(12, 6), title='rolling std', secondary_y='leverage')

# Threshold Turn Over + Strategy
------------------------------------

In [None]:
freq = '1d'
horizon = _map_freq(freq)
dates = makeSchedule(start_date, end_date, tenor=freq, calendar='china.sse', dateGenerationRule=DateGeneration.Backward)

In [None]:
all_data = engine.fetch_data_range(universe, factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')

In [None]:
rets = []
turn_overs = []
turn_over_threshold = 0.90
executor = ThresholdExecutor(turn_over_threshold=turn_over_threshold)
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    target_pos, _ = er_portfolio_analysis(er,
                                   industry,
                                   dx_return,
                                   constraint,
                                   False,
                                   benchmark_w)
    target_pos['code'] = total_data['code'].values
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    leverage = result.weight_x.abs().sum()
    
    ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
    rets.append(ret)
    leverags.append(executed_pos.weight.abs().sum())
    turn_overs.append(turn_over)

In [None]:
ret_df2 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=dates)
ret_df2.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df2 = ret_df2.shift(1)
ret_df2.iloc[0] = 0.
ret_df2['tc_cost'] = ret_df2.turn_over * 0.002

In [None]:
ret_df2[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Threshold tc rebalanced: Monitored freq {0}, {1} tc'.format(freq,
                                                                                                                turn_over_threshold),
                                             secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df2.returns - ret_df2.tc_cost
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))

In [None]:
ret_df2[['returns', 'leverage']].rolling(window=60).std().plot(figsize=(12, 6), title='rolling std', secondary_y='leverage')

# Target Vol + Threshold Turn Over + Strategy
------------------------

In [None]:
rets = []
turn_overs = []
target_vol = 0.002
turn_over_threshold = 0.70
window = 30
executor1 = TargetVolExecutor(window=window, target_vol=target_vol)
executor2 = ThresholdExecutor(turn_over_threshold=turn_over_threshold, is_relative=False)
execution_pipeline = ExecutionPipeline(executors=[executor1, executor2])
leverags = []

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    target_pos, _ = er_portfolio_analysis(er,
                                   industry,
                                   dx_return,
                                   constraint,
                                   False,
                                   benchmark_w)
    target_pos['code'] = total_data['code'].values
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    
    leverage = result.weight_x.abs().sum()
    
    ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
    rets.append(ret)
    execution_pipeline.update({'return': ret})
    turn_overs.append(turn_over)
    leverags.append(executed_pos.weight.abs().sum())
    print(date, leverage, execution_pipeline.executors[0].m_vol.result())

In [None]:
ret_df3 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=dates)
ret_df3.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df3 = ret_df3.shift(1)
ret_df3.iloc[0] = 0.
ret_df3['tc_cost'] = ret_df3.turn_over * 0.002

In [None]:
ret_df3[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Threshold tc + Target vol rebalanced: Monitored freq {0}, {1} tc, {2} vol target'.format(freq,
                                                                                                                                             turn_over_threshold,
                                                                                                                                             target_vol),
                                             secondary_y='tc_cost')

In [None]:
ret_df3[['returns', 'leverage']].rolling(window=60).std().plot(figsize=(12, 6), title='rolling std', secondary_y='leverage')

In [None]:
ret_atfer_tc = ret_df3.returns - ret_df3.tc_cost
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))

In [None]:
ret_df3.tail()

# Target Turn Over + Strategy
------------------------

In [None]:
rets = []
turn_overs = []
turn_over_target_base = 0.04
executor = NaiveExecutor()
execution_pipeline = ExecutionPipeline(executors=[executor])
leverags = []
previous_pos = pd.DataFrame()

for i, value in enumerate(factor_groups):
    date = value[0]
    data = value[1]
    codes = data.code.tolist()
    ref_date = date.strftime('%Y-%m-%d')
    total_data = data.dropna()
    dx_return = None
    risk_exp = total_data[neutralize_risk].values.astype(float)
    industry = total_data.industry.values
    benchmark_w = total_data.weight.values
    
    constraint_exp = total_data[constraint_risk].values
    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
    risk_names = constraint_risk + ['total']
    risk_target = risk_exp_expand.T @ benchmark_w
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + benchmark_w

    constraint = Constraints(risk_exp_expand, risk_names)
    for i, name in enumerate(risk_names):
        if name == 'total' or name == 'SIZE':
            constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])
        else:
            constraint.set_constraints(name, lower_bound=risk_target[i]*industry_lower, upper_bound=risk_target[i]*industry_upper)
        
    er = factor_processing(total_data[factors].values,
                           pre_process=[winsorize_normal, standardize],
                           post_process=[standardize]) @ factor_weights
    
    codes = total_data['code'].values
    
    
    if previous_pos.empty:
        current_position = None
        turn_over_target = None
    else:
        previous_pos.set_index('code', inplace=True)
        remained_pos = previous_pos.loc[codes]
        
        remained_pos.fillna(0., inplace=True)
        turn_over_target = turn_over_target_base
        
        current_position = remained_pos.weight.values
    
    try:
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w,
                                              current_position=current_position,
                                              turn_over_target=turn_over_target)
    except ValueError:
        print('{0} full rebalance'.format(date))
        target_pos, _ = er_portfolio_analysis(er,
                                              industry,
                                              dx_return,
                                              constraint,
                                              False,
                                              benchmark_w)
    
    target_pos['code'] = codes
    
    turn_over, executed_pos = execution_pipeline.execute(target_pos=target_pos)
    
    executed_codes = executed_pos.code.tolist()
    dx_retuns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
    
    result = pd.merge(executed_pos, total_data, on=['code'], how='inner')
    result = pd.merge(result, dx_retuns, on=['code'])
    leverage = result.weight_x.abs().sum()
    
    ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
    rets.append(ret)
    leverags.append(executed_pos.weight.abs().sum())
    turn_overs.append(turn_over)
    previous_pos = executed_pos

In [None]:
ret_df4 = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=dates)
ret_df4.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df4 = ret_df4.shift(1)
ret_df4.iloc[0] = 0.
ret_df4['tc_cost'] = ret_df4.turn_over * 0.002

In [None]:
ret_df4[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Target turn over rebalanced: Rebalance freq {0}, {1} turnover_target'.format(freq,
                                                                                                                                 turn_over_target_base,
                                                                                                                                 target_vol),
                                             secondary_y='tc_cost')

In [None]:
ret_atfer_tc = ret_df4.returns - ret_df4.tc_cost
print("sharp: ", ret_atfer_tc.mean() / ret_atfer_tc.std() * np.sqrt(252))

In [None]:
ret_df4[['returns', 'leverage']].rolling(window=60).std().plot(figsize=(12, 6), title='rolling std', secondary_y='leverage')