In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *
from PyFin.Math.Accumulators.StatefulAccumulators import MovingAverage
from PyFin.Math.Accumulators.StatefulAccumulators import MovingSharp
from PyFin.Math.Accumulators.StatefulAccumulators import MovingMaxDrawdown

plt.style.use('ggplot')

In [47]:
"""
Back test parameter settings
"""

start_date = '2010-01-01'
end_date = '2018-02-27'

freq = '10b'
industry_lower = 0.5
industry_upper = 1.5
neutralized_risk = industry_styles
industry_name = 'sw_adj'
industry_level = 1
turn_over_target_base = 0.4
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
batch = 0
horizon = map_freq(freq)
universe = Universe("custom", ['zz800'])
data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
benchmark_code = 905

executor = NaiveExecutor()
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
engine = SqlEngine(data_source)

In [48]:
"""
Factor Model
"""

# alpha_factors = {
#     'f01': LAST('ep_q'),
#     'f02': LAST('roe_q'),
#     'f03': LAST('market_confidence_75d'),
#     'f04': LAST('DivP'),
#     'f05': LAST('val_q'),
#     'f06': LAST('con_np_rolling'),
#     'f07': LAST('GREV'),
#     'f08': LAST('con_pe_rolling_order'),
#     'f09': LAST('con_pb_rolling_order')
# }

# weights = dict(f01=1.,
#                f02=0.5,
#                f03=0.5,
#                f04=0.5,
#                f05=0.5,
#                f06=0.5,
#                f07=0.5,
#                f08=-0.5,
#                f09=-0.5)


alpha_factors = {
    'f01': LAST('ep_q'),
    'f02': LAST('roe_q'),
    'f03': LAST('market_confidence_25d'),
    'f04': LAST('ILLIQUIDITY'),
    'f05': LAST('cfinc1_q'),
    'f06': LAST('CFO2EV'),
    'f07': LAST('IVR'),
    'f08': LAST('con_pe_rolling_order'),
    'f09': LAST('con_pb_rolling_order'),
}

weights = dict(f01=1.,
               f02=0.5,
               f03=0.5,
               f04=0.5,
               f05=0.5,
               f06=0.5,
               f07=0.5,
               f08=-0.5,
               f09=-0.5)

alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)

def predict_worker(params):
    data_meta = DataMeta(freq=freq,
                         universe=universe,
                         batch=batch,
                         neutralized_risk=neutralized_risk,
                         risk_model='short',
                         pre_process=[winsorize_normal, standardize],
                         post_process=[winsorize_normal, standardize],
                         warm_start=0,
                         data_source=data_source)
    ref_date, model = params
    er = predict_by_model(ref_date, model, data_meta)
    return er

In [49]:
%%time

"""
Training Phase
"""

pass

Wall time: 0 ns


In [50]:
%%time

"""
Predicting Phase
"""

from dask.distributed import Client
client = Client('10.63.6.176:8786')

tasks = client.map(predict_worker, [(d.strftime('%Y-%m-%d'), alpha_model) for d in ref_dates], pure=False)
predicts = client.gather(tasks)
client.close()

Wall time: 7.17 s


In [51]:
"""
Shared Data
"""

industry_names = industry_list(industry_name, industry_level)
constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names
total_risk_names = constraint_risk + ['benchmark', 'total']

b_type = []
l_val = []
u_val = []

for name in total_risk_names:
    if name == 'benchmark':
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(benchmark_total_lower)
        u_val.append(benchmark_total_upper)
    elif name in {'SIZE', 'SIZENL', 'BETA', 'total'}:
        b_type.append(BoundaryType.ABSOLUTE)
        l_val.append(0.0)
        u_val.append(0.0)
    else:
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(industry_lower)
        u_val.append(industry_upper)
        
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)
benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)
risk_total = engine.fetch_risk_model_range(universe, dates=ref_dates)[1]
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, offset=1).set_index('trade_date')

In [52]:
# rebalance

def create_scenario(weight_gap):

    previous_pos = pd.DataFrame()
    rets = []
    turn_overs = []
    leverags = []

    for i, ref_date in enumerate(ref_dates):
        ref_date = ref_date.strftime('%Y-%m-%d')
        industry_matrix = industry_total[industry_total.trade_date == ref_date]
        benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]
        risk_matrix = risk_total[risk_total.trade_date == ref_date]

        res = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)
        res = pd.merge(res, risk_matrix, on=['code'])
        res = res.dropna()
        codes = res.code.values.tolist()

        benchmark_w = res.weight.values
        is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))

        total_risk_exp = np.concatenate([res[constraint_risk].values.astype(float),
                                         is_in_benchmark,
                                         np.ones_like(is_in_benchmark)],
                                        axis=1)
        total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)
        constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)

        lbound = np.maximum(0., benchmark_w - weight_gap)  # np.zeros(len(total_data))
        ubound = weight_gap + benchmark_w

        if previous_pos.empty:
            current_position = None
            turn_over_target = None
        else:
            previous_pos.set_index('code', inplace=True)
            remained_pos = previous_pos.loc[codes]

            remained_pos.fillna(0., inplace=True)
            turn_over_target = turn_over_target_base
            current_position = remained_pos.weight.values

        er = predicts[i].loc[codes].values

        try:
            target_pos, _ = er_portfolio_analysis(er,
                                                  industry_matrix.industry_name.values,
                                                  None,
                                                  constraints,
                                                  False,
                                                  benchmark_w,
                                                  method='risk_neutral',
                                                  turn_over_target=turn_over_target,
                                                  current_position=current_position,
                                                  lbound=lbound,
                                                  ubound=ubound)
        except ValueError:
            target_pos, _ = er_portfolio_analysis(er,
                                                  industry_matrix.industry_name.values,
                                                  None,
                                                  constraints,
                                                  False,
                                                  benchmark_w,
                                                  method='risk_neutral',
                                                  lbound=lbound,
                                                  ubound=ubound)

        target_pos['code'] = codes
        turn_over, executed_pos = executor.execute(target_pos=target_pos)

        executed_codes = executed_pos.code.tolist()
        dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)
        result = pd.merge(executed_pos, dx_returns, on=['code'])

        leverage = result.weight.abs().sum()

        ret = result.weight.values @ (np.exp(result.dx.values) - 1.)
        rets.append(np.log(1. + ret))
        executor.set_current(executed_pos)
        turn_overs.append(turn_over)
        leverags.append(leverage)

        previous_pos = executed_pos
    
    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=ref_dates)

    # index return
    
    ret_df['index'] = index_return['dx']

    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.
    ret_df = ret_df.shift(1)
    ret_df.iloc[0] = 0.
    ret_df['tc_cost'] = ret_df.turn_over * 0.002
    ret_df['ret_after_tc'] = ret_df['returns'] - ret_df['tc_cost'] - ret_df['index'] * ret_df['leverage']
    return ret_df

In [53]:
def create_report(ret_df, windows):
    sharp_calc = MovingSharp(windows)
    drawdown_calc = MovingMaxDrawdown(windows)
    max_drawdown_calc = MovingMaxDrawdown(len(ret_df))

    res_df = pd.DataFrame(columns=['daily_return', 'cum_ret', 'sharp', 'drawdown', 'max_drawn', 'leverage'])
    total_returns = 0.

    for i, ret in enumerate(ret_df['ret_after_tc']):
        date = ret_df.index[i]
        total_returns += ret
        sharp_calc.push({'ret': ret, 'riskFree': 0.})
        drawdown_calc.push({'ret': ret})
        max_drawdown_calc.push({'ret': ret})

        res_df.loc[date, 'daily_return'] = ret
        res_df.loc[date, 'cum_ret'] = total_returns
        res_df.loc[date, 'drawdown'] = drawdown_calc.result()[0]
        res_df.loc[date, 'max_drawn'] = max_drawdown_calc.result()[0]
        res_df.loc[date, 'leverage'] = ret_df.loc[date, 'leverage']

        if i < 5:
            res_df.loc[date, 'sharp'] = 0.
        else:
            res_df.loc[date, 'sharp'] = sharp_calc.result() * np.sqrt(windows)
    return res_df

In [54]:
weight_gaps = [0.005, 0.010, 0.015, 0.020]

with pd.ExcelWriter(f'{universe.base_universe[0]}_{benchmark_code}_perf.xlsx', engine='xlsxwriter') as writer:
    for i, weight_gap in enumerate(weight_gaps):
        ret_df = create_scenario(weight_gap)
        res_df = create_report(ret_df, 25)
        res_df.to_excel(writer, sheet_name=f'{i}')
        alpha_logger.info(f"{weight_gap} finished")

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
2018-03-05 10:26:22,348 - ALPHA_MIND - INFO - 0.005 finished
2018-03-05 10:27:18,069 - ALPHA_MIND - INFO - 0.01 finished
2018-03-05 10:28:13,730 - ALPHA_MIND - INFO - 0.015 finished
2018-03-05 10:29:09,414 - ALPHA_MIND - INFO - 0.02 finished


In [55]:
res_df

Unnamed: 0,daily_return,cum_ret,sharp,drawdown,max_drawn,leverage
2010-01-04,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000
2010-01-18,-0.015930,-0.015930,0.000000,-0.015930,-0.015930,1.00000
2010-02-01,-0.001602,-0.017532,0.000000,-0.017532,-0.017532,1.00000
2010-02-22,-0.007802,-0.025334,0.000000,-0.025334,-0.025334,0.99839
2010-03-08,0.008656,-0.016678,0.000000,-0.025334,-0.025334,0.99537
2010-03-22,-0.008940,-0.025618,-2.506659,-0.025618,-0.025618,0.99846
2010-04-06,-0.001602,-0.027219,-2.480001,-0.027219,-0.027219,0.99846
2010-04-20,-0.011132,-0.038351,-3.114281,-0.038351,-0.038351,0.99837
2010-05-05,0.003350,-0.035001,-2.527191,-0.038351,-0.038351,0.99837
2010-05-19,0.015650,-0.019351,-1.015388,-0.038351,-0.038351,0.99857


In [56]:
ret_df

Unnamed: 0,leverage,returns,turn_over,index,tc_cost,ret_after_tc
2010-01-04,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000
2010-01-18,1.00000,0.046003,1.927841,0.058077,0.003856,-0.015930
2010-02-01,1.00000,-0.116628,0.400000,-0.115825,0.000800,-0.001602
2010-02-22,0.99839,0.028905,0.420000,0.035925,0.000840,-0.007802
2010-03-08,0.99537,0.053336,0.400000,0.044084,0.000800,0.008656
2010-03-22,0.99846,-0.008443,0.400000,-0.000303,0.000800,-0.008940
2010-04-06,0.99846,0.042204,0.400000,0.043072,0.000800,-0.001602
2010-04-20,0.99837,-0.004134,0.400000,0.006209,0.000800,-0.011132
2010-05-05,0.99837,-0.097597,0.400000,-0.101913,0.000800,0.003350
2010-05-19,0.99857,-0.108175,0.400000,-0.124804,0.000800,0.015650
