In [1]:
import os, time, multiprocessing

import pandas as pd

from data_core import GenericTabularData

# from home_utils.home_utils import job_helper
import white_simv2_light.lighter as lt
from white_simv2_light.utils import utils

try:
    from utils.plotterHope import *
    import utils.pyHope as ph
except Exception as _e:
    import matplotlib.pyplot as plt
    print(_e)
    
_cached = False

Data Core API 1.2.4.rc1: setting Data Core Env to Environment.Shanghai


In [2]:
burnin = 10
_cap, _n_dir = 4e8, 4
_dir_dict = {
    "long": 1,
    "sell": -1,
    "short": -2,
}
_write_explanatory = True
_write_figures = True
user_remote = "hopec"
login_node = "10.8.64.176"


#######
market = "hkg"
algo = "po"
exp_name = "{}_{}_400mls05".format(market, algo)
_pre_prefix = "4yi_0ms_e%s"
_pool, _ns = "strat_hope", "results_eq_{}".format(market)
_prefix_stem = "/strat_hope/results_eq_%s/%s/%s/"
_iterlist = sorted(set(range(1)) - set([]))
_use_old_po_search = False

date_dict = "/home/hope/sim/analysis/date_dict/hkg_eq_20210530_20230530_s13_b10.json"
_po_search = True
_iterlist_pocfg = [
    "p{}".format(str(_iter).zfill(3)) for \
    _iter in sorted(set(range(1)) - set([]))
] if _po_search else [""]

if market == "hkg":
    _univ_bm = 101200001
    comm = .00155
elif market == "twn":
    _univ_bm = 101300001
    comm = .0018 # avg
elif market == "kor":
    _univ_bm = 101400002
    comm = .0014 # avg

lt.logging.info(
    "[%s processing] Total of %i PO Setups",
    exp_name,
    len(list(_iterlist_pocfg)),
)
for _pocfg in _iterlist_pocfg:
    sec_prefix = _pre_prefix + "%s_{}"%(_pocfg)

    lt.logging.info(
        "[%s processing] START %s\nexplanatory: %s\nfigures: %s\nTotal of %i Exec Experiments",
        exp_name,
        _pocfg,
        _write_explanatory,
        _write_figures,
        len(list(_iterlist)),
    )
    time_total = time.time()
    #######


    _dir_result = \
        "/home/hope/sim/analysis/results/{}/{}/{}".format(
            market,
            exp_name,
            _pocfg
        )
    os.makedirs(_dir_result, exist_ok = True)
    _dir_remote = \
        "/home/hopec/sim/{}".format(exp_name)
    try:
        os.system(
            "scp {}@{}:{} {}"\
            .format(
                user_remote,
                login_node,
                os.path.join(
                    _dir_remote,
                    "experiment_map_{}_{}.json".format(market, exp_name)
                ),
                _dir_result
            )
        )
    except Exception as _e:
        lt.logging.warning(
            "[pre_processing cell] Remote experiment setup file didn't copy; %s",
            _e
        )


    _d_df = []
    for _i in _iterlist:
        run_prefix = \
            _prefix_stem\
            %(
                market,
                exp_name,
                sec_prefix%(str(_i).zfill(2))
            )

        lighter = lt.Lighter(
            market,
            run_prefix,
            date_dict,
            pool = _pool,
            ns = _ns,
            cap = _cap
        )
        if not _cached:
            lighter.prepare()

            _s_bar1m = lighter._s_bar1m
            _s_eod = lighter._s_eod
            _s_beta = lighter._s_beta
            _ddict_full = lighter._ddict_full
            
            try:
                _ret_idx = GenericTabularData(
                    region = lighter._mkt,
                    asset = "idx",
                    dataset = "md_eod",
                    univ = [_univ_bm],
                    start_date = min(lighter._date_list),
                    end_date = max(lighter._date_list)
                ).as_data_frame()
                _ret_idx["date"] = _ret_idx["yyyymmdd"]
                
            except:
                _ret_idx = None

            _cached = True

        else:
            lighter._s_bar1m = _s_bar1m
            lighter._s_eod = _s_eod
            lighter._s_beta = _s_beta
            lighter._ddict_full = _ddict_full

        #
        time0 = time.time()
        lt.logging.warning(
            "[_d_inventory cell] Estimated to take %.2f minutes",
            .025 * len(lighter._date_list) / 60
        )

        if lighter._d_inventory is not None:
            lt.logging.warning(
                "[_d_inventory cell] Table exists; force reload."
            )

        def _func_to_map(_date):
            _dfi = lighter._get_inventory_details(_date)
            return _dfi

        with multiprocessing.Pool(8) as P:
            _l_res = P.map(_func_to_map, lighter._date_list)
            P.close()
            P.join()
        _df = pd.concat(_l_res)
        _df = _df.sort_values(
            by = ["date"]
        ).reset_index(drop = True)
        lighter._d_inventory = _df
        lt.logging.info(
            "[_d_inventory cell] %i of %i days loaded; it took %.2f seconds.",
            lighter._d_inventory.date.nunique(),
            len(lighter._date_list),
            time.time() - time0
        )

        #
        time0 = time.time()
        lt.logging.warning(
            "[_m_orders cell] Estimated to take %.2f minutes",
            .076 * len(lighter._date_list) / 60
        )

        if lighter._m_orders is not None:
            lt.logging.warning(
                "[_m_orders cell] Table exists; force reload."
            )

        def _func_to_map(_date):
            _dfi = lighter._get_order_details_fast(_date, extra_grouping = ["orderDirection"])
            return _dfi

        with multiprocessing.Pool(8) as P:
            _l_res = P.map(_func_to_map, lighter._date_list)
            P.close()
            P.join()
        _df = pd.concat(_l_res)
        _df = _df.sort_values(
            by = ["date", "minute"]
        ).reset_index(drop = True)
        lighter._m_orders = _df
        lt.logging.info(
            "[_m_orders cell] %i of %i days loaded; it took %.2f seconds.",
            lighter._m_orders.date.nunique(),
            len(lighter._date_list),
            time.time() - time0
        )

        #
        time0 = time.time()
        lt.logging.warning(
            "[post_processing cell] Estimated to take less than 0.1 seconds."
        )
        _df = lighter._d_inventory.copy()
        _df["pnl_raw"] = \
            _df.eod_lmv + \
            _df.eod_cfe - \
            _df.sod_cfe - \
            (_df.eod_smv + \
             _df.eod_mfe - \
             _df.sod_mfe)
        _df["ret_raw"] = _df.pnl_raw / _cap
        _df["pnl_holding"] = \
            _df.eod_lmv_holding - \
            _df.sod_lmv_holding - \
            (_df.eod_smv_holding - \
             _df.sod_smv_holding)
        _df["ret_holding"] = _df.pnl_holding / _cap
        _df["ret_trading"] = _df.ret_raw - _df.ret_holding
        _df_orders = lighter._m_orders.copy()
        _df["eod_lmv_ratio"] = _df.eod_lmv / _cap
        _df["eod_smv_ratio"] = _df.eod_smv / _cap
        _tmp_to = (_df_orders.groupby(["date"])["tradeNotional_final"].sum() / \
            _n_dir / _cap).reset_index()
        _tmp_to.rename({
            "tradeNotional_final": "turnover"
        }, axis = 1, inplace = True)
        _df = pd.merge(
            _df,
            _tmp_to,
            on = ["date"],
            how = "left"
        )
        _df["turnover"] = _df["turnover"].fillna(0.)
        for _ikey, _idir in _dir_dict.items():
            try:
                _tmp_dir = (_df_orders[_df_orders.orderDirection == _idir]\
                    .groupby(["date"])["tradeNotional_final"].sum() / \
                    _cap).reset_index()
                _tmp_dir.rename({
                    "tradeNotional_final": "turnover_{}".format(_ikey)
                }, axis = 1, inplace = True)
                _df = pd.merge(
                    _df,
                    _tmp_dir,
                    on = ["date"],
                    how = "left"
                )
                _df["turnover_{}".format(_ikey)] = \
                    _df["turnover_{}".format(_ikey)].fillna(0.)
            except Exception as _e:
                lt.logging.warning(
                    "[post_processing cell] ERR processing dir %i; %s",
                    _idir,
                    _e
                )
                continue
        lt.logging.info(
            "[post_processing cell] %i of %i days loaded; it took %.2f seconds.",
            _df.date.nunique(),
            len(lighter._date_list),
            time.time() - time0
        )

        #
        lt.logging.info(
            "[iter%i DONE] RET %.2f bps / TO %.2f pct",
            _i,
            _df.iloc[burnin:]["ret_raw"].mean() * 10000,
            _df.iloc[burnin:]["turnover"].mean() * 100
        )


        _df["run_num"] = _i
        _d_df.append(_df.iloc[burnin:])

    _d_df = pd.concat(_d_df)

    #
    _d_df.to_csv(
        os.path.join(_dir_result, "result_daily.csv"),
        index = False
    )
    lt.logging.info(
        "[%s logging] result_daily.csv",
        exp_name
    )
    lt.logging.info(
        "%s",
        _d_df.groupby(["run_num"]).nunique()
    )

    #
    _summary = utils.summarize(
        _d_df, 
        comm,
        _ret_idx = _ret_idx
    )
    _summary.to_csv(
        os.path.join(_dir_result, "summary.csv"),
        index = False
    )
    lt.logging.info(
        "[%s logging] summary.csv",
        exp_name
    )

    #
    if _write_explanatory:
        _dir_explanatory = \
            os.path.join(_dir_result, "explanatory")
        os.makedirs(_dir_explanatory, exist_ok = True)

        for _i in _iterlist:
            _df = _d_df[_d_df.run_num == _i].copy()
            _df = _df.sort_values(by = ["date"]).reset_index(drop = True)

            _df_out = _df[["date"]].copy()
            _df_out["turnover_total"] = _df.turnover * _n_dir * _cap
            _df_out["ret_raw"] = _df.ret_raw * _cap + _df_out.turnover_total * comm
            _df_out["ret_trading"] = _df.ret_trading * _cap + _df_out.turnover_total * comm
            _df_out["ret_holding"] = _df.ret_holding * _cap

            _df_out.to_csv(
                os.path.join(
                    _dir_explanatory,
                    "day_df_{}_{}_exp{}.csv"\
                    .format(
                        market,
                        algo,
                        str(_i).zfill(2)
                    )
                ),
                index = False
            )

        lt.logging.info(
            "[%s logging] explanatory/day_df_%s_%s_expXX.csv",
            exp_name,
            market,
            algo,
        )

    if _write_figures:
        _dir_explanatory = \
            os.path.join(_dir_result, "figures")
        os.makedirs(_dir_explanatory, exist_ok = True)

        for _i in _iterlist:
            plt.figure()

            plt.plot(_d_df[_d_df.run_num == _i].ret_raw.cumsum().values * 100,
                     label = "Total Return Exp{}\nmean = {:.2f} bps"\
                         .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].ret_raw.mean() * 10000
                         ))
            plt.plot(_d_df[_d_df.run_num == _i].ret_holding.cumsum().values * 100,
                     label = "Holding Return Exp{}\nmean = {:.2f} bps"\
                         .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].ret_holding.mean() * 10000
                         ))
            plt.plot(_d_df[_d_df.run_num == _i].ret_trading.cumsum().values * 100,
                     label = "Trading Return Exp{}\nmean = {:.2f} bps"\
                         .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].ret_trading.mean() * 10000
                         ))

            plt.legend()

            try:
                ax = plt.gca()
                ph.plot_date_ticks(ax, _d_df[_d_df.run_num == _i].date.values, minTicks = 9)
            except Exception as _e:
                print(_e)

            plt.ylabel("Cumulative Return [%]")

            plt.savefig(
                os.path.join(
                    _dir_explanatory,
                    "return_e{}.png".format(str(_i).zfill(2)))
            )
            plt.close()

            #
            plt.figure()
            plt.plot(_d_df[_d_df.run_num == _i].turnover.values * 100,
                     label = "Average Turnover Exp{}\nmean = {:.2f} pct"\
                        .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].turnover.mean() * 100
                         ),
                     color = "k",
                     zorder = 999,
                     lw = 3)
            plt.plot(_d_df[_d_df.run_num == _i].turnover_long.values * 100,
                     label = "LONG Turnover Exp{}\nmean = {:.2f} pct"\
                        .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].turnover_long.mean() * 100
                         ))
            plt.plot(_d_df[_d_df.run_num == _i].turnover_sell.values * 100,
                     label = "SELL Turnover Exp{}\nmean = {:.2f} pct"\
                        .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].turnover_sell.mean() * 100
                         ))
            plt.plot(_d_df[_d_df.run_num == _i].turnover_short.values * 100,
                     label = "SHORT Turnover Exp{}\nmean = {:.2f} pct"\
                        .format(
                             str(_i).zfill(2),
                             _d_df[_d_df.run_num == _i].turnover_short.mean() * 100
                         ))

            plt.legend(ncol = 2)

            try:
                ax = plt.gca()
                ph.plot_date_ticks(ax, _d_df[_d_df.run_num == _i].date.values, minTicks = 9)
            except Exception as _e:
                print(_e)

            plt.ylabel("Turnover [%]")

            plt.savefig(
                os.path.join(
                    _dir_explanatory,
                    "turnover_e{}.png".format(str(_i).zfill(2)))
            )
            plt.close()

        lt.logging.info(
            "[%s logging] figures/return_eXX.png and figures/turnover_eXX.png",
            exp_name,
        )

    time_total = time.time() - time_total
    lt.logging.info(
        "[%s processing] DONE in %.2f minutes",
        exp_name,
        time_total / 60
    )

2023-07-19 16:35:58 [Lighter] [INFO] [hkg_po_400mls05 processing] Total of 1 PO Setups
2023-07-19 16:35:58 [Lighter] [INFO] [hkg_po_400mls05 processing] START p000
explanatory: True
figures: True
Total of 1 Exec Experiments
2023-07-19 16:36:00 [Lighter] [INFO] [init] Loaded mkt hkg
2023-07-19 16:36:00 [Lighter] [INFO] [init] rprefix /strat_hope/results_eq_hkg/hkg_po_400mls05/4yi_0ms_e00p000_{}/ pool strat_hope ns results_eq_hkg
2023-07-19 16:36:00 [Lighter] [INFO] [init] date_dict /home/hope/sim/analysis/date_dict/hkg_eq_20210530_20230530_s13_b10.json
2023-07-19 16:36:00 [Lighter] [INFO] [init] A total of 492 days from 2021-05-31 to 2023-05-30
2023-07-19 16:36:15 [Lighter] [INFO] [prepare] s00 done; it took 14.79 seconds.
2023-07-19 16:36:29 [Lighter] [INFO] [prepare] s01 done; it took 14.17 seconds.
2023-07-19 16:36:43 [Lighter] [INFO] [prepare] s02 done; it took 13.80 seconds.
2023-07-19 16:36:55 [Lighter] [INFO] [prepare] s03 done; it took 12.33 seconds.
2023-07-19 16:37:06 [Lighter