# Description

- Initialize with returns, predictions, target volatility, and oos start date
- Evaluate portfolios generated from the predictions

# Imports

In [1]:
%load_ext autoreload
%autoreload 2

import logging

import core.config as cconfig
import core.dataflow_model.model_evaluator as modeval
import core.dataflow_model.model_plotter as modplot
import core.dataflow_model.utils as cdmu
import helpers.dbg as dbg
import helpers.printing as hprint

In [2]:
dbg.init_logger(verbosity=logging.INFO)
#dbg.init_logger(verbosity=logging.DEBUG)

_LOG = logging.getLogger(__name__)

# _LOG.info("%s", env.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-e7715308-84ba-4edd-b1e4-1aeda980ba61.json'


# Notebook config

In [38]:
#exp_dir = "s3://eglp-spm-sasm/experiments/experiment.RH2Ef.v1_9-all.5T.20210831-004747.run1.tgz"
exp_dir = "./experiment.RH2Ef.v1_9-all.5T.20210831-004747.run1.tgz"# exp_dir = "s3://alphamatic-data/experiments/..."

eval_config = cconfig.get_config_from_nested_dict(
    {
        "exp_dir": exp_dir,
        "model_evaluator_kwargs": {
            "returns_col": "vwap_ret_0_vol_adj_clipped_2",
            "predictions_col": "vwap_ret_0_vol_adj_clipped_2_hat",
            #"oos_start": "2017-01-01",
        },
        "bh_adj_threshold": 0.1,
        "resample_rule": "W",
        "mode": "ins",
        "target_volatility": 0.1,
    }
)

# Initialize ModelEvaluator and ModelPlotter

In [78]:
# Load the data.
selected_idxs = list(range(4))
result_bundles = cdmu.yield_experiment_artifacts(
    eval_config["exp_dir"],
    "result_bundle.pkl",
    selected_idxs=selected_idxs,
)

In [83]:
print("before:", dbg.get_memory_usage(None))
df = next(result_bundles)
print("after:", dbg.get_memory_usage(None))

before: rss=1.138GB vms=3.687GB mem_pct=2%


StopIteration: 

In [105]:
i = 0
dfs = []

In [124]:
rss_before = dbg.get_memory_usage()[0]
print("before", dbg.get_memory_usage_as_str(None))

df_copy = result_df.copy(deep=True)
print("mem usage=", df_copy.memory_usage().sum() / 1024 ** 3)
dfs.append(df_copy)

rss_after = dbg.get_memory_usage()[0]
print("after", dbg.get_memory_usage_as_str(None))

print("mem_increase=", rss_after - rss_before)

before rss=3.863GB vms=6.412GB mem_pct=6%
0.17232084274291992
after rss=4.028GB vms=6.577GB mem_pct=6%
mem_increase= 0.16439437866210938


In [89]:
result_df = df[1]["result_df"]

In [90]:
result_df.memory_usage().sum()

185028096

In [93]:
hintro.format_size(result_df[["vwap_ret_0_vol_adj_clipped_2", "vwap_ret_0_vol_adj_clipped_2_hat"]].memory_usage().sum())

'24.1 MB'

In [61]:
import helpers.introspection as hintro

In [72]:
print(df[1].keys())

#hintro.get_size_in_bytes(df[1]["result_df"])
df[1]["result_df"].memory_usage(index=True, deep=True).sum()
#df[1]["result_df"].info()

odict_keys(['config', 'result_nid', 'method', 'result_df', 'column_to_tags', 'info', 'payload', 'class'])


148228608

In [49]:
# Build the ModelEvaluator.
evaluator = modeval.build_model_evaluator_from_result_bundles(
    result_bundles,
    abort_on_error=False,
    **eval_config["model_evaluator_kwargs"].to_dict(),
)
# Build the ModelPlotter.
plotter = modplot.ModelPlotter(evaluator)

# Load artifacts 'result_bundle.pkl' from './experiment.RH2Ef.v1_9-all.5T.20210831-004747.run1.tgz'
While expanding './experiment.RH2Ef.v1_9-all.5T.20210831-004747.run1.tgz' dst dir './experiment.RH2Ef.v1_9-all.5T.run1/' already exists: skipping
Found 534 experiment subdirs in './experiment.RH2Ef.v1_9-all.5T.run1/'


Loading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Analysis

In [43]:
pnl_stats = evaluator.calculate_stats(
    mode=eval_config["mode"], target_volatility=eval_config["target_volatility"]
)
display(pnl_stats)

Calculating positions:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating PnL:   0%|          | 0/2 [00:00<?, ?it/s]

Calculating stats:   0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0,Unnamed: 1,0,1
finance,avg_turnover_(%),74.704251,103.068568
finance,turnover_frequency,<5 * Minutes>,<5 * Minutes>
finance,avg_holding_period,1.338612,0.970228
finance,holding_period_units,<5 * Minutes>,<5 * Minutes>
sampling,start_time,2009-01-22 10:30:00-05:00,2009-01-22 10:30:00-05:00
sampling,end_time,2017-01-04 15:50:00-05:00,2018-12-31 15:50:00-05:00
sampling,n_sampling_points,151648,189473
sampling,frequency,<5 * Minutes>,<5 * Minutes>
sampling,sampling_points_per_year,105156.125,105156.125
sampling,time_span_in_years,7.954049,9.942407


## Model selection

In [None]:
plotter.plot_multiple_tests_adjustment(
    threshold=eval_config["bh_adj_threshold"], mode=eval_config["mode"]
)

In [None]:
# TODO(gp): Move this chunk of code in a function.
col_mask = (
    pnl_stats.loc["signal_quality"].loc["sr.adj_pval"]
    < eval_config["bh_adj_threshold"]
)
selected = pnl_stats.loc[:, col_mask].columns.to_list()
not_selected = pnl_stats.loc[:, ~col_mask].columns.to_list()

print("num model selected=%s" % hprint.perc(len(selected), pnl_stats.shape[1]))
print("model selected=%s" % selected)
print("model not selected=%s" % not_selected)

# Use `selected = None` to show all the models.

In [None]:
plotter.plot_multiple_pnls(
    keys=selected,
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)

## Return correlation

In [None]:
plotter.plot_correlation_matrix(
    series="returns",
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)

In [None]:
plotter.plot_effective_correlation_rank(
    series="returns",
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)

## Model correlation

In [None]:
plotter.plot_correlation_matrix(
    series="pnl",
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)

In [None]:
plotter.plot_effective_correlation_rank(
    series="pnl",
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)

## Aggregate model

In [None]:
pnl_srs, pos_srs, aggregate_stats = evaluator.aggregate_models(
    keys=selected,
    mode=eval_config["mode"],
    target_volatility=eval_config["target_volatility"],
)
display(aggregate_stats)

In [None]:
plotter.plot_sharpe_ratio_panel(keys=selected, mode=eval_config["mode"])

In [None]:
plotter.plot_rets_signal_analysis(
    keys=selected,
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
    target_volatility=eval_config["target_volatility"],
)

In [None]:
plotter.plot_performance(
    keys=selected,
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
    target_volatility=eval_config["target_volatility"],
)

In [None]:
plotter.plot_rets_and_vol(
    keys=selected,
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
    target_volatility=eval_config["target_volatility"],
)

In [None]:
assert 0

In [None]:
plotter.plot_positions(
    keys=selected,
    mode=eval_config["mode"],
    target_volatility=eval_config["target_volatility"],
)

In [None]:
# Plot the returns and prediction for one or more models.
model_key = selected[:1]
plotter.plot_returns_and_predictions(
    keys=model_key,
    resample_rule=eval_config["resample_rule"],
    mode=eval_config["mode"],
)