# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import logging

import numpy as np
import pandas as pd
import seaborn as sns

import core.config as cconfig
import dataflow_model.incremental_single_name_model_evaluator as ime
import dataflow_model.model_evaluator as modeval
import dataflow_model.model_plotter as modplot
import dataflow_model.regression_analyzer as cdmra
import dataflow_model.stats_computer as csc
import dataflow_model.utils as cdmu
import core.plotting as cplot
import core.statistics as cstati
import helpers.hdbg as dbg
import helpers.hprint as hprint

In [None]:
dbg.init_logger(verbosity=logging.INFO)
# dbg.init_logger(verbosity=logging.DEBUG)

_LOG = logging.getLogger(__name__)

# _LOG.info("%s", env.get_system_signature()[0])

hprint.config_notebook()

# Load regression dataframes

In [None]:
src_dir = ""
file_name = "result_bundle.v2_0.pkl"

fit_iter = cdmu.yield_experiment_artifacts(
    src_dir=src_dir,
    file_name=file_name,
    load_rb_kwargs={},
)

In [None]:
fit_coeffs = {k: v.info["ml"]["predict"]["fit_coefficients"] for k, v in fit_iter}
fit_coeffs = pd.concat(fit_coeffs)

In [None]:
fit_coeffs.head()

# MHT

In [None]:
p_vals = fit_coeffs["p_val_2s"]

In [None]:
p_vals.hist(bins=30)

In [None]:
cdmra.compute_moments(fit_coeffs, ["p_val_2s"])

In [None]:
q_vals = cstati.estimate_q_values(p_vals)

In [None]:
q_vals.hist(bins=30)

# Feature stats


In [None]:
feature_stats = cdmra.compute_moments(fit_coeffs, ["rho", "beta", "beta_z_scored", "turn"])
display(feature_stats)

In [None]:
sweep = cstati.apply_smoothing_parameters(
    feature_stats[("rho", "mean")],
    feature_stats[("turn", "mean")],
    np.arange(0, 3, 0.1)
)

In [None]:
stat = "beta"
feature = ""
fit_coeffs[stat].xs(feature, level=1).hist(bins=101)

# Reweight

In [None]:
feature_cols = []
target_col = ""

art_iter = cdmu.yield_experiment_artifacts(
    src_dir=src_dir,
    file_name=file_name,
    load_rb_kwargs={"columns": feature_cols + [target_col]},
)

def get_feature_weights(key: int) -> pd.Series:
    ...

In [None]:
sharpes = {}
turns = {}
daily_pnls = {}
portfolio_pnl = pd.Series()
for key, art in art_iter:
    features = art.result_df[feature_cols]
    prediction = (features * get_feature_weights(key)).sum(min_count=1, axis=1)
    turns[key] = cstati.compute_avg_turnover_and_holding_period(prediction)
    pnl = prediction * art.result_df[target_col]
    portfolio_pnl = pnl.add(portfolio_pnl, fill_value=0)
    sharpes[key] = cstati.compute_annualized_sharpe_ratio(pnl)
    daily_pnls[key] = pnl.resample("B").sum(min_count=1)

In [None]:
daily_pnl_xs = pd.DataFrame(daily_pnls).mean(axis=1)
daily_portfolio_pnl = portfolio_pnl.resample("B").sum(min_count=1)

In [None]:
daily_portfolio_pnl.cumsum().plot()

# Pair plots

In [None]:
split1 = cdmra.compute_coefficients(
    src_dir=src_dir,
    file_name=file_name,
    feature_cols=feature_cols,
    target_col=target_col,
    start=None,
    end=None,
)

split2 = cdmra.compute_coefficients(
    src_dir=src_dir,
    file_name=file_name,
    feature_cols=feature_cols,
    target_col=target_col,
    start=None,
    end=None,
)

In [None]:
stat = ""
sns.pairplot(
    pd.concat([
        split1[stat].rename("split1"),
        split2[stat].rename("split2"),
    ], join="inner", axis=1)
)

In [None]:
stat = ""
feature = ""
sns.pairplot(
    pd.concat([
        split1[stat].xs(feature, level=1).rename("split1"),
        split2[stat].xs(feature, level=1).rename("split2"),
    ], join="inner", axis=1)
)