In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import datetime
import logging

import pandas as pd

import core.config as cconfig
import core.finance as cofinanc
import core.plotting as coplotti
import dataflow.model as dtfmod
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hparquet as hparque
import helpers.hprint as hprint
import helpers.hsql as hsql

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

# Load tiled backtest

In [None]:
tile_dict = {
    "dir_name": "/app/build_tile_configs.../tiled_results/",
    "asset_id_col": "",
}
tile_config = cconfig.get_config_from_nested_dict(tile_dict)

## Report tile stats

In [None]:
parquet_tile_analyzer = dtfmod.ParquetTileAnalyzer()
parquet_tile_metadata = parquet_tile_analyzer.collate_parquet_tile_metadata(
    tile_config["dir_name"]
)

In [None]:
parquet_tile_analyzer.compute_metadata_stats_by_asset_id(parquet_tile_metadata)

In [None]:
parquet_tile_analyzer.compute_universe_size_by_time(parquet_tile_metadata)

In [None]:
asset_ids = parquet_tile_metadata.index.levels[0].to_list()
display(asset_ids)

## Load a single-asset tile

In [None]:
single_asset_tile = next(
    hparque.yield_parquet_tiles_by_assets(
        tile_config["dir_name"],
        asset_ids[0:1],
        tile_config["asset_id_col"],
        1,
        None,
    )
)

In [None]:
single_tile_df = dtfmod.process_parquet_read_df(
    single_asset_tile, tile_config["asset_id_col"]
)

In [None]:
single_tile_df.columns.levels[0]

In [None]:
single_tile_df.head(3)

# Compute portfolio bar metrics

In [None]:
fep_dict = {
    "price_col": "vwap",
    "volatility_col": "vwap.ret_0.vol",
    "prediction_col": "prediction",
    "target_gmv": 1e6,
    "dollar_neutrality": "gaussian_rank",
    "quantization": "nearest_lot",
    "burn_in_bars": 3,
}
fep_config = cconfig.get_config_from_nested_dict(fep_dict)

In [None]:
fep = dtfmod.ForecastEvaluatorFromPrices(
    fep_config["price_col"],
    fep_config["volatility_col"],
    fep_config["prediction_col"],
)

In [None]:
backtest_df_iter = dtfmod.yield_processed_parquet_tiles_by_year(
    tile_config["dir_name"],
    datetime.date(2011, 1, 1),
    datetime.date(2018, 12, 31),
    tile_config["asset_id_col"],
    data_cols=fep.get_cols(),
    asset_ids=None,
)

In [None]:
bar_metrics = []
for df in backtest_df_iter:
    _, bar_metrics_slice = fep.annotate_forecasts(
        df,
        target_gmv=fep_config["target_gmv"],
        dollar_neutrality=fep_config["dollar_neutrality"],
        quantization=fep_config["quantization"],
        burn_in_bars=fep_config["burn_in_bars"],
    )
    bar_metrics.append(bar_metrics_slice)
bar_metrics = pd.concat(bar_metrics)

In [None]:
coplotti.plot_portfolio_stats(bar_metrics, freq="B")

# Compute aggregate portfolio stats

In [None]:
stats_computer = dtfmod.StatsComputer()

In [None]:
portfolio_stats, daily_metrics = stats_computer.compute_portfolio_stats(
    bar_metrics,
    "B",
)
display(portfolio_stats)

# Overnight returns

In [None]:
host = ""
dbname = ""
port = 1000
user = ""
password = ""
table_name = ""
connection = hsql.get_connection(host, dbname, port, user, password)

In [None]:
query_results = cofinanc.query_by_assets_and_dates(
    connection,
    table_name,
    asset_ids=asset_ids,
    asset_id_col=config["asset_id_col"],
    start_date=config["start_date"],
    end_date=config["end_date"],
    date_col="date",
    select_cols=["date", "open_", "close", "total_return", "prev_total_return"],
)

In [None]:
overnight_returns = cofinanc.compute_overnight_returns(
    query_results,
    config["asset_id_col"],
)

# Regression analysis

In [None]:
regression_dict = {
    "target_col": "vwap.ret_0.vol_adj",
    "feature_cols": [1, 2, 3, 4, 5, 6, "prediction"],
    "feature_lag": 2,
    "batch_size": 50,
}
regression_config = cconfig.get_config_from_nested_dict(regression_dict)

In [None]:
coefficients, corr = dtfmod.regress(
    tile_config["dir_name"],
    tile_config["asset_id_col"],
    regression_config["target_col"],
    regression_config["feature_cols"],
    regression_config["feature_lag"],
    regression_config["batch_size"],
)

In [None]:
coefficients.head(3)

In [None]:
corr.head()