In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import logging
import os
from typing import List, Tuple

import pandas as pd

import core.config as cconfig
import core.plotting as coplotti
import dataflow.model as dtfmod
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hpandas as hpandas
import helpers.hprint as hprint
import oms as oms

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

In [None]:
date = "2022-08-31"
start_timestamp = pd.Timestamp(date + " 10:15:00", tz="America/New_York")
_LOG.info("start_timestamp=%s", start_timestamp)
end_timestamp = pd.Timestamp(date + " 15:45:00", tz="America/New_York")
_LOG.info("end_timestamp=%s", start_timestamp)

In [None]:
!ls /data/cf_production/20220915/job.1002440809/job-sasm_job-jobid-1002440809/user_executable_run_0-1000005405809/cf_prod_system_log_dir/process_forecasts

In [None]:
# /share/data/cf_production/20220919/job.1002450215/job-sasm_job-jobid-1002450215/user_executable_run_0-1000005484302/cf_prod_system_log_dir
# /share/data/cf_production/20220919/job.1002452903/user_executable_run_0-1000005489454/cf_prod_system_log_dir
prod_dir = (
    # "/share/data/cf_production/20220919/job.1002450215/job-sasm_job-jobid-1002450215/user_executable_run_0-1000005484302/cf_prod_system_log_dir"
    "/share/data/cf_production/20220919/job.1002452903/user_executable_run_0-1000005489454/cf_prod_system_log_dir"
)
prod_dir = prod_dir.replace("/share/data/", "/data/")
prod_portfolio_dir = os.path.join(prod_dir, "process_forecasts/portfolio")
prod_forecast_dir = os.path.join(prod_dir, "process_forecasts")
hdbg.dassert_dir_exists(prod_forecast_dir)

sim_dir = "/app/system_log_dir"
sim_portfolio_dir = os.path.join(sim_dir, "process_forecasts/portfolio")
sim_forecast_dir = os.path.join(sim_dir, "process_forecasts")
hdbg.dassert_dir_exists(sim_forecast_dir)

In [None]:
# hdbg.dassert_dir_exists(root_dir)
dict_ = {
    "prod_forecast_dir": prod_forecast_dir,
    "sim_forecast_dir": sim_forecast_dir,
    "prod_portfolio_dir": prod_portfolio_dir,
    "sim_portfolio_dir": sim_portfolio_dir,
    "freq": "15T",
    "start_timestamp": start_timestamp,
    "end_timestamp": end_timestamp,
}
#
config = cconfig.Config.from_dict(dict_)
display(config)

# Forecasts

## Load prod and sim forecasts

In [None]:
prod_forecast_df = oms.ForecastProcessor.read_logged_target_positions(
    config["prod_forecast_dir"]
)
hpandas.df_to_str(prod_forecast_df, log_level=logging.INFO)

In [None]:
sim_forecast_df = oms.ForecastProcessor.read_logged_target_positions(
    config["sim_forecast_dir"]
)
hpandas.df_to_str(sim_forecast_df, log_level=logging.INFO)

## Compute forecast prod delay

In [None]:
prod_forecast_delay = oms.compute_delay(prod_forecast_df, config["freq"])
hpandas.df_to_str(prod_forecast_delay, log_level=logging.INFO)

In [None]:
prod_forecast_delay.plot()

In [None]:
prod_forecast_df.index = prod_forecast_df.index.round(config["freq"])
sim_forecast_df.index = sim_forecast_df.index.round(config["freq"])
prod_forecast_df = prod_forecast_df.loc[start_timestamp:end_timestamp]
sim_forecast_df = sim_forecast_df.loc[start_timestamp:end_timestamp]

## Compare forecast dataframes

In [None]:
forecast_corrs = dtfmod.compute_correlations(prod_forecast_df, sim_forecast_df)
hpandas.df_to_str(forecast_corrs, precision=3, log_level=logging.INFO)

In [None]:
sort_col = "prediction"
hpandas.df_to_str(
    forecast_corrs.sort_values(sort_col, ascending=False),
    num_rows=10,
    precision=3,
    log_level=logging.INFO,
)

# Orders

## Load prod and sim orders

In [None]:
prod_order_df = oms.ForecastProcessor.read_logged_orders(
    config["prod_forecast_dir"]
)
hpandas.df_to_str(prod_order_df, log_level=logging.INFO)

In [None]:
sim_order_df = oms.ForecastProcessor.read_logged_orders(
    config["sim_forecast_dir"]
)
hpandas.df_to_str(sim_order_df, log_level=logging.INFO)

# Portfolios

## Load prod portfolio

In [None]:
prod_portfolio_df, prod_portfolio_stats_df = oms.load_portfolio_artifacts(
    config["prod_portfolio_dir"],
    config["start_timestamp"],
    config["end_timestamp"],
    config["freq"],
    normalize_bar_times=False,
)

In [None]:
hpandas.df_to_str(prod_portfolio_df, log_level=logging.INFO)

In [None]:
hpandas.df_to_str(prod_portfolio_stats_df, log_level=logging.INFO)

## Load sim portfolio

In [None]:
sim_portfolio_df, sim_portfolio_stats_df = oms.load_portfolio_artifacts(
    config["sim_portfolio_dir"],
    config["start_timestamp"],
    config["end_timestamp"],
    config["freq"],
    normalize_bar_times=False,
)

In [None]:
hpandas.df_to_str(sim_portfolio_df, log_level=logging.INFO)

In [None]:
hpandas.df_to_str(sim_portfolio_stats_df, log_level=logging.INFO)

## Compute prod portfolio delay

In [None]:
prod_portfolio_delay = oms.compute_delay(prod_portfolio_df, config["freq"])

In [None]:
hpandas.df_to_str(prod_portfolio_delay, log_level=logging.INFO)

In [None]:
prod_portfolio_delay.plot()

In [None]:
_LOG.info("prod portfolio delay mean=%s", prod_portfolio_delay.mean())
_LOG.info("prod portfolio delay std=%s", prod_portfolio_delay.std())

## Normalize bar times

In [None]:
dfs = [
    prod_portfolio_df,
    prod_portfolio_stats_df,
    sim_portfolio_df,
    sim_portfolio_stats_df,
]

In [None]:
for df in dfs:
    df.index = df.index.round(config["freq"])

## Compare portfolio stats

In [None]:
portfolio_stats_dfs = {
    "prod": prod_portfolio_stats_df,
    "sim": sim_portfolio_stats_df,
}
portfolio_stats_dfs = pd.concat(portfolio_stats_dfs, axis=1)

In [None]:
hpandas.df_to_str(portfolio_stats_dfs, log_level=logging.INFO)

In [None]:
coplotti.plot_portfolio_stats(portfolio_stats_dfs)

In [None]:
portfolio_stats_corrs = dtfmod.compute_correlations(
    prod_portfolio_stats_df, sim_portfolio_stats_df
)
display(portfolio_stats_corrs.round(3))

In [None]:
stats_computer = dtfmod.StatsComputer()
stats_sxs, _ = stats_computer.compute_portfolio_stats(
    portfolio_stats_dfs, config["freq"]
)
display(stats_sxs)

## Compare portfolios at the instrument level

In [None]:
portfolio_corrs = dtfmod.compute_correlations(prod_portfolio_df, sim_portfolio_df)
hpandas.df_to_str(portfolio_corrs, precision=3, log_level=logging.INFO)

In [None]:
sort_col = "pnl"
hpandas.df_to_str(
    portfolio_corrs.sort_values(sort_col, ascending=False),
    num_rows=10,
    precision=3,
    log_level=logging.INFO,
)

In [None]:
# OMS

In [None]:
shares_df = oms.compute_shares_traded(prod_portfolio_df, prod_order_df, "15T")

In [None]:
shares_df.columns.levels[0]

In [None]:
# shares_df["estimated_price_per_share"]
# shares_df["underfill"] / shares_df["order_share_target_as_int"]
shares_df["order_share_target_as_int"]

# System configs

In [None]:
# TODO(Paul): Clean up the system config handling.
def load_config_as_list(path):
    with open(path) as f:
        lines = f.readlines()
    _LOG.debug("Lines read=%d", len(lines))
    return lines

In [None]:
def diff_lines(list1, list2) -> Tuple[List[str], List[str]]:
    list1_only = list(set(list1) - set(list2))
    list2_only = list(set(list2) - set(list1))
    return list1_only, list2_only

In [None]:
prod_system_config_output = load_config_as_list(
    prod_dir + "/system_config.output.txt"
)
sim_system_config_output = load_config_as_list(
    sim_dir + "/system_config.output.txt"
)
prod_system_config_input = load_config_as_list(
    prod_dir + "/system_config.input.txt"
)
sim_system_config_input = load_config_as_list(
    sim_dir + "/system_config.input.txt"
)

In [None]:
prod_output_only, sim_output_only = diff_lines(
    prod_system_config_output, sim_system_config_output
)

In [None]:
# prod_output_only

In [None]:
# sim_output_only