In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import datetime
import logging

import numpy as np
import pandas as pd

import core.config as cconfig
import core.finance as cofinanc
import core.plotting as coplotti
import core.statistics as costatis
import dataflow.model as dtfmod
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hparquet as hparque
import helpers.hprint as hprint
import helpers.hsql as hsql

  import tqdm.autonotebook as tauton


In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-034e4a75-d591-45e1-ae81-192afc167ce2.json'
>>ENV<<: is_inside_container=True: code_version=1.0.3, container_version=amp-1.0.3, is_inside_docker=True, is_inside_ci=False, CI_defined=True, CI=''
>>ENV<<: AM_AWS_PROFILE=True AM_ECR_BASE_PATH=True AM_S3_BUCKET=True AM_TELEGRAM_TOKEN=True AWS_ACCESS_KEY_ID=False AWS_DEFAULT_REGION=True AWS_SECRET_ACCESS_KEY=False GH_ACTION_ACCESS_TOKEN=True
[31m-----------------------------------------------------------------------------
This code is not in sync with the container:
code_version='1.0.3' != container_version='amp-1.0.3'
-----------------------------------------------------------------------------
You need to:
- merge origin/master into your branch with `invoke git_merge_master`
- pull the latest container with `invoke docker_pull`[0m
INFO  # Git
    branch_name='AmpTask2163_Implement_tiled_backtesting'
    hash='59a

# Load tiled backtest

In [3]:
dict_ = {
    "file_name": "",
    "start_date": datetime.date(2010, 1, 1),
    "end_date": datetime.date(2020, 12, 31),
    "asset_id_col": "",
    "returns_col": "",
    "volatility_col": "",
    "prediction_col": "",
    "feature_cols": None,
    "feature_lag": 2,
    "target_col": "",
    "target_gmv": 1e6,
    "dollar_neutrality": "no_constraint",
    "freq": "5T",
}
config = cconfig.get_config_from_nested_dict(dict_)

## Report tile stats

In [4]:
parquet_tile_analyzer = dtfmod.ParquetTileAnalyzer()
parquet_tile_metadata = parquet_tile_analyzer.collate_parquet_tile_metadata(
    config["file_name"]
)

In [5]:
parquet_tile_analyzer.compute_metadata_stats_by_asset_id(parquet_tile_metadata)

Unnamed: 0_level_0,n_years,n_unique_months,n_files,size
egid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10025,1,2,2,711.7 KB


In [6]:
parquet_tile_analyzer.compute_universe_size_by_time(parquet_tile_metadata)

Unnamed: 0_level_0,Unnamed: 1_level_0,n_asset_ids,size
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,1,1,360.3 KB
2020,2,1,351.4 KB


In [7]:
asset_ids = parquet_tile_metadata.index.levels[0].to_list()

## Load a single-asset tile

In [None]:
single_asset_tile = next(
    hparque.yield_parquet_tiles_by_assets(
        config["file_name"],
        asset_ids[0:1],
        config["asset_id_col"],
        1,
        None,
    )
)

In [None]:
single_tile_df = dtfmod.process_parquet_read_df(df, config["asset_id_col"])

In [None]:
single_tile_df.columns

In [None]:
single_tile_df.head(3)

# Overnight returns

In [None]:
host = ""
dbname = ""
port = 1000
user = ""
password = ""
table_name = ""
connection = hsql.get_connection(host, dbname, port, user, password)

In [None]:
query_results = cofinanc.query_by_assets_and_dates(
    connection,
    table_name,
    asset_ids=asset_ids,
    asset_id_col=config["asset_id_col"],
    start_date=config["start_date"],
    end_date=config["end_date"],
    date_col="date",
    select_cols=["date", "open_", "close", "total_return", "prev_total_return"],
)

In [None]:
overnight_returns = cofinanc.compute_overnight_returns(
    query_results,
    config["asset_id_col"],
)

# Compute portfolio bar metrics

In [None]:
bar_metrics = dtfmod.generate_bar_metrics(
    config["file_name"],
    config["start_date"],
    config["end_date"],
    config["asset_id_col"],
    config["returns_col"],
    config["volatility_col"],
    config["prediction_col"],
    config["target_gmv"],
    config["dollar_neutrality"],
    # overnight_returns["overnight_returns"],
)

In [None]:
coplotti.plot_portfolio_stats(bar_metrics, freq="B")

# Compute aggregate portfolio stats

In [None]:
stats_computer = dtfmod.StatsComputer()

In [None]:
portfolio_stats, daily_metrics = stats_computer.compute_portfolio_stats(
    bar_metrics,
    "B",
)
display(portfolio_stats)

In [None]:
portfolio_stats_at_freq, _ = stats_computer.compute_portfolio_stats(
    bar_metrics,
    config["freq"],
)
display(portfolio_stats_at_freq)

# Regression analysis

In [None]:
hdbg.dassert(config["target_col"])
hdbg.dassert(config["feature_cols"])

In [None]:
coefficients = dtfmod.regress(
    config["file_name"],
    config["asset_id_col"],
    config["target_col"],
    config["feature_cols"],
    config["feature_lag"],
    50,
)

In [None]:
coefficients.head(3)

# Predictor mixing

In [None]:
hdbg.dassert(config["feature_cols"])

In [None]:
features = config["feature_cols"]
weights = pd.DataFrame(np.identity(len(features)), features, features)
weights["sum"] = 1
display(weights)

In [None]:
mix_bar_metrics = dtfmod.load_mix_evaluate(
    config["file_name"],
    config["start_date"],
    config["end_date"],
    config["asset_id_col"],
    config["returns_col"],
    config["volatility_col"],
    config["feature_cols"],
    weights,
    config["target_gmv"],
    config["dollar_neutrality"],
)

In [None]:
mix_portfolio_stats, mix_daily_metrics = stats_computer.compute_portfolio_stats(
    mix_bar_metrics,
    "B",
)
display(mix_portfolio_stats)

In [None]:
coplotti.plot_portfolio_stats(mix_bar_metrics, freq="B")