## Imports and System Setup

In [None]:
# Standard libraries
import os
import sys

# Third paty libraries
import pandas as pd


def project_root_path() -> str:
    notebook_root_path = os.path.abspath("")
    return os.path.dirname(notebook_root_path)


project_path = project_root_path()

print(f"Project root path: {project_path}")
print(f"Python version: {sys.version}")

sys.path.append(project_path)


# Causality libraries
from analysis.backtest import signalbacktest_df
from analysis.compounding import *
from analysis.evaluate_pnl import *
from data.dataloader import *

## Configuration

In [None]:
data_root_path = "~/data/causality_benchmark_dataset/common/daily/usa/1500/20240124"

backtest_offset = pd.offsets.BDay(
    1
)  #  pd.offsets.Week(1) # pd.offsets.MonthBegin(1)  #

# Leaving 3+ years of out-of-sample period:
backtest_end_ts = pd.Timestamp('2021-01-01')  # None # 

## Daily Data Preparation

In [None]:
# DataFrames are indexed such that values in a row are observable on the date of the corresponding index.
# For prices this means prices of the given date, while for returns it means that the return is finishing on the index date.

# Close to Close (overnight+intraday) returns are adjusted for splits, dividends, mergers and aquisitions:
ret_cc_df = load_field_df("ret_cc", data_root_path, end_ts=backtest_end_ts)
# Can't trade the close auction based on the same close price.
# Let's generate **adjusted** close to next day 15:45 returns, which can be used to trade in the upcoming close auction:
price_close_df = load_field_df(
    "close", data_root_path, end_ts=backtest_end_ts
)  # close prices are un-adjusted

price_1545_df = load_field_df(
    "154500_close_5m", data_root_path, end_ts=backtest_end_ts
)  # intraday prices are un-adjusted
# Same day prices, no need for onvernight adjustements:
ret_1545c_df = price_close_df / price_1545_df - 1.0
# Close to Close (cc) returns are already adjusted for overnight events, let's use those as a base line:
ret_c1545_df = np.exp(np.log(ret_cc_df + 1.0) - np.log(ret_1545c_df + 1.0)) - 1.0

# Universe is generated using liquidity data of previos days, but it's meant to be the universe one day later, on the index date.
# Contains 1.0 on days when an asset is tradeable, and np.NaN otherwise:
universe_df = load_field_df("universe", data_root_path, shift=0, end_ts=backtest_end_ts)

# All data fields from disk have the same index and same columns:
daily_index = universe_df.index

print(f"Loaded {len(daily_index)} daily records and {len(universe_df.columns)} assets.")

## Data Preparation for Trading Bars

Variables names include "bar", in contrast to the daily data above.

In [None]:
# Establish bar_tss, serving as boundaries of trading periods

if backtest_offset in [pd.offsets.Day(1), pd.offsets.BDay(1)]:
    bar_tss = list(daily_index)
else:
    current_ts = daily_index[0] - pd.offsets.Day(1) + backtest_offset
    # List of Timestamps:
    bar_tss = [daily_index[daily_index >= current_ts][0]]
    current_ts += backtest_offset
    while current_ts <= daily_index[-1]:
        bar_tss += [daily_index[daily_index >= current_ts][0]]
        current_ts += backtest_offset

    # Keep unique elements in the list, while staying sorted:
    bar_tss = sorted(list(set(bar_tss)))

assert len(bar_tss) == len(set(bar_tss))

In [None]:
# Plot a bar plot with bars on the days which are in bar_tss:

y_values = [(ts in bar_tss) * 1.0 for ts in daily_index]
x_values = daily_index
df = pd.DataFrame(
    y_values, index=[str(ts).split(" ")[0] for ts in daily_index], columns=["Trading"]
)
# Bar plot:
df.iloc[:100, :].plot(
    kind="bar",
    figsize=(20, 1),
    xlabel="Dates in Raw Data",
    ylabel="Trading Day Flag",
    title="First 100 Days",
    legend=False,
)
_ = df.iloc[-100:, :].plot(
    kind="bar",
    figsize=(20, 1),
    xlabel="Dates in Raw Data",
    ylabel="Trading Day Flag",
    title="Last 100 Days",
    legend=False,
)

In [None]:
# Downsample returns between Timestamps in bar_tss, by compounding returns between timestamps in bar_tss.
# Upcoming and observabel returns match, such that we can causally trade signals using the returns of the sime timestamp:
# For example the simplest possible backtest, placing $1 on each asset that lost in the period before will look like:
# pnl_df = (observable_bar_ret_cc_df < 0.0) * upcoming_bar_ret_cc_df

upcoming_bar_ret_cc_df = compound_upcoming_bar_return_cc_df(ret_cc_df, bar_tss)
observable_bar_ret_cc_df = compound_observable_bar_return_cc_df(ret_cc_df, bar_tss)

bar_universe_df = universe_df.loc[bar_tss, :]

In [None]:
# Establish the data used in the trading strategy at Timestamps in bat_tss,
# available at the Timestamp in its index (observable_ prefix)

# Variables observable when trading on the bar_tss dates:
observable_ret_c1545_df = ret_c1545_df.loc[
    bar_tss, :
]  # Can use the current 15:45 price
observable_bar_ret_cc1545_df = compound_ret_df(
    observable_bar_ret_cc_df, ret_c1545_df.loc[bar_tss, :]
)

observable_bar_signal_dfdict = {
    # Return from last daily close to today's 15:45 price:
    "day_ret_c1545": observable_ret_c1545_df,
    # Returns of period bar until previous close, compounded with returns until 15:45 from previous close:
    "bar_ret_cc1545": observable_bar_ret_cc1545_df,
}

for window_i in (
    1,
    2,
    3,
    5,
    21,
    42,
    63,
    int(252 / 2),
    252,
):  # Conveniently sparse windows to reduce correlation
    observable_bar_signal_dfdict[f"bar_ret_cc_{window_i:03d}"] = (
        observable_bar_ret_cc_df.rolling(window=window_i).sum().loc[bar_tss, :]
    )
for window_i in (
    1,
    2,
    3,
    5,
    21,
    42,
    63,
    int(252 / 2),
    252,
):  # Conveniently sparse windows to reduce correlation
    observable_bar_signal_dfdict[f"bar_ret^2_cc_{window_i:03d}"] = (
        observable_bar_ret_cc_df.rolling(window=window_i).sum().loc[bar_tss, :] ** 2.0
    )

In [None]:
# Create a backtest loop, to make trading dacisions on each day in bar_tss

# for trade_ts in bar_tss:
#     print(f'Trading the close on {trade_ts}')

In [None]:
# Analyze Profit & Loss (P&L) of the trading signals

pnl_dict = {}

enter_at_stdev = 2.0

for key, signal_df in observable_bar_signal_dfdict.items():

    signal_df = signal_df * bar_universe_df
    trade_ret_df = signalbacktest_df(
        signal_df, upcoming_bar_ret_cc_df, enter_at_stdev=2.0
    )
    pnl_dict[key] = trade_ret_df.sum(axis=1)

strategy_pnl_df = pd.DataFrame(pnl_dict)

plot_pnl(strategy_pnl_df)
display(calculate_performance_df(strategy_pnl_df))