In [1]:
# Import all the necessary modules
import os
import sys
import os, sys
# from .../research/notebooks -> go up two levels to repo root
repo_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mtick
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
import pandas_datareader as pdr
import math
import datetime as dt
from datetime import datetime, timezone
import itertools
import ast
import yfinance as yf
import seaborn as sn
import yaml
import requests
from pathlib import Path
from IPython.display import display, HTML
from strategy_signal.trend_following_signal import (
    apply_jupyter_fullscreen_css, get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol
)
from portfolio.strategy_performance import (calculate_sharpe_ratio, calculate_calmar_ratio, calculate_CAGR, calculate_risk_and_performance_metrics,
                                          calculate_compounded_cumulative_returns, estimate_fee_per_trade, rolling_sharpe_ratio)
from utils import coinbase_utils as cn
from portfolio import strategy_performance as perf
from sizing import position_sizing_binary_utils as size_bin
from sizing import position_sizing_continuous_utils as size_cont
from strategy_signal import trend_following_signal as tf
%matplotlib inline

In [2]:
import importlib
importlib.reload(cn)
importlib.reload(perf)
importlib.reload(tf)
importlib.reload(size_bin)
importlib.reload(size_cont)

<module 'sizing.position_sizing_continuous_utils' from '/Users/adheerchauhan/Documents/git/trend_following/sizing/position_sizing_continuous_utils.py'>

In [3]:
import warnings
warnings.filterwarnings('ignore')
pd.set_option('Display.max_rows', None)
pd.set_option('Display.max_columns',None)
apply_jupyter_fullscreen_css()

## Coinbase Utility Functions

In [5]:
def get_coinbase_historical_price_data(
    client,
    ticker,
    start_timestamp,
    end_timestamp,
    granularity="ONE_DAY",
    retries=3,
    delay=5,
):
    """
    Generic candle puller for Coinbase Advanced Trade RESTClient.get_candles().

    granularity examples:
      ONE_MINUTE, FIVE_MINUTE, FIFTEEN_MINUTE, THIRTY_MINUTE,
      ONE_HOUR, TWO_HOUR, FOUR_HOUR, SIX_HOUR, ONE_DAY
    """
    attempts = 0
    while attempts < retries:
        try:
            candle_list = client.get_candles(
                product_id=ticker,
                start=int(start_timestamp),
                end=int(end_timestamp),
                granularity=granularity,
            ).candles

            if not candle_list:
                cols = ["open", "high", "low", "close", "volume"]
                return pd.DataFrame(columns=cols).rename_axis("date")

            candle_data = []
            for c in candle_list:
                candle_data.append(
                    {
                        "date": c["start"],  # epoch seconds
                        "low": float(c["low"]),
                        "high": float(c["high"]),
                        "open": float(c["open"]),
                        "close": float(c["close"]),
                        "volume": float(c["volume"]),
                    }
                )

            df = pd.DataFrame(candle_data)
            if df.empty or "date" not in df.columns:
                cols = ["open", "high", "low", "close", "volume"]
                return pd.DataFrame(columns=cols).rename_axis("date")

            # epoch seconds -> tz-aware UTC -> drop tz (tz-naive UTC)
            s = pd.to_datetime(pd.to_numeric(df["date"], errors="coerce"), unit="s", utc=True).dt.tz_localize(None)

            # Only normalize for daily bars; keep intraday timestamps intact
            if granularity == "ONE_DAY":
                s = s.dt.normalize()

            df["date"] = s
            df = df.set_index("date").sort_index().rename_axis("date")

            return df

        except requests.exceptions.ConnectionError as e:
            print(f"Connection error: {e}. Retrying in {delay} seconds...")
            attempts += 1
            time.sleep(delay)

    raise Exception("Max retries exceeded. Could not connect to Coinbase API.")


In [7]:
def save_historical_crypto_prices_from_coinbase(
    ticker,
    user_start_date=False,
    start_date=None,
    end_date=None,
    save_to_file=False,
    portfolio_name="Default",
    granularity="ONE_DAY",
):
    """
    Pull historical candles for a single ticker at the requested granularity.

    Note: Coinbase candle endpoints have request caps (commonly 300 candles per call),
    so we chunk requests.
    """
    client = cn.get_coinbase_rest_api_client(portfolio_name=portfolio_name)

    if user_start_date:
        start_date = pd.Timestamp(start_date)
    else:
        start_date = cn.coinbase_start_date_by_ticker_dict.get(ticker)
        start_date = pd.Timestamp(start_date)
        if start_date is None:
            print(f"Start date for {ticker} is not included in the dictionary!")
            return None

    end_date = pd.Timestamp(end_date)

    # seconds per bar (used to step chunks without gaps)
    granularity_to_seconds = {
        "ONE_MINUTE": 60,
        "FIVE_MINUTE": 300,
        "FIFTEEN_MINUTE": 900,
        "THIRTY_MINUTE": 1800,
        "ONE_HOUR": 3600,
        "TWO_HOUR": 7200,
        "FOUR_HOUR": 14400,
        "SIX_HOUR": 21600,
        "ONE_DAY": 86400,
    }
    bar_sec = granularity_to_seconds.get(granularity)
    if bar_sec is None:
        raise ValueError(f"Unsupported granularity: {granularity}")

    # Keep your old 6-week chunking (works great for ONE_DAY and FOUR_HOUR),
    # but ensure we never step by +1 day when doing intraday.
    temp_start = start_date
    current_end = temp_start

    dfs = []
    while current_end < end_date:
        # 6 weeks is safe for FOUR_HOUR (â‰ˆ252 candles) under the typical 300 limit :contentReference[oaicite:1]{index=1}
        current_end = pd.to_datetime(temp_start) + dt.timedelta(weeks=6)
        if current_end > end_date:
            current_end = end_date

        start_ts = int(pd.Timestamp(temp_start).timestamp())
        end_ts = int(pd.Timestamp(current_end).timestamp())

        df_chunk = get_coinbase_historical_price_data(
            client=client,
            ticker=ticker,
            start_timestamp=start_ts,
            end_timestamp=end_ts,
            granularity=granularity,
        )
        dfs.append(df_chunk)

        # advance by exactly one bar to avoid duplicates and avoid gaps
        temp_start = pd.to_datetime(current_end) + pd.Timedelta(seconds=bar_sec)

    if not dfs:
        cols = ["open", "high", "low", "close", "volume"]
        return pd.DataFrame(columns=cols).rename_axis("date")

    df = pd.concat(dfs, axis=0)
    df = df[~df.index.duplicated(keep="last")].sort_index()

    # optional: save_to_file logic can stay as you had it (not shown in your snippet)

    return df


In [12]:
cn_ticker_list = cn.coinbase_start_date_by_ticker_dict

In [14]:
cn_ticker_list

{'BTC-USD': '2016-01-01',
 'ETH-USD': '2016-06-01',
 'SOL-USD': '2021-06-01',
 'ADA-USD': '2021-03-01',
 'AVAX-USD': '2021-09-01',
 'DOT-USD': '2021-06-01',
 'ATOM-USD': '2020-01-01',
 'LTC-USD': '2016-09-01',
 'XRP-USD': '2023-06-01',
 'ALGO-USD': '2019-08-01',
 'XLM-USD': '2019-02-01',
 'TON-USD': '2025-11-18',
 'NEAR-USD': '2022-09-01',
 'ICP-USD': '2021-05-10',
 'HBAR-USD': '2022-10-13',
 'SUI-USD': '2023-05-18',
 'CRO-USD': '2021-11-01',
 'APT-USD': '2022-10-19',
 'XTZ-USD': '2019-08-06',
 'EGLD-USD': '2022-12-07',
 'FIL-USD': '2020-12-09',
 'SEI-USD': '2023-08-15',
 'TIA-USD': '2023-11-01',
 'KAVA-USD': '2023-01-19',
 'ROSE-USD': '2022-04-26',
 'MATIC-USD': '2021-02-01',
 'SKL-USD': '2021-02-01',
 'OP-USD': '2022-06-01',
 'ARB-USD': '2023-03-23',
 'POL-USD': '2024-09-04',
 'IMX-USD': '2021-12-09',
 'STRK-USD': '2024-02-21',
 'BLAST-USD': '2024-06-26',
 'ZK-USD': '2024-09-25',
 'LRC-USD': '2020-09-15',
 'ZORA-USD': '2025-04-24',
 'METIS-USD': '2022-06-28',
 'STX-USD': '2022-01-20'

In [16]:
ticker_list = []
exclude_list = ['USDT-USD','DAI-USD','USD1-USD','PAX-USD','MATIC-USD']
for ticker, date in cn_ticker_list.items():
    if (pd.Timestamp(cn_ticker_list[ticker]).date() <= pd.Timestamp('2022-04-01').date()) & (ticker not in exclude_list):
        ticker_list.append(ticker)

In [18]:
print(len(ticker_list))
ticker_list

40


['BTC-USD',
 'ETH-USD',
 'SOL-USD',
 'ADA-USD',
 'AVAX-USD',
 'DOT-USD',
 'ATOM-USD',
 'LTC-USD',
 'ALGO-USD',
 'XLM-USD',
 'ICP-USD',
 'CRO-USD',
 'XTZ-USD',
 'FIL-USD',
 'SKL-USD',
 'IMX-USD',
 'LRC-USD',
 'STX-USD',
 'DOGE-USD',
 'SHIB-USD',
 'LINK-USD',
 'FET-USD',
 'GRT-USD',
 'RNDR-USD',
 'OXT-USD',
 'AIOZ-USD',
 'DIA-USD',
 'KRL-USD',
 'UNI-USD',
 'AAVE-USD',
 'AMP-USD',
 'COMP-USD',
 'MKR-USD',
 'SNX-USD',
 'SUSHI-USD',
 'CRV-USD',
 'BAL-USD',
 '1INCH-USD',
 'MANA-USD',
 'REQ-USD']

In [80]:
def get_coinbase_price_data_for_ticker_list(start_date, end_date, ticker_list, fill_missing=True):

    df_dict_by_ticker = {}
    ticker_list_len = len(ticker_list)
    loop_start = 0
    loop_end = 0
    counter = 0
    while counter < ticker_list_len:
        loop_start = counter
        if counter == 40:
            loop_end = ticker_list_len
        else:
            loop_end = counter + 10
        print(counter, loop_start, loop_end, ticker_list[loop_start: loop_end])
        for t in ticker_list[loop_start: loop_end]:
            df_dict_by_ticker[t] = save_historical_crypto_prices_from_coinbase(
                ticker=t,
                user_start_date=True,
                start_date=start_date,
                end_date=end_date,
                portfolio_name="Default",
                granularity="FOUR_HOUR",
            )
        counter += 10
    
    # Optional: one combined frame (MultiIndex: ticker, date)
    df_all = pd.concat(df_dict_by_ticker, names=["ticker", "date"]).sort_index()

    # --- build expected 4H grid ---
    start = pd.Timestamp(start_date).floor("4H")
    end = pd.Timestamp(end_date).ceil("4H")
    expected_dates = pd.date_range(start=start, end=end, freq="4H", inclusive="left")

    full_index = pd.MultiIndex.from_product(
        [ticker_list, expected_dates],
        names=["ticker", "date"]
    )

    # --- align everything to full grid ---
    df_all = df_all.reindex(full_index)

    # --- flag missing bars ---
    df_all["imputed"] = df_all["close"].isna()

    if fill_missing:
        # Forward-fill ONLY close per ticker (for alignment / valuation)
        df_all["close"] = df_all.groupby(level=0)["close"].ffill()

        # For imputed rows: set OHLC = close, volume=0
        m = df_all["imputed"]
        for col in ["open", "high", "low"]:
            df_all.loc[m, col] = df_all.loc[m, "close"]
        df_all.loc[m, "volume"] = 0.0

    # if you want a flat frame:
    df_all = df_all.reset_index()

    return df_all

In [82]:
%%time
start_date = "2022-04-01"
end_date   = "2024-12-31"
df_ticker_price_impute = get_coinbase_price_data_for_ticker_list(start_date, end_date, ticker_list, fill_missing=True)

0 0 10 ['BTC-USD', 'ETH-USD', 'SOL-USD', 'ADA-USD', 'AVAX-USD', 'DOT-USD', 'ATOM-USD', 'LTC-USD', 'ALGO-USD', 'XLM-USD']
10 10 20 ['ICP-USD', 'CRO-USD', 'XTZ-USD', 'FIL-USD', 'SKL-USD', 'IMX-USD', 'LRC-USD', 'STX-USD', 'DOGE-USD', 'SHIB-USD']
20 20 30 ['LINK-USD', 'FET-USD', 'GRT-USD', 'RNDR-USD', 'OXT-USD', 'AIOZ-USD', 'DIA-USD', 'KRL-USD', 'UNI-USD', 'AAVE-USD']
30 30 40 ['AMP-USD', 'COMP-USD', 'MKR-USD', 'SNX-USD', 'SUSHI-USD', 'CRV-USD', 'BAL-USD', '1INCH-USD', 'MANA-USD', 'REQ-USD']
CPU times: user 16.9 s, sys: 1.33 s, total: 18.2 s
Wall time: 2min


In [83]:
df_ticker_check = df_ticker_price_impute.groupby(['date']).agg({'ticker':'nunique'})
df_ticker_check[df_ticker_check['ticker'] < 40]

Unnamed: 0_level_0,ticker
date,Unnamed: 1_level_1


In [86]:
df_ticker_price[df_ticker_price['date'] == pd.Timestamp('2023-03-04 12:00:00')]

Unnamed: 0,ticker,date,low,high,open,close,volume
2025,1INCH-USD,2023-03-04 12:00:00,0.52,0.523,0.521,0.521,67556.04
8056,AAVE-USD,2023-03-04 12:00:00,75.75,77.06,76.71,75.77,5561.88
14087,ADA-USD,2023-03-04 12:00:00,0.3395,0.3423,0.3418,0.3397,2864558.0
20118,AIOZ-USD,2023-03-04 12:00:00,0.0335,0.034,0.0335,0.0336,141707.3
26149,ALGO-USD,2023-03-04 12:00:00,0.2289,0.2316,0.2311,0.2292,1265167.0
32180,AMP-USD,2023-03-04 12:00:00,0.00506,0.00532,0.00525,0.00529,44176270.0
38211,ATOM-USD,2023-03-04 12:00:00,11.888,12.022,11.989,11.898,26237.38
44242,AVAX-USD,2023-03-04 12:00:00,16.52,16.77,16.74,16.52,24594.52
50273,BAL-USD,2023-03-04 12:00:00,6.4,6.45,6.45,6.4,1007.224
56304,BTC-USD,2023-03-04 12:00:00,22300.8,22399.98,22351.49,22316.86,482.181


In [88]:
df_ticker_price[df_ticker_price['date'] == pd.Timestamp('2023-03-04 16:00:00')]

Unnamed: 0,ticker,date,low,high,open,close,volume
2026,1INCH-USD,2023-03-04 16:00:00,0.519,0.521,0.521,0.521,3642.54
8057,AAVE-USD,2023-03-04 16:00:00,75.62,76.04,75.76,75.74,921.979
14088,ADA-USD,2023-03-04 16:00:00,0.3397,0.3402,0.3397,0.3398,75265.12
20119,AIOZ-USD,2023-03-04 16:00:00,0.0335,0.0337,0.0336,0.0336,16784.0
26150,ALGO-USD,2023-03-04 16:00:00,0.2288,0.2298,0.2292,0.2292,52402.9
32181,AMP-USD,2023-03-04 16:00:00,0.0052,0.00536,0.00529,0.00522,12043730.0
38212,ATOM-USD,2023-03-04 16:00:00,11.874,11.943,11.892,11.943,2473.97
44243,AVAX-USD,2023-03-04 16:00:00,16.43,16.57,16.53,16.57,7001.243
50274,BAL-USD,2023-03-04 16:00:00,6.39,6.41,6.4,6.39,591.274
56305,BTC-USD,2023-03-04 16:00:00,22293.16,22345.17,22316.86,22330.27,31.41319


In [None]:
df_ticker_price[df_ticker_price['date'] == pd.Timestamp('2023-03-12 04:00:00')]

In [None]:
df_ticker_price.shape

In [None]:
## Save Files to Parquet for later use
df_ticker_price.to_parquet(
    f"/Users/adheerchauhan/Documents/git/trend_following/data_folder/coinbase_4_min_bar_data/coinbase_ohlcv_4min_{start_date}-{end_date}.parquet",
    index=False,
    compression="zstd",  # great balance of size + speed
)

In [40]:
## Read Parquet Files
df_ticker_price = pd.read_parquet(f"/Users/adheerchauhan/Documents/git/trend_following/data_folder/coinbase_4_min_bar_data/coinbase_ohlcv_4min_{start_date}-{end_date}.parquet")

In [42]:
df_ticker_price.head()

Unnamed: 0,ticker,date,low,high,open,close,volume
0,1INCH-USD,2022-04-01 00:00:00,1.67,1.792,1.777,1.708,193718.75
1,1INCH-USD,2022-04-01 04:00:00,1.707,1.756,1.709,1.755,64858.85
2,1INCH-USD,2022-04-01 08:00:00,1.738,1.772,1.757,1.747,43729.16
3,1INCH-USD,2022-04-01 12:00:00,1.71,1.831,1.744,1.816,149897.91
4,1INCH-USD,2022-04-01 16:00:00,1.809,1.885,1.819,1.88,167483.5


In [44]:
df_ticker_price.shape

(241229, 7)

In [46]:
df_ticker_price.groupby(['ticker']).size()

ticker
1INCH-USD    6031
AAVE-USD     6031
ADA-USD      6031
AIOZ-USD     6031
ALGO-USD     6031
AMP-USD      6031
ATOM-USD     6031
AVAX-USD     6031
BAL-USD      6031
BTC-USD      6031
COMP-USD     6031
CRO-USD      6031
CRV-USD      6031
DIA-USD      6031
DOGE-USD     6031
DOT-USD      6031
ETH-USD      6031
FET-USD      6031
FIL-USD      6031
GRT-USD      6031
ICP-USD      6031
IMX-USD      6031
KRL-USD      6020
LINK-USD     6031
LRC-USD      6031
LTC-USD      6031
MANA-USD     6031
MKR-USD      6031
OXT-USD      6031
REQ-USD      6031
RNDR-USD     6031
SHIB-USD     6031
SKL-USD      6031
SNX-USD      6031
SOL-USD      6031
STX-USD      6031
SUSHI-USD    6031
UNI-USD      6031
XLM-USD      6031
XTZ-USD      6031
dtype: int64

In [64]:
sorted(df_ticker_price.date.unique())

[Timestamp('2022-04-01 00:00:00'),
 Timestamp('2022-04-01 04:00:00'),
 Timestamp('2022-04-01 08:00:00'),
 Timestamp('2022-04-01 12:00:00'),
 Timestamp('2022-04-01 16:00:00'),
 Timestamp('2022-04-01 20:00:00'),
 Timestamp('2022-04-02 00:00:00'),
 Timestamp('2022-04-02 04:00:00'),
 Timestamp('2022-04-02 08:00:00'),
 Timestamp('2022-04-02 12:00:00'),
 Timestamp('2022-04-02 16:00:00'),
 Timestamp('2022-04-02 20:00:00'),
 Timestamp('2022-04-03 00:00:00'),
 Timestamp('2022-04-03 04:00:00'),
 Timestamp('2022-04-03 08:00:00'),
 Timestamp('2022-04-03 12:00:00'),
 Timestamp('2022-04-03 16:00:00'),
 Timestamp('2022-04-03 20:00:00'),
 Timestamp('2022-04-04 00:00:00'),
 Timestamp('2022-04-04 04:00:00'),
 Timestamp('2022-04-04 08:00:00'),
 Timestamp('2022-04-04 12:00:00'),
 Timestamp('2022-04-04 16:00:00'),
 Timestamp('2022-04-04 20:00:00'),
 Timestamp('2022-04-05 00:00:00'),
 Timestamp('2022-04-05 04:00:00'),
 Timestamp('2022-04-05 08:00:00'),
 Timestamp('2022-04-05 12:00:00'),
 Timestamp('2022-04-

In [66]:
expected_index = sorted(df_ticker_price.date.unique())#build_expected_index(start_date, end_date)

df_ticker_price_impute = reindex_and_flag(df_ticker_price, expected_index=expected_index)

In [68]:
df_ticker_price_impute.head(20)

Unnamed: 0,ticker,date,low,high,open,close,volume,imputed
2022-04-01 00:00:00,,NaT,,,,,,True
2022-04-01 04:00:00,,NaT,,,,,,True
2022-04-01 08:00:00,,NaT,,,,,,True
2022-04-01 12:00:00,,NaT,,,,,,True
2022-04-01 16:00:00,,NaT,,,,,,True
2022-04-01 20:00:00,,NaT,,,,,,True
2022-04-02 00:00:00,,NaT,,,,,,True
2022-04-02 04:00:00,,NaT,,,,,,True
2022-04-02 08:00:00,,NaT,,,,,,True
2022-04-02 12:00:00,,NaT,,,,,,True


In [54]:
df_ticker_price.shape

(241229, 7)

## Build Return Features

In [None]:
def calculate_z_score(df, return_col, date_col, z_score_col_name):

    return_mean = df.groupby([date_col])[return_col].transform('mean')
    return_std = df.groupby([date_col])[return_col].transform('std').replace(0, np.nan)
    df[z_score_col_name] = (df[return_col] - return_mean) / return_std

    return df
    

def build_return_features(df, min_z_score_ticker_count=20, fwd_return_period=3, winsorize_fwd_return=True, fwd_return_cap=0.50):

    df_returns = df.copy()

    ## Get Previour 4 hour returns
    ticker_group_close = df_returns.groupby(['ticker'])['close']
    df_returns['close_log_return_prev_4h'] = np.log(ticker_group_close.shift(1) / ticker_group_close.shift(2))

    ## Require a minimum number of tickers to calculate Z-Score for a given bar
    ticker_count_by_date = df_returns.groupby(['date'])['ticker'].transform('nunique')
    df_returns = df_returns[ticker_count_by_date >= min_z_score_ticker_count]

    ## Calculate cross-sectional Z-Score across all tickers per bar
    df_returns = calculate_z_score(df_returns, return_col='close_log_return_prev_4h', date_col='date', z_score_col_name='close_log_return_z_score_prev_4h')

    ## Get forward return for specified period (Open(T) to Open(T+H))
    df_returns[f'fwd_open_log_return_{fwd_return_period * 4}h'] = np.log(df_returns.groupby(['ticker'])['open'].shift(-fwd_return_period) / df_returns['open'])

    ## Winsorize Forward Return to reduce data glitches
    if winsorize_fwd_return:
        df_returns[f'fwd_open_log_return_{fwd_return_period * 4}h'] = df_returns[f'fwd_open_log_return_{fwd_return_period * 4}h'].clip(-fwd_return_cap, fwd_return_cap)

    return df_returns

In [None]:
df_returns = build_return_features(df_ticker_price, min_z_score_ticker_count=20, fwd_return_period=3, winsorize_fwd_return=True, fwd_return_cap=0.50)

In [None]:
df_returns.shape

In [None]:
df_returns.head()

In [None]:
return_cols = ['close_log_return_prev_4h','fwd_open_log_return_12h']
df_signal = df_returns.dropna(subset=return_cols).copy()

In [None]:
## Analyze the Decile Performance by date for this signal
def cs_bucket(group, col, q=10):
    # cross-sectional bucketing within each timestamp
    return pd.qcut(group[col], q=q, labels=False, duplicates="drop")

df_signal["quantile_bucket"] = df_signal.groupby("date", group_keys=False).apply(
    lambda g: cs_bucket(g, "close_log_return_z_score_prev_4h", q=10)
)

bucket_stats = (
    df_signal.dropna(subset=["quantile_bucket"])
    .groupby("quantile_bucket")["fwd_open_log_return_12h"]
    .agg(["mean", "std", "count"])
)
bucket_stats["t_stat"] = bucket_stats["mean"] / (bucket_stats["std"] / np.sqrt(bucket_stats["count"]))
## Denominator here is the Standard Error calculated as STD / sqrt(N). The t-stat calculates how many
## standard errors the observed mean is away from 0
## T-Stat assumes IID (which may not be the case) and n is large enough where the distribution is normal using Central Limit Theorem
bucket_stats


In [None]:
## Analyze the Qunitlie Performance by date for this signal
def cs_bucket(group, col, q=10):
    # cross-sectional bucketing within each timestamp
    return pd.qcut(group[col], q=q, labels=False, duplicates="drop")

df_signal["quantile_bucket"] = df_signal.groupby("date", group_keys=False).apply(
    lambda g: cs_bucket(g, "close_log_return_z_score_prev_4h", q=5)
)

bucket_stats = (
    df_signal.dropna(subset=["quantile_bucket"])
    .groupby("quantile_bucket")["fwd_open_log_return_12h"]
    .agg(["mean", "std", "count"])
)
bucket_stats["t_stat"] = bucket_stats["mean"] / (bucket_stats["std"] / np.sqrt(bucket_stats["count"]))
## Denominator here is the Standard Error calculated as STD / sqrt(N). The t-stat calculates how many
## standard errors the observed mean is away from 0
## T-Stat assumes IID (which may not be the case) and n is large enough where the distribution is normal using Central Limit Theorem
bucket_stats


In [None]:
df_signal[df_signal.date == pd.Timestamp('2022-04-26 08:00:00')].sort_values('close_log_return_z_score_prev_4h')#.head()

In [None]:
## Information Coefficient: calculates the correlation between the signal and future returns
## This is usually calculated cross-sectionally at each timestamp and analyzed over time
def spearman_ic(group, signal="close_log_return_z_score_prev_4h", label="fwd_open_log_return_12h"):
    g = group[[signal, label]].dropna()
    if len(g) < 10:
        return np.nan
    return g[signal].corr(g[label], method="spearman")

ic_ts = df_signal.groupby("date").apply(spearman_ic)
ic_ts.describe()
## Negative IC shows inverse correlation which is what we want
## Whwen Z-Score is negative, the coins bounce back with positive forward returns

In [None]:
ic_ts.head()

In [None]:
ic_ts = df_signal.groupby("date").apply(spearman_ic)

n = ic_ts.count()
ic_mean = ic_ts.mean()
ic_std = ic_ts.std(ddof=1)
ic_se = ic_std / np.sqrt(n) if n > 0 else np.nan
ic_t = ic_mean / ic_se if (n > 1 and ic_se != 0) else np.nan
ic_ir = ic_mean / ic_std if ic_std not in (0, np.nan) else np.nan
hit_rate = (ic_ts > 0).mean() if n > 0 else np.nan

## Calculate the IC for each percentile from 1 to 99
pct = ic_ts.dropna().quantile([0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]) if n > 0 else pd.Series(dtype=float)

# 3) Print everything
print("=== Information Coefficient (Spearman Rank IC) Summary ===")
print(f"Dates (non-NaN):              {n}")
print(f"Mean IC:                      {ic_mean:.6f}")
print(f"Std IC:                       {ic_std:.6f}")
print(f"Std Error (mean):             {ic_se:.6f}")
print(f"t-stat (mean IC):             {ic_t:.3f}")
print(f"Information Ratio (mean/std): {ic_ir:.3f}")
print(f"Hit rate (IC > 0):            {hit_rate:.3%}")
print(f"Min / Max IC:                 {ic_ts.min():.6f} / {ic_ts.max():.6f}")
print("")
print("Percentiles:")
for q, v in pct.items():
    print(f"  p{int(q*100):02d}: {v:.6f}")


In [None]:
## Creating a naive equal-weighted strategy going long the bottom 20% of coins
# Create a flag for the bottom 20% of coins by Z-Score
q = 0.2  # bottom 20%
df_signal["naive_trade_signal"] = df_signal.groupby("date")["close_log_return_z_score_prev_4h"].transform(
    lambda s: s <= s.quantile(q)
)

# Equal weight among selected per timestamp
sel_count = df_signal.groupby("date")["naive_trade_signal"].transform("sum").replace(0, np.nan)
df_signal["weight"] = (df_signal["naive_trade_signal"] / sel_count).fillna(0.0)

# Calculate the forward return of the strategy going out to 12 hours after a naive position is taken
basket_fwd = df_signal.groupby("date").apply(lambda g: float((g["weight"] * g["fwd_open_log_return_12h"]).sum()))
basket_fwd.name = "basket_fwd_12h"
basket_fwd.describe()


In [None]:
basket_fwd.head(20)

In [None]:
import numpy as np
import pandas as pd

df = df_returns.sort_values(["ticker","date"]).copy()

# --- Prior-bar OHLCV features (available at open_t) ---
g = df.groupby("ticker", group_keys=False)

df["volume_prev"]   = g["volume"].shift(1)
df["high_prev"]  = g["high"].shift(1)
df["low_prev"]   = g["low"].shift(1)
df["close_prev"] = g["close"].shift(1)
df["open_prev"]  = g["open"].shift(1)

# Range of prior bar (you can use /close_prev or /open_prev; pick one)
df["range_prev"] = (df["high_prev"] - df["low_prev"]) / df["close_prev"]

# Rolling z-scores per ticker (volume spike / range shock relative to its own history)
# Window ~ 10 days of 4h bars: 10*6 = 60
W = 60

def rolling_z(x, window=W):
    mu = x.rolling(window, min_periods=window//2).mean()
    sd = x.rolling(window, min_periods=window//2).std()
    return (x - mu) / sd.replace(0, np.nan)

df["volume_z_prev"]   = g["volume_prev"].apply(rolling_z)
df["range_z_prev"] = g["range_prev"].apply(rolling_z)

# Flags: high/low
VOL_Z_TH = 1.5
RNG_Z_TH = 1.5
df["high_vol_spike"]   = df["volume_z_prev"]   >= VOL_Z_TH
df["high_range_shock"] = df["range_z_prev"] >= RNG_Z_TH


In [None]:
BTC = "BTC-USD"
btc = df[df["ticker"] == BTC].sort_values("date")[["date","close_prev"]].copy()

# BTC MA on prior close (choose window)
MA_W = 150  # ~25 days of 4h bars
btc["btc_ma"] = btc["close_prev"].rolling(MA_W, min_periods=MA_W//2).mean()

btc["risk_on"] = btc["close_prev"] > btc["btc_ma"]
btc_regime = btc[["date","risk_on"]]

df = df.merge(btc_regime, on="date", how="left")
# If early history has NaN risk_on (MA not ready), you can default to False or drop:
df = df.dropna(subset=["risk_on"])


In [None]:
df.head(200)

In [None]:
q = 0.2
df["enter"] = df.groupby("date")["close_log_return_z_score_prev_4h"].transform(lambda s: s <= s.quantile(q))

# Use only rows where we have a label
df_eval = df.dropna(subset=["fwd_open_log_return_12h", "close_log_return_z_score_prev_4h"]).copy()

# Evaluate only entries (recommended) â€“ otherwise youâ€™re not measuring the strategy edge
df_trades = df_eval[df_eval["enter"]].copy()


In [None]:
def summarize(group, label="fwd_open_log_return_12h"):
    x = group[label].dropna()
    n = x.size
    if n < 30:
        return pd.Series({"mean": np.nan, "std": np.nan, "count": n, "t_stat": np.nan})
    mu = x.mean()
    sd = x.std(ddof=1)
    t  = mu / (sd / np.sqrt(n)) if sd > 0 else np.nan
    return pd.Series({"mean": mu, "std": sd, "count": n, "t_stat": t})

# 1) Volume spike high vs low
vol_table = df_trades.groupby("high_vol_spike").apply(summarize)
vol_table.index = vol_table.index.map({False: "Low vol spike", True: "High vol spike"})
vol_table

# 2) Range shock high vs low
rng_table = df_trades.groupby("high_range_shock").apply(summarize)
rng_table.index = rng_table.index.map({False: "Low range shock", True: "High range shock"})
rng_table

# 3) BTC regime risk-on vs risk-off
reg_table = df_trades.groupby("risk_on").apply(summarize)
reg_table.index = reg_table.index.map({False: "Risk-off", True: "Risk-on"})
reg_table


In [None]:
vol_table

In [None]:
rng_table

In [None]:
vol_x_reg = df_trades.groupby(["risk_on","high_vol_spike"]).apply(summarize).reset_index()
vol_x_reg["risk_on"] = vol_x_reg["risk_on"].map({False:"Risk-off", True:"Risk-on"})
vol_x_reg["high_vol_spike"] = vol_x_reg["high_vol_spike"].map({False:"Low vol spike", True:"High vol spike"})
vol_x_reg


In [None]:
three_way = df_trades.groupby(["risk_on","high_vol_spike","high_range_shock"]).apply(summarize).reset_index()
three_way["risk_on"] = three_way["risk_on"].map({False:"Risk-off", True:"Risk-on"})
three_way["high_vol_spike"] = three_way["high_vol_spike"].map({False:"Low vol spike", True:"High vol spike"})
three_way["high_range_shock"] = three_way["high_range_shock"].map({False:"Low range", True:"High range"})
three_way.sort_values(["risk_on","high_vol_spike","high_range_shock"])


In [None]:
df[df.ticker == 'AAVE-USD'].head(20)

In [None]:
df_signal[df_signal.date == pd.Timestamp('2022-04-01 08:00:00')].sort_values('close_log_return_z_score_prev_4h').head(10)

## Building a Strategy Backtest Engine

In [None]:
df_returns = build_return_features(df_ticker_price, min_z_score_ticker_count=20, fwd_return_period=3, winsorize_fwd_return=True, fwd_return_cap=0.50)
return_cols = ['close_log_return_prev_4h','fwd_open_log_return_12h']
df_signal = df_returns.dropna(subset=return_cols).copy()

In [None]:
df_returns.groupby(['date']).agg({'ticker':'nunique'})

In [None]:
df_returns[df_returns['date'] == pd.Timestamp('')]

In [None]:
## Break the Z-Scores per period into Deciles
def cs_bucket(group, col, q=10):
    # cross-sectional bucketing within each timestamp
    return pd.qcut(group[col], q=q, labels=False, duplicates="drop")

df_signal["quantile_bucket"] = df_signal.groupby("date", group_keys=False).apply(
    lambda g: cs_bucket(g, "close_log_return_z_score_prev_4h", q=10)
)

## Identify bottom performing tickers per period
q = 0.2  # bottom 20%
df_signal["bottom_quintile_signal"] = df_signal.groupby("date")["close_log_return_z_score_prev_4h"].transform(
    lambda s: s <= s.quantile(q)
)

# compute signal on bar close, execute next bar open
df_signal = df_signal.sort_values(["ticker", "date"])
df_signal["trade_signal"] = df_signal["bottom_quintile_signal"].astype(float)
df_signal["trade_signal_exec"] = df_signal.groupby("ticker")["trade_signal"].shift(1).fillna(0.0)

# then use trade_signal_exec (not trade_signal) in open_new_vintage_positions


In [None]:
## Strategy Params
initial_capital = 1000
cash_buffer_percentage = 0.10
fwd_return_period = 3
daily_weight_allocation = 1 / fwd_return_period
period_list = df_signal.date.unique().tolist()
first_period = period_list[0]

In [None]:
first_period

In [None]:
df_signal['position_weight'] = 0.0
# df_signal['event_col'] = np.nan
df_signal['position_notional'] = 0.0
df_signal['position_size'] = 0.0
df_signal['holding_period_counter'] = 0.0
df_signal['vintage_id'] = np.nan
# df_signal['available_cash'] = 0.0
# df_signal['total_position_notional'] = 0.0
# df_signal['total_portfolio_value'] = 0.0
# df_signal['total_portfolio_value_upper_limit'] = 0.0

## Set the Available Capital for the first period
# first_period_cond = (df_signal.date == first_period)
# df_signal.loc[first_period_cond, 'available_cash'] = initial_capital
# df_signal.loc[first_period_cond, 'total_portfolio_value'] = initial_capital
# df_signal.loc[first_period_cond, 'total_portfolio_value_upper_limit'] = df_signal.loc[first_period_cond, 'total_portfolio_value'] * (1 - cash_buffer_percentage)

## Estimated T-Cost
transaction_cost_est = 0.001
passive_trade_rate = 0.05
est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))

In [None]:
## Initialized Position and Portfolio Dataframes
portfolio_columns = ['total_position_notional','available_cash','total_portfolio_value','total_portfolio_value_upper_limit']
df_daily_portfolio_summary = pd.DataFrame(columns=portfolio_columns)
df_daily_portfolio_summary.index.name = 'date'
df_daily_position_summary = pd.DataFrame(columns=df_signal.columns.tolist())

In [None]:
df_daily_portfolio_summary.head()

In [None]:
df_daily_position_summary.head()

In [None]:
est_fees

In [None]:
def estimate_fee_per_trade_w_enable_switch(passive_trade_rate=0.5, maker_fee=0.006, taker_fee=0.012, enable_fees=True):
    if not enable_fees:
        return 0.0
    proportion_maker = passive_trade_rate
    proportion_taker = (1 - passive_trade_rate)
    return (maker_fee * proportion_maker) + (taker_fee * proportion_taker)

In [None]:
# -----------------------------
# NEW: Turnover helpers
# -----------------------------
TURNOVER_COLS = [
    "buy_notional_gross",
    "sell_notional_gross",
    "gross_traded_notional",
    "turnover_pct",
    "turnover_pct_half",
    "buy_turnover_pct",
    "sell_turnover_pct",
]

def ensure_turnover_cols_in_df(df_portfolio: pd.DataFrame) -> pd.DataFrame:
    for c in TURNOVER_COLS:
        if c not in df_portfolio.columns:
            df_portfolio[c] = 0.0
            
    return df_portfolio

def reset_turnover_for_period(df_portfolio: pd.DataFrame, period) -> pd.DataFrame:
    for c in TURNOVER_COLS:
        df_portfolio.loc[period, c] = 0.0

    return df_portfolio

def finalize_turnover_metrics(df_portfolio: pd.DataFrame, period) -> pd.DataFrame:
    pv = float(df_portfolio.loc[period, "total_portfolio_value"])
    gross = float(df_portfolio.loc[period, "gross_traded_notional"])
    buys = float(df_portfolio.loc[period, "buy_notional_gross"])
    sells = float(df_portfolio.loc[period, "sell_notional_gross"])

    if pv > 0:
        df_portfolio.loc[period, "turnover_pct"] = gross / pv
        df_portfolio.loc[period, "turnover_pct_half"] = gross / (2.0 * pv)
        df_portfolio.loc[period, "buy_turnover_pct"] = buys / pv
        df_portfolio.loc[period, "sell_turnover_pct"] = sells / pv
    else:
        df_portfolio.loc[period, "turnover_pct"] = 0.0
        df_portfolio.loc[period, "turnover_pct_half"] = 0.0
        df_portfolio.loc[period, "buy_turnover_pct"] = 0.0
        df_portfolio.loc[period, "sell_turnover_pct"] = 0.0

    return df_portfolio

In [None]:
def roll_portfolio_positions(df_portfolio: pd.DataFrame, current_period) -> pd.DataFrame:
    current_period = pd.Timestamp(current_period)

    cols = [
        "total_position_notional",
        "available_cash",
        "total_portfolio_value",
        "total_portfolio_value_upper_limit",
    ]

    last_vals = df_portfolio.iloc[-1][cols]

    # Ensure the row exists, then assign all columns at once
    if current_period not in df_portfolio.index:
        df_portfolio.loc[current_period, cols] = pd.NA

    df_portfolio.loc[current_period, cols] = last_vals.values

    ## Reset Turnover for New Period
    reset_turnover_for_period(df_portfolio, period=current_period)
    
    return df_portfolio

In [None]:
def compute_deployable_cash(df_portfolio, period, cash_buffer_percentage):

    ## Total Portfolio Value
    pv = float(df_portfolio.loc[period, "total_portfolio_value"])

    ## Cash Buffer to keep prior to taking positions
    desired_cash = pv * cash_buffer_percentage

    ## Available Cash to be deployed for new positions
    avail_cash = float(df_portfolio.loc[period, "available_cash"])
    deployable_cash = max(0.0, avail_cash - desired_cash)
    
    return deployable_cash


def compute_gross_to_deploy_target_per_vintage(df_portfolio, period, cash_buffer_percentage, fwd_return_period):
    """
    NEW: deploy ~1/fwd_return_period of the *target invested* notional each bar (subject to buffer + available cash).

    target_total_invested = PV * (1 - buffer)
    target_per_vintage    = target_total_invested / fwd_return_period

    max_deploy_now        = max(0, available_cash - PV*buffer)
    gross_to_deploy       = min(target_per_vintage, max_deploy_now)
    """
    ## Total Portfolio Value
    pv = float(df_portfolio.loc[period, "total_portfolio_value"])

    ## Available Cash
    avail_cash = float(df_portfolio.loc[period, "available_cash"])

    ## Cash Buffer to keep prior to taking positions
    desired_cash = pv * cash_buffer_percentage
    deployable_cash = max(0.0, avail_cash - desired_cash)

    ## Target Invested per Vintage
    target_total_invested = pv * (1.0 - cash_buffer_percentage)
    target_per_vintage = target_total_invested / float(fwd_return_period)
    gross_to_deploy = min(target_per_vintage, deployable_cash)
    
    return float(gross_to_deploy)


def open_new_vintage_positions(
    df_position, df_portfolio, df_signal, period,
    fwd_return_period, cash_buffer_percentage,
    transaction_cost_est, passive_trade_rate, enable_fees,
    vintage_name, signal_col='trade_signal_exec'
):
    
    df_signal_working = df_signal.copy()
    signal_current_period_cond = (df_signal_working["date"] == period)

    ## Get Estimated T-Cost
    # est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))
    est_fees = (transaction_cost_est + estimate_fee_per_trade_w_enable_switch(passive_trade_rate=passive_trade_rate, enable_fees=enable_fees))

    ## Calculate cash budget available to be deployed for new positions
    deployable_cash = compute_deployable_cash(df_portfolio, period, cash_buffer_percentage)
    gross_to_deploy = compute_gross_to_deploy_target_per_vintage(
        df_portfolio=df_portfolio,
        period=period,
        cash_buffer_percentage=cash_buffer_percentage,
        fwd_return_period=fwd_return_period,
    )

    ## Get the updated signal for current period
    non_zero_position_cond = (df_signal_working[signal_col].fillna(0).astype(float) != 0.0)
    picks = df_signal_working.loc[signal_current_period_cond & non_zero_position_cond].copy()
    n = len(picks)
    if n == 0 or gross_to_deploy <= 0:
        return df_position, df_portfolio

    ## Update Trade Weights for current period
    # NOTE: it's now informational only; sizing uses gross_to_deploy / n below.
    picks["position_weight"] = 1.0 / (fwd_return_period * n)

    # -----------------------------
    # CHANGE #3: allocate target-per-vintage gross equally across selected names
    # -----------------------------
    picks["trade_notional_gross"] = gross_to_deploy / float(n)

    picks["position_notional"] = picks["trade_notional_gross"] * (1 - est_fees)
    picks["position_size"] = picks["position_notional"] / picks["open"]
    picks["vintage_id"] = vintage_name

    ## Append the Current Period Signal Dataframe for the Vintage to the Daily Positions Dataframe
    df_position = pd.concat([df_position, picks], ignore_index=True)

    ## Update Portfolio Cash based on new positions
    cash_usage = float(picks["trade_notional_gross"].sum())
    df_portfolio.loc[period, "available_cash"] = float(df_portfolio.loc[period, "available_cash"]) - cash_usage

    # NEW: turnover accounting (buys)
    df_portfolio = ensure_turnover_cols_in_df(df_portfolio)
    df_portfolio.loc[period, "buy_notional_gross"] = float(df_portfolio.loc[period, "buy_notional_gross"]) + cash_usage
    df_portfolio.loc[period, "gross_traded_notional"] = float(df_portfolio.loc[period, "gross_traded_notional"]) + cash_usage

    ## Update Portfolio Positions by Net Notional
    df_portfolio.loc[period, "total_position_notional"] = float(df_portfolio.loc[period, "total_position_notional"]) + float(picks["position_notional"].sum())
    df_portfolio.loc[period, "total_portfolio_value"] = float(df_portfolio.loc[period, "available_cash"]) + float(df_portfolio.loc[period, "total_position_notional"])
    df_portfolio.loc[period, "total_portfolio_value_upper_limit"] = float(df_portfolio.loc[period, "total_portfolio_value"]) * (1 - cash_buffer_percentage)

    return df_position, df_portfolio

In [None]:
# def open_new_vintage_positions(df_position, df_portfolio, df_signal, period, fwd_return_period, cash_buffer_percentage, transaction_cost_est, passive_trade_rate, vintage_name):

#     df_signal_working = df_signal.copy()
#     position_current_period_cond = (df_position['date'] == period)
#     signal_current_period_cond = (df_signal_working['date'] == period)

#     ## Get Estimated T-Cost
#     est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))
    
#     ## Get Portfolio Metrics
#     available_cash = df_portfolio.loc[period, 'available_cash']
#     total_portfolio_value_upper_limit = df_portfolio.loc[period, 'total_portfolio_value_upper_limit']

#     ## Update Trade Weights
#     non_zero_position_cond = (df_signal_working['trade_signal_exec'])
#     non_zero_position_count = df_signal_working.loc[signal_current_period_cond & non_zero_position_cond].shape[0]
#     df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'position_weight'] = (1 / (fwd_return_period * non_zero_position_count))

#     ## Calculate Trade Notional based on Weights
#     new_trade_notional = df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'position_weight'] * (total_portfolio_value_upper_limit)
#     net_trade_notional = new_trade_notional * (1 - est_fees)
#     df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'position_notional'] = net_trade_notional
#     df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'position_size'] = net_trade_notional / df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'open']
#     df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'vintage_id'] = vintage_name

#     ## Append the Current Period Signal Dataframe for the Vintage to the Daily Positions Dataframe
#     df_position = pd.concat([df_position, df_signal_working[signal_current_period_cond & non_zero_position_cond]], axis=0, ignore_index=True)

#     ## Update Portfolio Cash based on new positions
#     total_position_notional = df_signal_working.loc[signal_current_period_cond & non_zero_position_cond, 'position_notional'].sum()
#     cash_usage = new_trade_notional.sum()
#     # total_portfolio_value_upper_limit = total_portfolio_value_upper_limit - cash_usage
#     available_cash = available_cash - cash_usage

#     ## Update End of Day Portfolio & Cash Positions
#     df_portfolio.loc[period, 'available_cash'] = available_cash
#     df_portfolio.loc[period, 'total_position_notional'] = df_portfolio.loc[period, 'total_position_notional'] + total_position_notional
#     df_portfolio.loc[period, 'total_portfolio_value'] = (df_portfolio.loc[period, 'available_cash'] +
#                                                          df_portfolio.loc[period, 'total_position_notional'])
#     df_portfolio.loc[period, 'total_portfolio_value_upper_limit'] = df_portfolio.loc[period, 'total_portfolio_value'] * (1 - cash_buffer_percentage)

#     return df_position, df_portfolio

In [None]:
# def update_open_vintage_positions(df_position, df_portfolio, df_signal, current_period, prior_period, cash_buffer_percentage, vintage_name):

#     position_current_period_cond = (df_position['date'] == current_period)
#     position_prior_period_cond = (df_position['date'] == prior_period)
#     signal_current_period_cond = (df_signal['date'] == current_period)

#     ## Pulling current positions for vintage
#     vintage_cond = (df_position['vintage_id'] == vintage_name)
#     non_zero_tickers_prior_period = df_position.loc[position_prior_period_cond & vintage_cond]['ticker'].tolist()
#     df_signal_current_period = df_signal.loc[signal_current_period_cond & (df_signal['ticker'].isin(non_zero_tickers_prior_period))]

#     ## Updating the current positions for vintage with positions sizes from previous period
#     for ticker in non_zero_tickers_prior_period:
#         ticker_cond = (df_signal_current_period.ticker == ticker)
#         df_signal_current_period.loc[ticker_cond, 'position_size'] = df_position.loc[position_prior_period_cond & (df_position.ticker == ticker), 'position_size']
#         df_signal_current_period.loc[ticker_cond, 'position_weight'] = df_position.loc[position_prior_period_cond & (df_position.ticker == ticker), 'position_weight']

#     ## Marking the position sizes from current period to current periods open
#     df_signal_current_period['position_notional'] = df_signal_current_period['position_size'] * df_signal_current_period['open']
#     df_signal_current_period['vintage_id'] = vintage_name
#     df_position = pd.concat([df_position, df_signal_current_period], axis=0, ignore_index=True)

#     ## Update Portfolio Positions with new marks for vintage
#     df_portfolio.loc[current_period, 'total_position_notional'] = (df_portfolio.loc[current_period, 'total_position_notional'] +
#                                                                    df_position.loc[position_current_period_cond, 'position_notional'].sum())
#     df_portfolio.loc[current_period, 'total_portfolio_value'] = (df_portfolio.loc[current_period, 'total_position_notional'] +
#                                                                  df_portfolio.loc[current_period, 'available_cash'])
#     df_portfolio.loc[period, 'total_portfolio_value_upper_limit'] = df_portfolio.loc[period, 'total_portfolio_value'] * (1 - cash_buffer_percentage)

#     return df_position, df_portfolio

In [None]:
# def update_open_vintage_positions(df_position, df_portfolio, df_signal, current_period, prior_period, cash_buffer_percentage, vintage_name):

#     position_prior_period_cond = (df_position['date'] == prior_period)
#     signal_current_period_cond = (df_signal['date'] == current_period)

#     ## Pulling current positions for vintage
#     vintage_cond = (df_position['vintage_id'] == vintage_name)
#     non_zero_tickers_prior_period = df_position.loc[
#         position_prior_period_cond & vintage_cond, 'ticker'
#     ].tolist()
#     df_signal_current_period = df_signal.loc[
#         signal_current_period_cond & df_signal['ticker'].isin(non_zero_tickers_prior_period)
#     ].copy()

#     ## Updating the current positions for vintage with positions sizes from previous period
#     for ticker in non_zero_tickers_prior_period:
#         prior_rows = df_position.loc[position_prior_period_cond & (df_position['ticker'] == ticker) & (df_position['vintage_id'] == vintage_name)]
#         if prior_rows.empty:
#             continue
#         df_signal_current_period.loc[df_signal_current_period['ticker'] == ticker, 'position_size'] = prior_rows['position_size'].iloc[0]
#         df_signal_current_period.loc[df_signal_current_period['ticker'] == ticker, 'position_weight'] = prior_rows['position_weight'].iloc[0]

#     ## Marking the position sizes from current period to current periods open
#     df_signal_current_period['position_notional'] = df_signal_current_period['position_size'] * df_signal_current_period['open']
#     df_signal_current_period['vintage_id'] = vintage_name
#     df_position = pd.concat([df_position, df_signal_current_period], axis=0, ignore_index=True)

#     ## Update Holding Counter for Vintage in Current Period
#     prior_period_holding_counter_cond = (df_position['date'] == prior_period) & (df_position['vintage_id'] == vintage_name)
#     prior_period_holding_counter = df_position.loc[prior_period_holding_counter_cond, 'holding_period_counter'].values[0]
#     df_position.loc[(df_position['date'] == current_period) & (df_position['vintage_id'] == vintage_name), 'holding_period_counter'] = prior_period_holding_counter + 1
    
#     ## Update Portfolio Positions with new marks for vintage
#     prior_period_vintage_position_notional = df_position[(df_position['date'] == prior_period) & (df_position['vintage_id'] == vintage_name)]['position_notional'].sum()
#     added_notional = df_signal_current_period['position_notional'].sum()
#     df_portfolio.loc[current_period, 'total_position_notional'] = (
#         df_portfolio.loc[current_period, 'total_position_notional'] + (added_notional - prior_period_vintage_position_notional)
#     )
#     df_portfolio.loc[current_period, 'total_portfolio_value'] = (
#         df_portfolio.loc[current_period, 'total_position_notional'] + df_portfolio.loc[current_period, 'available_cash']
#     )
#     df_portfolio.loc[current_period, 'total_portfolio_value_upper_limit'] = (
#         df_portfolio.loc[current_period, 'total_portfolio_value'] * (1 - cash_buffer_percentage)
#     )

#     return df_position, df_portfolio


In [None]:
def update_open_vintage_positions(df_position, df_portfolio, df_signal, current_period, prior_period, cash_buffer_percentage, vintage_name):
    """
    Roll/mark-to-market positions that were open in `vintage_name` from prior_period to current_period.

    Fixes vs your original:
    - Maps prior positions by (date, vintage_id, ticker) so you never accidentally pull from another vintage.
    - Avoids per-ticker loops; uses merge/map which is faster + safer.
    - Updates holding counter robustly (max across vintage rows) instead of values[0] from an arbitrary row.
    - Updates portfolio total_position_notional by the *delta* in this vintage's mark-to-market.
    - Leaves cash unchanged (correct for MTM-only step).
    """

    current_period = pd.Timestamp(current_period)
    prior_period = pd.Timestamp(prior_period)

    ## Get Vintage Position Data for Prior Period
    prior_vintage = df_position.loc[
        (df_position["date"] == prior_period) & (df_position["vintage_id"] == vintage_name)
    ].copy()

    ## If there is no Vintage data from the prior period, exit as there is nothing to update.
    if prior_vintage.empty:
        return df_position, df_portfolio

    ## Keep only one row per ticker (should already be one; this guards against accidental duplicates)
    prior_vintage = (
        prior_vintage.sort_values(["ticker"])
        .drop_duplicates(subset=["ticker"], keep="last")
    )

    prior_tickers = prior_vintage["ticker"].tolist()

    ## Get current period rows from signal dataframe for Vintage tickers
    cur_rows = df_signal.loc[
        (df_signal["date"] == current_period) & (df_signal["ticker"].isin(prior_tickers))
    ].copy()

    ## If for some reason df_signal is missing current_period rows for some tickers, drop them.
    if cur_rows.empty:
        return df_position, df_portfolio

    ## Create a map of the Position Size and Weight by ticker from the prior period
    prior_map = prior_vintage.set_index("ticker")[["position_size", "position_weight"]]

    ## Map the Weight and Position Size from the Prior Period to the Current Period
    cur_rows["position_size"] = cur_rows["ticker"].map(prior_map["position_size"])
    cur_rows["position_weight"] = cur_rows["ticker"].map(prior_map["position_weight"])

    ## Drop any tickers we failed to map (should not happen unless data is inconsistent)
    cur_rows = cur_rows.dropna(subset=["position_size", "position_weight"])

    ## Mark all positions to the current open
    cur_rows["position_notional"] = cur_rows["position_size"] * cur_rows["open"]
    cur_rows["vintage_id"] = vintage_name

    ## Update the Holding Counter
    prior_counter = prior_vintage["holding_period_counter"].max()
    cur_rows["holding_period_counter"] = prior_counter + 1

    ## Append to positions table
    df_position = pd.concat([df_position, cur_rows], axis=0, ignore_index=True)

    ## Update portfolio MTM for this vintage (cash unchanged)
    prior_vintage_notional = prior_vintage["position_notional"].sum()
    cur_vintage_notional = cur_rows["position_notional"].sum()
    delta_notional = cur_vintage_notional - prior_vintage_notional

    df_portfolio.loc[current_period, "total_position_notional"] = (
        df_portfolio.loc[current_period, "total_position_notional"] + delta_notional
    )
    df_portfolio.loc[current_period, "total_portfolio_value"] = (
        df_portfolio.loc[current_period, "available_cash"]
        + df_portfolio.loc[current_period, "total_position_notional"]
    )
    df_portfolio.loc[current_period, "total_portfolio_value_upper_limit"] = (
        df_portfolio.loc[current_period, "total_portfolio_value"] * (1 - cash_buffer_percentage)
    )

    return df_position, df_portfolio

In [None]:
# def exit_open_vintage_positions(df_position, df_portfolio, df_signal, current_period, prior_period, transaction_cost_est, passive_trade_rate, cash_buffer_percentage, vintage_name):

#     df_signal_working = df_signal.copy()
    
#     ## Filtering Conditions
#     position_current_period_cond = (df_position['date'] == current_period)
#     position_prior_period_cond = (df_position['date'] == prior_period)
#     signal_current_period_cond = (df_signal_working['date'] == current_period)

#     ## Estimated T-Cost
#     est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))

#     ## Pulling current positions for vintage
#     vintage_cond = (df_position['vintage_id'] == vintage_name)
#     non_zero_tickers_prior_period = df_position.loc[position_prior_period_cond & vintage_cond]['ticker'].tolist()
#     df_signal_current_period = df_signal_working.loc[signal_current_period_cond & (df_signal_working['ticker'].isin(non_zero_tickers_prior_period))]

#     ## Updating the current positions for vintage with positions sizes from previous period
#     position_vintage_cond = (df_position['vintage_id'] == vintage_name)
#     for ticker in non_zero_tickers_prior_period:
#         ticker_cond = (df_signal_current_period.ticker == ticker)
#         df_signal_current_period.loc[ticker_cond, 'position_size'] = df_position.loc[position_prior_period_cond & (df_position.ticker == ticker) & (position_vintage_cond), 'position_size']

#     ## Calculating the Exit notional net of T-Cost
#     df_signal_current_period['position_notional'] = df_signal_current_period['position_size'] * df_signal_current_period['open'] * (1 - est_fees)

#     ## Update Portfolio Positions with new marks for vintage
#     exit_net_position_notional = df_signal_current_period['position_notional'].sum()
#     df_portfolio.loc[current_period, 'total_position_notional'] = df_portfolio.loc[current_period, 'total_position_notional'] - exit_net_position_notional
#     df_portfolio.loc[current_period, 'available_cash'] = df_portfolio.loc[current_period, 'available_cash'] + exit_net_position_notional
#     df_portfolio.loc[current_period, 'total_portfolio_value'] = df_portfolio.loc[current_period, 'available_cash'] + df_portfolio.loc[current_period, 'total_position_notional']
#     df_portfolio.loc[current_period, 'total_portfolio_value_upper_limit'] = df_portfolio.loc[current_period, 'total_portfolio_value'] * (1 - cash_buffer_percentage)

#     ## Add Closed Position to Daily Position Summary
#     df_signal_current_period['position_notional'] = 0
#     df_signal_current_period['position_size'] = 0
#     df_signal_current_period['position_weight'] = 0
#     df_signal_current_period['vintage_id'] = vintage_name
#     df_signal_current_period['holding_period_counter'] = 0
#     df_position = pd.concat([df_position, df_signal_current_period], axis=0, ignore_index=True)

#     return df_position, df_portfolio    

In [None]:
def exit_open_vintage_positions(df_position, df_portfolio, df_signal, current_period, prior_period, transaction_cost_est, passive_trade_rate, enable_fees, 
                                cash_buffer_percentage, vintage_name):
    """
    Exits (closes) all open positions for a given vintage at the CURRENT_PERIOD open,
    using position sizes from the PRIOR_PERIOD for that same vintage.

    Key fixes vs your version:
      - Avoids modifying a filtered slice (SettingWithCopy) by building a .copy() frame for exits.
      - Uses a ticker->position_size mapping from df_position (scalar per ticker) and merges via .map().
      - Handles duplicates safely (keeps last by date ordering) and empty cases gracefully.
    """

    # Work on copies to avoid side effects
    df_signal_working = df_signal.copy()

    # --- Conditions ---
    position_current_period_cond = (df_position["date"] == current_period)
    position_prior_period_cond = (df_position["date"] == prior_period)
    signal_current_period_cond = (df_signal_working["date"] == current_period)
    vintage_cond = (df_position["vintage_id"] == vintage_name)

    # --- Estimated total fees/slippage model ---
    # est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))
    est_fees = (transaction_cost_est + estimate_fee_per_trade_w_enable_switch(passive_trade_rate=passive_trade_rate, enable_fees=enable_fees))

    # --- Identify tickers that were open in the prior period for this vintage ---
    prior_vintage_positions = df_position.loc[position_prior_period_cond & vintage_cond, ["ticker", "position_size", "position_notional"]].copy()

    if prior_vintage_positions.empty:
        # Nothing to exit; return unchanged
        return df_position, df_portfolio

    # If df_position can contain multiple rows per ticker for the same date/vintage,
    # keep the last one deterministically.
    prior_vintage_positions = (
        prior_vintage_positions
        .dropna(subset=["ticker"])
        .drop_duplicates(subset=["ticker"], keep="last")
    )

    non_zero_tickers_prior_period = prior_vintage_positions["ticker"].tolist()

    # --- Build exit frame for current period (copy, so we can mutate safely) ---
    mask_exit = signal_current_period_cond & df_signal_working["ticker"].isin(non_zero_tickers_prior_period)
    df_signal_current_period = df_signal_working.loc[mask_exit].copy()

    if df_signal_current_period.empty:
        # No signal rows for these tickers at current_period; cannot mark/exit
        return df_position, df_portfolio

    # --- Map prior position sizes into current period rows ---
    ticker_to_size = prior_vintage_positions.set_index("ticker")["position_size"]
    df_signal_current_period["position_size"] = df_signal_current_period["ticker"].map(ticker_to_size)

    # If any tickers didn't map (shouldn't happen), set them to 0 and ignore in exit
    df_signal_current_period["position_size"] = df_signal_current_period["position_size"].fillna(0.0)

    # --- Compute exit notional net of fees (sell at open) ---
    df_signal_current_period["position_notional"] = (
        df_signal_current_period["position_size"] * df_signal_current_period["open"]
    )

    prior_period_position_notional = float(prior_vintage_positions['position_notional'].sum())
    current_mtm_position_notional = float(df_signal_current_period["position_notional"].sum())
    exit_net_position_notional = float(df_signal_current_period["position_notional"].sum()) * (1 - est_fees)

    # --- Update portfolio accounting ---
    # Ensure portfolio rows exist
    if current_period not in df_portfolio.index:
        raise KeyError(f"current_period {current_period} not found in df_portfolio.index")
    if "total_position_notional" not in df_portfolio.columns or "available_cash" not in df_portfolio.columns:
        raise KeyError("df_portfolio missing required columns: total_position_notional, available_cash")

    ## Capture the Mark to Market from Previous Open to Current Open prior to exiting the position
    df_portfolio.loc[current_period, "total_position_notional"] = (
        df_portfolio.loc[current_period, "total_position_notional"] + (current_mtm_position_notional - prior_period_position_notional)
    )

    ## Capture the Bookeeping related to Exiting the Vintage
    # Hear we sell the current position notional but in the available cash, we capture the notional net of transaction costs
    df_portfolio.loc[current_period, "total_position_notional"] = (
        df_portfolio.loc[current_period, "total_position_notional"] - current_mtm_position_notional
    )
    df_portfolio.loc[current_period, "available_cash"] = (
        df_portfolio.loc[current_period, "available_cash"] + exit_net_position_notional
    )

    ## Capture the Turnover Accounting
    df_portfolio = ensure_turnover_cols_in_df(df_portfolio)
    df_portfolio.loc[current_period, "sell_notional_gross"] = (
        float(df_portfolio.loc[current_period, "sell_notional_gross"]) + current_mtm_position_notional
    )
    df_portfolio.loc[current_period, "gross_traded_notional"] = (
        float(df_portfolio.loc[current_period, "gross_traded_notional"]) + current_mtm_position_notional
    )

    ## Re-calculate the Total Portfolio Value and Upper Limit based on the Updated Portfolio
    df_portfolio.loc[current_period, "total_portfolio_value"] = (
        df_portfolio.loc[current_period, "available_cash"] + df_portfolio.loc[current_period, "total_position_notional"]
    )
    df_portfolio.loc[current_period, "total_portfolio_value_upper_limit"] = (
        df_portfolio.loc[current_period, "total_portfolio_value"] * (1 - cash_buffer_percentage)
    )

    # --- Append "closed" rows to df_position for audit trail ---
    # Set post-exit state fields
    df_signal_current_period["position_notional"] = 0.0
    df_signal_current_period["position_size"] = 0.0
    df_signal_current_period["position_weight"] = 0.0
    df_signal_current_period["vintage_id"] = vintage_name
    df_signal_current_period["holding_period_counter"] = 0

    # If df_position expects certain columns, align them
    for col in df_position.columns:
        if col not in df_signal_current_period.columns:
            df_signal_current_period[col] = np.nan

    df_signal_current_period = df_signal_current_period[df_position.columns]

    df_position_out = pd.concat([df_position, df_signal_current_period], axis=0, ignore_index=True)

    return df_position_out, df_portfolio

In [None]:
df_daily_portfolio_summary.head()

In [None]:
df_daily_position_summary.head()

In [None]:
df_signal.head()

In [None]:
## Initialize the Cash and Portfolio Value prior to processing positions
available_cash = initial_capital
total_portfolio_value = initial_capital
total_portfolio_value_upper_limit = total_portfolio_value * (1 - cash_buffer_percentage)
first_period = pd.Timestamp('2022-04-01 08:00:00')
second_period = pd.Timestamp('2022-04-01 12:00:00')
third_period = pd.Timestamp('2022-04-01 16:00:00')

## Initialize Daily Portfolio Positions prior to processing positions
df_daily_portfolio_summary.loc[first_period, 'total_position_notional'] = 0.0
df_daily_portfolio_summary.loc[first_period, 'available_cash'] = initial_capital
df_daily_portfolio_summary.loc[first_period, 'total_portfolio_value'] = initial_capital
df_daily_portfolio_summary.loc[first_period, 'total_portfolio_value_upper_limit'] = initial_capital * (1 - cash_buffer_percentage)

In [None]:
df_daily_position_summary.head()

In [None]:
df_daily_portfolio_summary.head()

In [None]:
# for current_period in period_list:
current_period = period_list[0]
prior_period = period_list[period_list.index(current_period)-1]

## Filtering Conditions
signal_current_period_cond = (df_signal.date == current_period)
positions_current_period_cond = (df_daily_position_summary.date == current_period)
positions_prior_period_cond = (df_daily_position_summary.date == prior_period)

df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()

if current_period > first_period:
    df_daily_portfolio_summary = roll_portfolio_positions(df_daily_portfolio_summary, current_period=current_period)

In [None]:
df_signal_current_period.shape

In [None]:
df_signal_current_period

In [None]:
df_daily_position_summary

In [None]:
df_daily_portfolio_summary

In [None]:
if current_period > first_period:
    print('First Period')
    df_daily_portfolio_summary = roll_portfolio_posisions(df_daily_portfolio_summary, current_period=current_period)

if current_period == first_period:
    ## Open New Positions for Vintage 1
    df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                       period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                       transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_1')
    non_zero_position_cond = (df_daily_position_summary['trade_signal_exec'])
    positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_1')
    positions_current_period_cond = (df_daily_position_summary.date == current_period)
    df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

In [None]:
df_daily_position_summary

In [None]:
df_signal[(df_signal['date'] == pd.Timestamp('2022-04-01 08:00:00')) & (df_signal['trade_signal_exec'])]#.head()

In [None]:
df_daily_position_summary['position_notional'].sum()

In [None]:
df_daily_portfolio_summary

In [None]:
print(current_period, prior_period)

In [None]:
# for current_period in period_list:
current_period = period_list[1]
prior_period = period_list[period_list.index(current_period)-1]

## Filtering Conditions
signal_current_period_cond = (df_signal.date == current_period)
positions_current_period_cond = (df_daily_position_summary.date == current_period)
positions_prior_period_cond = (df_daily_position_summary.date == prior_period)

df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()

if current_period > first_period:
    print('Current Period > First Period')
    df_daily_portfolio_summary = roll_portfolio_positions(df_daily_portfolio_summary, current_period=current_period)

In [None]:
df_signal_current_period

In [None]:
df_daily_portfolio_summary

In [None]:
df_daily_position_summary

In [None]:
## Update Positions from Vintage 1
df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
    df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
    current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')

## Open New Positions for Vintage 2
df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                   period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                   transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_2')
non_zero_position_cond = (df_daily_position_summary['trade_signal_exec'])
positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_2')
positions_current_period_cond = (df_daily_position_summary.date == current_period)
df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

In [None]:
df_daily_position_summary

In [None]:
1385.101010 * 0.02683

In [None]:
df_signal[(df_signal.date == pd.Timestamp('2022-04-01 12:00:00')) & (df_signal['ticker'].isin(df_daily_position_summary[df_daily_position_summary['vintage_id'] == 'Vintage_1'].ticker.unique().tolist()))]#.shape

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_notional':'sum'})

In [None]:
298.478855+295.739452

In [None]:
df_daily_portfolio_summary

In [None]:
current_period = period_list[2]
prior_period = second_period

if current_period > first_period:
    df_daily_portfolio_summary = roll_portfolio_positions(df_daily_portfolio_summary, current_period=current_period)

## Filtering Conditions
signal_current_period_cond = (df_signal.date == current_period)
positions_current_period_cond = (df_daily_position_summary.date == current_period)
positions_prior_period_cond = (df_daily_position_summary.date == prior_period)
df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()

## Update Positions from Vintage 1
df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
    df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
    current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')

## Update Positions from Vintage 2
df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
    df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
    current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_2')

## Open New Positions for Vintage 3
df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                   period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                   transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_3')
non_zero_position_cond = (df_daily_position_summary['trade_signal_exec'])
positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_3')
positions_current_period_cond = (df_daily_position_summary.date == current_period)
df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

In [None]:
df_daily_position_summary

In [None]:
df_signal[(df_signal.date == pd.Timestamp('2022-04-01 16:00:00')) & (df_signal['ticker'].isin(df_daily_position_summary[df_daily_position_summary['vintage_id'] == 'Vintage_2'].ticker.unique().tolist()))]#.shape

In [None]:
0.253011 * 155.2700

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_notional':'sum'})

In [None]:
df_daily_portfolio_summary

In [None]:
298.478855+295.739452

In [None]:
306.488152+315.659448+302.885062

In [None]:
df_daily_position_summary[df_daily_position_summary['date'] == pd.Timestamp('2022-04-01 16:00:00')]#['position_notional'].sum()

In [None]:
df_daily_position_summary.groupby(['ticker','vintage_id']).size()

In [None]:
df_signal_current_period.sort_values('close_log_return_z_score_prev_4h')

In [None]:
current_period = period_list[3]
prior_period = period_list[period_list.index(current_period)-1]

## Filtering Conditions
signal_current_period_cond = (df_signal.date == current_period)
positions_current_period_cond = (df_daily_position_summary.date == current_period)
positions_prior_period_cond = (df_daily_position_summary.date == prior_period)
df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()

if current_period > first_period:
    df_daily_portfolio_summary = roll_portfolio_positions(df_daily_portfolio_summary, current_period=current_period)

In [None]:
df_daily_portfolio_summary

In [None]:
df_daily_position_summary

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_notional':'sum'})

In [None]:
current_period

In [None]:
vintage = 'Vintage_1'
vintage_cond = (df_daily_position_summary['vintage_id'] == vintage)
previous_period_holding_counter = df_daily_position_summary.loc[positions_prior_period_cond & vintage_cond, 'holding_period_counter'].values[0]
if previous_period_holding_counter == 3:
    ## Exit all open positions in current period
    df_daily_position_summary, df_daily_portfolio_summary = exit_open_vintage_positions(
        df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
        current_period=current_period, prior_period=prior_period, transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
        cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)

In [None]:
df_daily_position_summary

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_notional':'sum'})

In [None]:
df_daily_portfolio_summary

In [None]:
df_daily_position_summary[df_daily_position_summary['vintage_id'] == 'Vintage_1'].sort_values(['ticker','date'])

In [None]:
vintage_list = df_daily_position_summary['vintage_id'].unique().tolist()
for vintage in vintage_list:
    vintage_cond = (df_daily_position_summary['vintage_id'] == vintage)
    ## Check if holding period is equal to 3
    previous_period_holding_counter = df_daily_position_summary.loc[positions_prior_period_cond & vintage_cond, 'holding_period_counter'].values[0]
    
    if previous_period_holding_counter == 3:
        ## Exit all open positions in current period
        df_daily_position_summary, df_daily_portfolio_summary = exit_open_vintage_positions(
            df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
            current_period=current_period, prior_period=prior_period, transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
            cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)
        
    elif previous_period_holding_counter == 0:
        ## Open New Positions for Vintage
        df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                           period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                           transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name=vintage)
        non_zero_position_cond = (df_daily_position_summary['trade_signal_exec'])
        positions_vintage_cond = (df_daily_position_summary['vintage_id'] == vintage)
        positions_current_period_cond = (df_daily_position_summary.date == current_period)
        df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
        
    elif (previous_period_holding_counter == 1) | (previous_period_holding_counter == 2):
        ## Update Positions from Vintage
        df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
            df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
            current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)

In [None]:
df_daily_position_summary

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_notional':'sum'})

In [None]:
df_daily_portfolio_summary

In [None]:
320.687385+311.571976

In [None]:
def run_mean_reversion_backtest(df_signal, fwd_return_period, initial_capital, cash_buffer_percentage, transaction_cost_est, passive_trade_rate, enable_fees, 
                                signal_col='trade_signal_exec'):

    ## Get a list of all the periods in the signal dataframe
    period_list = sorted(df_signal.date.unique())

    if len(period_list) < 4:
        raise ValueError("Need at least 4 periods in df_signal to run this backtest.")

    ## Define the position weight per signal
    daily_weight_allocation = 1 / fwd_return_period # (kept for now; not used directly below)

    ## Initialize first positions in the signal dataframe
    df_signal = df_signal.copy()
    df_signal['position_weight'] = 0.0
    df_signal['position_notional'] = 0.0
    df_signal['position_size'] = 0.0
    df_signal['holding_period_counter'] = 0.0
    df_signal['vintage_id'] = np.nan
    
    ## Estimated T-Cost
    # est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))
    est_fees = (transaction_cost_est + estimate_fee_per_trade_w_enable_switch(passive_trade_rate=passive_trade_rate, enable_fees=enable_fees))
    
    ## Initialized Position and Portfolio Dataframes
    portfolio_columns = [
        'total_position_notional',
        'available_cash',
        'total_portfolio_value',
        'total_portfolio_value_upper_limit',
        'buy_notional_gross',
        'sell_notional_gross',
        'gross_traded_notional',
        'turnover_pct',
        'turnover_pct_half',
        'buy_turnover_pct',
        'sell_turnover_pct'
    ]
    df_portfolio = pd.DataFrame(columns=portfolio_columns)
    df_portfolio.index.name = 'date'
    df_position = pd.DataFrame(columns=df_signal.columns.tolist())

    ## Initialize the Cash and Portfolio Value prior to processing positions
    first_period = period_list[1]
    second_period = period_list[2]
    third_period = period_list[3]
    
    ## Initialize Daily Portfolio Positions prior to processing positions
    df_portfolio.loc[first_period, 'total_position_notional'] = 0.0
    df_portfolio.loc[first_period, 'available_cash'] = float(initial_capital)
    df_portfolio.loc[first_period, 'total_portfolio_value'] = float(initial_capital)
    df_portfolio.loc[first_period, 'total_portfolio_value_upper_limit'] = float(initial_capital) * (1 - cash_buffer_percentage)

    ## Initialize Turnover Columns
    df_portfolio = reset_turnover_for_period(df_portfolio, period=first_period)

    run_counter = 0
    run_counter_check = np.arange(0, len(period_list), 100).tolist()
    for i, current_period in enumerate(period_list[1:1000], start=1):
        if run_counter in run_counter_check:
            print(run_counter)
        # prior_period = period_list[period_list.index(current_period)-1]
        prior_period = period_list[i - 1]
    
        ## Filtering Conditions
        signal_current_period_cond = (df_signal.date == current_period)
        df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()
    
        if current_period > first_period:
            df_portfolio = roll_portfolio_positions(df_portfolio, current_period=current_period)
    
        if current_period == first_period:
            ## Open New Positions for Vintage 1
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_1', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_1')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        elif current_period == second_period:
            ## Update Positions from Vintage 1
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')
    
            ## Open New Positions for Vintage 2
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_2', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_2')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        elif current_period == third_period:
            ## Update Positions from Vintage 1
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')
    
            ## Update Positions from Vintage 2
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_2')
    
            ## Open New Positions for Vintage 3
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_3', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_3')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        else:
            vintage_list = df_position['vintage_id'].unique().tolist()
            for vintage in vintage_list:
                if pd.isna(vintage):
                    continue
                    
                vintage_cond = (df_position['vintage_id'] == vintage)
                ## Get the Holding Period Counter for the Vintage
                prior_vals = df_position.loc[(df_position.date == prior_period) & vintage_cond, 'holding_period_counter']
                if prior_vals.empty:
                    previous_period_holding_counter = 0
                else:
                    previous_period_holding_counter = prior_vals.max()
                    if pd.isna(previous_period_holding_counter):
                        previous_period_holding_counter = 0
                    previous_period_holding_counter = int(previous_period_holding_counter)
                
                if previous_period_holding_counter >= fwd_return_period:
                    ## Exit all open positions in current period
                    df_position, df_portfolio = exit_open_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        current_period=current_period, prior_period=prior_period, transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees,
                        cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)
                    
                elif 1 <= previous_period_holding_counter <= (fwd_return_period - 1):
                    ## Update Positions from Vintage
                    df_position, df_portfolio = update_open_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)

                else:# previous_period_holding_counter == 0:
                    ## Open New Positions for Vintage
                    df_position, df_portfolio = open_new_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                        transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name=vintage, signal_col=signal_col)
                    non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
                    positions_vintage_cond = (df_position['vintage_id'] == vintage)
                    positions_current_period_cond = (df_position.date == current_period)
                    df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
                    
        ## Compute Turnover Ratios once per bar for the updated portfolio
        df_portfolio = ensure_turnover_cols_in_df(df_portfolio)
        df_portfolio = finalize_turnover_metrics(df_portfolio, current_period)
        run_counter += 1

    return df_position, df_portfolio

In [None]:
## Strategy Params
initial_capital = 1000
cash_buffer_percentage = 0.10
fwd_return_period = 3
transaction_cost_est = 0.001
passive_trade_rate = 0.05
enable_fees = True
# daily_weight_allocation = 1 / fwd_return_period
# period_list = df_signal.date.unique().tolist()
# first_period = period_list[0]

In [None]:
## Backtest with Transaction Costs and Passive Trade Rate of 5%
df_daily_position_with_t_cost, df_daily_portfolio_with_t_cost = run_mean_reversion_backtest(df_signal, fwd_return_period=fwd_return_period, initial_capital=initial_capital,
                                                                                            cash_buffer_percentage=cash_buffer_percentage, transaction_cost_est=transaction_cost_est,
                                                                                            passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, signal_col='trade_signal_exec')

In [None]:
df_daily_portfolio_with_t_cost['total_portfolio_value'].plot(figsize=(10,8), grid=True, title='Total Portfolio Value with Transaction Costs (0.001) & Passive Trade Rate of 5%')

In [None]:
df_daily_portfolio_with_t_cost['available_cash'].plot(figsize=(10,8), grid=True, title='Available Cash with Transaction Costs (0.001) & Passive Trade Rate of 5%')

In [None]:
df_daily_portfolio_with_t_cost[['turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Turnover Plots with Transaction Costs (0.001) & Passive Trade Rate of 5%', alpha=0.6
)

In [None]:
df_daily_portfolio_with_t_cost[['buy_turnover_pct','sell_turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Buy & Sell Turnover Plots with Transaction Costs (0.001) & Passive Trade Rate of 5%', alpha=0.6
)

In [None]:
pd.pivot_table(df_daily_position_with_t_cost, index=['date'], columns=['vintage_id'], values=['position_notional'],
               aggfunc={'position_notional':'sum'}).plot(figsize=(10,8), grid=True, title='Transaction Costs (0.001) & Passive Trade Rate of 5%')

In [None]:
pd.pivot_table(df_daily_position_with_t_cost, index=['date'], columns=['vintage_id'], values=['position_notional'],
               aggfunc={'position_notional':'sum'}).head(50)

In [None]:
## No Transaction Costs and Passive Trade Rate of 100%
df_daily_position_wo_t_cost_p_rate_100, df_daily_portfolio_wo_t_cost_p_rate_100 = run_mean_reversion_backtest(df_signal, fwd_return_period=fwd_return_period, initial_capital=initial_capital,
                                                                                                              cash_buffer_percentage=cash_buffer_percentage, transaction_cost_est=0,
                                                                                                              passive_trade_rate=1, enable_fees=enable_fees)

In [None]:
df_daily_portfolio_wo_t_cost_p_rate_100['total_portfolio_value'].plot(figsize=(10,8), grid=True, title='Total Portfolio Value with No Transaction Costs & Passive Trade Rate of 100%')

In [None]:
df_daily_portfolio_wo_t_cost_p_rate_100['available_cash'].plot(figsize=(10,8), grid=True, title='Available Cash Value with No Transaction Costs & Passive Trade Rate of 100%')

In [None]:
df_daily_portfolio_wo_t_cost_p_rate_100[['turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Turnover Plots with No Transaction Costs & Passive Trade Rate of 100%', alpha=0.6
)

In [None]:
df_daily_portfolio_wo_t_cost_p_rate_100[['buy_turnover_pct','sell_turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Buy & Sell Turnover Plots with No Transaction Costs & Passive Trade Rate of 100%', alpha=0.6
)

In [None]:
pd.pivot_table(df_daily_position_wo_t_cost_p_rate_100, index=['date'], columns=['vintage_id'], values=['position_notional'],
               aggfunc={'position_notional':'sum'}).plot(figsize=(10,8), grid=True, title='No Transaction Costs and Passive Trade Rate of 100%')

In [None]:
## No Transaction Costs or Exchange Fees
df_daily_position_wo_t_cost_or_exch_fees, df_daily_portfolio_wo_t_cost_or_exch_fees = run_mean_reversion_backtest(df_signal, fwd_return_period=fwd_return_period, initial_capital=initial_capital,
                                                                                                                  cash_buffer_percentage=cash_buffer_percentage, transaction_cost_est=0,
                                                                                                                  passive_trade_rate=1.0, enable_fees=False)

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees['total_portfolio_value'].plot(figsize=(10,8), grid=True, title='Total Portfolio Value with No Transaction Costs or Exchange Fees')

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees['available_cash'].plot(figsize=(10,8), grid=True, title='Available Cash with No Transaction Costs or Exchange Fees')

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees[['turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Turnover Plots with No Transaction Costs or Exchange Fees', alpha=0.6
)

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees[['buy_turnover_pct','sell_turnover_pct']].plot(
    figsize=(10,8), grid=True, title='Buy & Sell Turnover Plots with No Transaction Costs or Exchange Fees', alpha=0.6
)

In [None]:
pd.pivot_table(df_daily_position_wo_t_cost_or_exch_fees, index=['date'], columns=['vintage_id'], values=['position_notional'],
               aggfunc={'position_notional':'sum'}).plot(figsize=(10,8), grid=True, title='Daily Position Value with No Transaction Costs or Exchange Fees')

In [None]:
df_daily_portfolio_with_t_cost['total_portfolio_value'].plot(figsize=(10,8), grid=True, label='T-Cost (0.001) & Passive Trade Rate of 5%', legend=True)
df_daily_portfolio_wo_t_cost_or_exch_fees['total_portfolio_value'].plot(figsize=(10,8), grid=True, label='No T-Cost or Exchange Fees', legend=True)
df_daily_portfolio_wo_t_cost_p_rate_100['total_portfolio_value'].plot(figsize=(10,8), grid=True, label='No T-Cost & Passive Trade Rate of 100%', title='Portfolio Value', legend=True)

In [None]:
df_daily_portfolio_with_t_cost['available_cash'].plot(figsize=(10,8), grid=True, label='T-Cost (0.001) & Passive Trade Rate of 5%', legend=True)
df_daily_portfolio_wo_t_cost_or_exch_fees['available_cash'].plot(figsize=(10,8), grid=True, label='No T-Cost or Exchange Fees', legend=True)
df_daily_portfolio_wo_t_cost_p_rate_100['available_cash'].plot(figsize=(10,8), grid=True, label='No T-Cost & Passive Trade Rate of 100%', title='Available Cash', legend=True, alpha=0.6)

In [None]:
## Analyze the Qunitlie Performance by date for this signal
def cs_bucket(group, col, q=10):
    # cross-sectional bucketing within each timestamp
    return pd.qcut(group[col], q=q, labels=False, duplicates="drop")

df_signal["quantile_bucket"] = df_signal.groupby("date", group_keys=False).apply(
    lambda g: cs_bucket(g, "close_log_return_z_score_prev_4h", q=5)
)

bucket_stats = (
    df_signal.dropna(subset=["quantile_bucket"])
    .groupby("quantile_bucket")["fwd_open_log_return_12h"]
    .agg(["mean", "std", "count"])
)
bucket_stats["t_stat"] = bucket_stats["mean"] / (bucket_stats["std"] / np.sqrt(bucket_stats["count"]))
## Denominator here is the Standard Error calculated as STD / sqrt(N). The t-stat calculates how many
## standard errors the observed mean is away from 0
## T-Stat assumes IID (which may not be the case) and n is large enough where the distribution is normal using Central Limit Theorem
bucket_stats


In [None]:
df_signal[df_signal['date'] == pd.Timestamp('2022-04-01 12:00:00')].sort_values('quantile_bucket')

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees['daily_portfolio_pct_return'] = df_daily_portfolio_wo_t_cost_or_exch_fees['total_portfolio_value'].pct_change()

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees['daily_portfolio_pct_return'].describe()

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees.head()

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees.tail()

In [None]:
df_daily_position_with_t_cost.iloc[0]

In [None]:
df_daily_position_wo_t_cost_p_rate_100.iloc[0]

In [None]:
df_daily_portfolio_wo_t_cost_or_exch_fees.iloc[0]

## Shadow Backtest

In [None]:
def run_mean_reversion_backtest_check(df_signal, fwd_return_period, initial_capital, cash_buffer_percentage, transaction_cost_est, passive_trade_rate, enable_fees, 
                                      signal_col='trade_signal_exec'):

    ## Get a list of all the periods in the signal dataframe
    period_list = sorted(df_signal.date.unique())

    if len(period_list) < 4:
        raise ValueError("Need at least 4 periods in df_signal to run this backtest.")

    ## Define the position weight per signal
    daily_weight_allocation = 1 / fwd_return_period # (kept for now; not used directly below)

    ## Initialize first positions in the signal dataframe
    df_signal = df_signal.copy()
    df_signal['position_weight'] = 0.0
    df_signal['position_notional'] = 0.0
    df_signal['position_size'] = 0.0
    df_signal['holding_period_counter'] = 0.0
    df_signal['vintage_id'] = np.nan
    
    ## Estimated T-Cost
    # est_fees = (transaction_cost_est + perf.estimate_fee_per_trade(passive_trade_rate))
    est_fees = (transaction_cost_est + estimate_fee_per_trade_w_enable_switch(passive_trade_rate=passive_trade_rate, enable_fees=enable_fees))
    
    ## Initialized Position and Portfolio Dataframes
    portfolio_columns = [
        'total_position_notional',
        'available_cash',
        'total_portfolio_value',
        'total_portfolio_value_upper_limit',
        'buy_notional_gross',
        'sell_notional_gross',
        'gross_traded_notional',
        'turnover_pct',
        'turnover_pct_half',
        'buy_turnover_pct',
        'sell_turnover_pct'
    ]
    df_portfolio = pd.DataFrame(columns=portfolio_columns)
    df_portfolio.index.name = 'date'
    df_position = pd.DataFrame(columns=df_signal.columns.tolist())

    ## Initialize the Cash and Portfolio Value prior to processing positions
    first_period = period_list[1]
    second_period = period_list[2]
    third_period = period_list[3]
    
    ## Initialize Daily Portfolio Positions prior to processing positions
    df_portfolio.loc[first_period, 'total_position_notional'] = 0.0
    df_portfolio.loc[first_period, 'available_cash'] = float(initial_capital)
    df_portfolio.loc[first_period, 'total_portfolio_value'] = float(initial_capital)
    df_portfolio.loc[first_period, 'total_portfolio_value_upper_limit'] = float(initial_capital) * (1 - cash_buffer_percentage)

    ## Initialize Turnover Columns
    df_portfolio = reset_turnover_for_period(df_portfolio, period=first_period)

    run_counter = 0
    run_counter_check = np.arange(0, len(period_list), 100).tolist()
    for i, current_period in enumerate(period_list, start=1):
        if run_counter in run_counter_check:
            print(run_counter)
        # prior_period = period_list[period_list.index(current_period)-1]
        prior_period = period_list[i - 1]
    
        ## Filtering Conditions
        signal_current_period_cond = (df_signal.date == current_period)
        df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()
    
        if current_period > first_period:
            df_portfolio = roll_portfolio_positions(df_portfolio, current_period=current_period)
    
        if current_period == first_period:
            ## Open New Positions for Vintage 1
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_1', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_1')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        elif current_period == second_period:
            ## Update Positions from Vintage 1
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')
    
            ## Open New Positions for Vintage 2
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_2', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_2')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        elif current_period == third_period:
            ## Update Positions from Vintage 1
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')
    
            ## Update Positions from Vintage 2
            df_position, df_portfolio = update_open_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_2')
    
            ## Open New Positions for Vintage 3
            df_position, df_portfolio = open_new_vintage_positions(
                df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name='Vintage_3', signal_col=signal_col)
            non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
            positions_vintage_cond = (df_position['vintage_id'] == 'Vintage_3')
            positions_current_period_cond = (df_position.date == current_period)
            df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
    
        else:
            vintage_list = df_position['vintage_id'].unique().tolist()
            for vintage in vintage_list:
                if pd.isna(vintage):
                    continue
                    
                vintage_cond = (df_position['vintage_id'] == vintage)
                ## Get the Holding Period Counter for the Vintage
                prior_vals = df_position.loc[(df_position.date == prior_period) & vintage_cond, 'holding_period_counter']
                if prior_vals.empty:
                    previous_period_holding_counter = 0
                else:
                    previous_period_holding_counter = prior_vals.max()
                    if pd.isna(previous_period_holding_counter):
                        previous_period_holding_counter = 0
                    previous_period_holding_counter = int(previous_period_holding_counter)
                
                if previous_period_holding_counter >= fwd_return_period:
                    ## Exit all open positions in current period
                    df_position, df_portfolio = exit_open_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        current_period=current_period, prior_period=prior_period, transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees,
                        cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)
                    
                elif 1 <= previous_period_holding_counter <= (fwd_return_period - 1):
                    ## Update Positions from Vintage
                    df_position, df_portfolio = update_open_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)

                else:# previous_period_holding_counter == 0:
                    ## Open New Positions for Vintage
                    df_position, df_portfolio = open_new_vintage_positions(
                        df_position=df_position, df_portfolio=df_portfolio, df_signal=df_signal_current_period,
                        period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                        transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, enable_fees=enable_fees, vintage_name=vintage, signal_col=signal_col)
                    non_zero_position_cond = (df_position[signal_col].fillna(0).astype(float) != 0)
                    positions_vintage_cond = (df_position['vintage_id'] == vintage)
                    positions_current_period_cond = (df_position.date == current_period)
                    df_position.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
                    
        ## Compute Turnover Ratios once per bar for the updated portfolio
        df_portfolio = ensure_turnover_cols_in_df(df_portfolio)
        df_portfolio = finalize_turnover_metrics(df_portfolio, current_period)
        run_counter += 1

    return df_position, df_portfolio

In [None]:
from collections import deque
import numpy as np
import pandas as pd

def run_mean_reversion_shadow_backtest(
    df_signal: pd.DataFrame,
    fwd_return_period: int,
    initial_capital: float,
    cash_buffer_percentage: float,
    transaction_cost_est: float,
    passive_trade_rate: float,
    enable_fees: bool,
    signal_col: str = "trade_signal_exec",
    max_periods: int | None = None,   # optional for debugging
):
    """
    Shadow engine:
    - No df_position, no holding counters, no vintages.
    - Uses cohort queue (each cohort is a dict ticker->shares) with fixed holding period H.
    - Trades at OPEN of each bar.
    - Buys are sized to ~ (PV*(1-buffer))/H each bar, constrained by deployable cash.
    - Cash reduced by GROSS buy notional; shares computed from NET (after fees).
    """

    df = df_signal.copy()
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values(["date", "ticker"]).reset_index(drop=True)

    periods = df["date"].drop_duplicates().tolist()
    if max_periods is not None:
        periods = periods[:max_periods]

    # Fee model (same form you use)
    est_fees = transaction_cost_est + estimate_fee_per_trade_w_enable_switch(
        passive_trade_rate=passive_trade_rate, enable_fees=enable_fees
    )

    # Price lookup: (date, ticker) -> open
    px = df.set_index(["date", "ticker"])["open"].astype(float)

    # Cohorts queue: each element is {"date": entry_date, "shares": pd.Series indexed by ticker}
    cohorts = deque()

    # Portfolio time series
    portfolio_cols = [
        "total_position_notional",
        "available_cash",
        "total_portfolio_value",
        "total_portfolio_value_upper_limit",
        "buy_notional_gross",
        "sell_notional_gross",
        "gross_traded_notional",
        "turnover_pct",
        "turnover_pct_half",
        "buy_turnover_pct",
        "sell_turnover_pct",
    ]
    df_port = pd.DataFrame(index=pd.Index(periods, name="date"), columns=portfolio_cols, dtype=float).fillna(0.0)

    cash = float(initial_capital)

    def mtm_positions_at(date_t) -> float:
        """Sum_{cohorts} sum_{ticker} shares * open_t"""
        if not cohorts:
            return 0.0
        total = 0.0
        for c in cohorts:
            sh = c["shares"]
            # align prices for tickers in this cohort
            p = px.loc[(date_t, sh.index)].values

            prices_t = px.xs(date_t, level="date")  # open prices at this bar (from df_signal)
            missing = sh.index.difference(prices_t.index)
            
            if len(missing) > 0:
                print(date_t, "missing:", missing.tolist())

            total += float(np.dot(sh.values, p))
        return total

    def compute_gross_to_deploy(pv: float, cash: float) -> float:
        """Target invest per bar = PV*(1-buffer)/H, limited by deployable cash."""
        desired_cash = pv * cash_buffer_percentage
        deployable_cash = max(0.0, cash - desired_cash)
        target_per_bar = (pv * (1.0 - cash_buffer_percentage)) / float(fwd_return_period)
        return float(min(target_per_bar, deployable_cash))

    for t in periods[1:]:
        # --------------------
        # 1) MTM at open_t
        # --------------------
        pos_notional = mtm_positions_at(t)
        pv = cash + pos_notional

        # reset per-bar turnover
        buy_gross = 0.0
        sell_gross = 0.0

        # --------------------
        # 2) SELL expiring cohort (if we already have H cohorts)
        # --------------------
        if len(cohorts) >= fwd_return_period:
            exp = cohorts.popleft()
            sh = exp["shares"]
            p = px.loc[(t, sh.index)].values
            sell_gross = float(np.dot(sh.values, p))
            cash += sell_gross * (1.0 - est_fees)  # proceeds net of fees
            pos_notional -= sell_gross            # remove gross notional from holdings
            pv = cash + pos_notional

        # --------------------
        # 3) BUY new cohort
        # --------------------
        # picks at date t
        picks = df.loc[df["date"] == t].copy()
        picks = picks.loc[picks[signal_col].fillna(0).astype(float) != 0.0, ["ticker"]]

        if not picks.empty:
            gross_to_deploy = compute_gross_to_deploy(pv=pv, cash=cash)
            if gross_to_deploy > 0:
                n = len(picks)
                per_name_gross = gross_to_deploy / float(n)

                tickers = picks["ticker"].tolist()
                opens = px.loc[(t, tickers)].values

                # net notional after fees -> shares
                per_name_net = per_name_gross * (1.0 - est_fees)
                shares = per_name_net / opens

                cohort_shares = pd.Series(shares, index=pd.Index(tickers, name="ticker"), dtype=float)

                cohorts.append({"date": t, "shares": cohort_shares})

                buy_gross = gross_to_deploy
                cash -= buy_gross
                pos_notional += float(np.dot(cohort_shares.values, opens))  # equals per_name_net*n

                pv = cash + pos_notional

        # --------------------
        # 4) Write portfolio row + turnover
        # --------------------
        df_port.loc[t, "total_position_notional"] = pos_notional
        df_port.loc[t, "available_cash"] = cash
        df_port.loc[t, "total_portfolio_value"] = pv
        df_port.loc[t, "total_portfolio_value_upper_limit"] = pv * (1.0 - cash_buffer_percentage)

        df_port.loc[t, "buy_notional_gross"] = buy_gross
        df_port.loc[t, "sell_notional_gross"] = sell_gross
        df_port.loc[t, "gross_traded_notional"] = buy_gross + sell_gross

        if pv > 0:
            df_port.loc[t, "turnover_pct"] = (buy_gross + sell_gross) / pv
            df_port.loc[t, "turnover_pct_half"] = (buy_gross + sell_gross) / (2.0 * pv)
            df_port.loc[t, "buy_turnover_pct"] = buy_gross / pv
            df_port.loc[t, "sell_turnover_pct"] = sell_gross / pv

    return df_port


In [None]:
max_periods = 1000
df = df_signal.copy()
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["date", "ticker"]).reset_index(drop=True)

periods = df["date"].drop_duplicates().tolist()
if max_periods is not None:
    periods = periods[:max_periods]
    
px = df_signal.set_index(["date", "ticker"])["open"].astype(float)

In [None]:
cohorts = deque()
t = pd.Timestamp('2023-03-12 04:00:00')
cash = float(initial_capital)
pos_notional = mtm_positions_at(t)
pv = cash + pos_notional

In [None]:
periods[1]

In [None]:
for t in periods[1:]:
    print(t)

In [None]:
periods

In [None]:
px.head()

In [None]:
df_signal[df_signal['date'] == pd.Timestamp('2023-03-12 04:00:00')]#.sort_values(['trade_signal_exec'], ascending=False)

In [None]:
df_signal.groupby(['date']).agg({'ticker':'nunique'})

In [None]:
df_signal[df_signal['date'] == pd.Timestamp('2022-04-01 12:00:00')]#.head()

In [None]:
# # Your current engine
# df_pos_engine, df_port_engine = run_mean_reversion_backtest(
#     df_signal=df_signal,
#     fwd_return_period=3,
#     initial_capital=1000,
#     cash_buffer_percentage=0.10,
#     transaction_cost_est=0.0,
#     passive_trade_rate=1.0,
#     enable_fees=False,
#     signal_col="trade_signal_exec",
# )

# Shadow engine (independent)
df_port_shadow = run_mean_reversion_shadow_backtest(
    df_signal=df_signal,
    fwd_return_period=3,
    initial_capital=1000,
    cash_buffer_percentage=0.10,
    transaction_cost_est=0.0,
    passive_trade_rate=1.0,
    enable_fees=False,
    signal_col="trade_signal_exec",
)


In [None]:
%%time

## Initialize the Cash and Portfolio Value prior to processing positions
available_cash = initial_capital
total_portfolio_value = initial_capital
total_portfolio_value_upper_limit = total_portfolio_value * (1 - cash_buffer_percentage)
first_period = pd.Timestamp('2022-04-01 08:00:00')
second_period = pd.Timestamp('2022-04-01 12:00:00')
third_period = pd.Timestamp('2022-04-01 16:00:00')

## Initialize Daily Portfolio Positions prior to processing positions
df_daily_portfolio_summary.loc[first_period, 'total_position_notional'] = 0.0
df_daily_portfolio_summary.loc[first_period, 'available_cash'] = initial_capital
df_daily_portfolio_summary.loc[first_period, 'total_portfolio_value'] = initial_capital
df_daily_portfolio_summary.loc[first_period, 'total_portfolio_value_upper_limit'] = initial_capital * (1 - cash_buffer_percentage)

run_counter = 0
run_counter_check = np.arange(0, len(period_list), 100).tolist()
for current_period in period_list[0:1000]:
    if run_counter in run_counter_check:
        print(run_counter)
    prior_period = period_list[period_list.index(current_period)-1]

    ## Filtering Conditions
    signal_current_period_cond = (df_signal.date == current_period)
    positions_current_period_cond = (df_daily_position_summary.date == current_period)
    positions_prior_period_cond = (df_daily_position_summary.date == prior_period)
    df_signal_current_period = df_signal.loc[signal_current_period_cond].copy()

    if current_period > first_period:
        df_daily_portfolio_summary = roll_portfolio_positions(df_daily_portfolio_summary, current_period=current_period)

    if current_period == first_period:
        ## Open New Positions for Vintage 1
        df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                           period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                           transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_1')
        non_zero_position_cond = (df_daily_position_summary['bottom_quintile_signal'])
        positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_1')
        positions_current_period_cond = (df_daily_position_summary.date == current_period)
        df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

    elif current_period == second_period:
        ## Update Positions from Vintage 1
        df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
            df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
            current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')

        ## Open New Positions for Vintage 2
        df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                           period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                           transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_2')
        non_zero_position_cond = (df_daily_position_summary['bottom_quintile_signal'])
        positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_2')
        positions_current_period_cond = (df_daily_position_summary.date == current_period)
        df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

    elif current_period == third_period:
        ## Update Positions from Vintage 1
        df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
            df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
            current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_1')

        ## Update Positions from Vintage 2
        df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
            df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
            current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name='Vintage_2')

        ## Open New Positions for Vintage 3
        df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                           period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                           transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name='Vintage_3')
        non_zero_position_cond = (df_daily_position_summary['bottom_quintile_signal'])
        positions_vintage_cond = (df_daily_position_summary['vintage_id'] == 'Vintage_3')
        positions_current_period_cond = (df_daily_position_summary.date == current_period)
        df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1

    else:
        vintage_list = df_daily_position_summary['vintage_id'].unique().tolist()
        for vintage in vintage_list:
            vintage_cond = (df_daily_position_summary['vintage_id'] == vintage)
            ## Check if holding period is equal to 3
            previous_period_holding_counter = df_daily_position_summary.loc[positions_prior_period_cond & vintage_cond, 'holding_period_counter'].values[0]
            
            if previous_period_holding_counter == 3:
                ## Exit all open positions in current period
                df_daily_position_summary, df_daily_portfolio_summary = exit_open_vintage_positions(
                    df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                    current_period=current_period, prior_period=prior_period, transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
                    cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)
                
            elif previous_period_holding_counter == 0:
                ## Open New Positions for Vintage
                df_daily_position_summary, df_daily_portfolio_summary = open_new_vintage_positions(df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                                                                                                   period=current_period, fwd_return_period=fwd_return_period, cash_buffer_percentage=cash_buffer_percentage,
                                                                                                   transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate, vintage_name=vintage)
                non_zero_position_cond = (df_daily_position_summary['bottom_quintile_signal'])
                positions_vintage_cond = (df_daily_position_summary['vintage_id'] == vintage)
                positions_current_period_cond = (df_daily_position_summary.date == current_period)
                df_daily_position_summary.loc[non_zero_position_cond & positions_current_period_cond & positions_vintage_cond, 'holding_period_counter'] = 1
                
            elif (previous_period_holding_counter == 1) | (previous_period_holding_counter == 2):
                ## Update Positions from Vintage
                df_daily_position_summary, df_daily_portfolio_summary = update_open_vintage_positions(
                    df_position=df_daily_position_summary, df_portfolio=df_daily_portfolio_summary, df_signal=df_signal_current_period,
                    current_period=current_period, prior_period=prior_period, cash_buffer_percentage=cash_buffer_percentage, vintage_name=vintage)

    run_counter += 1

In [None]:
df_daily_portfolio_summary.head(200)

In [None]:
df_daily_portfolio_summary.tail(500)

In [None]:
df_daily_position_summary.head(20)

In [None]:
pd.pivot_table(df_daily_position_summary, index=['date'], columns=['vintage_id'], values=['position_notional'], aggfunc={'position_notional':'sum'}).plot(figsize=(12,10), grid=True)

In [None]:
df_daily_position_summary.groupby(['date','vintage_id']).agg({'position_size':'sum', 'position_notional':'sum'}).head(20)

In [None]:
df_daily_position_summary[(df_daily_position_summary['date'] == pd.Timestamp('2022-04-01 12:00:00')) & (df_daily_position_summary['vintage_id'] == 'Vintage_2')]

In [None]:
date_cond = (df_signal.date == pd.Timestamp('2022-04-01 08:00:00'))
positions_weights = df_signal.loc[date_cond, 'weight']

date_cond = (df_signal.date == date)
weight_cond = (df_signal.weight != 0.0)
position_coins = df_signal.loc[date_cond & weight_cond]['ticker'].tolist()

In [None]:
position_coins

In [None]:
positions_weights * initial_capital

In [None]:
df_signal[df_signal.date == pd.Timestamp('2022-04-01 08:00:00')]#.sort_values('z_ret_prev_4h')

In [None]:
df_signal.head()