In [1]:
# Import all the necessary modules
import os
import sys
import os, sys
# from .../research/notebooks -> go up two levels to repo root
repo_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mtick
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
import pandas_datareader as pdr
import math
import datetime
from datetime import datetime, timezone
import itertools
import ast
import yfinance as yf
import seaborn as sn
import yaml
from pathlib import Path
from IPython.display import display, HTML
from strategy_signal.trend_following_signal import (
    apply_jupyter_fullscreen_css, get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol
)
from portfolio.strategy_performance import (calculate_sharpe_ratio, calculate_calmar_ratio, calculate_CAGR, calculate_risk_and_performance_metrics,
                                          calculate_compounded_cumulative_returns, estimate_fee_per_trade, rolling_sharpe_ratio)
from utils import coinbase_utils as cn
from portfolio import strategy_performance as perf
from sizing import position_sizing_binary_utils as size_bin
from sizing import position_sizing_continuous_utils as size_cont
from strategy_signal import trend_following_signal as tf
%matplotlib inline

In [2]:
import importlib
importlib.reload(cn)
importlib.reload(perf)
importlib.reload(tf)
importlib.reload(size_bin)
importlib.reload(size_cont)

<module 'sizing.position_sizing_continuous_utils' from '/Users/adheerchauhan/Documents/git/trend_following/sizing/position_sizing_continuous_utils.py'>

In [3]:
import warnings
warnings.filterwarnings('ignore')
pd.set_option('Display.max_rows', None)
pd.set_option('Display.max_columns',None)
apply_jupyter_fullscreen_css()

## Helper Functions

In [5]:
## Load Config file for the strategy
def load_prod_strategy_config(strategy_version='v0.1.0'):
    nb_cwd = Path.cwd()  # git/trend_following/research/notebooks
    config_path = (
            nb_cwd.parents[1]  # -> git/trend_following
            / "live_strategy"
            / f"trend_following_strategy_{strategy_version}-live"
            / "config"
            / f"trend_strategy_config_{strategy_version}.yaml"
    )

    print(config_path)  # sanity check
    print(config_path.exists())  # should be True

    with open(config_path, "r") as f:
        cfg = yaml.safe_load(f)

    return cfg

In [10]:
from collections import OrderedDict

def print_strategy_params():
    """
    Pretty-print the strategy’s configuration values, with a blank line
    separating each logical section.
    """

    # ---- Define sections (title is just for dev readability) --------------
    sections = [
        ("Dates & universe", OrderedDict([
            ("start_date",      start_date),
            ("end_date",        end_date),
            ("warm_up_days",    WARMUP_DAYS),
            ("ticker_list",     ticker_list),
        ])),

        ("Moving-average / trend", OrderedDict([
            ("fast_mavg",                  fast_mavg),
            ("slow_mavg",                  slow_mavg),
            ("mavg_stepsize",              mavg_stepsize),
            ("mavg_z_score_window",        mavg_z_score_window),
            ("moving_avg_type",            moving_avg_type),
            ("ma_crossover_signal_weight", ma_crossover_signal_weight),
        ])),

        ("Donchian channel", OrderedDict([
            ("entry_rolling_donchian_window", entry_rolling_donchian_window),
            ("exit_rolling_donchian_window", exit_rolling_donchian_window),
            ("use_donchian_exit_gate", use_donchian_exit_gate),
            ("donchian_signal_weight",  donchian_signal_weight),
        ])),

        ("Volatility & risk", OrderedDict([
            ("volatility_window",            volatility_window),
            ("annualized_target_volatility", annualized_target_volatility),
            ("rolling_cov_window",           rolling_cov_window),
            ("rolling_atr_window",           rolling_atr_window),
            ("atr_multiplier",               atr_multiplier),
            ("log_std_window",               log_std_window),
            ("coef_of_variation_window",     coef_of_variation_window),
            ("vol_of_vol_z_score_window",    vol_of_vol_z_score_window),
            ("vol_of_vol_p_min",             vol_of_vol_p_min),
            ("r2_strong_threshold",          r2_strong_threshold)
        ])),

        ("Signal gating / quality", OrderedDict([
            ("lower_r_sqr_limit",             lower_r_sqr_limit),
            ("upper_r_sqr_limit",             upper_r_sqr_limit),
            ("rolling_r2_window",             rolling_r2_window),
            ("r2_smooth_window",              r2_smooth_window),
            ("r2_confirm_days",               r2_confirm_days),
            ("rolling_sharpe_window",         rolling_sharpe_window),
            ("use_activation",                use_activation),
            ("tanh_activation_constant_dict", tanh_activation_constant_dict),
            ("weighted_signal_ewm_window",    weighted_signal_ewm_window)
        ])),

        ("Trading toggles & thresholds", OrderedDict([
            ("long_only",                  long_only),
            ("use_coinbase_data",          use_coinbase_data),
            ("use_saved_files",            use_saved_files),
            ("saved_file_end_date",        saved_file_end_date),
            ("use_specific_start_date",    use_specific_start_date),
            ("signal_start_date",          signal_start_date),
            ("price_or_returns_calc",      price_or_returns_calc),
            ("notional_threshold_pct",     notional_threshold_pct),
            ("cooldown_counter_threshold", cooldown_counter_threshold),
            ("warmup_days",                WARMUP_DAYS)
        ])),

        ("Capital & execution", OrderedDict([
            ("initial_capital",        initial_capital),
            ("cash_buffer_percentage", cash_buffer_percentage),
            ("transaction_cost_est",   transaction_cost_est),
            ("passive_trade_rate",     passive_trade_rate),
            ("annual_trading_days",    annual_trading_days),
        ])),
    ]

    # ---- Compute width for neat alignment ---------------------------------
    longest_key = max(len(k) for _, sec in sections for k in sec)

    print("\nStrategy Parameters\n" + "-" * (longest_key + 30))
    for _, sec in sections:
        for k, v in sec.items():
            print(f"{k:<{longest_key}} : {v}")
        print()  # blank line between sections
    print("-" * (longest_key + 30) + "\n")

# ---------------------------------------------------------------------------
# Example usage (uncomment after your own parameter definitions are in scope)
# ---------------------------------------------------------------------------
# if __name__ == "__main__":
#     print_strategy_params()

In [12]:
def plot_signal_performance(df_1, df_2, ticker):

    fig = plt.figure(figsize=(20,12))
    layout = (2,2)
    signal_ax = plt.subplot2grid(layout, (0,0))
    price_ax = signal_ax.twinx()
    equity_curve_ax = plt.subplot2grid(layout, (0,1))
    sharpe_ax = plt.subplot2grid(layout, (1,0))
    portfolio_value_ax = plt.subplot2grid(layout, (1,1))

    _ = signal_ax.plot(df_1.index, df_1[f'{ticker}_final_signal'], label='Orig Signal', alpha=0.9)
    _ = signal_ax.plot(df_2.index, df_2[f'{ticker}_final_signal'], label='New Signal', alpha=0.9)
    _ = price_ax.plot(df_1.index, df_2[f'{ticker}_open'], label='Price', alpha=0.7, linestyle='--', color='magenta')
    _ = signal_ax.set_title(f'Orignal Signal vs New Signal')
    _ = signal_ax.set_ylabel('Signal')
    _ = signal_ax.set_xlabel('Date')
    _ = signal_ax.legend(loc='upper left')
    _ = signal_ax.grid()

    _ = equity_curve_ax.plot(df_1.index, df_1[f'equity_curve'], label='Orig Signal', alpha=0.9)
    _ = equity_curve_ax.plot(df_2.index, df_2[f'equity_curve'], label='New Signal', alpha=0.9)
    _ = equity_curve_ax.set_title(f'Equity Curve')
    _ = equity_curve_ax.set_ylabel('Equity Curve')
    _ = equity_curve_ax.set_xlabel('Date')
    _ = equity_curve_ax.legend(loc='upper left')
    _ = equity_curve_ax.grid()

    _ = sharpe_ax.plot(df_1.index, df_1[f'portfolio_rolling_sharpe_50'], label='Orig Signal', alpha=0.9)
    _ = sharpe_ax.plot(df_2.index, df_2[f'portfolio_rolling_sharpe_50'], label='New Signal', alpha=0.9)
    _ = sharpe_ax.set_title(f'Rolling Sharpe')
    _ = sharpe_ax.set_ylabel(f'Rolling Sharpe')
    _ = sharpe_ax.set_xlabel('Date')
    _ = sharpe_ax.legend(loc='upper left')
    _ = sharpe_ax.grid()

    _ = portfolio_value_ax.plot(df_1.index, df_1[f'total_portfolio_value'], label='Orig Signal', alpha=0.9)
    _ = portfolio_value_ax.plot(df_2.index, df_2[f'total_portfolio_value'], label='New Signal', alpha=0.9)
    _ = portfolio_value_ax.set_title(f'Total Portfolio Value')
    _ = portfolio_value_ax.set_ylabel('Portfolio Value')
    _ = portfolio_value_ax.set_xlabel('Date')
    _ = portfolio_value_ax.legend(loc='upper left')
    _ = portfolio_value_ax.grid()

    plt.tight_layout()

    return

## Signal Performance

In [15]:
cfg = load_prod_strategy_config()

/Users/adheerchauhan/Documents/git/trend_following/live_strategy/trend_following_strategy_v0.1.0-live/config/trend_strategy_config_v0.1.0.yaml
True


In [17]:
cfg

{'portfolio': {'exchange': 'Coinbase Advanced', 'name': 'Trend Following'},
 'run': {'start_date': '2022-04-01',
  'end_date': '2025-07-31',
  'use_specific_start_date': True,
  'signal_start_date': '2022-04-01',
  'warmup_days': 300,
  'long_only': True,
  'annual_trading_days': 365,
  'initial_capital': 15000},
 'universe': {'tickers': ['BTC-USD',
   'ETH-USD',
   'SOL-USD',
   'ADA-USD',
   'AVAX-USD']},
 'data': {'use_coinbase_data': True,
  'use_saved_files': True,
  'saved_file_end_date': '2025-07-31',
  'price_or_returns_calc': 'price',
  'moving_avg_type': 'exponential'},
 'signals': {'moving_average': {'fast_mavg': 20,
   'slow_mavg': 200,
   'mavg_stepsize': 8,
   'mavg_z_score_window': 126},
  'donchian': {'entry_rolling_donchian_window': 56,
   'exit_rolling_donchian_window': 28,
   'use_donchian_exit_gate': False},
  'weighting': {'ma_crossover_signal_weight': 0.85,
   'donchian_signal_weight': 0.15,
   'weighted_signal_ewm_window': 4},
  'activation': {'use_activation': F

In [21]:
import pandas as pd

# assume cfg is already loaded from YAML as shown in your message

# --- Prod Configuration (from cfg) ---

# portfolio
exchange = cfg['portfolio']['exchange']
portfolio_name = cfg['portfolio']['name']

start_date  = pd.Timestamp(cfg['run']['start_date']).date()
end_date    = pd.Timestamp(cfg['run']['end_date']).date()
use_specific_start_date = bool(cfg['run']['use_specific_start_date'])
signal_start_date       = pd.Timestamp(cfg['run']['signal_start_date']).date()
warmup_days = int(cfg['run']['warmup_days'])
long_only = cfg['run']['long_only']
annual_trading_days    = int(cfg['run']['annual_trading_days'])
initial_capital        = float(cfg['run']['initial_capital'])

# universe
ticker_list = list(cfg['universe']['tickers'])

# data
use_coinbase_data      = bool(cfg['data']['use_coinbase_data'])
use_saved_files        = bool(cfg['data']['use_saved_files'])
saved_file_end_date    = str(cfg['data']['saved_file_end_date'])
price_or_returns_calc    = str(cfg['data']['price_or_returns_calc'])
moving_avg_type    = str(cfg['data']['moving_avg_type'])

# signals.moving_average
fast_mavg        = int(cfg['signals']['moving_average']['fast_mavg'])
slow_mavg        = int(cfg['signals']['moving_average']['slow_mavg'])
mavg_stepsize    = int(cfg['signals']['moving_average']['mavg_stepsize'])
mavg_z_score_window = int(cfg['signals']['moving_average']['mavg_z_score_window'])

# signals.donchian
entry_rolling_donchian_window = int(cfg['signals']['donchian']['entry_rolling_donchian_window'])
exit_rolling_donchian_window  = int(cfg['signals']['donchian']['exit_rolling_donchian_window'])
use_donchian_exit_gate        = bool(cfg['signals']['donchian']['use_donchian_exit_gate'])

# signals.weighting
ma_crossover_signal_weight = float(cfg['signals']['weighting']['ma_crossover_signal_weight'])
donchian_signal_weight     = float(cfg['signals']['weighting']['donchian_signal_weight'])
weighted_signal_ewm_window = int(cfg['signals']['weighting']['weighted_signal_ewm_window'])  # (new config but same value)

# signals.filters.rolling_r2
rolling_r2_window   = int(cfg['signals']['filters']['rolling_r2']['rolling_r2_window'])
lower_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['lower_r_sqr_limit'])
upper_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['upper_r_sqr_limit'])
r2_smooth_window    = int(cfg['signals']['filters']['rolling_r2']['r2_smooth_window'])
r2_confirm_days     = int(cfg['signals']['filters']['rolling_r2']['r2_confirm_days'])
r2_strong_threshold = float(cfg['signals']['filters']['rolling_r2']['r2_strong_threshold'])

# signals.filters.vol_of_vol
log_std_window            = int(cfg['signals']['filters']['vol_of_vol']['log_std_window'])
coef_of_variation_window  = int(cfg['signals']['filters']['vol_of_vol']['coef_of_variation_window'])
vol_of_vol_z_score_window = int(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_z_score_window'])
vol_of_vol_p_min          = float(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_p_min'])

# signals.activation
use_activation              = bool(cfg['signals']['activation']['use_activation'])
tanh_activation_constant_dict = cfg['signals']['activation']['tanh_activation_constant_dict']  # likely None

# risk_and_sizing
annualized_target_volatility = float(cfg['risk_and_sizing']['annualized_target_volatility'])
volatility_window      = int(cfg['risk_and_sizing']['volatility_window'])
rolling_cov_window     = int(cfg['risk_and_sizing']['rolling_cov_window'])
rolling_atr_window     = int(cfg['risk_and_sizing']['rolling_atr_window'])
atr_multiplier         = float(cfg['risk_and_sizing']['atr_multiplier'])
stop_loss_strategy     = str(cfg['risk_and_sizing']['stop_loss_strategy'])
highest_high_window    = int(cfg['risk_and_sizing']['highest_high_window'])
rolling_sharpe_window    = int(cfg['risk_and_sizing']['rolling_sharpe_window'])
cash_buffer_percentage = float(cfg['risk_and_sizing']['cash_buffer_percentage'])

# execution_and_costs
transaction_cost_est   = float(cfg['execution_and_costs']['transaction_cost_est'])
passive_trade_rate     = float(cfg['execution_and_costs']['passive_trade_rate'])
notional_threshold_pct = float(cfg['execution_and_costs']['notional_threshold_pct'])
min_trade_notional_abs = float(cfg['execution_and_costs']['min_trade_notional_abs'])
cooldown_counter_threshold = int(cfg['execution_and_costs']['cooldown_counter_threshold'])

In [None]:
use_saved_files

In [None]:
df_final_prod_config = tf.apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=r2_confirm_days,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=use_saved_files, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)
df_final_prod_config = df_final_prod_config[df_final_prod_config.index >= pd.Timestamp(start_date)]

print('Calculating In Sample Asset Returns!!')
df_final_prod_config = perf.calculate_asset_level_returns(df_final_prod_config, end_date, ticker_list)

portfolio_perf_metrics_prod_config = calculate_risk_and_performance_metrics(df_final_prod_config, strategy_daily_return_col=f'portfolio_daily_pct_returns',
                                                                           strategy_trade_count_col=f'count_of_positions', include_transaction_costs_and_fees=False, passive_trade_rate=0.05, annual_trading_days=365, transaction_cost_est=0.001)
portfolio_perf_metrics_prod_config

In [None]:
ticker_perf_prod_config = {}
for t in cfg['universe']['tickers']:
    _ticker_perf = perf.calculate_risk_and_performance_metrics(
        df_final_prod_config,
        strategy_daily_return_col=f'{t}_daily_pct_returns',
        strategy_trade_count_col=f'{t}_position_count',
        annual_trading_days=365,
        include_transaction_costs_and_fees=False
    )
    ticker_perf_prod_config[t] = _ticker_perf

In [None]:
ticker_perf_prod_config

In [None]:
import pandas as pd

# assume cfg is already loaded from YAML as shown in your message

# --- Prod Configuration (from cfg) ---
start_date  = pd.Timestamp(cfg['run']['start_date']).date()
end_date    = pd.Timestamp(cfg['run']['end_date']).date()
warmup_days = int(cfg['run']['warmup_days'])

# ticker_list = list(cfg['universe']['tickers'])
ticker_list = ['BTC-USD', 'ETH-USD', 'SOL-USD', 'ADA-USD', 'AVAX-USD', 'LTC-USD', 'DOGE-USD', 'CRO-USD']

# signals.moving_average
fast_mavg        = int(cfg['signals']['moving_average']['fast_mavg'])
slow_mavg        = int(cfg['signals']['moving_average']['slow_mavg'])
mavg_stepsize    = int(cfg['signals']['moving_average']['mavg_stepsize'])
mavg_z_score_window = int(cfg['signals']['moving_average']['mavg_z_score_window'])

# signals.donchian
entry_rolling_donchian_window = int(cfg['signals']['donchian']['entry_rolling_donchian_window'])
exit_rolling_donchian_window  = int(cfg['signals']['donchian']['exit_rolling_donchian_window'])
use_donchian_exit_gate        = bool(cfg['signals']['donchian']['use_donchian_exit_gate'])

# signals.weighting
ma_crossover_signal_weight = float(cfg['signals']['weighting']['ma_crossover_signal_weight'])
donchian_signal_weight     = float(cfg['signals']['weighting']['donchian_signal_weight'])
weighted_signal_ewm_window = int(cfg['signals']['weighting']['weighted_signal_ewm_window'])  # (new config but same value)

# signals.filters.rolling_r2
rolling_r2_window   = int(cfg['signals']['filters']['rolling_r2']['rolling_r2_window'])
lower_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['lower_r_sqr_limit'])
upper_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['upper_r_sqr_limit'])
r2_smooth_window    = int(cfg['signals']['filters']['rolling_r2']['r2_smooth_window'])
r2_confirm_days     = int(cfg['signals']['filters']['rolling_r2']['r2_confirm_days'])
r2_strong_threshold = float(cfg['signals']['filters']['rolling_r2']['r2_strong_threshold'])

# signals.filters.vol_of_vol
log_std_window            = int(cfg['signals']['filters']['vol_of_vol']['log_std_window'])
coef_of_variation_window  = int(cfg['signals']['filters']['vol_of_vol']['coef_of_variation_window'])
vol_of_vol_z_score_window = int(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_z_score_window'])
vol_of_vol_p_min          = float(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_p_min'])

# signals.activation
use_activation              = bool(cfg['signals']['activation']['use_activation'])
tanh_activation_constant_dict = cfg['signals']['activation']['tanh_activation_constant_dict']  # likely None

# data / run toggles
moving_avg_type        = str(cfg['data']['moving_avg_type'])
long_only              = bool(cfg['run']['long_only'])
price_or_returns_calc  = str(cfg['data']['price_or_returns_calc'])

initial_capital        = float(cfg['run']['initial_capital'])

rolling_cov_window     = int(cfg['risk_and_sizing']['rolling_cov_window'])
volatility_window      = int(cfg['risk_and_sizing']['volatility_window'])

# stop loss strategy (new)
stop_loss_strategy     = str(cfg['risk_and_sizing']['stop_loss_strategy'])
rolling_atr_window     = int(cfg['risk_and_sizing']['rolling_atr_window'])
atr_multiplier         = float(cfg['risk_and_sizing']['atr_multiplier'])
highest_high_window    = int(cfg['risk_and_sizing']['highest_high_window'])

# cooldown (new)
cooldown_counter_threshold = int(cfg['execution_and_costs']['cooldown_counter_threshold'])

# target vol (new value)
annualized_target_volatility = float(cfg['risk_and_sizing']['annualized_target_volatility'])

transaction_cost_est   = float(cfg['execution_and_costs']['transaction_cost_est'])
passive_trade_rate     = float(cfg['execution_and_costs']['passive_trade_rate'])
notional_threshold_pct = float(cfg['execution_and_costs']['notional_threshold_pct'])
min_trade_notional_abs = float(cfg['execution_and_costs']['min_trade_notional_abs'])

rolling_sharpe_window  = int(cfg['risk_and_sizing']['rolling_sharpe_window'])
cash_buffer_percentage = float(cfg['risk_and_sizing']['cash_buffer_percentage'])
annual_trading_days    = int(cfg['run']['annual_trading_days'])

use_coinbase_data      = bool(cfg['data']['use_coinbase_data'])
use_saved_files        = bool(cfg['data']['use_saved_files'])
saved_file_end_date    = str(cfg['data']['saved_file_end_date'])

use_specific_start_date = bool(cfg['run']['use_specific_start_date'])
signal_start_date       = pd.Timestamp(cfg['run']['signal_start_date']).date()


In [None]:
ticker_list

In [None]:
df_final_prod_config_expanded = tf.apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=r2_confirm_days,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=False, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)
df_final_prod_config_expanded = df_final_prod_config_expanded[df_final_prod_config_expanded.index >= pd.Timestamp(start_date)]

print('Calculating In Sample Asset Returns!!')
df_final_prod_config_expanded = perf.calculate_asset_level_returns(df_final_prod_config_expanded, end_date, ticker_list)

portfolio_perf_metrics_prod_config_expanded = calculate_risk_and_performance_metrics(df_final_prod_config_expanded, strategy_daily_return_col=f'portfolio_daily_pct_returns',
                                                                                     strategy_trade_count_col=f'count_of_positions', include_transaction_costs_and_fees=False, passive_trade_rate=0.05, annual_trading_days=365, transaction_cost_est=0.001)
portfolio_perf_metrics_prod_config_expanded

In [None]:
ticker_perf_prod_config_expanded = {}
for t in ticker_list:
    _ticker_perf = perf.calculate_risk_and_performance_metrics(
        df_final_prod_config_expanded,
        strategy_daily_return_col=f'{t}_daily_pct_returns',
        strategy_trade_count_col=f'{t}_position_count',
        annual_trading_days=365,
        include_transaction_costs_and_fees=False
    )
    ticker_perf_prod_config_expanded[t] = _ticker_perf

In [None]:
ticker_perf_prod_config

In [None]:
ticker_perf_prod_config_expanded

In [None]:
plot_signal_performance(df_1=df_final_prod_config, df_2=df_final_prod_config_expanded, ticker='BTC-USD')

In [None]:
df_final_prod_config_expanded.head()

In [None]:
df_final_prod_config_expanded['equity_curve'].plot()

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in cfg['universe']['tickers']]
df_final_prod_config[final_signal_cols].plot(figsize=(10, 8))

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in ticker_list]
df_final_prod_config_expanded[final_signal_cols].plot(figsize=(10, 8))

In [None]:
return_cols = [f'{ticker}_actual_position_notional' for ticker in cfg['universe']['tickers']]
df_final_prod_config_expanded[return_cols].plot(figsize=(10, 8))

In [None]:
return_cols = [f'{ticker}_actual_position_notional' for ticker in ticker_list]
df_final_prod_config_expanded[return_cols].plot(figsize=(10, 8))

In [None]:
return_cols = [f'{ticker}_daily_pnl' for ticker in cfg['universe']['tickers']]
df_final_prod_config[return_cols].plot(figsize=(10, 8))

In [None]:
return_cols = [f'{ticker}_daily_pnl' for ticker in ticker_list]
df_final_prod_config_expanded[return_cols].plot(figsize=(10, 8))

In [None]:
return_cols = [f'{ticker}_daily_pct_returns' for ticker in ticker_list]
df_final_prod_config_expanded[return_cols].plot(figsize=(10, 8))

## Get a list of all tradeable coins from Coinbase on any given day

In [23]:
def product_to_dict(p):
    # Pydantic v2
    fn = getattr(p, "model_dump", None)
    if callable(fn):
        return fn(exclude_none=True)  # or fn() if you want Nones

    # Pydantic v1
    fn = getattr(p, "dict", None)
    if callable(fn):
        return fn()

    # JSON fallbacks (v2 and v1 respectively)
    fn = getattr(p, "model_dump_json", None)
    if callable(fn):
        return json.loads(p.model_dump_json())
    fn = getattr(p, "json", None)
    if callable(fn):
        return json.loads(p.json())

    # Last-resort: plain object
    if hasattr(p, "__dict__"):
        return {k: v for k, v in vars(p).items() if not k.startswith("_")}

    return {"raw": str(p)}

In [25]:
## Get a snapshot of all the available coins to trade
CANON_QUOTE = "USD"
PRODUCTS_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/products")
PRODUCTS_DIR.mkdir(parents=True, exist_ok=True)
LIQUIDITY_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/liquidity")
LIQUIDITY_DIR.mkdir(parents=True, exist_ok=True)
ELIGIBLE_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/eligible_products")
ELIGIBLE_DIR.mkdir(parents=True, exist_ok=True)

def coinbase_product_snapshot(client, asof=None, save=True):

    asof = asof or datetime.now(timezone.utc).date().isoformat()
    prod = client.get_products()['products']
    rows = [product_to_dict(p) for p in prod]
    df = pd.json_normalize(rows)

    # optional: keep only columns you care about
    reqd_cols = [
        "product_id","base_currency_id","quote_currency_id","product_type","status",
        "trading_disabled","is_disabled","cancel_only","limit_only","post_only","auction_mode","view_only",
        "base_increment","quote_increment","price_increment","base_min_size","quote_min_size",
        "alias","alias_to","display_name","product_venue","new_at","price","approximate_quote_24h_volume"
    ]
    df = df[reqd_cols]

    # optional: coerce numerics
    num_cols = ["base_increment","quote_increment","price_increment","base_min_size","quote_min_size",
                "price","approximate_quote_24h_volume"]
    for col in num_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    # Filter to USD spot & tradable
    filt = (
        (df["product_type"] == "SPOT") &
        (df["quote_currency_id"] == CANON_QUOTE) &
        (df["status"] == "online") &
        (~df["trading_disabled"]) &
        (~df["is_disabled"]) &
        (~df["view_only"]) &
        (~df["cancel_only"]) &
        (~df["auction_mode"])
    )
    df = df[filt]

    df["asof_date"] = pd.to_datetime(asof).date()

    if save:
        out = PRODUCTS_DIR / f"{asof}_prod.parquet"
        df.to_parquet(out, index=False)

    return df

In [27]:
def products_diff(prev_path, curr_path):
    
    prev = pd.read_parquet(prev_path)
    curr = pd.read_parquet(curr_path)
    prev_set = set(prev["ticker"])
    curr_set = set(curr["ticker"])
    adds = sorted(list(curr_set - prev_set))
    drops = sorted(list(prev_set - curr_set))
    
    return {"adds": adds, "drops": drops}

In [29]:
## Get OHLC data for each coin
def get_coinbase_candle_data(client, product_id, start_date, end_date):
    """Return a daily OHLCV DataFrame indexed by date. Empty DF if no data."""

    start_date = pd.Timestamp(start_date)
    end_date   = pd.Timestamp(end_date)
    start_timestamp = int(pd.Timestamp(start_date).timestamp())
    end_timestamp = int(pd.Timestamp(end_date).timestamp())

    resp = client.get_candles(
        product_id=product_id,
        start=start_timestamp,
        end=end_timestamp,
        granularity='ONE_DAY',
    )
    candles = resp.candles or []

    if not candles:
        # return an empty frame with the expected schema
        cols = ['low','high','open','close','volume']
        return pd.DataFrame(columns=cols).astype({c:'float64' for c in cols})

    rows = [{
        'date':   c['start'],
        'low':    float(c['low']),
        'high':   float(c['high']),
        'open':   float(c['open']),
        'close':  float(c['close']),
        'volume': float(c['volume']),
    } for c in candles]

    df = pd.DataFrame(rows)
    df['date'] = pd.to_datetime(pd.to_numeric(df['date'], errors='coerce'), unit='s', utc=True).dt.date
    return df.sort_values('date').set_index('date')

In [31]:
## Check if enough history is available and if the liquidity metrics meet all required thresholds
def has_warmup_coverage(client, product_id: str, asof_date, warmup_days: int) -> bool:
    """
    Return True if there is at least one daily candle on or before (asof - warmup_days),
    using a tiny 1-day query window.
    """
    # asof_date can be 'YYYY-MM-DD', date, or datetime
    asof = pd.Timestamp(asof_date).date()

    # boundary day at 00:00:00 UTC
    start = asof - pd.Timedelta(days=warmup_days)
    end   = start + pd.Timedelta(days=1)
    start_timestamp = int(pd.Timestamp(start).timestamp())
    end_timestamp = int(pd.Timestamp(end).timestamp())

    resp = client.get_candles(
        product_id=product_id,
        start=start_timestamp,
        end=end_timestamp,
        granularity="ONE_DAY",   # required enum value
    )
    candles = getattr(resp, "candles", []) or []
    return bool(candles)


def get_liquidity_metrics(client, product_id, asof_date, lookback_day_count=90):
    
    end_date = pd.Timestamp(asof_date).date()
    start_date = end_date - pd.Timedelta(days=lookback_day_count)
    df = get_coinbase_candle_data(client, product_id=product_id, start_date=start_date, end_date=end_date)
    df['notional_usd'] = df['volume'] * df['close']
    df['adv_90d_median'] = df['notional_usd'].rolling(90).median()
    df['high_low_spread_bps'] = (df['high'] - df['low']) / ((df['high'] + df['low']) / 2) * 10000
    df['high_low_spread_90d_median'] = df['high_low_spread_bps'].rolling(90).median()

    return df

In [33]:
## Get liquidity metrics for all coins
def get_liquidity_metrics_all_tickers_monthly(client, product_id_list, asof_date, lookback_day_count=90, warmup_days=300, save=True):

    df_liquidity = pd.DataFrame(columns=['asof_date','product_id','adv_90d_median','high_low_spread_90d_median','warmup_days_available'])
    
    for product_id in product_id_list:
        try:
            df = get_liquidity_metrics(client, product_id, asof_date=asof_date, lookback_day_count=lookback_day_count)
            row = {
                'asof_date': asof_date,
                'product_id': product_id,
                'adv_90d_median': df.loc[pd.Timestamp(asof_date).date()]['adv_90d_median'],
                'high_low_spread_90d_median': df.loc[pd.Timestamp(asof_date).date()]['high_low_spread_90d_median'],
                'warmup_days_available': has_warmup_coverage(client, product_id=product_id, asof_date=asof_date, warmup_days=warmup_days)
            }
            df_liquidity.loc[df_liquidity.shape[0]] = row
        except KeyError:
            continue

    if save:
        out = LIQUIDITY_DIR / f"{asof_date}_liquidity.parquet"
        df_liquidity.to_parquet(out, index=False)

    return df_liquidity

## Get a list of all eligible coins from all the coins based on liquidity requirements
def get_eligible_ticker_list_monthly(df, asof_date, median_adv_col='adv_90d_median', median_high_low_spread_col='high_low_spread_90d_median', 
                                     warmup_days_col='warmup_days_available', adv_quantile_threshold=0.60, high_low_quantile_threshold=0.60, save=True):

    ## Get ADV Floor
    adv_null_cond = (df[median_adv_col].notnull())
    adv_usd_floor = np.quantile(df[adv_null_cond][median_adv_col], q=adv_quantile_threshold)

    ## Get High-Low Spread Floor
    high_low_null_cond = (df[median_high_low_spread_col].notnull())
    high_low_spread_floor = np.quantile(df[high_low_null_cond][median_high_low_spread_col], q=high_low_quantile_threshold)

    ## Exclude Stablecoins
    exclusions = ['USDC-USD', 'DAI-USD', 'USDT-USD']

    ## Create eligibility criteria
    eligible_cond = (
        (df[warmup_days_col]) &
        (df[median_adv_col] >= adv_usd_floor) &
        (df[median_high_low_spread_col] <= high_low_spread_floor) &
        (~df['product_id'].isin(exclusions))
    )

    ## Create eligibility ticker list
    df_eligible = df[eligible_cond].reset_index(drop=True)

    if save:
        out = ELIGIBLE_DIR / f"{asof_date}_eligible.parquet"
        df_eligible.to_parquet(out, index=False)

    return df_eligible, adv_usd_floor, high_low_spread_floor

In [35]:
from concurrent.futures import ThreadPoolExecutor
import time, random
import pandas as pd
import numpy as np

# ---- worker: minimal jitter + tiny retry on 429, nothing else ----
def _call_with_retry(fn, *args, **kwargs):
    for i in range(2):  # at most 2 tries
        try:
            return fn(*args, **kwargs)
        except Exception as e:
            status = getattr(getattr(e, "response", None), "status_code", None)
            if status == 429:
                # quick backoff (200ms, then 400ms) + jitter
                time.sleep(0.2 * (2**i) + random.uniform(0.0, 0.08))
                continue
            raise
    # last attempt without catching
    return fn(*args, **kwargs)

def _one_product_liquidity(client, pid, asof_date, lookback_day_count):
    asof_key = pd.Timestamp(asof_date).date()

    # a hair of jitter so batch submissions don't land at the same millisecond
    # (very small; won't slow total runtime noticeably)
    time.sleep(random.uniform(0.0, 0.02))

    metrics = _call_with_retry(
        get_liquidity_metrics,
        client=client, product_id=pid, asof_date=asof_key, lookback_day_count=lookback_day_count
    )
    if metrics is None or metrics.empty:
        return None

    if asof_key not in metrics.index:
        metrics = metrics.loc[:asof_key]
        if metrics.empty:
            return None

    row = metrics.iloc[-1]
    adv = row.get("adv_90d_median", np.nan)
    hls = row.get("high_low_spread_90d_median", np.nan)
    if not np.isfinite(adv) or not np.isfinite(hls):
        return None

    return {
        "asof_date": asof_key,
        "product_id": pid,
        "adv_90d_median": float(adv),
        "high_low_spread_90d_median": float(hls),
    }

# ---- simple batched submit: keeps it fast but smooth ----
def _batched(seq, n):
    for i in range(0, len(seq), n):
        yield seq[i:i+n]

def get_liquidity_metrics_all_tickers_daily_fast(
    client, product_id_list, asof_date,
    lookback_day_count=90, 
    max_workers=12, batch_size=16, between_batches_sleep=0.10, save=True
):
    STABLECOINS = {'USDC-USD','DAI-USD','USDT-USD'}
    # keep simple upfront pruning
    pids = [p for p in product_id_list if p.endswith("-USD") and p not in STABLECOINS]

    rows = []

    # --- batched submission: prevents huge burst that causes 429 ---
    for batch in _batched(pids, batch_size):
        # small, short-lived pool per batch keeps scheduling overhead low
        with ThreadPoolExecutor(max_workers=max_workers) as ex:
            # Use executor.map for low overhead (faster than as_completed here)
            args_iter = ((client, pid, asof_date, lookback_day_count) for pid in batch)
            for res in ex.map(lambda a: _one_product_liquidity(*a), args_iter, chunksize=1):
                if res:
                    rows.append(res)

        # tiny pause between bursts; tune 0.05–0.15s if you still see 429s
        if between_batches_sleep:
            time.sleep(between_batches_sleep)

    df = pd.DataFrame(rows, columns=[
        "asof_date","product_id","adv_90d_median","high_low_spread_90d_median"
    ])
    # if save and len(df):
    #     out = LIQUIDITY_DIR / f"{pd.Timestamp(asof_date).date()}_liquidity.parquet"
    #     df.to_parquet(out, index=False)
    return df

## Get a list of all eligible coins from all the coins based on liquidity requirements
def get_eligible_ticker_list_daily(df, asof_date, median_adv_col='adv_90d_median', median_high_low_spread_col='high_low_spread_90d_median', 
                                   adv_quantile_threshold=0.60, high_low_quantile_threshold=0.60, save=True):

    ## Get ADV Floor
    adv_null_cond = (df[median_adv_col].notnull())
    adv_usd_floor = np.quantile(df[adv_null_cond][median_adv_col], q=adv_quantile_threshold)

    ## Get High-Low Spread Floor
    high_low_null_cond = (df[median_high_low_spread_col].notnull())
    high_low_spread_floor = np.quantile(df[high_low_null_cond][median_high_low_spread_col], q=high_low_quantile_threshold)

    ## Exclude Stablecoins
    STABLECOINS = ['USDC-USD', 'DAI-USD', 'USDT-USD']

    ## Create eligibility criteria
    eligible_cond = (
        (df[median_adv_col] >= adv_usd_floor) &
        (df[median_high_low_spread_col] <= high_low_spread_floor) &
        (~df['product_id'].isin(STABLECOINS))
    )

    ## Create eligibility ticker list
    df_eligible = df[eligible_cond].reset_index(drop=True)

    if save:
        out = ELIGIBLE_DIR / f"{asof_date}_eligible.parquet"
        df_eligible.to_parquet(out, index=False)

    return df_eligible, adv_usd_floor, high_low_spread_floor

In [37]:
asof_date = pd.Timestamp('2025-10-24').date()
lookback_day_count = 90
warmup_days = cfg['run']['warmup_days']
client = cn.get_coinbase_rest_api_client(portfolio_name='Default')

In [None]:
df_products_t_1 = coinbase_product_snapshot(client, asof=pd.Timestamp('2025-10-23').date(), save=True)

In [None]:
df_products = coinbase_product_snapshot(client, asof=asof_date, save=True)

In [None]:
df_products.shape

In [None]:
%%time
## Get the product snapshot, liquidity metrics and eligible ticker list on a monthly frequency from 2022-03-01 to 2025-07-31 
lookback_day_count = 90
warmup_days = 300
adv_quantile = 0.60
high_low_quantile = 0.60
date_range = pd.date_range(start=pd.Timestamp('2022-03-01').date(), end=pd.Timestamp('2025-07-31').date(), freq='M')
for date in date_range:
    print(date)
    df_products = coinbase_product_snapshot(client, asof=date.date(), save=True)
    curr_product_list = df_products.product_id.unique().tolist()
    df_liquidity_ticker = get_liquidity_metrics_all_tickers_monthly(client, product_id_list=curr_product_list, asof_date=date.date(),
                                                                    lookback_day_count=lookback_day_count, warmup_days=warmup_days, save=True)
    df_eligible, adv_usd_floor, high_low_spread_floor = get_eligible_ticker_list_monthly(df=df_liquidity_ticker, asof_date=date.date(), median_adv_col='adv_90d_median',
                                                                                         median_high_low_spread_col='high_low_spread_90d_median', 
                                                                                         warmup_days_col='warmup_days_available',
                                                                                         adv_quantile_threshold=adv_quantile, high_low_quantile_threshold=high_low_quantile)

In [None]:
%%time
## Get the product snapshot, liquidity metrics and eligible ticker list on a monthly frequency for one day
lookback_day_count = 90
warmup_days = 300
adv_quantile = 0.60
high_low_quantile = 0.60
date_range = pd.date_range(start=pd.Timestamp('2022-03-01').date(), end=pd.Timestamp('2025-07-31').date(), freq='M')
# for date in date_range:
date = pd.Timestamp('2025-11-03')
print(date)
df_products = coinbase_product_snapshot(client, asof=date.date(), save=True)
curr_product_list = df_products.product_id.unique().tolist()
df_liquidity_ticker_fast = get_liquidity_metrics_all_tickers_daily_fast(client, product_id_list=curr_product_list, asof_date=date.date(),
                                                                        lookback_day_count=lookback_day_count, max_workers=5, batch_size=12, between_batches_sleep=0.10, save=False)
df_eligible_test, adv_usd_floor, high_low_spread_floor = get_eligible_ticker_list_daily(df=df_liquidity_ticker_fast, asof_date=date.date(), median_adv_col='adv_90d_median',
                                                                                        median_high_low_spread_col='high_low_spread_90d_median', 
                                                                                        adv_quantile_threshold=adv_quantile, high_low_quantile_threshold=high_low_quantile)

In [None]:
df_eligible_test

In [None]:
df_eligible.shape

In [None]:
df_liquidity_ticker_fast

In [None]:
import pandas as pd

# assume cfg is already loaded from YAML as shown in your message

# --- Prod Configuration (from cfg) ---
start_date  = pd.Timestamp(cfg['run']['start_date']).date()
end_date    = pd.Timestamp(cfg['run']['end_date']).date()
warmup_days = int(cfg['run']['warmup_days'])

# ticker_list = list(cfg['universe']['tickers'])
ticker_list = ['BTC-USD', 'ETH-USD', 'SOL-USD', 'ADA-USD', 'AVAX-USD', 'LTC-USD', 'DOGE-USD', 'CRO-USD']

# signals.moving_average
fast_mavg        = int(cfg['signals']['moving_average']['fast_mavg'])
slow_mavg        = int(cfg['signals']['moving_average']['slow_mavg'])
mavg_stepsize    = int(cfg['signals']['moving_average']['mavg_stepsize'])
mavg_z_score_window = int(cfg['signals']['moving_average']['mavg_z_score_window'])

# signals.donchian
entry_rolling_donchian_window = int(cfg['signals']['donchian']['entry_rolling_donchian_window'])
exit_rolling_donchian_window  = int(cfg['signals']['donchian']['exit_rolling_donchian_window'])
use_donchian_exit_gate        = bool(cfg['signals']['donchian']['use_donchian_exit_gate'])

# signals.weighting
ma_crossover_signal_weight = float(cfg['signals']['weighting']['ma_crossover_signal_weight'])
donchian_signal_weight     = float(cfg['signals']['weighting']['donchian_signal_weight'])
weighted_signal_ewm_window = int(cfg['signals']['weighting']['weighted_signal_ewm_window'])  # (new config but same value)

# signals.filters.rolling_r2
rolling_r2_window   = int(cfg['signals']['filters']['rolling_r2']['rolling_r2_window'])
lower_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['lower_r_sqr_limit'])
upper_r_sqr_limit   = float(cfg['signals']['filters']['rolling_r2']['upper_r_sqr_limit'])
r2_smooth_window    = int(cfg['signals']['filters']['rolling_r2']['r2_smooth_window'])
r2_confirm_days     = 2 #int(cfg['signals']['filters']['rolling_r2']['r2_confirm_days'])
r2_strong_threshold = float(cfg['signals']['filters']['rolling_r2']['r2_strong_threshold'])

# signals.filters.vol_of_vol
log_std_window            = int(cfg['signals']['filters']['vol_of_vol']['log_std_window'])
coef_of_variation_window  = int(cfg['signals']['filters']['vol_of_vol']['coef_of_variation_window'])
vol_of_vol_z_score_window = int(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_z_score_window'])
vol_of_vol_p_min          = float(cfg['signals']['filters']['vol_of_vol']['vol_of_vol_p_min'])

# signals.activation
use_activation              = bool(cfg['signals']['activation']['use_activation'])
tanh_activation_constant_dict = cfg['signals']['activation']['tanh_activation_constant_dict']  # likely None

# data / run toggles
moving_avg_type        = str(cfg['data']['moving_avg_type'])
long_only              = bool(cfg['run']['long_only'])
price_or_returns_calc  = str(cfg['data']['price_or_returns_calc'])

initial_capital        = float(cfg['run']['initial_capital'])

rolling_cov_window     = int(cfg['risk_and_sizing']['rolling_cov_window'])
volatility_window      = int(cfg['risk_and_sizing']['volatility_window'])

# stop loss strategy (new)
stop_loss_strategy     = str(cfg['risk_and_sizing']['stop_loss_strategy'])
rolling_atr_window     = int(cfg['risk_and_sizing']['rolling_atr_window'])
atr_multiplier         = float(cfg['risk_and_sizing']['atr_multiplier'])
highest_high_window    = int(cfg['risk_and_sizing']['highest_high_window'])

# cooldown (new)
cooldown_counter_threshold = int(cfg['execution_and_costs']['cooldown_counter_threshold'])

# target vol (new value)
annualized_target_volatility = float(cfg['risk_and_sizing']['annualized_target_volatility'])

transaction_cost_est   = float(cfg['execution_and_costs']['transaction_cost_est'])
passive_trade_rate     = float(cfg['execution_and_costs']['passive_trade_rate'])
notional_threshold_pct = float(cfg['execution_and_costs']['notional_threshold_pct'])
min_trade_notional_abs = float(cfg['execution_and_costs']['min_trade_notional_abs'])

rolling_sharpe_window  = int(cfg['risk_and_sizing']['rolling_sharpe_window'])
cash_buffer_percentage = float(cfg['risk_and_sizing']['cash_buffer_percentage'])
annual_trading_days    = int(cfg['run']['annual_trading_days'])

use_coinbase_data      = bool(cfg['data']['use_coinbase_data'])
use_saved_files        = bool(cfg['data']['use_saved_files'])
saved_file_end_date    = str(cfg['data']['saved_file_end_date'])

use_specific_start_date = bool(cfg['run']['use_specific_start_date'])
signal_start_date       = pd.Timestamp(cfg['run']['signal_start_date']).date()

In [39]:
def get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
        start_date, end_date, ticker_list, fast_mavg, slow_mavg, mavg_stepsize, mavg_z_score_window,
        entry_rolling_donchian_window, exit_rolling_donchian_window, use_donchian_exit_gate,
        ma_crossover_signal_weight, donchian_signal_weight, weighted_signal_ewm_window,
        rolling_r2_window=30, lower_r_sqr_limit=0.2, upper_r_sqr_limit=0.8, r2_smooth_window=3, r2_confirm_days=0,
        log_std_window=14, coef_of_variation_window=30, vol_of_vol_z_score_window=252, vol_of_vol_p_min=0.6,
        r2_strong_threshold=0.8, use_activation=True, tanh_activation_constant_dict=None, moving_avg_type='exponential',
        long_only=False, price_or_returns_calc='price', use_coinbase_data=True, use_saved_files=True,
        saved_file_end_date='2025-07-31'):

    ## Generate trend signal for all tickers
    trend_list = []
    date_list = cn.coinbase_start_date_by_ticker_dict

    for ticker in ticker_list:
        # Create Column Names
        trend_continuous_signal_col = f'{ticker}_mavg_ribbon_slope'
        trend_continuous_signal_rank_col = f'{ticker}_mavg_ribbon_rank'
        final_signal_col = f'{ticker}_final_signal'
        close_price_col = f'{ticker}_close'
        open_price_col = f'{ticker}_open'
        rolling_r2_col = f'{ticker}_rolling_r_sqr'
        final_weighted_additive_signal_col = f'{ticker}_final_weighted_additive_signal'

        # if pd.to_datetime(date_list[ticker]).date() > start_date:
        #     run_date = pd.to_datetime(date_list[ticker]).date()
        # else:
        #     run_date = start_date

        df_trend = tf.generate_trend_signal_with_donchian_channel_continuous_with_rolling_r_sqr_vol_of_vol(
            start_date=start_date, end_date=end_date, ticker=ticker, fast_mavg=fast_mavg, slow_mavg=slow_mavg,
            mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
            entry_rolling_donchian_window=entry_rolling_donchian_window,
            exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
            ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
            weighted_signal_ewm_window=weighted_signal_ewm_window,
            rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit,
            upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=r2_confirm_days,
            log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
            vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
            r2_strong_threshold=r2_strong_threshold,
            use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
            moving_avg_type=moving_avg_type, price_or_returns_calc=price_or_returns_calc, long_only=long_only,
            use_coinbase_data=use_coinbase_data, use_saved_files=use_saved_files,
            saved_file_end_date=saved_file_end_date)

        trend_cols = [close_price_col, open_price_col, trend_continuous_signal_col, trend_continuous_signal_rank_col,
                      final_weighted_additive_signal_col,
                      rolling_r2_col, final_signal_col]
        df_trend = df_trend[trend_cols]
        trend_list.append(df_trend)

    df_trend = pd.concat(trend_list, axis=1)

    return df_trend

In [None]:
month_end_dates = pd.date_range(start=pd.Timestamp('2022-03-31').date(), end=pd.Timestamp('2022-12-31').date(), freq='M')
eligible_ticker_liquidity_dict = {}
for month_end in month_end_dates:
    print(month_end)
    out = ELIGIBLE_DIR / f"{pd.Timestamp(month_end).date()}_eligible.parquet"
    df_eligible = pd.read_parquet(out)
    eligible_ticker_list = df_eligible.product_id.unique().tolist()
    eligible_ticker_liquidity_dict[pd.Timestamp(month_end).date()] = eligible_ticker_list

In [None]:
for k, v in eligible_ticker_liquidity_dict.items():
    print(k, len(v), sorted(v))

In [None]:
## Get Eligible Ticker List based on Liquidity Metrics and Trend Following Signal at Month-End
month_end_dates = pd.date_range(start=pd.Timestamp('2022-03-31').date(), end=pd.Timestamp('2022-12-31').date(), freq='M')
eligible_ticker_dict = {}
for month_end in month_end_dates:
    print(month_end)
    out = ELIGIBLE_DIR / f"{pd.Timestamp(month_end).date()}_eligible.parquet"
    df_eligible = pd.read_parquet(out)
    eligible_ticker_list = df_eligible.product_id.unique().tolist()
    df_trend_eligible = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
        start_date=pd.Timestamp(month_end).date()-pd.Timedelta(days=warmup_days), end_date=pd.Timestamp(month_end).date() + pd.Timedelta(days=10), ticker_list=eligible_ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg,
        mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
        entry_rolling_donchian_window=entry_rolling_donchian_window,
        exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
        ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
        weighted_signal_ewm_window=weighted_signal_ewm_window, rolling_r2_window=rolling_r2_window,
        lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window,
        r2_confirm_days=r2_confirm_days, log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
        vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
        r2_strong_threshold=r2_strong_threshold, use_activation=use_activation,
        tanh_activation_constant_dict=tanh_activation_constant_dict, moving_avg_type=moving_avg_type,
        long_only=long_only, price_or_returns_calc=price_or_returns_calc, use_coinbase_data=use_coinbase_data,
        use_saved_files=False, saved_file_end_date=saved_file_end_date)
    
    date_cond = (df_trend_eligible.index == pd.Timestamp(month_end))
    final_signal_cols = [f'{ticker}_final_signal' for ticker in eligible_ticker_list]
    df_raw_eligible_list = df_trend_eligible[date_cond][final_signal_cols].T.reset_index()
    df_raw_eligible_list.columns = ['ticker', 'final_signal']
    df_raw_eligible_list = df_raw_eligible_list.sort_values('final_signal', ascending=False)
    min_strength = np.quantile(df_raw_eligible_list['final_signal'], q=0.8)
    min_strength_cond = (df_raw_eligible_list['final_signal'] > min_strength)
    df_raw_eligible_list = df_raw_eligible_list[min_strength_cond]
    df_raw_eligible_list["ticker"] = df_raw_eligible_list["ticker"].str.replace(r"_final_signal$", "", regex=True)
    final_eligible_list = df_raw_eligible_list.ticker.tolist()
    eligible_ticker_dict[pd.Timestamp(month_end).date()] = final_eligible_list

In [None]:
for key, value in eligible_ticker_dict.items():
    print(key, sorted(value))

In [63]:
def clamp_to_per_asset_caps(desired_delta: pd.Series,
                            current_notional_prev: pd.Series,
                            total_portfolio_value_upper_limit: float,
                            per_asset_weight_cap: float,
                            per_asset_notional_cap: float | None) -> pd.Series:
    """
    Enforce per-asset caps by limiting *post-trade* notional to:
      min(weight_cap * TPV_upper, absolute_cap if given).
    Only constrains the *upside* (you can always reduce).
    """
    if len(desired_delta) == 0:
        return desired_delta

    # cap each asset's *final* notional (current + delta)
    weight_cap_notional = per_asset_weight_cap * float(total_portfolio_value_upper_limit)
    cap_notional = weight_cap_notional if per_asset_notional_cap is None else min(weight_cap_notional, per_asset_notional_cap)

    # current long notionals; if you allow shorting, drop the clip
    curr = current_notional_prev.fillna(0.0).clip(lower=0.0)
    target_after = (curr + desired_delta).fillna(0.0)

    # where target_after exceeds cap, cut the delta so final == cap
    over = target_after > cap_notional
    if over.any():
        desired_delta.loc[over] = cap_notional - curr.loc[over]

    return desired_delta


def enforce_freeze_on_notional_deltas(desired_delta: pd.Series,
                                      current_notional_prev: pd.Series,
                                      frozen_tickers: list[str] | None,
                                      allow_adds_on_frozen: bool) -> pd.Series:
    """
    Freeze policy:
      - If allow_adds_on_frozen=True: do nothing (you may increase frozen tickers).
      - If False: you may *not* increase frozen tickers; only flat or decrease.
    """
    if not frozen_tickers:
        return desired_delta

    if allow_adds_on_frozen:
        return desired_delta

    # block positive deltas for frozen tickers
    fz = pd.Index(frozen_tickers).intersection(desired_delta.index)
    pos = desired_delta.loc[fz] > 0
    if pos.any():
        desired_delta.loc[fz[pos]] = 0.0
    return desired_delta


In [65]:
def _get_today_signals(df: pd.DataFrame, date, tickers, suffix: str) -> pd.Series:
    """Return a Series index=ticker, value=today's signal (NaN -> -inf for ranking)."""
    cols = [f"{t}{suffix}" for t in tickers]
    row = df.loc[date, cols] if date in df.index else pd.Series([np.nan]*len(cols), index=cols)
    s = row.copy()
    s.index = [c[:-len(suffix)] for c in s.index]
    return s.astype(float).fillna(-np.inf)

def _enforce_daily_position_budget(
    desired_delta: pd.Series,
    current_notional_prev: pd.Series,
    today_signals: pd.Series,
    max_positions: int | None,
    max_new_per_day: int | None,
) -> pd.Series:
    """
    Allows exits/reductions as-is. Among *entries* (delta>0 on names not currently held),
    keep only top-K by signal, where K = min(max_new_per_day, max_positions - held_count).
    """
    ## Make no changes if there is no set max positions or new max positions per day and execute desired trades
    if max_positions is None and max_new_per_day is None:
        return desired_delta

    ## Get the total number of held positions in the portfolio and the number of desired new entrants
    curr = current_notional_prev.fillna(0.0)
    held = curr > 0
    entrants = (desired_delta.fillna(0.0) > 0) & (~held)

    ## If there are no new entrants, do nothing and execute the desired trades
    if not entrants.any():
        return desired_delta

    ## If max number of positions is set, get the number of open slots in the portfolio
    held_cnt = int(held.sum())
    slots = None
    if max_positions is not None:
        slots = max(max_positions - held_cnt, 0)
    
    ## If there is a max number of new positions set, modify the number of slots based on the max_new_per_day param
    if max_new_per_day is not None:
        slots = max_new_per_day if slots is None else min(slots, max_new_per_day)

    ## If the max number of positions is not exceeded after accounting for the entrants, execute desired trades
    if slots is None or slots >= int(entrants.sum()):
        return desired_delta  # nothing to trim

    ## If the number of entrants is greater than the number of available slots, use signal strength to 
    ## rank the new entrants and pick the top few that fill up the number of open slots
    tf_signal = today_signals.reindex(desired_delta.index).fillna(-np.inf)
    tf_signal_entrants = tf_signal[entrants].sort_values(ascending=False)
    allow = set(tf_signal_entrants.index[:slots])

    ## Based on the signal strength, block the entrants that are not chosen
    block = [t for t in tf_signal_entrants.index if t not in allow]
    if block:
        desired_delta.loc[block] = 0.0
        
    return desired_delta

In [67]:
def get_target_volatility_daily_portfolio_positions_expanded_universe(df, ticker_list, initial_capital, rolling_cov_window,
                                                                      stop_loss_strategy, rolling_atr_window, atr_multiplier,
                                                                      highest_high_window, cash_buffer_percentage,
                                                                      annualized_target_volatility, transaction_cost_est=0.001,
                                                                      passive_trade_rate=0.05, notional_threshold_pct=0.02,
                                                                      min_trade_notional_abs=10, cooldown_counter_threshold=3,
                                                                      annual_trading_days=365, use_specific_start_date=False,
                                                                      signal_start_date=None, start_date=None, previous_month_open_positions_df=None,
                                                                      frozen_ticker_list=None,
                                                                      per_asset_weight_cap=0.25,                # e.g., max 25% of PV upper limit per asset
                                                                      per_asset_notional_cap=None,              # e.g., 5000. If None, ignore absolute cap.
                                                                      allow_adds_on_frozen=True,                # True = you can scale UP frozen tickers
                                                                      eligibility_by_date=None,                 # dict[date]->set([...]) of liquidity-eligible tickers
                                                                      force_exit_if_ineligible=True,            # True = immediate full exit if ineligible today
                                                                      max_positions = None,                     # e.g., 12
                                                                      max_new_per_day = None,                   # e.g., 3
                                                                      signal_col_suffix = "_final_signal"       # so we can rank entries cleanly
                                                                     ):

    # Ensure DatetimeIndex (tz-naive), normalized, sorted
    if not isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index, utc=True).tz_localize(None)
    elif df.index.tz is not None:
        df.index = df.index.tz_localize(None)
    df.index = df.index.normalize()
    df.sort_index(inplace=True)

    ## Calculate the covariance matrix for tickers in the portfolio
    returns_cols = [f'{ticker}_t_1_close_pct_returns' for ticker in ticker_list]
    cov_matrix = df[returns_cols].rolling(rolling_cov_window).cov(pairwise=True).dropna()

    ## Delete rows prior to the first available date of the covariance matrix
    cov_matrix_start_date = cov_matrix.index[0][0]
    df = df[df.index >= cov_matrix_start_date]

    ## Derive the Daily Target Portfolio Volatility
    daily_target_volatility = annualized_target_volatility / np.sqrt(annual_trading_days)

    ## Reorder dataframe columns
    for ticker in ticker_list:
        df[f'{ticker}_new_position_size'] = 0.0
        df[f'{ticker}_new_position_notional'] = 0.0
        df[f'{ticker}_open_position_size'] = 0.0
        df[f'{ticker}_open_position_notional'] = 0.0
        df[f'{ticker}_actual_position_size'] = 0.0
        df[f'{ticker}_actual_position_notional'] = 0.0
        df[f'{ticker}_short_sale_proceeds'] = 0.0
        df[f'{ticker}_new_position_entry_exit_price'] = 0.0
        df[f'{ticker}_target_vol_normalized_weight'] = 0.0
        df[f'{ticker}_target_notional'] = 0.0
        df[f'{ticker}_target_size'] = 0.0
        df[f'{ticker}_stop_loss'] = 0.0
        df[f'{ticker}_stopout_flag'] = False
        df[f'{ticker}_cooldown_counter'] = 0.0
        df[f'{ticker}_event'] = np.nan
    ord_cols = size_bin.reorder_columns_by_ticker(df.columns, ticker_list)
    df = df[ord_cols]

    ## Portfolio Level Cash and Positions are all set to 0
    df['daily_portfolio_volatility'] = 0.0
    df['available_cash'] = 0.0
    df['count_of_positions'] = 0.0
    df['total_actual_position_notional'] = 0.0
    df['total_target_notional'] = 0.0
    df['total_portfolio_value'] = 0.0
    df['total_portfolio_value_upper_limit'] = 0.0
    df['target_vol_scaling_factor'] = 1.0
    df['cash_scaling_factor'] = 1.0
    df['final_scaling_factor'] = 1.0
    df['cash_shrink_factor'] = 1.0

    # ## Cash and the Total Portfolio Value on Day 1 is the initial capital for the strategy
    # if use_specific_start_date and signal_start_date is not None:
    #     # start_index_position = df.index.get_loc(signal_start_date)
    #     key = pd.Timestamp(signal_start_date).normalize()
    #     start_index_position = df.index.get_loc(key)
    # else:
    #     start_index_position = 0

    # --- pick the desired logical start date for THIS run ---
    if use_specific_start_date and signal_start_date is not None:
        logical_start = pd.Timestamp(signal_start_date).normalize()   # only for first month
    elif start_date is not None:
        logical_start = pd.Timestamp(start_date).normalize()          # month’s first day
    else:
        logical_start = df.index[0]                                   # fallback
    
    # --- align to available index after the cov trimming you did above ---
    # df already trimmed by cov_matrix_start_date; its first index may be > logical_start
    pos = df.index.searchsorted(logical_start, side="left")
    start_index_position = int(min(pos, len(df.index)-1))

    ## TODO: ADD PREVIOUS MONTH OPEN POSITIONS, AVAILABLE CASH AND TOTAL PORTFOLIO VALUE HERE
    if previous_month_open_positions_df is not None:
        df.iloc[start_index_position, df.columns.get_loc('available_cash')]        = previous_month_open_positions_df['available_cash'].iloc[0]
        df.iloc[start_index_position, df.columns.get_loc('total_portfolio_value')] = previous_month_open_positions_df['total_portfolio_value'].iloc[0]
        prev_month_open_ticker_list = [col.split('_')[0] for col in previous_month_open_positions_df.columns if '_actual_position_notional' in col]
        for ticker in prev_month_open_ticker_list:
            df.iloc[start_index_position, df.columns.get_loc(f'{ticker}_actual_position_notional')] = previous_month_open_positions_df[f'{ticker}_actual_position_notional'].iloc[0]
            df.iloc[start_index_position, df.columns.get_loc(f'{ticker}_actual_position_size')]     = previous_month_open_positions_df[f'{ticker}_actual_position_size'].iloc[0]
    else:
        print('Previous Month End Not Available')
        df.iloc[start_index_position, df.columns.get_loc('available_cash')]        = initial_capital
        df.iloc[start_index_position, df.columns.get_loc('total_portfolio_value')] = initial_capital

    ## Identify Daily Positions starting from day 2
    for date in df.index[start_index_position + 1:]:
        previous_date = df.index[df.index.get_loc(date) - 1]

        ## Start the day with the available cash from yesterday
        df.loc[date, 'available_cash'] = df.loc[previous_date, 'available_cash']

        ## Roll Portfolio Value from the Previous Day
        total_portfolio_value = df.loc[previous_date, 'total_portfolio_value']
        df.loc[date, 'total_portfolio_value'] = total_portfolio_value

        ## Update Total Portfolio Value Upper Limit based on the Total Portfolio Value
        total_portfolio_value_upper_limit = (df.loc[date, 'total_portfolio_value'] *
                                             (1 - cash_buffer_percentage))
        df.loc[date, 'total_portfolio_value_upper_limit'] = total_portfolio_value_upper_limit

        ## Calculate the target notional by ticker
        df = size_cont.get_target_volatility_position_sizing(df, cov_matrix, date, ticker_list, daily_target_volatility,
                                                             total_portfolio_value_upper_limit)

        ## Adjust Positions for Cash Available
        desired_positions, cash_shrink_factor = size_cont.get_cash_adjusted_desired_positions(
            df, date, previous_date, ticker_list, cash_buffer_percentage, transaction_cost_est, passive_trade_rate,
            total_portfolio_value, notional_threshold_pct, min_trade_notional_abs)

        ## Build current notional *from previous_date* for the union ticker_list
        current_notional_prev = df.loc[previous_date, [f"{t}_actual_position_notional" for t in ticker_list]]
        current_notional_prev.index = [c.replace("_actual_position_notional","") for c in current_notional_prev.index]

        # Convert your desired_positions dict to a Series keyed by ticker -> notional delta
        desired_delta = pd.Series(
            {t: desired_positions[t]['new_trade_notional'] for t in desired_positions},
            dtype=float
        ).reindex(current_notional_prev.index).fillna(0.0)

        ## NEW (3): daily liquidity eligibility -> hard exit
        if force_exit_if_ineligible and (eligibility_by_date is not None):
            ## These include all coins that are in today's ticker list and are eligible
            ## This returns the set of common elements between eligibility_by_date and ticker_list
            today_set = set(eligibility_by_date.get(pd.Timestamp(date).date(), ticker_list))
            drops = [t for t in ticker_list if t not in today_set and float(current_notional_prev.get(t, 0.0)) != 0.0]
            if drops:
                # Full flatten: set delta = -current_notional
                # (If you want a “reduce-only” when negative PnL, this is where you'd do it; here we just flatten.)
                desired_delta.loc[drops] = -current_notional_prev.loc[drops].astype(float)
                # optional: mark events (per-ticker columns already exist)
                for t in drops:
                    df.loc[date, f'{t}_event'] = 'Liquidity Exit'
    
                # Also block *new* entries for ineligible tickers
                blocks = [t for t in ticker_list if t not in today_set and float(current_notional_prev.get(t, 0.0)) == 0.0]
                if blocks:
                    desired_delta.loc[blocks] = 0.0
        
        ## NEW (2): freeze policy now *allows* adds if you wish
        desired_delta = enforce_freeze_on_notional_deltas(
            desired_delta, current_notional_prev,
            frozen_tickers=frozen_ticker_list,
            allow_adds_on_frozen=allow_adds_on_frozen
        )

        ## NEW (1): apply per-asset caps to post-trade notionals
        desired_delta = clamp_to_per_asset_caps(
            desired_delta=desired_delta,
            current_notional_prev=current_notional_prev,
            total_portfolio_value_upper_limit=total_portfolio_value_upper_limit,
            per_asset_weight_cap=per_asset_weight_cap,
            per_asset_notional_cap=per_asset_notional_cap
        )

        # ---- NEW: Daily position budget (entries only) ----
        today_signals = _get_today_signals(df, date, ticker_list, signal_col_suffix)
        desired_delta = _enforce_daily_position_budget(
            desired_delta=desired_delta,
            current_notional_prev=current_notional_prev,
            today_signals=today_signals,
            max_positions=max_positions,
            max_new_per_day=max_new_per_day
        )
        
        ## Write back to your dict structure (if your downstream expects the nested dict)
        for t in desired_positions:
            desired_positions[t]['new_trade_notional'] = float(desired_delta.get(t, 0.0))

        ## Get the daily positions
        df = size_cont.get_daily_positions_and_portfolio_cash(
            df, date, previous_date, desired_positions, cash_shrink_factor, ticker_list,
            stop_loss_strategy, rolling_atr_window, atr_multiplier, highest_high_window,
            transaction_cost_est, passive_trade_rate, cooldown_counter_threshold)

    return df

In [69]:
def _prune_usd_products(df_products):
    cols = set(df_products.columns)
    filt = (df_products["quote_currency"] == "USD") if "quote_currency" in cols else True
    if "status" in cols:           filt &= (df_products["status"] == "online")
    if "trading_disabled" in cols: filt &= (~df_products["trading_disabled"])
    if "cancel_only" in cols:      filt &= (~df_products["cancel_only"])
    if "limit_only" in cols:       filt &= (~df_products["limit_only"])
    if "post_only" in cols:        filt &= (~df_products["post_only"])
    if "auction_mode" in cols:     filt &= (~df_products["auction_mode"])
    df = df_products.loc[filt].copy()
    stables = {"USDC-USD","USDT-USD","DAI-USD"}
    return [p for p in df["product_id"].unique().tolist() if p.endswith("-USD") and p not in stables]

def build_eligibility_by_date_for_month(
    client,
    month_start_date, month_end_date,
    lookback_day_count=90,          # your current default
    adv_quantile=0.60, high_low_quantile=0.60,
    product_list=None,
    max_workers=8, batch_size=16, between_batches_sleep=0.10
):
    """
    Returns dict[date -> set(product_id)] for every calendar day in [month_start_date, month_end_date].
    """
    ms = pd.Timestamp(month_start_date).date()
    me = pd.Timestamp(month_end_date).date()

    if product_list is None:
        # 1) snapshot once at month-start and prune
        df_products = coinbase_product_snapshot(client, asof=ms, save=True)
        curr_product_list = _prune_usd_products(df_products)
    else:
        curr_product_list = product_list

    elig_by_date = {}

    # 2) daily scan across the month using the same pid list
    for d in pd.date_range(ms, me, freq="D").date:
        print(d)
        df_liq = get_liquidity_metrics_all_tickers_daily_fast(
            client, product_id_list=curr_product_list, asof_date=d,
            lookback_day_count=lookback_day_count,
            max_workers=max_workers, batch_size=batch_size,
            between_batches_sleep=between_batches_sleep, save=False
        )
        # compute daily eligibility thresholds and list
        df_eligible, adv_floor, spread_floor = get_eligible_ticker_list_daily(
            df=df_liq, asof_date=d,
            median_adv_col='adv_90d_median',
            median_high_low_spread_col='high_low_spread_90d_median',
            adv_quantile_threshold=adv_quantile,
            high_low_quantile_threshold=high_low_quantile,
            save=False
        )
        elig_by_date[d] = set(df_eligible["product_id"].tolist())

    return elig_by_date

In [None]:
from pathlib import Path
import shutil
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
import numpy as np

# -----------------------------
# CONFIG: storage locations
# -----------------------------
PERF_STORE_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/perf_store")      # partitioned parquet
PERF_STORE_DIR.mkdir(parents=True, exist_ok=True)
MONTH_END_OPEN_POSITION_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/prev_month_open_positions")
MONTH_END_OPEN_POSITION_DIR.mkdir(parents=True, exist_ok=True)
POSITIONS_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/positions_daily")
POSITIONS_DIR.mkdir(parents=True, exist_ok=True)
ELIGIBLE_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/eligible_products")

# -----------------------------
# UTILS
# -----------------------------
def _tz_naive_sorted_index(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    idx = pd.to_datetime(df.index)
    if getattr(idx, "tz", None) is not None:
        idx = idx.tz_localize(None)
    df.index = idx
    df.sort_index(inplace=True)
    return df


def write_monthly_performance(
    df: pd.DataFrame,
    cols: list[str],
    month_start: pd.Timestamp,
    month_end: pd.Timestamp,
) -> None:
    """
    Write ONLY the current month's rows (no warmup leakage) to a partitioned Parquet dataset.
    """
    # 1) normalize + sort
    df = df.copy()
    idx = pd.to_datetime(df.index)
    if getattr(idx, "tz", None) is not None:
        idx = idx.tz_localize(None)
    df.index = idx.normalize()
    df.sort_index(inplace=True)

    # 2) clamp to [month_start, month_end]
    ms = pd.Timestamp(month_start).normalize()
    me = pd.Timestamp(month_end).normalize()
    df = df.loc[(df.index >= ms) & (df.index <= me)]

    if df.empty:
        return

    # 3) keep only requested cols; add missing as NA
    out = pd.DataFrame(index=df.index)
    for c in cols:
        out[c] = df[c] if c in df.columns else pd.NA

    # 4) add partitions
    out = out.reset_index().rename(columns={"index": "date"})
    out["year"]  = out["date"].dt.year.astype(int)
    out["month"] = out["date"].dt.month.astype(int)

    # sanity: should be a single month
    assert out[["year","month"]].drop_duplicates().shape[0] == 1, "Leak outside month!"

    # nuke existing partition dir for this month to make the write idempotent
    y, m = int(out["year"].iloc[0]), int(out["month"].iloc[0])
    part_dir = PERF_STORE_DIR / f"year={y}" / f"month={m}"
    if part_dir.exists():
        shutil.rmtree(part_dir)

    # 5) append to dataset
    out.to_parquet(
        PERF_STORE_DIR,
        engine="pyarrow",
        partition_cols=["year", "month"],
        index=False,
        existing_data_behavior="overwrite_or_ignore",  # fine: no duplicates if you only write the month once
    )


def snapshot_open_positions_long(df, month_end_date, ticker_list):
    d = pd.Timestamp(month_end_date).normalize()
    df = _tz_naive_sorted_index(df)

    available_cash = float(df.loc[d, "available_cash"])
    total_pv = float(df.loc[d, "total_portfolio_value"])

    rows = []
    for t in ticker_list:
        size_col = f"{t}_actual_position_size"
        notl_col = f"{t}_actual_position_notional"
        if size_col in df.columns and notl_col in df.columns:
            size_val = float(df.loc[d, size_col])
            if size_val > 0:
                rows.append({
                    "date": d, "ticker": t,
                    "actual_position_size": float(df.loc[d, size_col]),
                    "actual_position_notional": float(df.loc[d, notl_col]),
                    "available_cash": available_cash,
                    "total_portfolio_value": total_pv,
                })

    if not rows:
        rows.append({
            "date": d, "ticker": None,
            "actual_position_size": 0.0, "actual_position_notional": 0.0,
            "available_cash": available_cash, "total_portfolio_value": total_pv,
        })

    out = pd.DataFrame(rows)
    out["year"]  = out["date"].dt.year.astype("int32")
    out["month"] = out["date"].dt.month.astype("int32")

    # enforce stable dtypes
    out["ticker"] = out["ticker"].astype("string")
    for c in ["actual_position_size","actual_position_notional","available_cash","total_portfolio_value"]:
        out[c] = pd.to_numeric(out[c], errors="coerce").astype("float64")

    # --- NEW: replace the partition to keep it idempotent ---
    y, m = int(out["year"].iloc[0]), int(out["month"].iloc[0])
    part_dir = MONTH_END_OPEN_POSITION_DIR / f"year={y}" / f"month={m}"
    if part_dir.exists():
        shutil.rmtree(part_dir)

    out.to_parquet(
        MONTH_END_OPEN_POSITION_DIR,
        engine="pyarrow",
        partition_cols=["year","month"],
        index=False,
    )
    

def write_daily_positions(df_month, ticker_list, month_start_date, month_end_date):
    """
    df_month is your monthly dataframe that currently contains per-coin columns.
    This function melts it to long rows per date×coin and writes only the month (no warmup).
    """
    # 1) clamp to month
    df = df_month.copy()
    idx = pd.to_datetime(df.index)
    if getattr(idx, "tz", None) is not None: idx = idx.tz_localize(None)
    df.index = idx.normalize()
    ms, me = pd.Timestamp(month_start_date).normalize(), pd.Timestamp(month_end_date).normalize()
    df = df.loc[(df.index >= ms) & (df.index <= me)]
    if df.empty:
        return
    
    # 2) pick coin-level columns and melt
    SUFFIXES = [
        "actual_position_size",
        "actual_position_notional",
        "target_size",
        "target_notional",
        "final_weighted_additive_signal",
        "final_signal",
        "t_1_close",
        "open",
        "20_avg_true_range_price",
        "highest_high_56",
        "stop_loss",
        "stopout_flag",
        "annualized_volatility_30",
        "event",
    ]
    # CONTEXT_COLS = ["available_cash", "total_portfolio_value"]  # optional
    
    ticker_cols = [f'{ticker}_{s}' for ticker in ticker_list for s in SUFFIXES]
    wide = df[ticker_cols].copy()
    long = (
        wide
        .stack()                       # index: (date, column_name)
        .rename("value")
        .reset_index()
        .rename(columns={"level_0":"date","level_1":"field"})
    )
    
    # 3) parse product_id and metric from "BTC-USD_actual_position_size"
    sp = long["field"].str.split("_", n=1)
    long["product_id"] = sp.str[0]
    long["metric"]     = sp.str[1]
    # non-coin fields (available_cash/total_portfolio_value) will have metric=None; drop or keep separately
    coin_mask = long["product_id"].str.contains("-", na=False)
    long = long[coin_mask]
    
    # 4) pivot metrics back to columns per (date, product_id)
    pos = (
        long.pivot_table(index=["date","product_id"], columns="metric", values="value", aggfunc="last")
            .reset_index()
    )
    
    # 5) add partitions and enforce dtypes
    pos["date"]   = pd.to_datetime(pos["date"]).dt.normalize()
    pos["year"]   = pos["date"].dt.year.astype("int32")
    pos["month"]  = pos["date"].dt.month.astype("int32")
    pos["product_id"] = pos["product_id"].astype("string")
    
    float_cols = ["actual_position_size","actual_position_notional","target_size",
                  "target_notional","final_weighted_additive_signal","final_signal",
                  "t_1_close","open","20_avg_true_range_price","highest_high_56",
                  "stop_loss","annualized_volatility_30"]
    for c in float_cols:
        if c in pos.columns:
            pos[c] = pd.to_numeric(pos[c], errors="coerce").astype("float64")
    if "stopout_flag" in pos.columns:
        pos["stopout_flag"] = pos["stopout_flag"].astype("boolean")
    if "event" in pos.columns:
        pos["event"] = pos["event"].astype("string")

    # --- NEW: replace the partition to keep it idempotent ---
    y, m = int(pos["year"].iloc[0]), int(pos["month"].iloc[0])
    part_dir = POSITIONS_DIR / f"year={y}" / f"month={m}"
    if part_dir.exists():
        shutil.rmtree(part_dir)

    # 6) write
    pos.to_parquet(
        POSITIONS_DIR,
        engine="pyarrow",
        partition_cols=["year","month"],
        index=False,
    )


def load_prev_month_snapshot_wide(prev_month_end: pd.Timestamp) -> pd.DataFrame | None:
    d = pd.Timestamp(prev_month_end).normalize()

    # enforce a stable schema across all files
    schema = pa.schema([
        ("date", pa.timestamp("ns")),       # use "ms" if you wrote ms
        ("ticker", pa.string()),            # string with NA allowed
        ("actual_position_size", pa.float64()),
        ("actual_position_notional", pa.float64()),
        ("available_cash", pa.float64()),
        ("total_portfolio_value", pa.float64()),
        ("year", pa.int32()),
        ("month", pa.int32()),
    ])

    dset = ds.dataset(str(MONTH_END_OPEN_POSITION_DIR), format="parquet",
                      partitioning="hive", schema=schema)

    # read only that month partition
    tbl = dset.to_table(
        filter=(ds.field("year") == d.year) & (ds.field("month") == d.month)
    )
    snap = tbl.to_pandas()
    if snap.empty:
        return None

    snap["date"] = pd.to_datetime(snap["date"]).dt.normalize()
    snap = snap.loc[snap["date"] == d]
    if snap.empty:
        return None

    # pivot back to your wide shape
    available_cash = float(snap["available_cash"].iloc[0])
    total_pv = float(snap["total_portfolio_value"].iloc[0])

    pos = snap.dropna(subset=["ticker"])
    wide = pd.DataFrame(index=[d], columns=["available_cash", "total_portfolio_value"])
    wide.loc[d, "available_cash"] = available_cash
    wide.loc[d, "total_portfolio_value"] = total_pv

    for _, r in pos.iterrows():
        t = r["ticker"]
        wide.loc[d, f"{t}_actual_position_size"] = float(r["actual_position_size"])
        wide.loc[d, f"{t}_actual_position_notional"] = float(r["actual_position_notional"])

    return wide


# -----------------------------
# MAIN MONTHLY LOOP (streamlined)
# -----------------------------
adv_quantile = 0.60
high_low_quantile = 0.60

## Month-End Date Range for Backtest
month_end_dates = pd.date_range(
    start=pd.Timestamp("2022-03-31").date(),
    end=pd.Timestamp("2024-12-31").date(),
    freq="M"
)

## Performance columns to persist for portfolio analysis
PERF_COLS = [
    "daily_portfolio_volatility", "available_cash", "count_of_positions",
    "total_actual_position_notional", "total_target_notional",
    "total_portfolio_value", "total_portfolio_value_upper_limit",
]

for prev_month_end in month_end_dates:
    ## Pull Base Universe from Monthly Eligible Ticker List
    print("Get Final Ticker List Based on Signal Strength on Last Month End Date!!")
    eligible_path = ELIGIBLE_DIR / f"{pd.Timestamp(prev_month_end).date()}_eligible.parquet"
    df_eligible = pd.read_parquet(eligible_path)
    base_universe = df_eligible["product_id"].unique().tolist()

    ## Get Month Start and End Dates
    month_start_date = pd.Timestamp(prev_month_end).normalize() + pd.Timedelta(days=1)
    month_end_date = (pd.Timestamp(prev_month_end) + pd.offsets.MonthEnd(1)).normalize()
    print(month_start_date.date(), month_end_date.date(), base_universe)

    ## Check if this is the first month in the run
    is_first_month = (month_start_date.normalize() == pd.Timestamp(signal_start_date).normalize())
    print(f"Is the first month flag set: {is_first_month}!!")

    ## Pull Previous Month Available Cash, Total Portfolio Value and Open Positions if available
    ## If not available, it is the first month and we start with the Initial Capital
    print("Check if there are any open positions from prior month!!")
    previous_month_open_positions_df = load_prev_month_snapshot_wide(prev_month_end)
    if previous_month_open_positions_df is not None:
        prev_month_available_cash = previous_month_open_positions_df.loc[prev_month_end.normalize(), "available_cash"]
        prev_month_total_portfolio_value = previous_month_open_positions_df.loc[prev_month_end.normalize(), "total_portfolio_value"]
        prev_month_open_ticker_list = [
            col.split("_")[0]
            for col in previous_month_open_positions_df.columns
            if col.endswith("_actual_position_notional")
        ]
        print(f"Tickers with Open Positions at Prior Month-end: {prev_month_open_ticker_list}")
    else:
        prev_month_open_ticker_list = []
        prev_month_available_cash = cfg["run"]["initial_capital"]
        prev_month_total_portfolio_value = cfg["run"]["initial_capital"]
        print("No Open Positions at Prior Month-end!!")

    ## Create a frozen list of tickers with open positions that are not on the eligible ticker list
    print("Create Final Ticker List and Frozen Ticker List!!")
    frozen_ticker_list = [t for t in prev_month_open_ticker_list if t not in base_universe]
    final_month_ticker_list = base_universe + frozen_ticker_list
    print(f"Final Ticker List for Current Month: {final_month_ticker_list}")

    ## Get the liquidity metrics for all tickers by date
    ## This is to exit positions if tickers fall off the daily eligible list based on liquidity criteria
    print("Pull all eligible tickers based on Liquidity Metrics for every date in the current month!!")
    eligibility_by_date = build_eligibility_by_date_for_month(
        client,
        month_start_date=month_start_date.date(),
        month_end_date=month_end_date.date(),
        lookback_day_count=lookback_day_count,
        adv_quantile=adv_quantile,
        high_low_quantile=high_low_quantile,
        product_list=base_universe,
        max_workers=5,
        batch_size=10,
        between_batches_sleep=0.10,
    )

    ## Generate Trend Following Trading Signal
    print("Generating Moving Average Ribbon Signal!!")
    df_trend = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
        start_date=month_start_date - pd.Timedelta(days=warmup_days),
        end_date=month_end_date,
        ticker_list=final_month_ticker_list,
        fast_mavg=fast_mavg,
        slow_mavg=slow_mavg,
        mavg_stepsize=mavg_stepsize,
        mavg_z_score_window=mavg_z_score_window,
        entry_rolling_donchian_window=entry_rolling_donchian_window,
        exit_rolling_donchian_window=exit_rolling_donchian_window,
        use_donchian_exit_gate=use_donchian_exit_gate,
        ma_crossover_signal_weight=ma_crossover_signal_weight,
        donchian_signal_weight=donchian_signal_weight,
        weighted_signal_ewm_window=weighted_signal_ewm_window,
        rolling_r2_window=rolling_r2_window,
        lower_r_sqr_limit=lower_r_sqr_limit,
        upper_r_sqr_limit=upper_r_sqr_limit,
        r2_smooth_window=r2_smooth_window,
        r2_confirm_days=r2_confirm_days,
        log_std_window=log_std_window,
        coef_of_variation_window=coef_of_variation_window,
        vol_of_vol_z_score_window=vol_of_vol_z_score_window,
        vol_of_vol_p_min=vol_of_vol_p_min,
        r2_strong_threshold=r2_strong_threshold,
        use_activation=use_activation,
        tanh_activation_constant_dict=tanh_activation_constant_dict,
        moving_avg_type=moving_avg_type,
        long_only=long_only,
        price_or_returns_calc=price_or_returns_calc,
        use_coinbase_data=use_coinbase_data,
        use_saved_files=False,
        saved_file_end_date=saved_file_end_date,
    )

    ## Volatility Adjust the Trend Signal
    print("Generating Volatility Adjusted Trend Signal!!")
    df_signal = size_cont.get_volatility_adjusted_trend_signal_continuous(
        df=df_trend,
        ticker_list=final_month_ticker_list,
        volatility_window=volatility_window,
        annual_trading_days=annual_trading_days,
    )

    ## Get Average True Range for all tickers
    print("Getting Average True Range for Stop Loss Calculation!!")
    df_atr = size_cont.get_average_true_range_portfolio(
        start_date=month_start_date - pd.Timedelta(days=warmup_days),
        end_date=month_end_date,
        ticker_list=final_month_ticker_list,
        rolling_atr_window=rolling_atr_window,
        highest_high_window=highest_high_window,
        price_or_returns_calc="price",
        use_coinbase_data=use_coinbase_data,
        use_saved_files=False,
        saved_file_end_date=saved_file_end_date,
    )
    df_signal = pd.merge(df_signal, df_atr, left_index=True, right_index=True, how="left")

    ## Get the Volatility Targeted Position Sizes for the eligible tickers
    print("Calculating Volatility Targeted Position Size and Cash Management!!")
    df_month = get_target_volatility_daily_portfolio_positions_expanded_universe(
        df_signal,
        ticker_list=final_month_ticker_list,
        initial_capital=initial_capital,
        rolling_cov_window=rolling_cov_window,
        stop_loss_strategy=stop_loss_strategy,
        rolling_atr_window=rolling_atr_window,
        atr_multiplier=atr_multiplier,
        highest_high_window=highest_high_window,
        cash_buffer_percentage=cash_buffer_percentage,
        annualized_target_volatility=annualized_target_volatility,
        transaction_cost_est=transaction_cost_est,
        passive_trade_rate=passive_trade_rate,
        notional_threshold_pct=notional_threshold_pct,
        min_trade_notional_abs=min_trade_notional_abs,
        cooldown_counter_threshold=cooldown_counter_threshold,
        annual_trading_days=annual_trading_days,
        use_specific_start_date=is_first_month,
        signal_start_date=signal_start_date,
        start_date=month_start_date,
        previous_month_open_positions_df=previous_month_open_positions_df,
        frozen_ticker_list=frozen_ticker_list,
        per_asset_weight_cap=0.25,
        per_asset_notional_cap=None,
        allow_adds_on_frozen=True,
        eligibility_by_date=eligibility_by_date,
        force_exit_if_ineligible=True,
        max_positions=12,
        max_new_per_day=3,
        signal_col_suffix="_final_signal",
    )

    ## Write the current month's portfolio performance metrics to a Parquet file
    write_monthly_performance(df_month,
                              cols=PERF_COLS,
                              month_start=month_start_date,
                              month_end=month_end_date
                             )

    ## Save the Open Positions at month end to a Parquet file
    snapshot_open_positions_long(
        df=df_month,
        month_end_date=month_end_date,
        ticker_list=final_month_ticker_list,
    )

    ## Archive the Daily Positions file by ticker for further analysis
    write_daily_positions(df_month, ticker_list=final_month_ticker_list,
                          month_start_date=month_start_date, month_end_date=month_end_date)


Get Final Ticker List Based on Signal Strength on Last Month End Date!!
2022-04-01 2022-04-30 ['BTC-USD', 'ETH-USD', 'ZEC-USD', 'DOGE-USD', 'LINK-USD', 'ADA-USD', 'LTC-USD', 'XLM-USD', 'BCH-USD', 'AAVE-USD', 'UNI-USD', 'ICP-USD', 'FIL-USD', 'ETC-USD', 'ALGO-USD', 'MANA-USD', 'COMP-USD', 'MKR-USD', 'ANKR-USD', 'YFI-USD', 'EOS-USD', 'XTZ-USD']
Is the first month flag set: True!!
Check if there are any open positions from prior month!!
No Open Positions at Prior Month-end!!
Create Final Ticker List and Frozen Ticker List!!
Final Ticker List for Current Month: ['BTC-USD', 'ETH-USD', 'ZEC-USD', 'DOGE-USD', 'LINK-USD', 'ADA-USD', 'LTC-USD', 'XLM-USD', 'BCH-USD', 'AAVE-USD', 'UNI-USD', 'ICP-USD', 'FIL-USD', 'ETC-USD', 'ALGO-USD', 'MANA-USD', 'COMP-USD', 'MKR-USD', 'ANKR-USD', 'YFI-USD', 'EOS-USD', 'XTZ-USD']
Pull all eligible tickers based on Liquidity Metrics for every date in the current month!!
2022-04-01
2022-04-02
2022-04-03
2022-04-04
2022-04-05
2022-04-06
2022-04-07
2022-04-08
2022-04-

In [None]:
df_month.head()

In [None]:
long.head(20)

In [None]:
pos.head(25)

In [None]:
atr_cols = [col for col in df_month.columns if 'avg_true_range' in col]
df_month[df_month.index > pd.Timestamp('2022-03-31')][atr_cols].head()

In [None]:
wide_df.head()

In [None]:
long

In [None]:
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from pathlib import Path

# OPEN_POS_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/prev_month_open_positions")

# Define the schema we expect across ALL files
open_pos_schema = pa.schema([
    ("date", pa.timestamp("ns")),            # or "ms" if that's what you wrote; adjust if needed
    ("ticker", pa.string()),                 # enforce string, NA allowed
    ("actual_position_size", pa.float64()),
    ("actual_position_notional", pa.float64()),
    ("available_cash", pa.float64()),
    ("total_portfolio_value", pa.float64()),
    ("year", pa.int32()),
    ("month", pa.int32()),
])

# Build a Dataset with hive partitioning (year=..., month=...)
dset = ds.dataset(str(MONTH_END_OPEN_POSITION_DIR), format="parquet", partitioning="hive", schema=open_pos_schema)

# Optional pruning with a filter; omit filter to read all
# filt = (ds.field("year") == 2022) & (ds.field("month").isin([4,5,6]))
# tbl = dset.to_table(filter=filt, columns=[...])

tbl = dset.to_table()  # full read with enforced schema
df_open = tbl.to_pandas()

# Make sure date is normalized if you rely on equality checks
df_open["date"] = pd.to_datetime(df_open["date"]).dt.normalize()
df_open = df_open.sort_values(['year','month'])


In [None]:
df_open

In [None]:
from pathlib import Path
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd

PERF_COLS = [
    "daily_portfolio_volatility", "available_cash", "count_of_positions",
    "total_actual_position_notional", "total_target_notional",
    "total_portfolio_value", "total_portfolio_value_upper_limit",
]

PERF_STORE_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/perf_store")

# --- Define a consistent schema for the perf dataset ---
# Adjust timestamp unit to "ms" if you wrote in milliseconds instead of nanoseconds.
perf_schema = pa.schema(
    [("date", pa.timestamp("ns"))] +
    # floats for all continuous/numeric portfolio fields
    [(c, pa.float64()) for c in PERF_COLS if c != "count_of_positions"] +
    # count_of_positions as integer (nullable Int32 for safety)
    [("count_of_positions", pa.int32())] +
    # partition columns
    [("year", pa.int32()), ("month", pa.int32())]
)

# --- Build the Dataset (hive partitioning: year=..., month=...) ---
perf_ds = ds.dataset(str(PERF_STORE_DIR), format="parquet", partitioning="hive", schema=perf_schema)

# --- Read everything (fast; respects partitions) ---
perf_tbl = perf_ds.to_table(columns=["date"] + PERF_COLS + ["year", "month"])
df_perf = perf_tbl.to_pandas()

# Normalize & index by date
df_perf["date"] = pd.to_datetime(df_perf["date"]).dt.normalize()
df_perf = df_perf.sort_values(["year", "month", "date"]).set_index("date")

# Optional: drop partition columns in the final frame
df_perf = df_perf[PERF_COLS]

# --- Example: read a filtered slice efficiently (only scans needed folders) ---
# filt = (ds.field("year") == 2022) & (ds.field("month").isin([4, 5, 6]))
# perf_tbl_q2 = perf_ds.to_table(filter=filt, columns=["date"] + PERF_COLS + ["year", "month"])
# df_perf_q2 = (perf_tbl_q2.to_pandas()
#               .assign(date=lambda d: pd.to_datetime(d["date"]).dt.normalize())
#               .sort_values(["year", "month", "date"])
#               .set_index("date"))[PERF_COLS]


In [None]:
df_perf

In [None]:
adv_quantile=0.60
high_low_quantile=0.60

MONTH_END_OPEN_POSITION_DIR = Path("/Users/adheerchauhan/Documents/git/trend_following/data_folder/universe/prev_month_open_positions")
MONTH_END_OPEN_POSITION_DIR.mkdir(parents=True, exist_ok=True)
month_end_dates = pd.date_range(start=pd.Timestamp('2022-03-31').date(), end=pd.Timestamp('2022-12-31').date(), freq='M')
prev_month_open_ticker_list = []
month_df_dict = {}

## Loop through all month-end dates
for prev_month_end in month_end_dates:    
    ## Pull in the final ticker list for the month based on the signal strength
    print('Get Final Ticker List Based on Signal Strength on Last Month End Date!!')
    out = ELIGIBLE_DIR / f"{pd.Timestamp(prev_month_end).date()}_eligible.parquet"
    df_eligible = pd.read_parquet(out)
    base_universe = df_eligible.product_id.unique().tolist()

    ## Set the start and end date based on the following months
    month_start_date = prev_month_end.date() + pd.Timedelta(days=1)
    month_end_date = (prev_month_end + pd.offsets.MonthEnd(1)).date()
    print(month_start_date, month_end_date, base_universe)

    ## Set a flag to track the first month in the run
    is_first_month = (pd.Timestamp(month_start_date).normalize()
                      == pd.Timestamp(signal_start_date).normalize())
    print(f'Is the first month flag set: {is_first_month}!!')

    ## Pull in the available cash, total portfolio value and any open positions from the previous month
    print('Check if there are any open positions from prior month!!')
    prev_month_open_positions = MONTH_END_OPEN_POSITION_DIR / f'{prev_month_end.date()}_open_positions.pickle'
    previous_month_open_positions_df = None
    if prev_month_open_positions.exists():
        previous_month_open_positions_df = pd.read_pickle(prev_month_open_positions)
        prev_month_available_cash = previous_month_open_positions_df.loc[prev_month_end, 'available_cash']
        prev_month_total_portfolio_value = previous_month_open_positions_df.loc[prev_month_end, 'total_portfolio_value']
        prev_month_open_ticker_list = [col.split('_')[0] for col in previous_month_open_positions_df.columns if '_actual_position_notional' in col]
        print(f'Tickers with Open Positions at Prior Month-end: {prev_month_open_ticker_list}')
    else:
        prev_month_open_ticker_list = []
        prev_month_available_cash = cfg['run']['initial_capital']
        prev_month_total_portfolio_value = cfg['run']['initial_capital']
        print('No Open Positions at Prior Month-end!!')

    ## Account for any tickers with open positions that are no longer on the final ticker list
    print('Create Final Ticker List and Frozen Ticker List!!')
    frozen_ticker_list = [ticker for ticker in prev_month_open_ticker_list if ticker not in base_universe]
    final_month_ticker_list = base_universe + frozen_ticker_list
    print(f'Final Ticker List for Current Month: {final_month_ticker_list}')

    # ---- NEW: build daily eligibility map (ADV/spread only) ----
    print('Pull all eligible tickers based on Liquidity Metrics for every date in the current month!!')
    eligibility_by_date = build_eligibility_by_date_for_month(
        client,
        month_start_date=month_start_date,
        month_end_date=month_end_date,
        lookback_day_count=lookback_day_count,
        adv_quantile=adv_quantile, high_low_quantile=high_low_quantile,
        product_list=base_universe,
        max_workers=5, batch_size=12, between_batches_sleep=0.10
    )
    # -----------------------------------------------------------

    print('Generating Moving Average Ribbon Signal!!')
    ## Generate Trend Signal for all tickers including tickers on the frozen list
    df_trend = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
        start_date=month_start_date - pd.Timedelta(days=warmup_days), end_date=month_end_date, ticker_list=final_month_ticker_list,
        fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
        entry_rolling_donchian_window=entry_rolling_donchian_window,
        exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
        ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
        weighted_signal_ewm_window=weighted_signal_ewm_window, rolling_r2_window=rolling_r2_window,
        lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window,
        r2_confirm_days=r2_confirm_days, log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
        vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
        r2_strong_threshold=r2_strong_threshold, use_activation=use_activation,
        tanh_activation_constant_dict=tanh_activation_constant_dict, moving_avg_type=moving_avg_type,
        long_only=long_only, price_or_returns_calc=price_or_returns_calc, use_coinbase_data=use_coinbase_data,
        use_saved_files=False, saved_file_end_date=saved_file_end_date)
    
    print('Generating Volatility Adjusted Trend Signal!!')
    ## Get Volatility Adjusted Trend Signal
    df_signal = size_cont.get_volatility_adjusted_trend_signal_continuous(df=df_trend, ticker_list=final_month_ticker_list,
                                                                          volatility_window=volatility_window,
                                                                          annual_trading_days=annual_trading_days)
    
    print('Getting Average True Range for Stop Loss Calculation!!')
    ## Get Average True Range for Stop Loss Calculation
    df_atr = size_cont.get_average_true_range_portfolio(start_date=month_start_date - pd.Timedelta(days=warmup_days), end_date=month_end_date,
                                                        ticker_list=final_month_ticker_list, rolling_atr_window=rolling_atr_window,
                                                        highest_high_window=highest_high_window,
                                                        price_or_returns_calc='price',
                                                        use_coinbase_data=use_coinbase_data,
                                                        use_saved_files=False,
                                                        saved_file_end_date=saved_file_end_date)
    df_signal = pd.merge(df_signal, df_atr, left_index=True, right_index=True, how='left')

    print('Calculating Volatility Targeted Position Size and Cash Management!!')
    ## Get Target Volatility Position Sizing and Run Cash Management
    df = get_target_volatility_daily_portfolio_positions_expanded_universe(
        df_signal, ticker_list=final_month_ticker_list, initial_capital=initial_capital, rolling_cov_window=rolling_cov_window,
        stop_loss_strategy=stop_loss_strategy, rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
        highest_high_window=highest_high_window,
        cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
        transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
        notional_threshold_pct=notional_threshold_pct, min_trade_notional_abs=min_trade_notional_abs,
        cooldown_counter_threshold=cooldown_counter_threshold, annual_trading_days=annual_trading_days,
        use_specific_start_date=is_first_month, signal_start_date=signal_start_date,
        start_date=month_start_date, previous_month_open_positions_df=previous_month_open_positions_df,
        frozen_ticker_list=frozen_ticker_list, 
        per_asset_weight_cap=0.25,                # e.g., max 25% of PV upper limit per asset
        per_asset_notional_cap=None,              # e.g., 5000. If None, ignore absolute cap.
        allow_adds_on_frozen=True,                # True = you can scale UP frozen tickers
        eligibility_by_date=eligibility_by_date,  # dict[date]->set([...]) of liquidity-eligible tickers
        force_exit_if_ineligible=True,            # True = immediate full exit if ineligible today
        max_positions = 12,                       # e.g., 12
        max_new_per_day = 3,                      # e.g., 3
        signal_col_suffix = "_final_signal"       # so we can rank entries cleanly
    )

    ## Save the monthly dataframe to a dictionary for further analysis
    print('Saving final monthly dataframe to dictionary!!')
    month_df_dict[month_start_date] = df

    ## Save month end positions as a pickle file
    actual_position_notional_cols = [col for col in df.columns if '_actual_position_notional' in col]
    actual_position_size_cols = [col for col in df.columns if '_actual_position_size' in col]
    open_position_size_list = []
    for ticker in final_month_ticker_list:
        if df.loc[pd.Timestamp(month_end_date), f'{ticker}_actual_position_size'] > 0:
            open_position_size_list.append(ticker)
    
    ## Create month-end dataframe to archive for following month
    prev_month_cols = ['date', 'available_cash', 'total_portfolio_value']
    open_position_size_cols = [f'{ticker}_actual_position_size' for ticker in open_position_size_list]
    open_position_notional_cols = [f'{ticker}_actual_position_notional' for ticker in open_position_size_list]
    prev_month_cols = prev_month_cols + open_position_size_cols + open_position_notional_cols
    df_month_end_positions = pd.DataFrame(columns=prev_month_cols)
    df_month_end_positions.loc[0, 'date'] = pd.Timestamp(month_end_date)
    df_month_end_positions = df_month_end_positions.set_index('date')
    df_month_end_positions.loc[pd.Timestamp(month_end_date), 'available_cash'] = df.loc[pd.Timestamp(month_end_date), 'available_cash']
    df_month_end_positions.loc[pd.Timestamp(month_end_date), 'total_portfolio_value'] = df.loc[pd.Timestamp(month_end_date), 'total_portfolio_value']
    for ticker in open_position_size_list:
        df_month_end_positions.loc[pd.Timestamp(month_end_date), f'{ticker}_actual_position_notional'] = df.loc[pd.Timestamp(month_end_date), f'{ticker}_actual_position_notional']
        df_month_end_positions.loc[pd.Timestamp(month_end_date), f'{ticker}_actual_position_size'] = df.loc[pd.Timestamp(month_end_date), f'{ticker}_actual_position_size']

    month_end_open_positions = MONTH_END_OPEN_POSITION_DIR / f'{month_end_date}_open_positions.pickle'
    df_month_end_positions.to_pickle(month_end_open_positions)

In [None]:
import pandas as pd
import numpy as np

# monthly_frames: dict where key is any date inside the month (e.g., month_start or month_end)
# and value is the month's DataFrame returned by your sizing pipeline.
# Example: {date(2022,4,1): df_apr, date(2022,5,1): df_may, ...}
def collect_performance_across_months(monthly_frames: dict, cols: list[str]) -> pd.DataFrame:
    pieces = []
    for k, df in monthly_frames.items():
        # 1) ensure DatetimeIndex, tz-naive, normalized, sorted
        idx = pd.to_datetime(df.index, utc=True).tz_localize(None) if not isinstance(df.index, pd.DatetimeIndex) else df.index
        if idx.tz is not None:
            idx = idx.tz_localize(None)
        idx = idx.normalize()
        if not df.index.equals(idx):
            df = df.copy()
            df.index = idx
        df = df.sort_index()

        # 2) derive month start/end for this DF
        # Try to use the dict key 'k' as an anchor (it can be a date or timestamp)
        anchor = pd.Timestamp(k).normalize()
        # month start = first day of that month; month end = last day of that month
        ms = (anchor - pd.offsets.MonthBegin(1)) if anchor.day != 1 else anchor
        me = ms + pd.offsets.MonthEnd(0)

        # If your dict key is month-end (common), ms above still works:
        #   e.g., k=2022-03-31 -> ms=2022-03-01, me=2022-03-31

        # 3) clamp rows to [ms, me] (handles warmup because we simply start at max(first_idx, ms))
        df_month = df.loc[(df.index >= ms) & (df.index <= me)]

        if df_month.empty:
            continue

        # 4) ensure columns exist; select + add missing as NaN
        keep = [c for c in cols if c in df_month.columns]
        missing = [c for c in cols if c not in df_month.columns]
        out = df_month[keep].copy()
        for m in missing:
            out[m] = np.nan
        out = out[cols]

        # 5) add a date column if you want (optional); or keep index as is
        out = out.copy()
        out["date"] = out.index

        pieces.append(out)

    if not pieces:
        return pd.DataFrame(columns=cols + ["date"])

    df_performance_track = pd.concat(pieces, axis=0, ignore_index=False)
    # sort and drop duplicates in case overlapping months produced repeats
    df_performance_track = df_performance_track.sort_index()
    df_performance_track = df_performance_track[~df_performance_track.index.duplicated(keep="last")]

    # Reorder final columns (index first; or move 'date' first if you prefer)
    # If you'd rather have a RangeIndex and 'date' as a column, uncomment:
    # df_performance_track = df_performance_track.reset_index(drop=True)

    return df_performance_track




In [None]:
import pandas as pd
import numpy as np

def month_key(dt_like) -> pd.Period:
    return pd.Timestamp(dt_like).to_period('M')

def collect_performance(monthly_frames: dict[pd.Period, pd.DataFrame], cols: list[str]) -> pd.DataFrame:
    pieces = []
    for p, df in monthly_frames.items():
        # Ensure DatetimeIndex is tz-naive daily timestamps
        idx = pd.to_datetime(df.index)
        if getattr(idx, "tz", None) is not None:
            idx = idx.tz_localize(None)
        df = df.copy()
        df.index = idx

        # Clamp to that month using the Period key
        mask = (df.index >= p.start_time) & (df.index <= p.end_time)
        df_month = df.loc[mask]

        if df_month.empty:
            continue

        # Keep only needed cols; add missing as NaN
        keep = [c for c in cols if c in df_month.columns]
        out = df_month[keep].copy()
        for m in (set(cols) - set(keep)):
            out[m] = np.nan
        out = out[cols]

        pieces.append(out)

    if not pieces:
        return pd.DataFrame(columns=cols)

    out = pd.concat(pieces, axis=0)
    out = out[~out.index.duplicated(keep="last")]
    return out.sort_index()

# Example while building the dict:
# month_df_dict[month_key(month_start_date)] = df_month


In [None]:
# while saving each monthly df:
month_df_dict[pd.Timestamp(month_start_date).to_period('M')] = df

# then build:
cols = [
    'daily_portfolio_volatility','available_cash','count_of_positions',
    'total_actual_position_notional','total_target_notional',
    'total_portfolio_value','total_portfolio_value_upper_limit'
]
df_performance_track = collect_performance(month_df_dict, cols)

In [None]:
df_performance_track

In [None]:
df_performance_track = size_bin.calculate_portfolio_returns(df_performance_track, rolling_sharpe_window=rolling_sharpe_window)

In [None]:
df_performance_track

In [None]:
df_performance_track['portfolio_strategy_cumulative_return'].plot(figsize=(12,10))

In [None]:
date_cond = month_df_dict[pd.Timestamp('2023-01-01').date()].index >= pd.Timestamp('2023-01-01')
month_df_dict[pd.Timestamp('2023-01-01').date()][date_cond].tail()

In [None]:
dict_date_key = pd.Timestamp('2023-01-01').date()
event_cols = [x for x in month_df_dict[dict_date_key].columns if 'event' in x]
month_df_dict[pd.Timestamp('2023-01-01').date()][date_cond][event_cols]

In [None]:
month_df_dict[pd.Timestamp('2023-01-01').date()]['BTC-USD_event']

In [None]:
12.94 - 1.75*0.848609

In [None]:
dict_date_key = pd.Timestamp('2022-04-01').date()
date_cond = (month_df_dict[dict_date_key].index >= pd.Timestamp('2022-04-01'))
month_df_dict[dict_date_key][date_cond].head(20)

In [None]:
event_cols = [x for x in month_df_dict[dict_date_key].columns if 'event' in x]
month_df_dict[dict_date_key][date_cond][event_cols]

In [None]:
actual_notional_cols = [x for x in month_df_dict[dict_date_key].columns if 'actual_position_notional' in x and 'total' not in x]
month_df_dict[dict_date_key][date_cond][actual_notional_cols].plot(figsize=(12,10))

In [None]:
dict_date_key = pd.Timestamp('2022-05-01').date()
date_cond = (month_df_dict[dict_date_key].index >= pd.Timestamp('2022-05-01'))
month_df_dict[dict_date_key][date_cond].head()

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in final_month_ticker_list]
month_df_dict[dict_date_key][date_cond][final_signal_cols].plot()

In [None]:
prev_month_open_positions

In [None]:
previous_month_open_positions_df

In [None]:
month_end_date_temp = pd.Timestamp('2022-04-30').date()
month_end_open_positions = MONTH_END_OPEN_POSITION_DIR / f'{month_end_date_temp}_open_positions.pickle'
df_month_end_positions_temp = pd.read_pickle(month_end_open_positions)

In [None]:
df_month_end_positions

In [None]:
month_df_dict[dict_date_key][date_cond]['total_portfolio_value'].plot()

In [None]:
month_start_date

In [None]:
is_first_month = (pd.Timestamp(month_start_date).normalize()
                  == pd.Timestamp(signal_start_date).normalize())

In [None]:
is_first_month

In [None]:
month_start_date

In [None]:
r2_confirm_days

In [None]:
df_month_end_positions

In [None]:
date_cond = (df.index >= pd.Timestamp('2022-05-01'))
df[date_cond].head(20)

In [None]:
df[(df.index >= pd.Timestamp('2022-03-20'))]['available_cash'].plot()

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in final_month_ticker_list]

In [None]:
date_cond = (df_signal.index >= pd.Timestamp('2022-04-01'))
df_signal[date_cond][final_signal_cols].plot(figsize=(12,10))

In [None]:
date_cond = (df.index >= pd.Timestamp('2022-04-01'))
notional_cols = [f'{ticker}_actual_position_notional' for ticker in final_month_ticker_list]
df[date_cond][notional_cols].plot(figsize=(12,10))

In [None]:
date_cond = (df.index >= pd.Timestamp('2022-04-01'))
notional_cols = [f'{ticker}_target_notional' for ticker in final_month_ticker_list]
df[date_cond][notional_cols].plot(figsize=(12,10))

In [None]:
df[date_cond]['total_portfolio_value'].plot()

In [None]:
df[date_cond][notional_cols]

In [None]:
df[date_cond].head()

In [None]:
for k, v in eligibility_by_date.items():
    print(k, len(v), sorted(v))

In [None]:
len(final_month_ticker_list)

In [None]:
sorted(final_month_ticker_list)

In [None]:
month_start_date

In [None]:
signal_start_date

In [None]:
use_specific_start_date

In [None]:
df_signal.head()

In [None]:
eligibility_by_date

In [None]:
df_trend.head()

In [None]:
month_end_dates = pd.date_range(start=pd.Timestamp('2022-03-31').date(), end=pd.Timestamp('2022-12-31').date(), freq='M')
prev_month_open_ticker_list = []

## Loop through all month-end dates
for prev_month_end in month_end_dates:
    ## Pull in the final ticker list for the month based on the signal strength
    final_month_ticker_list = eligible_ticker_dict[prev_month_end.date()]

    ## Set the start and end date based on the following months
    month_start_date = prev_month_end.date() + pd.Timedelta(days=1)
    month_end_date = (prev_month_end + pd.offsets.MonthEnd(1)).date()
    print(month_start_date, month_end_date, final_month_ticker_list)

    eligibility_by_date = build_eligibility_by_date_for_month(
            client,
            month_start_date=month_start_date,
            month_end_date=month_end_date,
            lookback_day_count=lookback_day_count,
            adv_quantile=adv_quantile, high_low_quantile=high_low_quantile,
            max_workers=5, batch_size=12, between_batches_sleep=0.10
        )

In [None]:
for k, v in eligibility_by_date.items():
    print(k, len(v), v)

In [None]:
month_ends_between(start_date='2025-01-31', end_date='2025-02-28')

In [None]:
eligible_ticker_dict[pd.Timestamp('2022-03-31').date()]

In [None]:
final_month_ticker_list

In [None]:
prev_month_open_ticker_list

In [None]:
frozen_ticker_list = [ticker for ticker in prev_month_open_ticker_list if ticker not in final_month_ticker_list]

In [None]:
frozen_ticker_list

In [None]:
final_month_ticker_list = final_month_ticker_list + frozen_ticker_list

In [None]:
final_month_ticker_list

In [None]:
df_final_1.head()

In [None]:
final_eligible_list

In [None]:
actual_position_notional_cols = [col for col in df_final_1.columns if '_actual_position_notional' in col]
actual_position_size_cols = [col for col in df_final_1.columns if '_actual_position_size' in col]
open_position_size_list = []
for ticker in final_eligible_list:
    if df_final_1[f'{ticker}_actual_position_size'].loc[pd.Timestamp(end_date)] > 0:
        open_position_size_list.append(ticker)

## Create previous months dataframe
prev_month_cols = ['date', 'available_cash', 'total_portfolio_value']
open_position_size_cols = [f'{ticker}_actual_position_size' for ticker in open_position_size_list]
open_position_notional_cols = [f'{ticker}_actual_position_notional' for ticker in open_position_size_list]
prev_month_cols = prev_month_cols + open_position_size_cols + open_position_notional_cols
df_previous_month_positions = pd.DataFrame(columns=prev_month_cols)
df_previous_month_positions.loc[0, 'date'] = pd.Timestamp(end_date)
df_previous_month_positions = df_previous_month_positions.set_index('date')
df_previous_month_positions.loc[pd.Timestamp(end_date), 'available_cash'] = df_final_1.loc[pd.Timestamp(end_date), 'available_cash']
df_previous_month_positions.loc[pd.Timestamp(end_date), 'total_portfolio_value'] = df_final_1.loc[pd.Timestamp(end_date), 'total_portfolio_value']
for ticker in open_position_size_list:
    df_previous_month_positions.loc[pd.Timestamp(end_date), f'{ticker}_actual_position_notional'] = df_final_1.loc[pd.Timestamp(end_date), f'{ticker}_actual_position_notional']
    df_previous_month_positions.loc[pd.Timestamp(end_date), f'{ticker}_actual_position_size'] = df_final_1.loc[pd.Timestamp(end_date), f'{ticker}_actual_position_size']

In [None]:
df_previous_month_positions.loc[0, 'date'] = pd.Timestamp(end_date)
df_previous_month_positions = df_previous_month_positions.set_index('date')

In [None]:
df_previous_month_positions

In [None]:
df_previous_month_positions['available_cash'].iloc[0]

In [None]:
prev_month_open_ticker_list = [col.split('_')[0] for col in df_previous_month_positions.columns if '_actual_position_notional' in col]

In [None]:
prev_month_open_ticker_list

In [None]:
df_previous_month_positions.loc[pd.Timestamp(end_date)]

In [None]:
end_date

In [None]:
print('Generating Moving Average Ribbon Signal!!')
## Generate Trend Signal for all tickers

df_trend = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg,
    mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
    entry_rolling_donchian_window=entry_rolling_donchian_window,
    exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
    weighted_signal_ewm_window=weighted_signal_ewm_window, rolling_r2_window=rolling_r2_window,
    lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window,
    r2_confirm_days=r2_confirm_days, log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
    vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
    r2_strong_threshold=r2_strong_threshold, use_activation=use_activation,
    tanh_activation_constant_dict=tanh_activation_constant_dict, moving_avg_type=moving_avg_type,
    long_only=long_only, price_or_returns_calc=price_or_returns_calc, use_coinbase_data=use_coinbase_data,
    use_saved_files=use_saved_files, saved_file_end_date=saved_file_end_date)

print('Generating Volatility Adjusted Trend Signal!!')
## Get Volatility Adjusted Trend Signal
df_signal = size_cont.get_volatility_adjusted_trend_signal_continuous(df_trend, ticker_list, volatility_window,
                                                                      annual_trading_days)

print('Getting Average True Range for Stop Loss Calculation!!')
## Get Average True Range for Stop Loss Calculation
df_atr = size_cont.get_average_true_range_portfolio(start_date=start_date, end_date=end_date,
                                                    ticker_list=ticker_list, rolling_atr_window=rolling_atr_window,
                                                    highest_high_window=highest_high_window,
                                                    price_or_returns_calc='price',
                                                    use_coinbase_data=use_coinbase_data,
                                                    use_saved_files=use_saved_files,
                                                    saved_file_end_date=saved_file_end_date)
df_signal = pd.merge(df_signal, df_atr, left_index=True, right_index=True, how='left')

In [None]:
df_signal.head()

In [None]:
## With R2 Confirm Days = 2
df_final_1 = apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=final_eligible_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=2,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=False, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)

In [None]:
actual_position_notional_cols = [f'{ticker}_actual_position_notional']
actual_position_notional_cols = [f'{ticker}_actual_position_size']
df_final_1[actual_position_notional_cols].tail()

In [None]:
df_final_1.tail()

In [None]:
eligible_ticker_start_date = pd.Timestamp('2022-04-01').date() - pd.Timedelta(days=warmup_days)
eligible_ticker_end_date = pd.Timestamp('2022-04-30').date()
eligible_ticker_list = eligible_ticker_dict[pd.Timestamp('2022-03-31').date()]
df_trend = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
    start_date=eligible_ticker_start_date, end_date=eligible_ticker_end_date, ticker_list=eligible_ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg,
    mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
    entry_rolling_donchian_window=entry_rolling_donchian_window,
    exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
    weighted_signal_ewm_window=weighted_signal_ewm_window, rolling_r2_window=rolling_r2_window,
    lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window,
    r2_confirm_days=r2_confirm_days, log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
    vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
    r2_strong_threshold=r2_strong_threshold, use_activation=use_activation,
    tanh_activation_constant_dict=tanh_activation_constant_dict, moving_avg_type=moving_avg_type,
    long_only=long_only, price_or_returns_calc=price_or_returns_calc, use_coinbase_data=use_coinbase_data,
    use_saved_files=False, saved_file_end_date=saved_file_end_date)

print('Generating Volatility Adjusted Trend Signal!!')
## Get Volatility Adjusted Trend Signal
df_signal = size_cont.get_volatility_adjusted_trend_signal_continuous(df_trend, eligible_ticker_list, volatility_window,
                                                                      annual_trading_days)

print('Getting Average True Range for Stop Loss Calculation!!')
## Get Average True Range for Stop Loss Calculation
df_atr = size_cont.get_average_true_range_portfolio(start_date=eligible_ticker_start_date, end_date=eligible_ticker_end_date,
                                                    ticker_list=eligible_ticker_list, rolling_atr_window=rolling_atr_window,
                                                    highest_high_window=highest_high_window,
                                                    price_or_returns_calc='price',
                                                    use_coinbase_data=use_coinbase_data,
                                                    use_saved_files=False,
                                                    saved_file_end_date=saved_file_end_date)
df_signal = pd.merge(df_signal, df_atr, left_index=True, right_index=True, how='left')

In [None]:
df_trend.head()

In [None]:
df_signal.head()

In [None]:
final_eligible_list

In [None]:
start_date = pd.Timestamp('2022-04-01').date()
end_date = pd.Timestamp('2022-04-30').date()

In [None]:
def calculate_asset_level_returns(df, end_date, ticker_list):
    ## Check if data is available for all the tickers
    # date_list = cn.coinbase_start_date_by_ticker_dict
    # ticker_list = [ticker for ticker in ticker_list if pd.Timestamp(date_list[ticker]).date() < end_date]

    for ticker in ticker_list:
        df[f'{ticker}_daily_pnl'] = (df[f'{ticker}_actual_position_size'] * df[f'{ticker}_open'].diff().shift(-1))
        df[f'{ticker}_daily_pct_returns'] = (df[f'{ticker}_daily_pnl'] / df[f'total_portfolio_value'].shift(1)).fillna(
            0)
        df[f'{ticker}_position_count'] = np.where((df[f'{ticker}_actual_position_notional'] != 0), 1,
                                                  0)  ## This is not entirely accurate
    return df

In [None]:
print(start_date, end_date)

In [None]:
## With R2 Confirm Days = 2
df_final_1 = apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=final_eligible_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=2,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=False, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)
df_final_1 = df_final_1[df_final_1.index >= pd.Timestamp(start_date)]

print('Calculating In Sample Asset Returns!!')
df_final_1 = calculate_asset_level_returns(df_final_1, end_date, final_eligible_list)

portfolio_perf_metrics_1 = calculate_risk_and_performance_metrics(df_final_1, strategy_daily_return_col=f'portfolio_daily_pct_returns',
                                                                  strategy_trade_count_col=f'count_of_positions', include_transaction_costs_and_fees=False, passive_trade_rate=0.05, annual_trading_days=365, transaction_cost_est=0.001)
portfolio_perf_metrics_1

In [None]:
ticker_perf_prod_config_expanded_1 = {}
for t in final_eligible_list:
    _ticker_perf = perf.calculate_risk_and_performance_metrics(
        df_final_1,
        strategy_daily_return_col=f'{t}_daily_pct_returns',
        strategy_trade_count_col=f'{t}_position_count',
        annual_trading_days=365,
        include_transaction_costs_and_fees=False
    )
    ticker_perf_prod_config_expanded_1[t] = _ticker_perf

In [None]:
ticker_perf_prod_config_expanded_1

In [None]:
df_final_1.tail()

In [None]:
out = ELIGIBLE_DIR / f"2022-04-30_eligible.parquet"
df_eligible_2 = pd.read_parquet(out)

In [None]:
eligible_ticker_start_date = pd.Timestamp('2022-04-30').date()
eligible_ticker_end_date = pd.Timestamp('2022-05-31').date()

eligible_ticker_list = df_eligible_2.product_id.unique().tolist()
df_trend_eligible_2 = get_trend_donchian_signal_for_portfolio_with_rolling_r_sqr_vol_of_vol(
    start_date=eligible_ticker_start_date-pd.Timedelta(days=warmup_days), end_date=eligible_ticker_end_date, ticker_list=eligible_ticker_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg,
    mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window,
    entry_rolling_donchian_window=entry_rolling_donchian_window,
    exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate,
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight,
    weighted_signal_ewm_window=weighted_signal_ewm_window, rolling_r2_window=rolling_r2_window,
    lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window,
    r2_confirm_days=r2_confirm_days, log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window,
    vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min,
    r2_strong_threshold=r2_strong_threshold, use_activation=use_activation,
    tanh_activation_constant_dict=tanh_activation_constant_dict, moving_avg_type=moving_avg_type,
    long_only=long_only, price_or_returns_calc=price_or_returns_calc, use_coinbase_data=use_coinbase_data,
    use_saved_files=False, saved_file_end_date=saved_file_end_date)

In [None]:
eligible_ticker_list

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in eligible_ticker_list]#[0:11]]
date_cond = (df_trend_eligible_2.index >= pd.Timestamp(eligible_ticker_start_date)) & (df_trend_eligible_2.index <= pd.Timestamp(eligible_ticker_end_date))
df_trend_eligible_2[date_cond][final_signal_cols].plot(figsize=(10,8))

In [None]:
print(len(eligible_ticker_list))
eligible_ticker_list

In [None]:
date_cond = (df_trend_eligible_2.index == pd.Timestamp(eligible_ticker_start_date))
df_raw_eligible_list = df_trend_eligible_2[date_cond][final_signal_cols].T.reset_index()
df_raw_eligible_list.columns = ['ticker', 'final_signal']
df_raw_eligible_list = df_raw_eligible_list.sort_values('final_signal', ascending=False)
min_strength = np.quantile(df_raw_eligible_list['final_signal'], q=0.8)
min_strength_cond = (df_raw_eligible_list['final_signal'] > min_strength)
df_raw_eligible_list = df_raw_eligible_list[min_strength_cond]
df_raw_eligible_list["ticker"] = df_raw_eligible_list["ticker"].str.replace(r"_final_signal$", "", regex=True)
final_eligible_list_2 = df_raw_eligible_list.ticker.tolist()

In [None]:
df_raw_eligible_list

In [None]:
final_signal_cols = [f'{ticker}_final_signal' for ticker in final_eligible_list_2]
date_cond = (df_trend_eligible_2.index >= pd.Timestamp(eligible_ticker_start_date)) & (df_trend_eligible_2.index <= pd.Timestamp(eligible_ticker_end_date))
df_trend_eligible_2[date_cond][final_signal_cols].plot(figsize=(10,8))

In [None]:
start_date = pd.Timestamp('2022-05-01').date()
end_date = pd.Timestamp('2022-05-31').date()

In [None]:
## With R2 Confirm Days = 2
df_final_2 = apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=final_eligible_list_2, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=2,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=False, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)
df_final_2 = df_final_2[df_final_2.index >= pd.Timestamp(start_date)]

print('Calculating In Sample Asset Returns!!')
df_final_2 = calculate_asset_level_returns(df_final_2, end_date, final_eligible_list_2)

portfolio_perf_metrics_2 = calculate_risk_and_performance_metrics(df_final_2, strategy_daily_return_col=f'portfolio_daily_pct_returns',
                                                                  strategy_trade_count_col=f'count_of_positions', include_transaction_costs_and_fees=False, passive_trade_rate=0.05, annual_trading_days=365, transaction_cost_est=0.001)
portfolio_perf_metrics_2

In [None]:
ticker_perf_prod_config_expanded_2 = {}
for t in final_eligible_list_2:
    _ticker_perf = perf.calculate_risk_and_performance_metrics(
        df_final_2,
        strategy_daily_return_col=f'{t}_daily_pct_returns',
        strategy_trade_count_col=f'{t}_position_count',
        annual_trading_days=365,
        include_transaction_costs_and_fees=False
    )
    ticker_perf_prod_config_expanded_2[t] = _ticker_perf

In [None]:
ticker_perf_prod_config_expanded_2

In [None]:
df_final_1 = apply_target_volatility_position_sizing_continuous_strategy_with_rolling_r_sqr_vol_of_vol(
    start_date=start_date - pd.Timedelta(days=warmup_days), end_date=end_date, ticker_list=final_eligible_list, fast_mavg=fast_mavg, slow_mavg=slow_mavg, mavg_stepsize=mavg_stepsize, mavg_z_score_window=mavg_z_score_window, 
    entry_rolling_donchian_window=entry_rolling_donchian_window, exit_rolling_donchian_window=exit_rolling_donchian_window, use_donchian_exit_gate=use_donchian_exit_gate, 
    ma_crossover_signal_weight=ma_crossover_signal_weight, donchian_signal_weight=donchian_signal_weight, weighted_signal_ewm_window=weighted_signal_ewm_window,
    rolling_r2_window=rolling_r2_window, lower_r_sqr_limit=lower_r_sqr_limit, upper_r_sqr_limit=upper_r_sqr_limit, r2_smooth_window=r2_smooth_window, r2_confirm_days=r2_confirm_days,
    log_std_window=log_std_window, coef_of_variation_window=coef_of_variation_window, vol_of_vol_z_score_window=vol_of_vol_z_score_window, vol_of_vol_p_min=vol_of_vol_p_min, r2_strong_threshold=r2_strong_threshold,
    use_activation=use_activation, tanh_activation_constant_dict=tanh_activation_constant_dict,
    moving_avg_type=moving_avg_type, long_only=long_only, price_or_returns_calc=price_or_returns_calc,
    initial_capital=initial_capital, rolling_cov_window=rolling_cov_window, volatility_window=volatility_window,
    rolling_atr_window=rolling_atr_window, atr_multiplier=atr_multiplier,
    transaction_cost_est=transaction_cost_est, passive_trade_rate=passive_trade_rate,
    notional_threshold_pct=notional_threshold_pct, cooldown_counter_threshold=cooldown_counter_threshold,
    use_coinbase_data=use_coinbase_data, use_saved_files=False, saved_file_end_date=saved_file_end_date, 
    rolling_sharpe_window=rolling_sharpe_window, cash_buffer_percentage=cash_buffer_percentage, annualized_target_volatility=annualized_target_volatility,
    annual_trading_days=annual_trading_days, use_specific_start_date=use_specific_start_date, signal_start_date=start_date)
df_final_1 = df_final_1[df_final_1.index >= pd.Timestamp(start_date)]

print('Calculating In Sample Asset Returns!!')
df_final_1 = calculate_asset_level_returns(df_final_1, end_date, final_eligible_list)

portfolio_perf_metrics_1 = calculate_risk_and_performance_metrics(df_final_1, strategy_daily_return_col=f'portfolio_daily_pct_returns',
                                                                  strategy_trade_count_col=f'count_of_positions', include_transaction_costs_and_fees=False, passive_trade_rate=0.05, annual_trading_days=365, transaction_cost_est=0.001)
portfolio_perf_metrics_1

In [None]:
ticker_perf_prod_config_expanded_1 = {}
for t in final_eligible_list:
    _ticker_perf = perf.calculate_risk_and_performance_metrics(
        df_final_1,
        strategy_daily_return_col=f'{t}_daily_pct_returns',
        strategy_trade_count_col=f'{t}_position_count',
        annual_trading_days=365,
        include_transaction_costs_and_fees=False
    )
    ticker_perf_prod_config_expanded_1[t] = _ticker_perf

In [None]:
ticker_perf_prod_config_expanded_1

In [None]:
df_final_1['equity_curve'].plot(figsize=(10,8))

In [None]:
df_final_1.head(20)

In [None]:
raw_trend_signal_cols = [f'{ticker}_final_weighted_additive_signal' for ticker in eligible_ticker_list]
final_signal_cols = [f'{ticker}_final_signal' for ticker in eligible_ticker_list]
date_cond = (df_trend_eligible.index >= start_date)
df_trend_eligible[date_cond][raw_trend_signal_cols].head()

In [None]:
df_trend_eligible.head()

In [None]:
date_cond = (df_trend_eligible.index <= pd.Timestamp('2022-04-30').date())
open_cols = [f'{ticker}' for ticker in ]
df_trend_eligible[date_cond]['EOS-USD_open'].plot(figsize=(10,8))

In [None]:
df_trend_eligible[date_cond][raw_trend_signal_cols].loc[start_date].sort_values(ascending=False)

In [None]:
df_trend_eligible[date_cond][final_signal_cols].loc[start_date].sort_values(ascending=False)

In [None]:
adv_floor_usd = 2000000
median_spread_bps_cap = 25
warmup_days_available = True
exclusions = ['USDC-USD', 'DAI-USD', 'USDT-USD']

In [None]:
%%time
end_date = '2025-10-24'
df_liquidity = pd.DataFrame(columns=['asof_date','product_id','adv_90d_median','high_low_spread_90d_median','warmup_days_available'])
for product_id in curr_product_list:
    print(product_id)
    df = get_liquidity_metrics(client, product_id, asof_date=end_date, lookback_day_count=90)
    row = {
        'asof_date': end_date,
        'product_id': product_id,
        'adv_90d_median': df.loc[pd.Timestamp(end_date).date()]['adv_90d_median'],
        'high_low_spread_90d_median': df.loc[pd.Timestamp(end_date).date()]['high_low_spread_90d_median'],
        'warmup_days_available': has_warmup_coverage(client, product_id=product_id, asof_date=end_date, warmup_days=300)
    }
    df_liquidity.loc[df_liquidity.shape[0]] = row

In [None]:
adv_null_cond = (df_liquidity['adv_90d_median'].notnull())
high_low_null_cond = (df_liquidity['high_low_spread_90d_median'].notnull())
adv_usd_floor = np.quantile(df_liquidity[adv_null_cond]['adv_90d_median'], q=0.60)
high_low_spread_floor = np.quantile(df_liquidity[adv_null_cond]['high_low_spread_90d_median'], q=0.60)
exclusions = ['USDC-USD', 'DAI-USD', 'USDT-USD']

In [None]:
adv_usd_floor

In [None]:
high_low_spread_floor

In [None]:
df_liquidity[df_liquidity.warmup_days_available].groupby(['asof_date']).agg({'adv_90d_median': ['median','mean','min','max'],
                                                                             'high_low_spread_90d_median': ['median','mean','min','max']})

In [None]:
df_liquidity[df_liquidity.product_id == 'USDT-USD']

In [None]:
exclusions = ['USDC-USD', 'DAI-USD', 'USDT-USD']
eligible_cond = (
    (df_liquidity["warmup_days_available"]) &
    (df_liquidity["adv_90d_median"] >= adv_usd_floor) &
    (df_liquidity["high_low_spread_90d_median"] <= high_low_spread_floor) &
    (~df_liquidity['product_id'].isin(exclusions))
)

In [None]:
df_liquidity[eligible_cond].shape

In [None]:
df_liquidity[eligible_cond]

In [None]:
df_liquidity.shape

In [None]:
df_liquidity[df_liquidity.warmup_days_available].shape

In [None]:
df_liquidity

In [None]:
df_liquidity.agg({'adv_90d_median': ['median','mean','min','max']})

In [None]:
ticker = 'XRP-USD'
liquidity_day_count = 90
end_date = pd.Timestamp('2025-10-24').date()
start_date = end_date - pd.Timedelta(days=90)
df_ohlc = cn.save_historical_crypto_prices_from_coinbase(ticker=ticker, user_start_date=True, start_date=start_date, end_date=end_date, save_to_file=False, portfolio_name='Default')
df_ohlc['adv_90d_median'] = df_ohlc['volume'].rolling(90).median()
df_ohlc['high_low_spread_bps'] = (df_ohlc['high'] - df_ohlc['low']) / ((df_ohlc['high'] + df_ohlc['low']) / 2) * 10000
df_ohlc['high_low_spread_90d_median'] = df_ohlc['high_low_spread_bps'].rolling(90).median()

In [None]:
df_ohlc.shape

In [None]:
df_ohlc

In [None]:
start_date

In [None]:
# liquidity_metrics.py
def compute_adv_and_spread(df_ohlcv, window=90):
    """
    df_ohlcv: Multi-asset daily OHLCV with columns:
       ['date','ticker','open','high','low','close','volume']  # volume in base units
    Returns per-day per-ticker ADV (USD) and median spread proxy (bps).
    """
    df = df_ohlcv.copy()
    df["notional_usd"] = df["close"] * df["volume"]
    adv = (df
           .groupby("ticker")
           .apply(lambda g: g.set_index("date")["notional_usd"]
                  .rolling(window, min_periods=max(30, window//3)).median())
           .rename("adv_usd_median_90")
           .reset_index())

    # If you don’t have L1 quotes, use a conservative spread proxy:
    df["spread_bps_proxy"] = (df["high"] - df["low"]) / ((df["high"] + df["low"])/2) * 1e4
    spr = (df
           .groupby("ticker")
           .apply(lambda g: g.set_index("date")["spread_bps_proxy"]
                  .rolling(window, min_periods=max(30, window//3)).median())
           .rename("spread_bps_median_90")
           .reset_index())
    out = adv.merge(spr, on=["ticker","date"], how="outer")
    # history_days (since first valid bar)
    first_date = (df.dropna(subset=["close"])
                  .groupby("ticker")["date"].min()
                  .rename("first_date")
                  .reset_index())
    out = out.merge(first_date, on="ticker", how="left")
    out["history_days"] = (pd.to_datetime(out["date"]) - pd.to_datetime(out["first_date"])).dt.days
    return out


In [None]:
prod = client.get_products()['products']
rows = [product_to_dict(p) for p in prod]
df_products = pd.json_normalize(rows)    

In [None]:
# optional: keep only columns you care about
reqd_cols = [
    "product_id","base_currency_id","quote_currency_id","product_type","status",
    "trading_disabled","is_disabled","cancel_only","limit_only","post_only","auction_mode","view_only",
    "base_increment","quote_increment","price_increment","base_min_size","quote_min_size",
    "alias","alias_to","display_name","product_venue","new_at","price","approximate_quote_24h_volume"
]
df_products = df_products[reqd_cols]

# optional: coerce numerics
for col in ["base_increment","quote_increment","price_increment","base_min_size","quote_min_size",
            "price","approximate_quote_24h_volume"]:
    if col in df_products.columns:
        df_products[col] = pd.to_numeric(df_products[col], errors="coerce")

In [None]:
CANON_QUOTE = "USD"
# Filter to USD spot & tradable
filt = (
    (df_products["product_type"] == "SPOT") &
    (df_products["quote_currency_id"] == CANON_QUOTE) &
    (df_products["status"] == "online") &
    (~df_products["trading_disabled"]) &
    (~df_products["is_disabled"]) &
    (~df_products["view_only"]) &
    (~df_products["cancel_only"]) &
    (~df_products["auction_mode"])
)
df_products = df_products[filt]

In [None]:
df_products.groupby(['base_currency_id']).size()

In [None]:
df_products.alias_to

In [None]:
df_products.shape

In [None]:
df_products[filt].shape

In [None]:
type(df_products.trading_disabled.iloc[0])

In [None]:
df_products.head()

In [None]:
df_products.head()

In [None]:
# universe.py
from pathlib import Path
import pandas as pd
import json
from datetime import datetime, timezone

PRODUCTS_DIR = Path("data_folder/universe/products")
PRODUCTS_DIR.mkdir(parents=True, exist_ok=True)

CANON_QUOTE = "USD"

def _to_bool(x):
    # Coinbase fields come as bools or strings; normalize
    if isinstance(x, bool): return x
    if x is None: return False
    s = str(x).strip().lower()
    return s in ("true", "1", "yes")

def coinbase_products_snapshot(client, asof=None, save=True):
    asof = asof or datetime.now(timezone.utc).date().isoformat()
    raw = client.get_products()['products']
    df = pd.DataFrame(raw)

    # Ensure missing columns exist
    for col in ["alias","alias_to","min_market_funds"]:
        if col not in df.columns: df[col] = None

    # Filter to USD spot & tradable
    filt = (
        (df["product_type"] == "SPOT") &
        (df["quote_currency_id"] == CANON_QUOTE) &
        (df["status"] == "online") &
        (~df["trading_disabled"].apply(_to_bool)) &
        (~df["is_disabled"].apply(_to_bool)) &
        (~df["view_only"].apply(_to_bool)) &
        (~df["cancel_only"].apply(_to_bool)) &
        (~df["auction_mode"].apply(_to_bool))
    )
    df = df.loc[filt].copy()

    # Canonicalize: prefer -USD symbol if aliasing points to it
    # Build a map: if row.alias is a valid USD symbol, use alias as canonical id
    def canon_pid(row):
        alias = (row.get("alias") or "").strip()
        pid = row["product_id"]
        return alias if alias.endswith("-USD") and len(alias) > 0 else pid

    df["canonical_product_id"] = df.apply(canon_pid, axis=1)

    # Deduplicate on canonical product id, prefer the row whose product_id endswith('-USD')
    df["prefer_usd_flag"] = df["product_id"].str.endswith("-USD")
    df = (df.sort_values(["canonical_product_id","prefer_usd_flag"], ascending=[True, False])
            .drop_duplicates(subset=["canonical_product_id"], keep="first"))

    # Keep a lean schema for downstream
    keep_cols = [
        "canonical_product_id","product_id","base_currency_id","quote_currency_id","product_type","status",
        "trading_disabled","is_disabled","cancel_only","limit_only","post_only","auction_mode","view_only",
        "base_increment","quote_increment","price_increment","base_min_size","quote_min_size","min_market_funds",
        "alias","alias_to","display_name","product_venue","new_at"
    ]
    df = df[keep_cols].rename(columns={"canonical_product_id":"ticker"})

    # Coerce numerics where helpful
    for col in ["base_increment","quote_increment","price_increment","base_min_size","quote_min_size","min_market_funds"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    df["asof_date"] = pd.to_datetime(asof).date()

    if save:
        out = PRODUCTS_DIR / f"{asof}.parquet"
        df.to_parquet(out, index=False)
    return df
