In [3]:
#!pip install pandas pyarrow yfinance fortitudo_tech matplotlib numpy
#!pip install alpaca-py seaborn fredapi python-dotenv cvxopt

In [5]:
"""
FFR Walk-Forward Backtest — Block 1: Data & Setup
==================================================
Fetches and prepares all data needed for the backtest.
Assets via Alpaca, macro via yfinance.

Contrary to previous configurations, BTU replaced by OKE (full membership since 2010), and APO by GS.
"""

import os
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import date
from dotenv import load_dotenv

load_dotenv("env")

# ── config ──
assets = [
    'NVDA', 'ISRG', 'GS', 'DECK', 'COST', 'CAT',
    'OKE', 'SHW', 'NRG', 'NFLX', 'CBRE'
]
commodities = ['GC=F']
macro_tickers = ['^VIX', '^GSPC']
start_date = "2016-01-01"
today = date.today().isoformat()


# ── stock data (alpaca) ──
def fetch_stocks(tickers, start, end):
    from alpaca.data.historical import StockHistoricalDataClient
    from alpaca.data.requests import StockBarsRequest
    from alpaca.data.timeframe import TimeFrame, TimeFrameUnit
    from alpaca.data.enums import DataFeed, Adjustment

    client = StockHistoricalDataClient(
        api_key=os.environ['ALPACA_API_KEY'],
        secret_key=os.environ['ALPACA_SECRET_KEY']
    )
    req = StockBarsRequest(
        symbol_or_symbols=tickers,
        timeframe=TimeFrame(1, TimeFrameUnit.Day),
        start=pd.to_datetime(start),
        end=pd.to_datetime(end),
        adjustment=Adjustment.ALL,
        feed=DataFeed.SIP
    )
    bars = client.get_stock_bars(req).df.reset_index()
    bars['date'] = bars['timestamp'].dt.date
    return bars.pivot_table(index='date', columns='symbol', values='close')


# ── macro data (yfinance) ──
def fetch_macro(tickers, start, end):
    df = yf.download(tickers, start=start, end=end, auto_adjust=True)
    if isinstance(df.columns, pd.MultiIndex):
        df = df['Close']
    df.index = pd.to_datetime(df.index).date
    df.index.name = 'date'
    df.interpolate(method='linear', inplace=True)
    return df


# ── merge & clean ──
def build_dataset(stocks, comms, macro):
    prices = pd.merge(stocks, comms, left_index=True, right_index=True, how='inner')
    prices = prices.dropna(axis=0, how='any')

    state_vars = pd.DataFrame({
        'SP500_ret': np.log(macro['^GSPC'] / macro['^GSPC'].shift(1)),
        'VIX': macro['^VIX'],
    }).reindex(prices.index).ffill().dropna()

    common = prices.index.intersection(state_vars.index)
    prices = prices.loc[common]
    state_vars = state_vars.loc[common]

    prices.index = pd.to_datetime(prices.index)
    state_vars.index = pd.to_datetime(state_vars.index)

    pnl = np.log(prices / prices.shift(1)).dropna()
    state_vars = state_vars.loc[pnl.index]

    return prices, pnl, state_vars


# ── run ──
if __name__ == "__main__":
    print("Fetching stocks via Alpaca...")
    stocks = fetch_stocks(assets, start_date, today)

    print("Fetching commodities & macro via yfinance...")
    all_yf = fetch_macro(commodities + macro_tickers, start_date, today)

    print("Building dataset...")
    prices, pnl, state_vars = build_dataset(
        stocks, all_yf[commodities], all_yf[macro_tickers]
    )

    print(f"\n=== Dataset Ready ===")
    print(f"Assets:     {list(pnl.columns)}")
    print(f"Dimensions: {pnl.shape[0]} days × {pnl.shape[1]} assets")
    print(f"Range:      {pnl.index[0].date()} → {pnl.index[-1].date()}")
    print(f"Years:      {pnl.shape[0]/252:.1f}")
    print(f"State vars: {list(state_vars.columns)}")

    print("\n── pnl.head() ──")
    print(pnl.head())
    print("\n── state_vars.head() ──")
    print(state_vars.head())
    print("\n── pnl.tail() ──")
    print(pnl.tail())
    print("\n── state_vars.tail() ──")
    print(state_vars.tail())

Fetching stocks via Alpaca...


[*********************100%***********************]  3 of 3 completed

Fetching commodities & macro via yfinance...
Building dataset...

=== Dataset Ready ===
Assets:     ['CAT', 'CBRE', 'COST', 'DECK', 'GS', 'ISRG', 'NFLX', 'NRG', 'NVDA', 'OKE', 'SHW', 'GC=F']
Dimensions: 2540 days × 12 assets
Range:      2016-01-06 → 2026-02-11
Years:      10.1
State vars: ['SP500_ret', 'VIX']

── pnl.head() ──
                 CAT      CBRE      COST      DECK        GS      ISRG  \
date                                                                     
2016-01-06 -0.015924 -0.043989 -0.009262 -0.028832 -0.024758  0.002280   
2016-01-07 -0.035029 -0.057534 -0.023154  0.014199 -0.031183 -0.029887   
2016-01-08 -0.010064 -0.011329 -0.017665 -0.055606 -0.004182 -0.000671   
2016-01-11 -0.029357 -0.020721  0.016974 -0.003121  0.010867  0.005520   
2016-01-12  0.002289  0.007944  0.008571 -0.009149 -0.000074  0.004826   

                NFLX       NRG      NVDA       OKE       SHW      GC=F  
date                                                                    
2016-0




In [11]:
"""
FFR Walk-Forward Backtest — Block 2: Stacked Optimizer
======================================================
Produces strategic weights via:
1. Mean-CVaR efficient frontier (long-only, max 25%, α=0.90)
2. Bootstrap mean-uncertainty resampling via entropy pooling
3. L-fold exposure stacking (Vorobets 2025)
"""

import numpy as np
import fortitudo.tech as ft


def stacked_optimizer(pnl, B=1000, N=100, P=9, pf_index=4, L=20,
                      alpha=0.90, max_weight=0.25, seed=3):
    """Run the full stacked optimizer on a training window.

    Args:
        pnl:        DataFrame of log-returns (T × I).
        B:          number of bootstrap draws for mean uncertainty.
        N:          sample size per bootstrap draw.
        P:          number of frontier points.
        pf_index:   which frontier point to stack around.
        L:          number of folds for exposure stacking.
        alpha:      CVaR confidence level.
        max_weight: upper bound per asset.
        seed:       random seed.

    Returns:
        weights: array of strategic weights (I,).
    """
    R = pnl.values
    S, I = R.shape

    # constraints: 0 <= w <= max_weight
    G = np.vstack((np.eye(I), -np.eye(I)))
    h = np.hstack((max_weight * np.ones(I), np.zeros(I)))

    # base frontier
    cvar_opt = ft.MeanCVaR(R, G, h, alpha=alpha)

    # mean-uncertainty bootstrap
    stats = ft.simulation_moments(pnl)
    means = stats['Mean'].values
    cov = ft.covariance_matrix(pnl).values

    np.random.seed(seed)
    return_sim = np.random.multivariate_normal(means, cov, (N, B))

    p = np.ones((S, 1)) / S
    frontier_mean = np.full((I, P, B), np.nan)

    for b in range(B):
        means_b = np.mean(return_sim[:, b, :], axis=0)
        q = ft.entropy_pooling(p, A=R.T, b=means_b[:, np.newaxis])
        R_weighted = R * q * S
        cvar_opt_b = ft.MeanCVaR(R_weighted, G, h, alpha=alpha)
        frontier_mean[:, :, b] = cvar_opt_b.efficient_frontier(P)

    # exposure stacking
    weights = ft.exposure_stacking(L, frontier_mean[:, pf_index, :])

    return weights


# ── test ──
train_end = pnl.index[0] + pd.DateOffset(months=24)
pnl_train = pnl.loc[:train_end]

print(f"Training: {pnl_train.index[0].date()} → {pnl_train.index[-1].date()}")
print(f"  {len(pnl_train)} days × {pnl_train.shape[1]} assets")
print(f"  Running stacked optimizer (B=1000)...")

w_strategic = stacked_optimizer(pnl_train)

print(f"\nStrategic Weights:")
for name, w in zip(pnl.columns, w_strategic):
    print(f"  {name:6s} {w*100:6.2f}%")
print(f"  Sum:   {w_strategic.sum()*100:.1f}%")

Training: 2016-01-06 → 2018-01-05
  505 days × 12 assets
  Running stacked optimizer (B=1000)...

Strategic Weights:
  CAT      9.37%
  CBRE     1.54%
  COST    12.25%
  DECK     4.94%
  GS       4.26%
  ISRG    13.92%
  NFLX     4.03%
  NRG      3.12%
  NVDA     9.51%
  OKE      4.92%
  SHW     10.04%
  GC=F    22.10%
  Sum:   100.0%


In [9]:
"""
FFR Walk-Forward Backtest — Block 3: Regime Classification
==========================================================
3×3 partitioning on SP500_ret and VIX using 25th/75th percentiles.
Thresholds computed on training window only.
"""

import numpy as np
import pandas as pd


def classify_3(series, p25, p75):
    return np.where(series <= p25, 0, np.where(series <= p75, 1, 2))


def compute_regimes(state_vars, thresholds=None):
    if thresholds is None:
        thresholds = {
            'sp_p25':  np.percentile(state_vars['SP500_ret'], 25),
            'sp_p75':  np.percentile(state_vars['SP500_ret'], 75),
            'vix_p25': np.percentile(state_vars['VIX'], 25),
            'vix_p75': np.percentile(state_vars['VIX'], 75),
        }
    regimes = pd.DataFrame({
        'growth': classify_3(state_vars['SP500_ret'], thresholds['sp_p25'], thresholds['sp_p75']),
        'vix':    classify_3(state_vars['VIX'], thresholds['vix_p25'], thresholds['vix_p75']),
    }, index=state_vars.index)
    regimes['state'] = regimes['growth'] + 3 * regimes['vix']
    return regimes, thresholds


def print_regime_summary(regimes, thresholds):
    labels = ['Low', 'Mid', 'High']
    print(f"=== Regime Thresholds ===")
    print(f"  SP500_ret:  Low ≤ {thresholds['sp_p25']:.5f}  |  Mid ≤ {thresholds['sp_p75']:.5f}  |  High")
    print(f"  VIX:        Low ≤ {thresholds['vix_p25']:.2f}  |  Mid ≤ {thresholds['vix_p75']:.2f}  |  High")
    print(f"\n=== State Distribution ({regimes['state'].nunique()}/9 observed) ===")
    for s in sorted(regimes['state'].unique()):
        g, v = s % 3, s // 3
        n = (regimes['state'] == s).sum()
        print(f"  State {s}: Growth={labels[g]}, VIX={labels[v]}  →  {n} obs ({n/len(regimes)*100:.1f}%)")
    current = regimes.iloc[-1]
    print(f"\nCurrent state: {current['state']}  "
          f"(Growth={labels[current['growth']]}, VIX={labels[current['vix']]})")


# ── test ──
train_end = pnl.index[0] + pd.DateOffset(months=24)
sv_train = state_vars.loc[:train_end]
regimes_train, thresholds = compute_regimes(sv_train)
print_regime_summary(regimes_train, thresholds)

=== Regime Thresholds ===
  SP500_ret:  Low ≤ -0.00186  |  Mid ≤ 0.00370  |  High
  VIX:        Low ≤ 10.81  |  Mid ≤ 14.63  |  High

=== State Distribution (9/9 observed) ===
  State 0: Growth=Low, VIX=Low  →  9 obs (1.8%)
  State 1: Growth=Mid, VIX=Low  →  88 obs (17.4%)
  State 2: Growth=High, VIX=Low  →  30 obs (5.9%)
  State 3: Growth=Low, VIX=Mid  →  66 obs (13.1%)
  State 4: Growth=Mid, VIX=Mid  →  125 obs (24.8%)
  State 5: Growth=High, VIX=Mid  →  61 obs (12.1%)
  State 6: Growth=Low, VIX=High  →  52 obs (10.3%)
  State 7: Growth=Mid, VIX=High  →  39 obs (7.7%)
  State 8: Growth=High, VIX=High  →  35 obs (6.9%)

Current state: 2  (Growth=High, VIX=Low)


In [12]:
"""
FFR Walk-Forward Backtest — Block 4: Entropy Pooling
====================================================
For each observed state, solve for a probability vector over training
scenarios that matches the state's mean & variance of state variables,
while staying close to an exponential-decay prior.
"""

import numpy as np
import fortitudo.tech as ft


def fit_entropy_pooling(pnl_train, state_vars_train, regimes_train, n_states=9):
    """Fit state-conditional probability vectors via entropy pooling.

    Args:
        pnl_train:        DataFrame of training log-returns (T × I).
        state_vars_train: DataFrame with ['SP500_ret', 'VIX'] for training.
        regimes_train:    DataFrame with 'state' column from compute_regimes.
        n_states:         total number of possible states (9 for 3×3).

    Returns:
        state_probs: dict {state_id: probability vector of length T}.
        p_prior:     the exponential decay prior used.
    """
    T = len(pnl_train)
    p_prior = ft.exp_decay_probs(pnl_train, half_life=T / 2)

    state_var_cols = ['SP500_ret', 'VIX']
    state_vals = state_vars_train[state_var_cols].values

    observed_states = regimes_train['state'].unique()

    # per-state moments
    state_means = np.zeros((n_states, len(state_var_cols)))
    state_stds = np.zeros((n_states, len(state_var_cols)))
    for s in observed_states:
        mask = regimes_train['state'].values == s
        state_means[s] = np.mean(state_vals[mask], axis=0)
        state_stds[s] = np.std(state_vals[mask], axis=0)

    # entropy pooling per state
    state_probs = {}
    for s in observed_states:
        mu_s = state_means[s]
        sigma_s = state_stds[s]

        A = np.vstack((np.ones((1, T)), state_vals.T))
        b = np.vstack(([[1]], mu_s[:, np.newaxis]))

        G = state_vals.T ** 2
        h = (sigma_s ** 2 + mu_s ** 2)[:, np.newaxis]

        try:
            q = ft.entropy_pooling(p_prior, A, b, G, h)
            state_probs[s] = q.flatten()
        except:
            state_probs[s] = p_prior.flatten()
            print(f"  State {s}: entropy pooling failed, using prior")

    return state_probs, p_prior.flatten()


def build_states_prob_matrix(state_probs, regimes_train, n_states=9):
    """Build T × n_states matrix of normalised probability vectors."""
    T = len(regimes_train)
    states_prob = np.zeros((T, n_states))
    for s, q in state_probs.items():
        states_prob[:, s] = q / q.sum()
    return states_prob


# ── test ──
train_end = pnl.index[0] + pd.DateOffset(months=24)
pnl_train = pnl.loc[:train_end]
sv_train = state_vars.loc[:train_end]
regimes_train, thresholds = compute_regimes(sv_train)

print(f"Fitting entropy pooling on {len(pnl_train)} scenarios, {regimes_train['state'].nunique()} states...")
state_probs, p_prior = fit_entropy_pooling(pnl_train, sv_train, regimes_train)
states_prob = build_states_prob_matrix(state_probs, regimes_train)

print(f"States probability matrix: {states_prob.shape}")
print(f"\nPer-state effective scenarios (higher = more robust):")
for s in sorted(state_probs.keys()):
    q = state_probs[s] / state_probs[s].sum()
    eff = np.exp(-q @ np.log(np.maximum(q, 1e-300)))
    g, v = s % 3, s // 3
    labels = ['Low', 'Mid', 'High']
    print(f"  State {s} (G={labels[g]}, V={labels[v]}): {eff:.0f} / {len(pnl_train)} scenarios")

Fitting entropy pooling on 505 scenarios, 9 states...
States probability matrix: (505, 9)

Per-state effective scenarios (higher = more robust):
  State 0 (G=Low, V=Low): 19 / 505 scenarios
  State 1 (G=Mid, V=Low): 104 / 505 scenarios
  State 2 (G=High, V=Low): 43 / 505 scenarios
  State 3 (G=Low, V=Mid): 98 / 505 scenarios
  State 4 (G=Mid, V=Mid): 192 / 505 scenarios
  State 5 (G=High, V=Mid): 87 / 505 scenarios
  State 6 (G=Low, V=High): 90 / 505 scenarios
  State 7 (G=Mid, V=High): 88 / 505 scenarios
  State 8 (G=High, V=High): 60 / 505 scenarios


In [24]:
"""
FFR Walk-Forward Backtest — Block 6: CVaR Sizing
=================================================
Compute CVaR from FFR scenarios, derive leverage and
max position size for both strategic and tactical arms.
"""

import numpy as np


def compute_cvar(returns, alpha=0.90):
    cutoff = int((1 - alpha) * len(returns))
    return -np.mean(np.sort(returns)[:cutoff])


def size_arm(returns_h, weights, equity, risk_budget, alpha=0.90, max_leverage=1.66):
    port_returns = returns_h @ weights
    cvar = compute_cvar(port_returns, alpha)
    leverage_raw = risk_budget / cvar
    gross_weight = np.abs(weights).sum()
    leverage = min(leverage_raw, max_leverage / gross_weight)
    max_position = equity * leverage
    notional = max_position * weights

    return {
        'cvar': cvar,
        'leverage_raw': leverage_raw,
        'leverage': leverage,
        'gross_weight': gross_weight,
        'max_position': max_position,
        'notional': notional,
    }


# ── test ──
equity_strategic = 300_000
tactical_ratio = 0.5
risk_budget = 0.10
alpha = 0.90

strat = size_arm(returns_h, w_strategic, equity_strategic, risk_budget, alpha)

print(f"=== Strategic Arm ===")
print(f"  CVaR {alpha:.0%}:      {strat['cvar']*100:.2f}%")
print(f"  Leverage (raw): {strat['leverage_raw']:.2f}x")
print(f"  Leverage (cap): {strat['leverage']:.2f}x")
print(f"  Max position:   ${strat['max_position']:,.0f}")

=== Strategic Arm ===
  CVaR 90%:      1.02%
  Leverage (raw): 9.84x
  Leverage (cap): 1.66x
  Max position:   $498,000


In [25]:
"""
FFR Walk-Forward Backtest — Block 7: Tactical Optimizer
=======================================================
Mean-CVaR optimization on FFR scenario matrix,
conditioned on current regime. Allows long/short.
"""

import numpy as np
import fortitudo.tech as ft


def tactical_optimize(returns_h, alpha=0.90, max_weight=0.25):
    I = returns_h.shape[1]
    G = np.vstack([np.eye(I), -np.eye(I)])
    h = np.hstack([max_weight * np.ones(I), max_weight * np.ones(I)])
    cvar_opt = ft.MeanCVaR(returns_h, G, h, alpha=alpha)
    return_target = returns_h.mean(axis=0).mean()
    weights = np.array(cvar_opt.efficient_portfolio(return_target)).flatten()
    return weights


# ── test ──
alpha = 0.90
max_weight = 0.25

w_tactical = tactical_optimize(returns_h, alpha, max_weight)

print(f"\nTactical Weights:")
for name, w in zip(pnl_train.columns, w_tactical):
    print(f"  {name:6s} {w*100:+7.2f}%")
print(f"  Sum:   {w_tactical.sum()*100:+.1f}%")

equity_tactical = equity_strategic * tactical_ratio
tact = size_arm(returns_h, w_tactical, equity_tactical, risk_budget, alpha)

print(f"\n=== Tactical Arm ===")
print(f"  CVaR {alpha:.0%}:      {tact['cvar']*100:.2f}%")
print(f"  Leverage (raw): {tact['leverage_raw']:.2f}x")
print(f"  Leverage (cap): {tact['leverage']:.2f}x")
print(f"  Gross weight:   {tact['gross_weight']:.2f}")
print(f"  Max position:   ${tact['max_position']:,.0f}")


Tactical Weights:
  CAT     +24.96%
  CBRE     +8.56%
  COST     +1.81%
  DECK     +0.11%
  GS       +9.70%
  ISRG    +13.77%
  NFLX     +1.85%
  NRG      +9.51%
  NVDA    +15.47%
  OKE      -8.17%
  SHW      -2.56%
  GC=F    +25.00%
  Sum:   +100.0%

=== Tactical Arm ===
  CVaR 90%:      0.69%
  Leverage (raw): 14.57x
  Leverage (cap): 1.37x
  Gross weight:   1.21
  Max position:   $204,980


In [26]:
# ── combined leverage ──
gross_strat = np.abs(strat['notional']).sum()
gross_tact = np.abs(tact['notional']).sum()
total_leverage = (gross_strat + gross_tact) / equity_strategic

print(f"\n=== Combined ===")
print(f"  Gross strategic: ${gross_strat:>12,.0f}  ({strat['leverage']:.2f}x on ${equity_strategic:,.0f})")
print(f"  Gross tactical:  ${gross_tact:>12,.0f}  ({tact['leverage']:.2f}x on ${equity_tactical:,.0f})")
print(f"  Total leverage:  {total_leverage:.2f}x on ${equity_strategic:,.0f}")
if total_leverage > 1.6:
    print(f"  ⚠ Exceeds 1.6x cap — would scale down in live loop")


=== Combined ===
  Gross strategic: $     498,000  (1.66x on $300,000)
  Gross tactical:  $     249,000  (1.37x on $150,000)
  Total leverage:  2.49x on $300,000
  ⚠ Exceeds 1.6x cap — would scale down in live loop
