In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from pathlib import Path
import sys
import matplotlib.pyplot as plt
import pandas as pd

PROJECT_ROOT = Path.cwd().parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

In [26]:
from backtesting.global_loop import run_global_ranking_daily_portfolio
from object.class_file import BatchConfig, StrategyParams
from joblib import Parallel, delayed, cpu_count
import pandas as pd
import numpy as np
from utils.inline_scanner import InlineScannerConfig, build_scans_inline
import matplotlib.pyplot as plt
import seaborn as sns

### Pipeline

In [27]:
UNIVERSES = ["sweden"]   # adapte
SCAN_FREQ = "W-FRI"          # "B" = business days (recommandé) / "D" si tu veux daily pur

IS_START, IS_END   = "2022-12-31", "2024-12-31"
OOS_START, OOS_END = "2025-01-01", "2025-12-31"

In [28]:
inline_cfg = InlineScannerConfig(
    raw_data_path=PROJECT_ROOT / "data" / "raw" / "d1",
    asset_registry_path=PROJECT_ROOT / "data" / "asset_registry.csv",
    lookback_days=504,          
    min_obs=100,
    liquidity_lookback=20,
    liquidity_min_moves=0.0
)

In [29]:
def _date_chunks(start, end, freq, chunk_days):
    dates = pd.date_range(start, end, freq=freq)
    if len(dates) == 0:
        return []
    chunks = []
    for i in range(0, len(dates), chunk_days):
        s = dates[i].strftime("%Y-%m-%d")
        e = dates[min(i + chunk_days - 1, len(dates) - 1)].strftime("%Y-%m-%d")
        chunks.append((s, e))
    return chunks

def parallel_build_scans_inline(universes, start_date, end_date, freq, cfg, n_jobs=None, chunk_days=30):
    if n_jobs is None:
        n_jobs = max(cpu_count() - 1, 1)
    chunks = _date_chunks(start_date, end_date, freq, chunk_days)
    results = Parallel(n_jobs=n_jobs, verbose=10)(
        delayed(build_scans_inline)(
            universes=universes, start_date=s, end_date=e, freq=freq, cfg=cfg
        )
        for s, e in chunks
    )
    results = [r for r in results if (r is not None and len(r) > 0)]
    return pd.concat(results, ignore_index=True) if results else pd.DataFrame()

In [30]:
n_jobs = max(cpu_count() - 1, 1)

In [31]:
scans_is = parallel_build_scans_inline(UNIVERSES, IS_START, IS_END, SCAN_FREQ, inline_cfg, n_jobs=n_jobs, chunk_days=30)
print("IS scans rows:", len(scans_is), "| dates:", scans_is["scan_date"].min(), "->", scans_is["scan_date"].max())

[Parallel(n_jobs=7)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=7)]: Done   1 tasks      | elapsed:  2.3min
[Parallel(n_jobs=7)]: Done   2 out of   4 | elapsed:  4.6min remaining:  4.6min


IS scans rows: 2498 | dates: 2023-01-06 00:00:00 -> 2024-12-27 00:00:00


[Parallel(n_jobs=7)]: Done   4 out of   4 | elapsed:  4.7min finished


In [32]:
scans_is.to_parquet(PROJECT_ROOT / "data" / "scanner" / "scans_is.parquet")

In [33]:
scans_oos = parallel_build_scans_inline(UNIVERSES, OOS_START, OOS_END, SCAN_FREQ, inline_cfg, n_jobs=n_jobs, chunk_days=30)
print("OOS scans rows:", len(scans_oos), "| dates:", scans_oos["scan_date"].min(), "->", scans_oos["scan_date"].max())

[Parallel(n_jobs=7)]: Using backend LokyBackend with 7 concurrent workers.


OOS scans rows: 1224 | dates: 2025-01-03 00:00:00 -> 2025-12-26 00:00:00


[Parallel(n_jobs=7)]: Done   2 out of   2 | elapsed:  3.7min finished


In [34]:
scans_oos.to_parquet(PROJECT_ROOT / "data" / "scanner" / "scans_oos.parquet")

In [35]:
scans_is = pd.read_parquet(PROJECT_ROOT / "data" / "scanner" / "scans_is.parquet")
scans_oos = pd.read_parquet(PROJECT_ROOT / "data" / "scanner" / "scans_oos.parquet")

In [36]:
BASE_FEES = 0.0002
TOP_N = 20
MAX_POS = 5
MAX_HOLD = 30

# Grilles heatmap
z_entries = [1.5, 1.75, 2.0, 2.25, 2.5]
z_windows = [40, 60, 80, 100]

In [37]:
cfg_is = BatchConfig(
    data_path=PROJECT_ROOT / "data" / "raw" / "d1",
    start_date=IS_START,
    end_date=IS_END,
)

cfg_oos = BatchConfig(
    data_path=PROJECT_ROOT / "data" / "raw" / "d1",
    start_date=OOS_START,
    end_date=OOS_END,
)

In [38]:
def run_for_params(z_window: int, z_entry: float, cfg: BatchConfig, scans: pd.DataFrame) -> float:
    params = StrategyParams(
        z_entry=z_entry,
        z_exit=0.25 * z_entry,
        z_stop=2.0 * z_entry,
        z_window=z_window,
        beta_mode="static",
        fees=BASE_FEES,
        top_n_candidates=TOP_N,
        max_positions=MAX_POS,
        max_holding_days=MAX_HOLD,
    )

    res = run_global_ranking_daily_portfolio(
        cfg=cfg,
        params=params,
        universes=UNIVERSES,
        scans=scans,
    )

    if not res:
        return float("nan")

    return float(res["stats"]["Sharpe"])


In [41]:
results_is = Parallel(n_jobs=n_jobs, verbose=10)(
    delayed(run_for_params)(zw, ze, cfg_is, scans_is)
    for zw in z_windows
    for ze in z_entries
)

heatmap_is = pd.DataFrame(
    np.array(results_is).reshape(len(z_windows), len(z_entries)),
    index=z_windows,
    columns=z_entries,
)
heatmap_is.index.name = "z_window"
heatmap_is.columns.name = "z_entry"

[Parallel(n_jobs=7)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=7)]: Done   4 tasks      | elapsed:   37.2s


KeyboardInterrupt: 

In [None]:
results_oos = Parallel(n_jobs=n_jobs, verbose=10)(
    delayed(run_for_params)(zw, ze, cfg_oos, scans_oos)
    for zw in z_windows
    for ze in z_entries
)

heatmap_oos = pd.DataFrame(
    np.array(results_oos).reshape(len(z_windows), len(z_entries)),
    index=z_windows,
    columns=z_entries,
)
heatmap_oos.index.name = "z_window"
heatmap_oos.columns.name = "z_entry"

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=True)

sns.heatmap(heatmap_is, ax=axes[0], annot=True, fmt=".2f", cmap="RdYlGn", center=0.0, vmin=heatmap_is.min().min(), vmax=heatmap_is.max().max())
axes[0].set_title("IN-SAMPLE (Sharpe)")

sns.heatmap(heatmap_oos, ax=axes[1], annot=True, fmt=".2f", cmap="RdYlGn", center=0.0, vmin=heatmap_is.min().min(), vmax=heatmap_is.max().max())
axes[1].set_title("OUT-OF-SAMPLE (Sharpe)")

plt.tight_layout()
plt.show()

### Analyse du Sharpe année après année

In [None]:
scan_global = pd.concat([scans_is, scans_oos], ignore_index=True)

In [None]:
heatmap_year_list = []
for year, scans_year in scan_global.groupby(scan_global["scan_date"].dt.year):
    print(f"Processing year: {year} with {len(scans_year)} scans")
    cfg = BatchConfig(
        data_path=PROJECT_ROOT / "data" / "raw" / "d1",
        start_date=scans_year["scan_date"].min(),
        end_date=scans_year["scan_date"].max(),
    )

    results_year = Parallel(n_jobs=n_jobs, verbose=10)(
        delayed(run_for_params)(zw, ze, cfg, scans_year)
        for zw in z_windows
        for ze in z_entries
    )

    heatmap_year = pd.DataFrame(
        np.array(results_year).reshape(len(z_windows), len(z_entries)),
        index=z_windows,
        columns=z_entries,
    )
    heatmap_year.index.name = "z_window"
    heatmap_year.columns.name = "z_entry"
    heatmap_year_list.append((year, heatmap_year))

In [None]:
fig, axes = plt.subplots(2, 5, figsize=(20, 8))
axes = axes.flatten()

for idx, (year, heatmap) in enumerate(heatmap_year_list[1:11]):  # Skip 2015, take 2016-2025
    sns.heatmap(heatmap, ax=axes[idx], annot=True, fmt=".2f", cmap="RdYlGn", center=0.0, cbar=False, vmin=0, vmax=1)
    axes[idx].set_title(f"Year {year}")

# Hide unused subplots if fewer than 10 years
for idx in range(len(heatmap_year_list) - 1, 10):
    axes[idx].set_visible(False)

plt.tight_layout()
plt.show()

### Analyse globale avec jeu de paramètres fixés

In [66]:
cfg = BatchConfig(
    data_path=PROJECT_ROOT / "data" / "raw" / "d1",
    start_date=IS_START,
    end_date=OOS_END,
)

In [67]:
scan_df = pd.concat([scans_is, scans_oos], ignore_index=True)

In [69]:
z_entry = 2.25
z_window = 80

params = StrategyParams(
        z_entry=z_entry,
        z_exit=0.25 * z_entry,
        z_stop=2.0 * z_entry,
        z_window=z_window,
        beta_mode="static",
        fees=BASE_FEES,
        top_n_candidates=TOP_N,
        max_positions=MAX_POS,
        max_holding_days=MAX_HOLD,
    )

res = run_global_ranking_daily_portfolio(
    cfg=cfg,
    params=params,
    universes=UNIVERSES,
    scans=scan_df,
)

In [70]:
tr = res["trades"].copy()

tr = tr[tr["exit_datetime"].notna()].copy()

In [71]:
hit_ratio = (tr["trade_return"] > 0).mean()

avg_return = tr["trade_return"].mean()
std_return = tr["trade_return"].std()

avg_win = tr.loc[tr["trade_return"] > 0, "trade_return"].mean()
avg_loss = tr.loc[tr["trade_return"] < 0, "trade_return"].mean()

payoff_ratio = abs(avg_win / avg_loss)

expectancy = hit_ratio * avg_win + (1 - hit_ratio) * avg_loss

trade_sharpe = avg_return / std_return if std_return > 0 else np.nan

print("Nb closed trades:", len(tr))
print("Hit ratio:", round(hit_ratio, 3))
print("Expectancy:", round(expectancy, 4))
print("Avg win:", round(avg_win, 4))
print("Avg loss:", round(avg_loss, 4))
print("Payoff ratio:", round(payoff_ratio, 3))
print("Trade Sharpe:", round(trade_sharpe, 3))
display(res["stats"])

Nb closed trades: 101
Hit ratio: 0.614
Expectancy: 0.0039
Avg win: 0.0265
Avg loss: -0.0319
Payoff ratio: 0.829
Trade Sharpe: 0.107


{'Final Equity': 1.17,
 'CAGR': 0.052,
 'Sharpe': 0.39,
 'Max Drawdown': -0.199,
 'Nb Trades': 104,
 'PCA window': 252,
 'PCA q': 0.85,
 'PCA min assets': 12,
 'Mkt vol window': 20,
 'Mkt vol q': 0.8,
 '% days blocked (when active)': 0.076,
 'Max scan age (bdays)': 15}

In [72]:
import plotly.graph_objects as go

eq = res["equity"]

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=eq["datetime"],
        y=eq["equity"],
        mode="lines",
        name="Equity",
    )
)

fig.update_layout(
    title="Equity Curve",
    xaxis_title="Date",
    yaxis_title="Equity",
    template="plotly_dark",
    hovermode="x unified",
    width=900,
    height=450,
)

fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [73]:
res["trades"].groupby('reason').agg({'pair_id': 'count', 'trade_return': ['sum', 'median', 'std']}).round(4)

Unnamed: 0_level_0,pair_id,trade_return,trade_return,trade_return
Unnamed: 0_level_1,count,sum,median,std
reason,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
SL,6,-0.1263,-0.0131,0.025
TIME,23,-0.6392,-0.0355,0.0451
TP,72,1.1615,0.017,0.0261


In [74]:
tr.groupby("side")["trade_return"].agg(["count","mean","std"])

Unnamed: 0_level_0,count,mean,std
side,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LONG_SPREAD,46,-0.001426,0.033526
SHORT_SPREAD,55,0.008393,0.038702


In [75]:
from backtesting.global_loop import build_price_panel


all_assets = (pd.concat([scan_df["asset_1"], scan_df["asset_2"]], ignore_index=True).astype(str).str.upper().unique().tolist())
# use Timestamp objects so pandas DateOffset arithmetic works inside build_price_panel
start_date = pd.to_datetime(scan_df["scan_date"].min())
end_date = pd.to_datetime(scan_df["scan_date"].max())

price_df = build_price_panel(cfg, all_assets, start_date, end_date)