# 03 â€” Signals & Validation

Validates the pairs discovered by cross-sector clustering using
transient validation and stable pair tracking.

In [1]:
import sys, os

project_root = os.path.abspath(os.path.join('..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from config import DEFAULT_CONFIG
from signals.transient import validate_transient_event
from signals.stable import track_stable_pairs
from screener.analysis import build_pair_registry
from screener.universe import load_cached_universe

%matplotlib inline

## 1. Load Artifacts

In [2]:
data_dir = os.path.join('..', 'data', 'combined')

_, _, sector_map = load_cached_universe('combined')

required = ['ts_df', 'cluster_history', 'pair_co_cluster_freq', 'df_formations']
arts = {}
for name in required:
    with open(os.path.join(data_dir, f'{name}.pkl'), 'rb') as f:
        arts[name] = pickle.load(f)

print(f"Loaded {len(arts)} artifacts")
print(f"Sector map: {len(sector_map)} tickers")

Loaded 4 artifacts
Sector map: 142 tickers


## 2. Build Pair Registry

In [3]:
registry = build_pair_registry(
    arts['cluster_history'],
    arts['pair_co_cluster_freq'],
    sector_map=sector_map,
)
print(f"{len(registry)} pairs above noise-adjusted freq threshold")

if 'pair_type' in registry.columns:
    print(f"\nPair types:")
    print(registry['pair_type'].value_counts().to_string())

display(registry.head(10))

3643 pairs above noise-adjusted freq threshold

Pair types:
pair_type
cross-sector    2201
intra-sector    1442


Unnamed: 0,Pair,Ticker_1,Ticker_2,raw_count,raw_freq,noise_adj_freq,sector_1,sector_2,pair_type
0,PBR-PBR-A,PBR,PBR-A,572,0.570858,0.991334,Energy,Energy,intra-sector
1,CIFR-HUT,CIFR,HUT,168,0.167665,0.938547,Financial Services,Financial Services,intra-sector
2,CIFR-IREN,CIFR,IREN,146,0.145709,0.9125,Financial Services,Financial Services,intra-sector
3,MARA-RIOT,MARA,RIOT,156,0.155689,0.912281,Financial Services,Financial Services,intra-sector
4,APLD-IREN,APLD,IREN,89,0.088822,0.908163,Technology,Financial Services,cross-sector
5,HUT-MARA,HUT,MARA,138,0.137725,0.901961,Financial Services,Financial Services,intra-sector
6,ALAB-CRDO,ALAB,CRDO,70,0.06986,0.897436,Technology,Technology,intra-sector
7,AAL-DAL,AAL,DAL,329,0.328343,0.894022,Industrials,Industrials,intra-sector
8,APLD-CRWV,APLD,CRWV,68,0.067864,0.883117,Technology,Technology,intra-sector
9,CIFR-RIOT,CIFR,RIOT,114,0.113772,0.876923,Financial Services,Financial Services,intra-sector


## 3. Transient Validation

In [4]:
formations = arts['df_formations']
ts_df = arts['ts_df']
transient_cfg = DEFAULT_CONFIG.transient

# Filter formations to registered pairs, limit per pair
registered_pairs = set(registry['Pair'].tolist())
formations_filtered = formations[formations['Pair'].isin(registered_pairs)]
formations_limited = formations_filtered.groupby('Pair').tail(5)

print(f"Validating {len(formations_limited)} formation events...")

results = []
for _, row in formations_limited.iterrows():
    result = validate_transient_event(
        row['Ticker_1'], row['Ticker_2'],
        row['Formation_Time'], ts_df,
        cfg=transient_cfg,
    )
    if result is not None:
        results.append(result)

df_transient = pd.DataFrame(results)
if not df_transient.empty:
    n_passed = df_transient['passed'].sum()
    print(f"Validated: {len(df_transient)}, Passed: {n_passed} ({n_passed/len(df_transient):.1%})")
else:
    print("No events could be validated")

Validating 17417 formation events...


Validated: 15707, Passed: 71 (0.5%)


## 4. Stable Pair Tracking

In [5]:
pair_list = list(zip(registry['Ticker_1'], registry['Ticker_2']))
stable_signals = track_stable_pairs(pair_list, ts_df)

n_coint = sum(1 for s in stable_signals if s['coint_pval'] < 0.05)
print(f"{len(stable_signals)} pairs tracked, {n_coint} cointegrated (p<0.05)")

3643 pairs tracked, 257 cointegrated (p<0.05)


## 5. Save Results

In [6]:
with open(os.path.join(data_dir, 'pair_registry.pkl'), 'wb') as f:
    pickle.dump(registry, f)
with open(os.path.join(data_dir, 'transient_results.pkl'), 'wb') as f:
    pickle.dump(df_transient, f)
with open(os.path.join(data_dir, 'stable_results.pkl'), 'wb') as f:
    pickle.dump(stable_signals, f)

print("Saved.")

Saved.
