# Signal Tier Execution Quality

Evaluate execution outcomes by signal tier:
- Fill counts/rates
- Slippage (bps)
- Signed next-day return after fills

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


## Data Loading & Tier Attribution

Loads order events, slippage records, position prices, and the signal log, then performs a multi-stage join to assign a signal tier (strong/moderate/weak/exit) to each filled order. The tier is sourced first from explicit order tags embedded at submission time, then inferred from same-day or most-recent prior signals matched by symbol and direction. Diagnostic tables at the end show how many rows were attributed from each source and what fraction of signed next-day returns are usable for quality measurement.

## Execution Quality — 3-Panel Overview by Tier

These three side-by-side plots summarize execution quality broken out by signal tier. The left panel shows fill counts, confirming how many orders were completed per tier; the center panel compares absolute slippage in basis points, testing whether high-conviction strong-tier orders incur more or less market impact than weak-tier limit orders; and the right panel shows signed next-day return, checking whether fills on strong signals land at prices that subsequently move in the intended direction.

## Tier Metrics Summary Table

This table condenses execution quality into a single row per tier, reporting fill count, average and median slippage in basis points, clean signed-return observations, and hit rate. Use it to compare whether the extra immediacy cost of strong-tier limit-at-market orders is offset by better entry timing relative to the more patient moderate and weak tier limits. A higher hit rate and lower slippage for stronger tiers would validate the signal-strength execution design.

In [None]:
import re

df_events = read_csv_from_store('wolfpack/order_events.csv')
df_slippage = read_csv_from_store('wolfpack/slippage.csv')
df_positions = read_csv_from_store('wolfpack/positions.csv')
df_signals = read_csv_from_store('wolfpack/signals.csv')

if df_events is None or df_slippage is None:
    raise ValueError('order_events.csv and slippage.csv are required.')

df_events['date'] = pd.to_datetime(df_events['date'])
df_slippage['date'] = pd.to_datetime(df_slippage['date'])

for col in ['fill_quantity', 'fill_price']:
    df_events[col] = pd.to_numeric(df_events[col], errors='coerce').fillna(0.0)
for col in ['quantity', 'expected_price', 'fill_price', 'slippage_dollars']:
    df_slippage[col] = pd.to_numeric(df_slippage[col], errors='coerce').fillna(0.0)

MAX_NEXT_GAP_DAYS = 7
MAX_ABS_RETURN = 0.50

def parse_tag(tag, key):
    if pd.isna(tag):
        return np.nan
    m = re.search(rf'{key}=([^;]+)', str(tag))
    return m.group(1) if m else np.nan

def tier_from_magnitude(mag):
    if pd.isna(mag):
        return np.nan
    x = abs(float(mag))
    if x >= 0.7:
        return 'strong'
    if x >= 0.3:
        return 'moderate'
    return 'weak'

# Build event-derived tier map from filled events
fills = df_events[df_events['status'].astype(str).str.contains('Filled', case=False, na=False)].copy()
fills = fills[fills['fill_quantity'].abs() > 0].copy()

# Try tier from explicit tier=... tag; fallback to signal=... in tag when present
fills['tag_tier'] = fills['tag'].apply(lambda t: parse_tag(t, 'tier'))
fills['tag_signal'] = pd.to_numeric(fills['tag'].apply(lambda t: parse_tag(t, 'signal')), errors='coerce')
fills['event_tier'] = fills['tag_tier']
fills.loc[fills['event_tier'].isna(), 'event_tier'] = fills.loc[fills['event_tier'].isna(), 'tag_signal'].apply(tier_from_magnitude)
fills['event_tier'] = fills['event_tier'].astype(str).str.lower().str.strip()
fills.loc[~fills['event_tier'].isin(['strong', 'moderate', 'weak', 'exit']), 'event_tier'] = np.nan

fills['fill_price_4'] = fills['fill_price'].round(4)
fills['fill_quantity_4'] = fills['fill_quantity'].round(4)
fills['fill_quantity_abs_4'] = fills['fill_quantity'].abs().round(4)

strict_map = (
    fills[['date', 'symbol', 'fill_price_4', 'fill_quantity_4', 'event_tier']]
    .drop_duplicates(['date', 'symbol', 'fill_price_4', 'fill_quantity_4'])
)
loose_map = (
    fills[['date', 'symbol', 'fill_price_4', 'fill_quantity_abs_4', 'event_tier']]
    .drop_duplicates(['date', 'symbol', 'fill_price_4', 'fill_quantity_abs_4'])
)

# Start from slippage rows (one row per filled order)
merged = df_slippage.copy()
merged['fill_price_4'] = merged['fill_price'].round(4)
merged['quantity_4'] = merged['quantity'].round(4)
merged['quantity_abs_4'] = merged['quantity'].abs().round(4)
merged['order_side'] = np.where(merged['quantity'] >= 0, 'Buy', 'Sell')

# Strict join first (signed quantity)
merged = merged.merge(
    strict_map,
    left_on=['date', 'symbol', 'fill_price_4', 'quantity_4'],
    right_on=['date', 'symbol', 'fill_price_4', 'fill_quantity_4'],
    how='left'
)

# Loose join fallback (absolute quantity), used only for rows still missing tier
missing = merged['event_tier'].isna()
if missing.any():
    fallback = (
        merged.loc[missing, ['date', 'symbol', 'fill_price_4', 'quantity_abs_4']]
        .assign(_row=lambda x: x.index)
        .merge(
            loose_map,
            left_on=['date', 'symbol', 'fill_price_4', 'quantity_abs_4'],
            right_on=['date', 'symbol', 'fill_price_4', 'fill_quantity_abs_4'],
            how='left'
        )
        .sort_values('_row')
    )
    merged.loc[fallback['_row'].values, 'event_tier'] = fallback['event_tier'].values

merged['tier'] = merged['event_tier']
merged['tier_source'] = np.where(merged['tier'].notna(), 'event_tag', 'unmapped')
merged['signal_lag_days'] = np.nan

# If event tags are missing, infer from signal strength/direction
if df_signals is not None and {'date', 'symbol', 'direction', 'magnitude'}.issubset(df_signals.columns):
    sig = df_signals[['date', 'symbol', 'direction', 'magnitude']].copy()
    sig['date'] = pd.to_datetime(sig['date'])
    sig['magnitude'] = pd.to_numeric(sig['magnitude'], errors='coerce')
    sig = sig.dropna(subset=['date', 'symbol', 'magnitude'])
    sig['direction'] = sig['direction'].astype(str).str.title()
    sig['signal_side'] = np.where(sig['direction'].eq('Up'), 'Buy', 'Sell')
    sig['signal_tier'] = sig['magnitude'].apply(tier_from_magnitude)
    sig['abs_magnitude'] = sig['magnitude'].abs()

    # One signal row per date/symbol: keep strongest absolute magnitude
    sig_daily = (
        sig.sort_values(['symbol', 'date', 'abs_magnitude'], ascending=[True, True, False])
           .drop_duplicates(['symbol', 'date'])[['date', 'symbol', 'signal_side', 'signal_tier']]
    )

    # Pass 1: same-day match
    merged = merged.merge(sig_daily, on=['date', 'symbol'], how='left')

    unmapped = merged['tier'].isna() & merged['signal_tier'].notna()
    same_dir = unmapped & (merged['order_side'] == merged['signal_side'])
    opposite_dir = unmapped & (merged['order_side'] != merged['signal_side'])

    merged.loc[same_dir, 'tier'] = merged.loc[same_dir, 'signal_tier']
    merged.loc[same_dir, 'tier_source'] = 'signal_same_day_match'
    merged.loc[same_dir, 'signal_lag_days'] = 0

    merged.loc[opposite_dir, 'tier'] = 'exit'
    merged.loc[opposite_dir, 'tier_source'] = 'signal_same_day_opposite_exit'
    merged.loc[opposite_dir, 'signal_lag_days'] = 0

    # Pass 2: most recent prior signal for rows still unmapped
    remaining = merged['tier'].isna()
    if remaining.any() and not sig_daily.empty:
        left_all = merged.loc[remaining, ['date', 'symbol', 'order_side']].copy()
        left_all = left_all.dropna(subset=['date', 'symbol'])
        left_all['_row'] = left_all.index

        right_all = sig_daily.rename(columns={'date': 'signal_date'})
        right_all = right_all.dropna(subset=['signal_date', 'symbol'])

        asof_parts = []
        for symbol, left_sym in left_all.groupby('symbol', sort=False):
            right_sym = right_all[right_all['symbol'] == symbol]
            if right_sym.empty:
                continue

            left_sym = left_sym.sort_values('date')
            right_sym = right_sym[['signal_date', 'signal_side', 'signal_tier']].sort_values('signal_date')

            part = pd.merge_asof(
                left_sym,
                right_sym,
                left_on='date',
                right_on='signal_date',
                direction='backward'
            )
            asof_parts.append(part)

        if asof_parts:
            asof = pd.concat(asof_parts, ignore_index=True)
            asof['lag_days'] = (asof['date'] - asof['signal_date']).dt.days

            # Guardrail: only use reasonably recent signal context
            recent = asof['signal_tier'].notna() & asof['lag_days'].notna() & (asof['lag_days'] <= 10)
            same_dir = recent & (asof['order_side'] == asof['signal_side'])
            opposite_dir = recent & (asof['order_side'] != asof['signal_side'])

            idx_same = asof.loc[same_dir, '_row']
            idx_opp = asof.loc[opposite_dir, '_row']

            merged.loc[idx_same, 'tier'] = asof.loc[same_dir, 'signal_tier'].values
            merged.loc[idx_same, 'tier_source'] = 'signal_prior_match'
            merged.loc[idx_same, 'signal_lag_days'] = asof.loc[same_dir, 'lag_days'].values

            merged.loc[idx_opp, 'tier'] = 'exit'
            merged.loc[idx_opp, 'tier_source'] = 'signal_prior_opposite_exit'
            merged.loc[idx_opp, 'signal_lag_days'] = asof.loc[opposite_dir, 'lag_days'].values

merged['tier'] = merged['tier'].fillna('unknown')
merged['notional'] = (merged['fill_price'].abs() * merged['quantity'].abs()).replace(0, np.nan)
merged['slippage_bps'] = (merged['slippage_dollars'].abs() / merged['notional']) * 10000

if df_positions is not None:
    df_positions['date'] = pd.to_datetime(df_positions['date'])
    df_positions['price'] = pd.to_numeric(df_positions['price'], errors='coerce')

    px = (
        df_positions[['date', 'symbol', 'price']]
        .dropna(subset=['date', 'symbol', 'price'])
        .query('price > 0')
        .drop_duplicates(['date', 'symbol'])
        .sort_values(['symbol', 'date'])
    )

    px['next_date'] = px.groupby('symbol')['date'].shift(-1)
    px['next_price'] = px.groupby('symbol')['price'].shift(-1)

    fills_rt = merged.merge(px[['date', 'symbol', 'price', 'next_date', 'next_price']], on=['date', 'symbol'], how='left')
    fills_rt['next_gap_days'] = (fills_rt['next_date'] - fills_rt['date']).dt.days
    fills_rt['side_sign'] = np.where(fills_rt['quantity'] >= 0, 1.0, -1.0)

    valid_base = (
        (fills_rt['fill_price'] > 0) &
        (fills_rt['next_price'] > 0) &
        fills_rt['next_gap_days'].notna() &
        (fills_rt['next_gap_days'] >= 1) &
        (fills_rt['next_gap_days'] <= MAX_NEXT_GAP_DAYS)
    )

    fills_rt['raw_next_day_return'] = np.nan
    fills_rt.loc[valid_base, 'raw_next_day_return'] = (
        fills_rt.loc[valid_base, 'next_price'] / fills_rt.loc[valid_base, 'fill_price']
    ) - 1.0

    fills_rt['signed_next_day_return'] = fills_rt['raw_next_day_return'] * fills_rt['side_sign']
    fills_rt['signed_next_day_return_clean'] = fills_rt['signed_next_day_return']

    extreme_move = fills_rt['signed_next_day_return_clean'].abs() > MAX_ABS_RETURN
    fills_rt.loc[extreme_move, 'signed_next_day_return_clean'] = np.nan

    fills_rt['return_quality'] = np.select(
        [
            fills_rt['next_price'].isna(),
            fills_rt['fill_price'] <= 0,
            fills_rt['next_price'] <= 0,
            fills_rt['next_gap_days'].isna(),
            fills_rt['next_gap_days'] < 1,
            fills_rt['next_gap_days'] > MAX_NEXT_GAP_DAYS,
            extreme_move,
        ],
        [
            'missing_next_price',
            'non_positive_fill_price',
            'non_positive_next_price',
            'missing_next_gap',
            'non_forward_next_price',
            'next_gap_too_large',
            f'abs_return_gt_{int(MAX_ABS_RETURN * 100)}pct',
        ],
        default='ok'
    )
else:
    fills_rt = merged.copy()
    fills_rt['raw_next_day_return'] = np.nan
    fills_rt['signed_next_day_return'] = np.nan
    fills_rt['signed_next_day_return_clean'] = np.nan
    fills_rt['next_gap_days'] = np.nan
    fills_rt['return_quality'] = 'no_positions_prices'

source_breakdown = fills_rt['tier_source'].value_counts(dropna=False).rename_axis('tier_source').reset_index(name='rows')
print('Tier source breakdown:')
display(source_breakdown)

if fills_rt['signal_lag_days'].notna().any():
    lag_summary = fills_rt['signal_lag_days'].dropna().describe(percentiles=[0.5, 0.9, 0.99]).to_frame('lag_days')
    print('Signal lag (days) for inferred tiers:')
    display(lag_summary)

quality_breakdown = fills_rt['return_quality'].value_counts(dropna=False).rename_axis('return_quality').reset_index(name='rows')
print('Return quality breakdown:')
display(quality_breakdown)

raw_cov = fills_rt['signed_next_day_return'].notna().mean()
clean_cov = fills_rt['signed_next_day_return_clean'].notna().mean()
print(f'Usable signed return coverage (raw): {raw_cov:.1%}')
print(f'Usable signed return coverage (clean): {clean_cov:.1%}')

print(f"Unknown rows: {int((fills_rt['tier'] == 'unknown').sum()):,} / {len(fills_rt):,}")
display(fills_rt.head())


## Bootstrap Confidence Intervals on Signed Next-Day Returns

This section quantifies uncertainty around tier-level return estimates using 1,000-iteration bootstrapped confidence intervals on both mean and median signed next-day returns. The resulting 95% intervals are plotted as error bars, one panel per statistic. Tiers whose intervals do not cross zero provide the strongest evidence that the observed edge is not due to sampling noise in a relatively small fill dataset.

## Tagged vs Inferred Tier — Source Split Panels

This section splits fills into those whose tier came directly from order tags versus those inferred from recent signal history, then compares signed next-day returns for each group side by side. A summary table first shows fill counts and average returns by source group and tier; the two boxplots then make it visually clear whether inferred-tier rows behave similarly to directly tagged ones. Large discrepancies would indicate the inference logic is misclassifying orders and distorting the measured edge by tier.

## Multi-Horizon Signed Returns (1D / 3D / 5D)

This section extends the analysis from next-day returns to 3- and 5-day holding horizons for each tier, using forward prices from position data cleaned for large date gaps. A summary table reports observations, mean, median, and hit rate for each tier-horizon pair; the line chart and boxplot then show whether any tier's directional edge persists or decays as the holding period lengthens. Sustained or growing edge at longer horizons would support the strategy's 5-day scaling schedule.

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

tier_counts = fills_rt['tier'].value_counts().rename_axis('tier').reset_index(name='fills')
sns.barplot(data=tier_counts, x='tier', y='fills', order=['strong', 'moderate', 'weak', 'exit', 'unknown'], ax=axes[0])
axes[0].set_title('Filled Orders by Tier')
axes[0].grid(axis='y', alpha=0.3)

sns.boxplot(data=fills_rt, x='tier', y='slippage_bps', order=['strong', 'moderate', 'weak', 'exit', 'unknown'], ax=axes[1])
axes[1].set_title('Absolute Slippage (bps) by Tier')
axes[1].set_xlabel('Tier')
axes[1].grid(alpha=0.3)

sns.boxplot(data=fills_rt, x='tier', y='signed_next_day_return_clean', order=['strong', 'moderate', 'weak', 'exit', 'unknown'], ax=axes[2])
axes[2].set_title('Signed Next-Day Return by Tier (Cleaned)')
axes[2].set_xlabel('Tier')
axes[2].set_ylabel('Return')
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
tier_metrics = (
    fills_rt.groupby('tier', as_index=False)
            .agg(
                fills=('symbol', 'count'),
                avg_slippage_bps=('slippage_bps', 'mean'),
                med_slippage_bps=('slippage_bps', 'median'),
                clean_return_obs=('signed_next_day_return_clean', 'count'),
                avg_signed_next_day_return_raw=('signed_next_day_return', 'mean'),
                avg_signed_next_day_return_clean=('signed_next_day_return_clean', 'mean'),
                hit_rate_clean=('signed_next_day_return_clean', lambda s: (s > 0).mean())
            )
            .sort_values('fills', ascending=False)
)
tier_metrics['hit_rate_clean'] = 100 * tier_metrics['hit_rate_clean']
display(tier_metrics)


## Extended Diagnostics

These sections add three deeper checks:

1. **Bootstrap confidence intervals**: Estimates uncertainty for tier-level mean/median signed next-day returns, so we can distinguish noise from reliable differences.
2. **Tier-source split panels**: Separates performance by source of tier attribution (`event_tag` vs inferred) to show whether inferred mapping behaves differently.
3. **Multi-horizon returns (1D/3D/5D)**: Compares signed forward returns across holding horizons to test whether edge appears with more time than next-day.


In [None]:
# Bootstrap confidence intervals for tier-level 1D cleaned returns
rng = np.random.default_rng(42)

def bootstrap_ci(values, func=np.mean, n_boot=1000, ci=95):
    arr = pd.Series(values).dropna().to_numpy()
    n = len(arr)
    if n == 0:
        return np.nan, np.nan, np.nan
    stat = float(func(arr))
    if n == 1:
        return stat, stat, stat

    samples = rng.choice(arr, size=(n_boot, n), replace=True)
    boot_stats = np.apply_along_axis(func, 1, samples)
    alpha = (100 - ci) / 2.0
    lo = float(np.percentile(boot_stats, alpha))
    hi = float(np.percentile(boot_stats, 100 - alpha))
    return stat, lo, hi

rows = []
for tier in ['strong', 'moderate', 'weak', 'exit', 'unknown']:
    s = fills_rt.loc[fills_rt['tier'] == tier, 'signed_next_day_return_clean']
    if s.notna().sum() == 0:
        continue

    mean_val, mean_lo, mean_hi = bootstrap_ci(s, func=np.mean)
    med_val, med_lo, med_hi = bootstrap_ci(s, func=np.median)

    rows.append({
        'tier': tier,
        'n_clean': int(s.notna().sum()),
        'mean': mean_val,
        'mean_ci_low': mean_lo,
        'mean_ci_high': mean_hi,
        'median': med_val,
        'median_ci_low': med_lo,
        'median_ci_high': med_hi,
    })

ci_df = pd.DataFrame(rows)
display(ci_df)

if not ci_df.empty:
    fig, axes = plt.subplots(1, 2, figsize=(16, 5))

    axes[0].errorbar(
        ci_df['tier'],
        ci_df['mean'],
        yerr=[ci_df['mean'] - ci_df['mean_ci_low'], ci_df['mean_ci_high'] - ci_df['mean']],
        fmt='o',
        capsize=4
    )
    axes[0].axhline(0, color='black', linewidth=1)
    axes[0].set_title('Mean Signed 1D Return with 95% Bootstrap CI')
    axes[0].set_ylabel('Return')
    axes[0].grid(alpha=0.3)

    axes[1].errorbar(
        ci_df['tier'],
        ci_df['median'],
        yerr=[ci_df['median'] - ci_df['median_ci_low'], ci_df['median_ci_high'] - ci_df['median']],
        fmt='o',
        capsize=4,
        color='#ff7f0e'
    )
    axes[1].axhline(0, color='black', linewidth=1)
    axes[1].set_title('Median Signed 1D Return with 95% Bootstrap CI')
    axes[1].set_ylabel('Return')
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()


In [None]:
# Separate panels for tagged vs inferred tier sources
fills_rt['source_group'] = np.where(fills_rt['tier_source'].eq('event_tag'), 'tagged', 'inferred')
order = ['strong', 'moderate', 'weak', 'exit', 'unknown']

source_summary = (
    fills_rt.groupby(['source_group', 'tier'], as_index=False)
            .agg(
                fills=('symbol', 'count'),
                clean_return_obs=('signed_next_day_return_clean', 'count'),
                avg_signed_return_clean=('signed_next_day_return_clean', 'mean'),
                avg_slippage_bps=('slippage_bps', 'mean')
            )
            .sort_values(['source_group', 'fills'], ascending=[True, False])
)
display(source_summary)

fig, axes = plt.subplots(1, 2, figsize=(18, 5), sharey=True)
for ax, group in zip(axes, ['tagged', 'inferred']):
    dfg = fills_rt[fills_rt['source_group'] == group]
    if dfg.empty:
        ax.text(0.5, 0.5, f'No {group} rows', ha='center', va='center', transform=ax.transAxes)
        ax.set_title(f'Signed Next-Day Return ({group})')
        ax.set_xlabel('Tier')
        ax.grid(alpha=0.3)
        continue

    sns.boxplot(data=dfg, x='tier', y='signed_next_day_return_clean', order=order, ax=ax)
    ax.axhline(0, color='black', linewidth=1)
    ax.set_title(f'Signed Next-Day Return ({group})')
    ax.set_xlabel('Tier')
    ax.grid(alpha=0.3)

axes[0].set_ylabel('Return')
plt.tight_layout()
plt.show()


In [None]:
# Multi-horizon signed forward returns (1D, 3D, 5D)
if df_positions is None:
    raise ValueError('positions.csv is required for 3D/5D return analysis.')

horizons = [1, 3, 5]
max_gap_days = {1: 7, 3: 14, 5: 21}
max_abs_return_by_h = {1: 0.50, 3: 0.75, 5: 1.00}

px_h = (
    df_positions[['date', 'symbol', 'price']]
    .copy()
)
px_h['date'] = pd.to_datetime(px_h['date'])
px_h['price'] = pd.to_numeric(px_h['price'], errors='coerce')
px_h = (
    px_h.dropna(subset=['date', 'symbol', 'price'])
        .query('price > 0')
        .drop_duplicates(['date', 'symbol'])
        .sort_values(['symbol', 'date'])
)

for h in horizons:
    px_h[f'fwd_date_{h}d'] = px_h.groupby('symbol')['date'].shift(-h)
    px_h[f'fwd_price_{h}d'] = px_h.groupby('symbol')['price'].shift(-h)
    px_h[f'raw_ret_{h}d'] = px_h[f'fwd_price_{h}d'] / px_h['price'] - 1.0
    px_h[f'gap_days_{h}d'] = (px_h[f'fwd_date_{h}d'] - px_h['date']).dt.days

base = fills_rt[['date', 'symbol', 'tier', 'tier_source', 'quantity']].copy()
base['side_sign'] = np.where(base['quantity'] >= 0, 1.0, -1.0)

merge_cols = ['date', 'symbol']
for h in horizons:
    merge_cols += [f'raw_ret_{h}d', f'gap_days_{h}d']

hr = base.merge(px_h[merge_cols], on=['date', 'symbol'], how='left')

for h in horizons:
    hr[f'signed_ret_{h}d_raw'] = hr[f'raw_ret_{h}d'] * hr['side_sign']

    valid = (
        hr[f'gap_days_{h}d'].notna() &
        (hr[f'gap_days_{h}d'] >= h) &
        (hr[f'gap_days_{h}d'] <= max_gap_days[h])
    )

    hr[f'signed_ret_{h}d_clean'] = hr[f'signed_ret_{h}d_raw']
    hr.loc[~valid, f'signed_ret_{h}d_clean'] = np.nan
    hr.loc[hr[f'signed_ret_{h}d_clean'].abs() > max_abs_return_by_h[h], f'signed_ret_{h}d_clean'] = np.nan

# Build long-format dataframe
long_rows = []
for h in horizons:
    tmp = hr[['tier', 'tier_source', f'signed_ret_{h}d_clean']].copy()
    tmp = tmp.rename(columns={f'signed_ret_{h}d_clean': 'signed_return'})
    tmp['horizon_days'] = h
    long_rows.append(tmp)

hret = pd.concat(long_rows, ignore_index=True)

horizon_summary = (
    hret.groupby(['tier', 'horizon_days'], as_index=False)
        .agg(
            obs=('signed_return', 'count'),
            mean_signed_return=('signed_return', 'mean'),
            median_signed_return=('signed_return', 'median'),
            hit_rate=('signed_return', lambda s: (s > 0).mean())
        )
)
horizon_summary['hit_rate'] = 100 * horizon_summary['hit_rate']
display(horizon_summary.sort_values(['horizon_days', 'tier']))

# Plot mean by horizon
order = ['strong', 'moderate', 'weak', 'exit', 'unknown']
fig, axes = plt.subplots(1, 2, figsize=(18, 5))
for tier in order:
    line = horizon_summary[horizon_summary['tier'] == tier].sort_values('horizon_days')
    if line.empty:
        continue
    axes[0].plot(line['horizon_days'], line['mean_signed_return'], marker='o', label=tier)

axes[0].axhline(0, color='black', linewidth=1)
axes[0].set_title('Mean Signed Return by Horizon')
axes[0].set_xlabel('Horizon (days)')
axes[0].set_ylabel('Mean signed return')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Boxplot by horizon with tier hue
plot_df = hret[hret['signed_return'].notna()].copy()
sns.boxplot(data=plot_df, x='horizon_days', y='signed_return', hue='tier', ax=axes[1])
axes[1].axhline(0, color='black', linewidth=1)
axes[1].set_title('Signed Return Distribution by Horizon and Tier')
axes[1].set_xlabel('Horizon (days)')
axes[1].set_ylabel('Signed return')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()
