In [2]:
# =============================================================================
# IMPORTS
# =============================================================================

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("‚úì Imports loaded")

‚úì Imports loaded


In [3]:
# =============================================================================
# DATA LOADING (copied from Notebook 1)
# =============================================================================

BASKETS = {
    'cybersecurity': ['CRWD', 'PANW', 'ZS', 'FTNT', 'OKTA', 'S', 'VRNS', 'TENB', 'CYBR', 'NET'],
    'uranium': ['UUUU', 'UEC', 'CCJ', 'LEU', 'DNN', 'NXE', 'URG', 'SMR'],
    'quantum': ['IONQ', 'RGTI', 'QBTS'],
    'space': ['RKLB', 'ASTS', 'LUNR'],
}

START_DATE = '2024-01-01'
END_DATE = '2026-01-08'

def load_basket_data(tickers, start=START_DATE, end=END_DATE):
    data = {}
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end, progress=False)
            if len(df) > 0:
                df['Returns'] = df['Adj Close'].pct_change()
                df['Ticker'] = ticker
                data[ticker] = df
                print(f"‚úì {ticker}: {len(df)} days")
        except Exception as e:
            print(f"‚úó {ticker}: {e}")
    return data

def load_sector(sector_name):
    if sector_name in BASKETS:
        print(f"\nüì¶ Loading {sector_name.upper()}")
        return load_basket_data(BASKETS[sector_name])
    return {}

print("‚úì Functions ready")

‚úì Functions ready


In [4]:
# FIXED load_basket_data function
def load_basket_data(tickers, start=START_DATE, end=END_DATE):
    data = {}
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end, progress=False)
            if len(df) > 0:
                # CRITICAL FIX: Handle multi-index columns FIRST
                if isinstance(df.columns, pd.MultiIndex):
                    df.columns = df.columns.droplevel(1)
                
                # THEN calculate returns with fallback
                if 'Adj Close' in df.columns:
                    df['Returns'] = df['Adj Close'].pct_change()
                elif 'Close' in df.columns:
                    df['Returns'] = df['Close'].pct_change()
                    df['Adj Close'] = df['Close']
                
                df['Ticker'] = ticker
                data[ticker] = df
                print(f"‚úì {ticker}: {len(df)} days")
        except Exception as e:
            print(f"‚úó {ticker}: {e}")
    return data

print("‚úì FIXED function loaded")

‚úì FIXED function loaded


In [5]:
# =============================================================================
# CONFIGURATION - EDIT THIS TO TEST DIFFERENT THEORIES
# =============================================================================

# SECTOR TO TEST
SECTOR = 'cybersecurity'

# LEADER TICKERS (typically high-growth momentum names)
LEADERS = ['CRWD', 'ZS']

# LAGGARD TICKERS (typically cheaper, slower movers)
LAGGARDS = ['S', 'VRNS', 'TENB']

# THRESHOLDS TO TEST (iterate through these)
LEADER_MOVE_THRESHOLDS = [0.02, 0.03, 0.04, 0.05]  # 2%, 3%, 4%, 5%

# HOLDING PERIODS TO TEST
HOLDING_PERIODS = [1, 2, 3, 5, 10]  # days after signal

print("‚úì Configuration set")
print(f"  Leaders: {LEADERS}")
print(f"  Laggards: {LAGGARDS}")
print(f"  Thresholds: {[f'{t*100:.0f}%' for t in LEADER_MOVE_THRESHOLDS]}")
print(f"  Holding periods: {HOLDING_PERIODS} days")

‚úì Configuration set
  Leaders: ['CRWD', 'ZS']
  Laggards: ['S', 'VRNS', 'TENB']
  Thresholds: ['2%', '3%', '4%', '5%']
  Holding periods: [1, 2, 3, 5, 10] days


In [6]:
# =============================================================================
# SIGNAL DETECTION
# =============================================================================

def find_leader_signals(leader_data, threshold):
    """
    Find days where ANY leader moved more than threshold.
    Returns DataFrame with signal dates and which leader triggered.
    """
    signals = []
    
    for ticker, df in leader_data.items():
        if ticker in LEADERS:
            big_moves = df[df['Returns'] > threshold].copy()
            for date, row in big_moves.iterrows():
                signals.append({
                    'date': date,
                    'leader': ticker,
                    'leader_return': row['Returns']
                })
    
    signals_df = pd.DataFrame(signals)
    if len(signals_df) > 0:
        signals_df = signals_df.sort_values('date')
        # Count how many leaders moved on same day (stronger signal)
        signals_df['num_leaders'] = signals_df.groupby('date')['leader'].transform('count')
    
    return signals_df

print("‚úì Signal detection function ready")

‚úì Signal detection function ready


In [16]:
# =============================================================================
# LAGGARD FOLLOW-THROUGH ANALYSIS
# =============================================================================

def analyze_laggard_response(signals_df, laggard_data, holding_period):
    """
    After each leader signal, track what laggards did.
    """
    results = []
    
    for _, signal in signals_df.iterrows():
        signal_date = signal['date']
        
        for ticker, df in laggard_data.items():
            if ticker in LAGGARDS:
                try:
                    # Find the signal date index
                    if signal_date in df.index:
                        idx = df.index.get_loc(signal_date)
                        
                        # Get return over holding period
                        if idx + holding_period < len(df):
                            entry_price = df.iloc[idx]['Adj Close']
                            exit_price = df.iloc[idx + holding_period]['Adj Close']
                            laggard_return = (exit_price - entry_price) / entry_price
                            
                            # Also get next-day return
                            if idx + 1 < len(df):
                                next_day_return = df.iloc[idx + 1]['Returns']
                            else:
                                next_day_return = np.nan
                            
                            results.append({
                                'signal_date': signal_date,
                                'leader': signal['leader'],
                                'leader_return': signal['leader_return'],
                                'num_leaders': signal['num_leaders'],
                                'laggard': ticker,
                                'holding_period': holding_period,
                                'laggard_return': laggard_return,
                                'next_day_return': next_day_return,
                                'win': laggard_return > 0
                            })
                except Exception as e:
                    continue
    
    return pd.DataFrame(results)

print("‚úì Analysis function ready")

‚úì Analysis function ready


In [8]:
# =============================================================================
# FULL BACKTEST
# =============================================================================

def run_full_backtest(sector_data, thresholds=LEADER_MOVE_THRESHOLDS, 
                      holding_periods=HOLDING_PERIODS):
    """
    Test all combinations of threshold and holding period.
    """
    all_results = []
    
    # Split data into leaders and laggards
    leader_data = {k: v for k, v in sector_data.items() if k in LEADERS}
    laggard_data = {k: v for k, v in sector_data.items() if k in LAGGARDS}
    
    print(f"\nüîç Testing {len(thresholds)} thresholds √ó {len(holding_periods)} periods = {len(thresholds)*len(holding_periods)} combinations")
    print("="*60)
    
    for threshold in thresholds:
        print(f"\nThreshold: {threshold*100:.0f}%")
        
        # Find signals
        signals = find_leader_signals(leader_data, threshold)
        print(f"  Found {len(signals)} leader signals")
        
        if len(signals) == 0:
            continue
            
        for period in holding_periods:
            # Analyze laggard response
            results = analyze_laggard_response(signals, laggard_data, period)
            
            if len(results) > 0:
                results['threshold'] = threshold
                all_results.append(results)
                print(f"    {period}-day hold: {len(results)} trades")
    
    if all_results:
        return pd.concat(all_results, ignore_index=True)
    return pd.DataFrame()

print("‚úì Backtest engine ready")

‚úì Backtest engine ready


In [9]:
# =============================================================================
# RESULTS SUMMARY
# =============================================================================

def summarize_results(results_df):
    """
    Create summary statistics by threshold and holding period.
    """
    if len(results_df) == 0:
        print("No results to summarize")
        return pd.DataFrame()
    
    summary = results_df.groupby(['threshold', 'holding_period', 'laggard']).agg({
        'laggard_return': ['mean', 'std', 'count'],
        'win': 'mean'
    }).round(4)
    
    summary.columns = ['avg_return', 'std_return', 'num_trades', 'win_rate']
    summary = summary.reset_index()
    
    # Calculate expected value
    summary['expected_value'] = summary['avg_return'] * summary['win_rate']
    
    return summary.sort_values('expected_value', ascending=False)

print("‚úì Summary function ready")

‚úì Summary function ready


In [10]:
# =============================================================================
# EXECUTE - RUN THE BACKTEST
# =============================================================================

# Load the data
print("\nüê∫ STARTING LEADER/LAGGARD BACKTEST")
print("="*60)
sector_data = load_sector(SECTOR)

# Run backtest
results = run_full_backtest(sector_data)

# Show summary
summary = summarize_results(results)

print("\n" + "="*60)
print("üìä RESULTS SUMMARY (sorted by expected value)")
print("="*60)
if len(summary) > 0:
    # Show top 20 results
    print(summary.head(20).to_string(index=False))
    
    # Show best overall
    best = summary.iloc[0]
    print(f"\nüéØ BEST SETUP:")
    print(f"   Laggard: {best['laggard']}")
    print(f"   Leader threshold: {best['threshold']*100:.0f}%")
    print(f"   Holding period: {best['holding_period']} days")
    print(f"   Win rate: {best['win_rate']*100:.1f}%")
    print(f"   Avg return: {best['avg_return']*100:.2f}%")
    print(f"   Expected value: {best['expected_value']*100:.2f}%")
    print(f"   Number of trades: {int(best['num_trades'])}")
else:
    print("No results found")


üê∫ STARTING LEADER/LAGGARD BACKTEST

üì¶ Loading CYBERSECURITY
‚úì CRWD: 506 days
‚úì PANW: 506 days
‚úì ZS: 506 days
‚úì FTNT: 506 days
‚úì OKTA: 506 days
‚úì S: 506 days
‚úì VRNS: 506 days
‚úì TENB: 506 days
‚úì CYBR: 506 days
‚úì NET: 506 days

üîç Testing 4 thresholds √ó 5 periods = 20 combinations

Threshold: 2%
  Found 195 leader signals
    1-day hold: 579 trades
    2-day hold: 579 trades
    3-day hold: 579 trades
    5-day hold: 579 trades
    10-day hold: 579 trades

Threshold: 3%
  Found 107 leader signals
    1-day hold: 315 trades
    2-day hold: 315 trades
    3-day hold: 315 trades
    5-day hold: 315 trades
    10-day hold: 315 trades

Threshold: 4%
  Found 51 leader signals
    1-day hold: 147 trades
    2-day hold: 147 trades
    3-day hold: 147 trades
    5-day hold: 147 trades
    10-day hold: 147 trades

Threshold: 5%
  Found 27 leader signals
    1-day hold: 81 trades
    2-day hold: 81 trades
    3-day hold: 81 trades
    5-day hold: 81 trades
    10-day ho

In [11]:
# =============================================================================
# MULTI-SECTOR VALIDATION - Test ALL sectors to confirm pattern isn't overfit
# =============================================================================

# Update BASKETS with all sectors from Notebook 1
BASKETS = {
    'cybersecurity': ['CRWD', 'PANW', 'ZS', 'FTNT', 'OKTA', 'S', 'VRNS', 'TENB', 'CYBR', 'NET'],
    'biotech_small': ['SANA', 'KYTX', 'ORIC', 'ALT', 'PALI', 'GHRS', 'MNMD', 'NTLA', 'TERN'],
    'biotech_large': ['AMGN', 'GILD', 'REGN', 'VRTX', 'BIIB'],
    'uranium': ['UUUU', 'UEC', 'CCJ', 'LEU', 'DNN', 'NXE', 'URG', 'SMR'],
    'ai_hype': ['BBAI', 'AI', 'SOUN', 'PLTR', 'PATH'],
    'ai_infrastructure': ['NVDA', 'AMD', 'MU', 'AVGO', 'MRVL'],
    'defense': ['LMT', 'RTX', 'NOC', 'GD', 'BA'],
    'quantum': ['IONQ', 'RGTI', 'QBTS'],
    'space': ['RKLB', 'ASTS', 'LUNR'],
    'semi': ['NVTS', 'NXPI', 'SWKS', 'MRVL', 'ARM', 'NVDA', 'AMD', 'INTC', 'MU', 'TSM'],
}

def test_all_sectors(top_n=3):
    """
    Run leader/laggard backtest on EVERY sector.
    For each sector, pick first 2 tickers as leaders, next 3 as laggards.
    """
    all_sector_results = []
    
    print("\nüê∫ TESTING ALL SECTORS FOR LEADER/LAGGARD PATTERN")
    print("="*70)
    
    for sector_name, tickers in BASKETS.items():
        if len(tickers) < 3:
            print(f"\n‚ö†Ô∏è  {sector_name.upper()}: Only {len(tickers)} tickers, skipping")
            continue
            
        print(f"\n{'='*70}")
        print(f"üìä SECTOR: {sector_name.upper()}")
        print(f"{'='*70}")
        
        # Define leaders (first 2) and laggards (next 3)
        test_leaders = tickers[:2]
        test_laggards = tickers[2:5] if len(tickers) >= 5 else tickers[2:]
        
        print(f"  Leaders: {test_leaders}")
        print(f"  Laggards: {test_laggards}")
        
        # Load data
        sector_data = load_basket_data(tickers)
        
        if len(sector_data) < 3:
            print(f"  ‚úó Failed to load enough data")
            continue
        
        # Split into leaders/laggards
        leader_data = {k: v for k, v in sector_data.items() if k in test_leaders}
        laggard_data = {k: v for k, v in sector_data.items() if k in test_laggards}
        
        # Test at 5% threshold (where we found best edge in cybersecurity)
        threshold = 0.05
        signals = find_leader_signals(leader_data, threshold)
        print(f"\n  üîç Found {len(signals)} leader signals at 5% threshold")
        
        if len(signals) < 5:
            print(f"  ‚ö†Ô∏è  Too few signals ({len(signals)}), skipping")
            continue
        
        # Test 5-day holding period (best from cybersecurity)
        results = analyze_laggard_response(signals, laggard_data, holding_period=5)
        
        if len(results) == 0:
            print(f"  ‚úó No results generated")
            continue
        
        # Summarize by laggard
        for laggard in test_laggards:
            laggard_results = results[results['laggard'] == laggard]
            if len(laggard_results) > 0:
                win_rate = (laggard_results['forward_return'] > 0).mean()
                avg_return = laggard_results['forward_return'].mean()
                expected_value = win_rate * avg_return
                
                print(f"    {laggard:8s}: {len(laggard_results):3d} trades | Win: {win_rate*100:5.1f}% | Avg: {avg_return*100:5.2f}% | EV: {expected_value*100:5.2f}%")
                
                all_sector_results.append({
                    'sector': sector_name,
                    'laggard': laggard,
                    'num_trades': len(laggard_results),
                    'win_rate': win_rate,
                    'avg_return': avg_return,
                    'expected_value': expected_value,
                    'leaders': ', '.join(test_leaders)
                })
    
    # Create summary dataframe
    if all_sector_results:
        df = pd.DataFrame(all_sector_results)
        df = df.sort_values('expected_value', ascending=False)
        
        print("\n" + "="*70)
        print("üéØ TOP 20 SETUPS ACROSS ALL SECTORS (5% threshold, 5-day hold)")
        print("="*70)
        print(df.head(20).to_string(index=False))
        
        # Show sector-level stats
        print("\n" + "="*70)
        print("üìà SECTOR SUMMARY")
        print("="*70)
        sector_summary = df.groupby('sector').agg({
            'expected_value': 'max',
            'win_rate': 'max',
            'num_trades': 'sum'
        }).sort_values('expected_value', ascending=False)
        print(sector_summary.to_string())
        
        return df
    else:
        print("\n‚ùå No results across any sector")
        return pd.DataFrame()

# RUN IT
all_results = test_all_sectors()


üê∫ TESTING ALL SECTORS FOR LEADER/LAGGARD PATTERN

üìä SECTOR: CYBERSECURITY
  Leaders: ['CRWD', 'PANW']
  Laggards: ['ZS', 'FTNT', 'OKTA']
‚úì CRWD: 506 days
‚úì PANW: 506 days
‚úì ZS: 506 days
‚úì FTNT: 506 days
‚úì OKTA: 506 days
‚úì S: 506 days
‚úì VRNS: 506 days
‚úì TENB: 506 days
‚úì CYBR: 506 days
‚úì NET: 506 days

  üîç Found 17 leader signals at 5% threshold
  ‚úó No results generated

üìä SECTOR: BIOTECH_SMALL
  Leaders: ['SANA', 'KYTX']
  Laggards: ['ORIC', 'ALT', 'PALI']
‚úì SANA: 506 days
‚úì KYTX: 479 days
‚úì ORIC: 506 days
‚úì ALT: 506 days
‚úì PALI: 506 days
‚úì GHRS: 506 days
‚úì MNMD: 506 days
‚úì NTLA: 506 days
‚úì TERN: 506 days

  üîç Found 0 leader signals at 5% threshold
  ‚ö†Ô∏è  Too few signals (0), skipping

üìä SECTOR: BIOTECH_LARGE
  Leaders: ['AMGN', 'GILD']
  Laggards: ['REGN', 'VRTX', 'BIIB']
‚úì AMGN: 506 days
‚úì GILD: 506 days
‚úì REGN: 506 days
‚úì VRTX: 506 days
‚úì BIIB: 506 days

  üîç Found 0 leader signals at 5% threshold
  ‚ö†Ô∏è  Too

In [12]:
# =============================================================================
# ADAPTIVE THRESHOLD - Try 3% for lower volatility sectors
# =============================================================================

def test_all_sectors_adaptive(threshold=0.03, holding_period=5):
    """
    Run with LOWER threshold (3%) to capture signals in lower-vol sectors.
    """
    all_sector_results = []
    
    print(f"\nüê∫ TESTING ALL SECTORS AT {threshold*100:.0f}% THRESHOLD, {holding_period}-DAY HOLD")
    print("="*70)
    
    for sector_name, tickers in BASKETS.items():
        if len(tickers) < 3:
            print(f"\n‚ö†Ô∏è  {sector_name.upper()}: Only {len(tickers)} tickers, skipping")
            continue
            
        print(f"\n{'='*70}")
        print(f"üìä SECTOR: {sector_name.upper()}")
        print(f"{'='*70}")
        
        # Define leaders (first 2) and laggards (next 3)
        test_leaders = tickers[:2]
        test_laggards = tickers[2:5] if len(tickers) >= 5 else tickers[2:]
        
        print(f"  Leaders: {test_leaders}")
        print(f"  Laggards: {test_laggards}")
        
        # Load data
        sector_data = load_basket_data(tickers)
        
        if len(sector_data) < 3:
            print(f"  ‚úó Failed to load enough data")
            continue
        
        # Split into leaders/laggards
        leader_data = {k: v for k, v in sector_data.items() if k in test_leaders}
        laggard_data = {k: v for k, v in sector_data.items() if k in test_laggards}
        
        # Find signals
        signals = find_leader_signals(leader_data, threshold)
        print(f"\n  üîç Found {len(signals)} leader signals")
        
        if len(signals) < 10:
            print(f"  ‚ö†Ô∏è  Too few signals ({len(signals)}), skipping")
            continue
        
        # Analyze laggard response
        results = analyze_laggard_response(signals, laggard_data, holding_period)
        
        if len(results) == 0:
            print(f"  ‚úó No results generated")
            continue
        
        # Summarize by laggard
        for laggard in test_laggards:
            laggard_results = results[results['laggard'] == laggard]
            if len(laggard_results) > 0:
                win_rate = (laggard_results['forward_return'] > 0).mean()
                avg_return = laggard_results['forward_return'].mean()
                expected_value = win_rate * avg_return
                
                print(f"    {laggard:8s}: {len(laggard_results):3d} trades | Win: {win_rate*100:5.1f}% | Avg: {avg_return*100:5.2f}% | EV: {expected_value*100:5.2f}%")
                
                all_sector_results.append({
                    'sector': sector_name,
                    'laggard': laggard,
                    'num_trades': len(laggard_results),
                    'win_rate': win_rate,
                    'avg_return': avg_return,
                    'expected_value': expected_value,
                    'leaders': ', '.join(test_leaders)
                })
    
    # Create summary dataframe
    if all_sector_results:
        df = pd.DataFrame(all_sector_results)
        df = df.sort_values('expected_value', ascending=False)
        
        print("\n" + "="*70)
        print(f"üéØ TOP 20 SETUPS ACROSS ALL SECTORS ({threshold*100:.0f}% threshold, {holding_period}-day hold)")
        print("="*70)
        print(df[['sector', 'laggard', 'num_trades', 'win_rate', 'avg_return', 'expected_value']].head(20).to_string(index=False))
        
        # Show sector-level stats
        print("\n" + "="*70)
        print("üìà SECTOR SUMMARY (Best laggard per sector)")
        print("="*70)
        sector_summary = df.groupby('sector').agg({
            'expected_value': 'max',
            'win_rate': 'max',
            'num_trades': 'sum'
        }).sort_values('expected_value', ascending=False)
        print(sector_summary.to_string())
        
        # Count how many sectors show positive edge
        positive_ev = df[df['expected_value'] > 0].groupby('sector').size()
        print(f"\n‚úÖ Sectors with positive EV: {len(positive_ev)}/{len(BASKETS)}")
        
        return df
    else:
        print("\n‚ùå No results across any sector")
        return pd.DataFrame()

# RUN AT 3% THRESHOLD
all_results_3pct = test_all_sectors_adaptive(threshold=0.03, holding_period=5)


üê∫ TESTING ALL SECTORS AT 3% THRESHOLD, 5-DAY HOLD

üìä SECTOR: CYBERSECURITY
  Leaders: ['CRWD', 'PANW']
  Laggards: ['ZS', 'FTNT', 'OKTA']
‚úì CRWD: 506 days
‚úì PANW: 506 days
‚úì ZS: 506 days
‚úì FTNT: 506 days
‚úì OKTA: 506 days
‚úì S: 506 days
‚úì VRNS: 506 days
‚úì TENB: 506 days
‚úì CYBR: 506 days
‚úì NET: 506 days

  üîç Found 63 leader signals
  ‚úó No results generated

üìä SECTOR: BIOTECH_SMALL
  Leaders: ['SANA', 'KYTX']
  Laggards: ['ORIC', 'ALT', 'PALI']
‚úì SANA: 506 days
‚úì KYTX: 479 days
‚úì ORIC: 506 days
‚úì ALT: 506 days
‚úì PALI: 506 days
‚úì GHRS: 506 days
‚úì MNMD: 506 days
‚úì NTLA: 506 days
‚úì TERN: 506 days

  üîç Found 0 leader signals
  ‚ö†Ô∏è  Too few signals (0), skipping

üìä SECTOR: BIOTECH_LARGE
  Leaders: ['AMGN', 'GILD']
  Laggards: ['REGN', 'VRTX', 'BIIB']
‚úì AMGN: 506 days
‚úì GILD: 506 days
‚úì REGN: 506 days
‚úì VRTX: 506 days
‚úì BIIB: 506 days

  üîç Found 0 leader signals
  ‚ö†Ô∏è  Too few signals (0), skipping

üìä SECTOR: URANIU

In [17]:
# DEBUG: Why no results?
cyber_data = load_basket_data(['CRWD', 'PANW', 'ZS', 'FTNT', 'OKTA'])
leaders = {k: v for k, v in cyber_data.items() if k in ['CRWD', 'PANW']}
laggards = {k: v for k, v in cyber_data.items() if k in ['ZS', 'FTNT', 'OKTA']}

signals = find_leader_signals(leaders, 0.03)
print(f"Signals: {len(signals)}")
if len(signals) > 0:
    print(signals.head())
    
    results = analyze_laggard_response(signals, laggards, 5)
    print(f"\nResults: {len(results)}")
    if len(results) > 0:
        print(results.head())
    else:
        print("analyze_laggard_response returned empty")
else:
    print("No signals found")

‚úì CRWD: 506 days
‚úì PANW: 506 days
‚úì ZS: 506 days
‚úì FTNT: 506 days
‚úì OKTA: 506 days
Signals: 63
        date leader  leader_return  num_leaders
0 2024-01-08   CRWD       0.055847            1
1 2024-01-09   CRWD       0.047803            1
2 2024-01-10   CRWD       0.030208            1
3 2024-01-29   CRWD       0.035686            1
4 2024-02-07   CRWD       0.051869            1

Results: 0
analyze_laggard_response returned empty


In [18]:
# Check actual analyze function signature and return value
import inspect
print("analyze_laggard_response signature:")
print(inspect.signature(analyze_laggard_response))

# Manually test the logic
signal_date = signals.iloc[0]['date']
ticker = 'ZS'
df = laggards[ticker]
holding_period = 5

print(f"\nTesting: signal_date={signal_date}, ticker={ticker}")
print(f"signal_date in df.index: {signal_date in df.index}")

if signal_date in df.index:
    idx = df.index.get_loc(signal_date)
    print(f"idx: {idx}")
    print(f"len(df): {len(df)}")
    print(f"idx + holding_period < len(df): {idx + holding_period < len(df)}")
    
    if idx + holding_period < len(df):
        entry = df.iloc[idx]['Adj Close']
        exit_val = df.iloc[idx + holding_period]['Adj Close']
        ret = (exit_val - entry) / entry
        print(f"entry: {entry}, exit: {exit_val}, return: {ret*100:.2f}%")

analyze_laggard_response signature:
(signals_df, laggard_data, holding_period)

Testing: signal_date=2024-01-08 00:00:00, ticker=ZS
signal_date in df.index: True
idx: 4
len(df): 506
idx + holding_period < len(df): True
entry: 218.10000610351562, exit: 232.17999267578125, return: 6.46%


In [19]:
# FIXED VERSION - remove global LAGGARDS dependency
def analyze_laggard_response_v2(signals_df, laggard_data, holding_period):
    """
    After each leader signal, track what laggards did.
    Uses all tickers in laggard_data (no global LAGGARDS check).
    """
    results = []
    
    for _, signal in signals_df.iterrows():
        signal_date = signal['date']
        
        for ticker, df in laggard_data.items():
            # REMOVED: if ticker in LAGGARDS check
            try:
                # Find the signal date index
                if signal_date in df.index:
                    idx = df.index.get_loc(signal_date)
                    
                    # Get return over holding period
                    if idx + holding_period < len(df):
                        entry_price = df.iloc[idx]['Adj Close']
                        exit_price = df.iloc[idx + holding_period]['Adj Close']
                        laggard_return = (exit_price - entry_price) / entry_price
                        
                        # Also get next-day return
                        if idx + 1 < len(df):
                            next_day_return = df.iloc[idx + 1]['Returns']
                        else:
                            next_day_return = np.nan
                        
                        results.append({
                            'signal_date': signal_date,
                            'leader': signal['leader'],
                            'leader_return': signal['leader_return'],
                            'num_leaders': signal['num_leaders'],
                            'laggard': ticker,
                            'holding_period': holding_period,
                            'forward_return': laggard_return,  # Changed name for consistency
                            'next_day_return': next_day_return,
                            'win': laggard_return > 0
                        })
            except Exception as e:
                continue
    
    return pd.DataFrame(results)

print("‚úì Fixed analyze function loaded (v2)")

‚úì Fixed analyze function loaded (v2)


In [20]:
# Test fixed version
results_v2 = analyze_laggard_response_v2(signals, laggards, 5)
print(f"Results v2: {len(results_v2)}")
if len(results_v2) > 0:
    print(results_v2.head(10))
    
    # Show summary
    for laggard in ['ZS', 'FTNT', 'OKTA']:
        lag_results = results_v2[results_v2['laggard'] == laggard]
        if len(lag_results) > 0:
            win_rate = (lag_results['forward_return'] > 0).mean()
            avg_ret = lag_results['forward_return'].mean()
            ev = win_rate * avg_ret
            print(f"\n{laggard}: {len(lag_results)} trades, {win_rate*100:.1f}% win, {avg_ret*100:.2f}% avg, {ev*100:.2f}% EV")

Results v2: 186
  signal_date leader  leader_return  num_leaders laggard  holding_period  \
0  2024-01-08   CRWD       0.055847            1      ZS               5   
1  2024-01-08   CRWD       0.055847            1    FTNT               5   
2  2024-01-08   CRWD       0.055847            1    OKTA               5   
3  2024-01-09   CRWD       0.047803            1      ZS               5   
4  2024-01-09   CRWD       0.047803            1    FTNT               5   
5  2024-01-09   CRWD       0.047803            1    OKTA               5   
6  2024-01-10   CRWD       0.030208            1      ZS               5   
7  2024-01-10   CRWD       0.030208            1    FTNT               5   
8  2024-01-10   CRWD       0.030208            1    OKTA               5   
9  2024-01-29   CRWD       0.035686            1      ZS               5   

   forward_return  next_day_return    win  
0        0.064557         0.037414   True  
1        0.012080        -0.030362   True  
2       -0.0143

In [21]:
# =============================================================================
# ALL SECTORS TEST - FINAL VERSION with fixed function
# =============================================================================

def test_all_sectors_final(threshold=0.03, holding_period=5):
    """
    Test ALL sectors with fixed analyze function.
    """
    all_sector_results = []
    
    print(f"\nüê∫ FINAL TEST: ALL SECTORS AT {threshold*100:.0f}% THRESHOLD, {holding_period}-DAY HOLD")
    print("="*70)
    
    for sector_name, tickers in BASKETS.items():
        if len(tickers) < 3:
            continue
            
        print(f"\nüìä {sector_name.upper()}")
        
        # Define leaders (first 2) and laggards (rest)
        test_leaders = tickers[:2]
        test_laggards = tickers[2:5] if len(tickers) >= 5 else tickers[2:]
        
        # Load data
        sector_data = load_basket_data(tickers)
        
        if len(sector_data) < 3:
            print(f"  ‚úó Failed to load data")
            continue
        
        # Split
        leader_data = {k: v for k, v in sector_data.items() if k in test_leaders}
        laggard_data = {k: v for k, v in sector_data.items() if k in test_laggards}
        
        # Find signals
        signals = find_leader_signals(leader_data, threshold)
        
        if len(signals) < 10:
            print(f"  ‚ö†Ô∏è  Only {len(signals)} signals, skipping")
            continue
        
        # FIXED: Use v2 function
        results = analyze_laggard_response_v2(signals, laggard_data, holding_period)
        
        if len(results) == 0:
            print(f"  ‚úó No results")
            continue
        
        # Summarize
        print(f"  Leaders: {', '.join(test_leaders)} ‚Üí {len(signals)} signals")
        for laggard in test_laggards:
            lag_res = results[results['laggard'] == laggard]
            if len(lag_res) > 0:
                wr = (lag_res['forward_return'] > 0).mean()
                ar = lag_res['forward_return'].mean()
                ev = wr * ar
                print(f"    {laggard:8s}: {len(lag_res):3d} trades | {wr*100:5.1f}% win | {ar*100:5.2f}% avg | {ev*100:5.2f}% EV")
                
                all_sector_results.append({
                    'sector': sector_name,
                    'laggard': laggard,
                    'num_trades': len(lag_res),
                    'win_rate': wr,
                    'avg_return': ar,
                    'expected_value': ev
                })
    
    # Summary
    if all_sector_results:
        df = pd.DataFrame(all_sector_results)
        df = df.sort_values('expected_value', ascending=False)
        
        print("\n" + "="*70)
        print(f"üéØ TOP 15 SETUPS ({threshold*100:.0f}% threshold, {holding_period}-day hold)")
        print("="*70)
        print(df[['sector', 'laggard', 'num_trades', 'win_rate', 'avg_return', 'expected_value']].head(15).to_string(index=False))
        
        # Count positive EV
        positive = df[df['expected_value'] > 0]
        print(f"\n‚úÖ {len(positive)} setups with positive EV out of {len(df)} total")
        
        # Sector summary
        print("\n" + "="*70)
        print("üìà BEST SETUP PER SECTOR")
        print("="*70)
        best_per_sector = df.loc[df.groupby('sector')['expected_value'].idxmax()]
        print(best_per_sector[['sector', 'laggard', 'win_rate', 'expected_value']].to_string(index=False))
        
        return df
    else:
        print("\n‚ùå No results")
        return pd.DataFrame()

# RUN IT
final_results = test_all_sectors_final(threshold=0.03, holding_period=5)


üê∫ FINAL TEST: ALL SECTORS AT 3% THRESHOLD, 5-DAY HOLD

üìä CYBERSECURITY
‚úì CRWD: 506 days
‚úì PANW: 506 days
‚úì ZS: 506 days
‚úì FTNT: 506 days
‚úì OKTA: 506 days
‚úì S: 506 days
‚úì VRNS: 506 days
‚úì TENB: 506 days
‚úì CYBR: 506 days
‚úì NET: 506 days
  Leaders: CRWD, PANW ‚Üí 63 signals
    ZS      :  62 trades |  54.8% win |  0.51% avg |  0.28% EV
    FTNT    :  62 trades |  59.7% win |  1.38% avg |  0.82% EV
    OKTA    :  62 trades |  50.0% win |  1.35% avg |  0.67% EV

üìä BIOTECH_SMALL
‚úì SANA: 506 days
‚úì KYTX: 479 days
‚úì ORIC: 506 days
‚úì ALT: 506 days
‚úì PALI: 506 days
‚úì GHRS: 506 days
‚úì MNMD: 506 days
‚úì NTLA: 506 days
‚úì TERN: 506 days
  ‚ö†Ô∏è  Only 0 signals, skipping

üìä BIOTECH_LARGE
‚úì AMGN: 506 days
‚úì GILD: 506 days
‚úì REGN: 506 days
‚úì VRTX: 506 days
‚úì BIIB: 506 days
  ‚ö†Ô∏è  Only 0 signals, skipping

üìä URANIUM
‚úì UUUU: 506 days
‚úì UEC: 506 days
‚úì CCJ: 506 days
‚úì LEU: 506 days
‚úì DNN: 506 days
‚úì NXE: 506 days
‚úì URG: 506 d

In [18]:
# =============================================================================
# VISUALIZATION
# =============================================================================

def plot_results(summary):
    """
    Visualize win rates and returns.
    """
    if len(summary) == 0:
        print("No data to plot")
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    fig.suptitle('üê∫ LEADER/LAGGARD ANALYSIS', fontsize=16, fontweight='bold')
    
    # Win rate by holding period
    for laggard in summary['laggard'].unique():
        data = summary[summary['laggard'] == laggard]
        axes[0, 0].plot(data['holding_period'], data['win_rate'], 
                        marker='o', label=laggard, linewidth=2)
    axes[0, 0].set_xlabel('Holding Period (days)', fontsize=12)
    axes[0, 0].set_ylabel('Win Rate', fontsize=12)
    axes[0, 0].set_title('Win Rate by Holding Period', fontweight='bold')
    axes[0, 0].legend()
    axes[0, 0].axhline(y=0.5, color='r', linestyle='--', alpha=0.5, label='50% baseline')
    axes[0, 0].grid(alpha=0.3)
    
    # Average return by holding period
    for laggard in summary['laggard'].unique():
        data = summary[summary['laggard'] == laggard]
        axes[0, 1].plot(data['holding_period'], data['avg_return']*100, 
                        marker='o', label=laggard, linewidth=2)
    axes[0, 1].set_xlabel('Holding Period (days)', fontsize=12)
    axes[0, 1].set_ylabel('Average Return (%)', fontsize=12)
    axes[0, 1].set_title('Average Return by Holding Period', fontweight='bold')
    axes[0, 1].legend()
    axes[0, 1].axhline(y=0, color='r', linestyle='--', alpha=0.5)
    axes[0, 1].grid(alpha=0.3)
    
    # Win rate by threshold
    for laggard in summary['laggard'].unique():
        data = summary[summary['laggard'] == laggard]
        avg_by_threshold = data.groupby('threshold')['win_rate'].mean()
        axes[1, 0].plot(avg_by_threshold.index*100, avg_by_threshold.values, 
                        marker='o', label=laggard, linewidth=2)
    axes[1, 0].set_xlabel('Leader Move Threshold (%)', fontsize=12)
    axes[1, 0].set_ylabel('Win Rate', fontsize=12)
    axes[1, 0].set_title('Win Rate by Leader Threshold', fontweight='bold')
    axes[1, 0].legend()
    axes[1, 0].grid(alpha=0.3)
    
    # Expected value heatmap
    pivot = summary.pivot_table(values='expected_value', 
                                index='threshold', 
                                columns='holding_period', 
                                aggfunc='mean')
    im = axes[1, 1].imshow(pivot.values, cmap='RdYlGn', aspect='auto')
    axes[1, 1].set_xticks(range(len(pivot.columns)))
    axes[1, 1].set_xticklabels(pivot.columns)
    axes[1, 1].set_yticks(range(len(pivot.index)))
    axes[1, 1].set_yticklabels([f"{x*100:.0f}%" for x in pivot.index])
    axes[1, 1].set_xlabel('Holding Period (days)', fontsize=12)
    axes[1, 1].set_ylabel('Leader Threshold', fontsize=12)
    axes[1, 1].set_title('Expected Value Heatmap', fontweight='bold')
    plt.colorbar(im, ax=axes[1, 1])
    
    plt.tight_layout()
    plt.show()

# Plot the results
if len(summary) > 0:
    plot_results(summary)
else:
    print("No data to visualize")

No data to visualize


---

## üìä INTERPRETATION

**What to look for:**
- **Win rate > 60%** = Edge confirmed
- **Expected value > 2%** = Tradeable setup
- **High sample size** (50+ trades) = Statistically significant

**Next steps:**
1. If edge found ‚Üí Test on different sectors (uranium, quantum, etc)
2. If no edge ‚Üí Adjust LEADERS/LAGGARDS or try different thresholds
3. Compare to coordination signal from Notebook 4

üê∫ **Hunt the pattern. Test the theory. Trust the data.**