In [1]:
# Setup
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("üê∫ PRE-MARKET SIGNAL ANALYSIS")
print("="*60)
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
print("Mission: Test if Day 1's announce in pre-market")
print("="*60)

  from pandas.core import (


üê∫ PRE-MARKET SIGNAL ANALYSIS
Date: 2026-01-06 23:00
Mission: Test if Day 1's announce in pre-market


In [2]:
# Test universe - same as other notebooks
UNIVERSE = [
    'SIDU', 'ASTS', 'LUNR', 'RKLB', 'RDW',  # Space
    'IONQ', 'QBTS', 'RGTI', 'QUBT',          # Quantum
    'NVTS', 'WOLF', 'ON', 'AEHR', 'SKYT',    # Semi
    'UUUU', 'LEU', 'CCJ', 'SMR', 'OKLO',     # Nuclear
    'USAR', 'MP'                             # Rare Earth
]

print(f"üìä Testing {len(UNIVERSE)} stocks for pre-market signals")

üìä Testing 21 stocks for pre-market signals


In [3]:
def get_daily_data(ticker, lookback_months=6):
    """
    Get daily OHLCV data with gap calculations.
    
    NOTE: yfinance doesn't have reliable pre-market data for small caps.
    We'll use open vs previous close as proxy for "gap" which captures
    overnight/pre-market movement.
    """
    try:
        end_date = datetime.now()
        start_date = end_date - timedelta(days=lookback_months * 30)
        
        stock = yf.Ticker(ticker)
        hist = stock.history(start=start_date, end=end_date)
        
        if len(hist) < 20:
            return None
        
        # Calculate gap (Open vs Previous Close)
        hist['prev_close'] = hist['Close'].shift(1)
        hist['gap_pct'] = ((hist['Open'] - hist['prev_close']) / hist['prev_close']) * 100
        
        # Daily return (Close vs Open) - intraday movement
        hist['intraday_pct'] = ((hist['Close'] - hist['Open']) / hist['Open']) * 100
        
        # Full day return (Close vs Previous Close)
        hist['daily_pct'] = ((hist['Close'] - hist['prev_close']) / hist['prev_close']) * 100
        
        # Volume ratio
        hist['vol_ratio'] = hist['Volume'] / hist['Volume'].rolling(20).mean()
        
        # Is this a green day?
        hist['is_green'] = hist['Close'] > hist['Open']
        
        return hist
    except Exception as e:
        return None

print("‚úÖ get_daily_data() ready")

‚úÖ get_daily_data() ready


In [4]:
def find_day1s(hist, min_run_days=3, min_gain=10):
    """
    Find all Day 1's (first day of a 3+ day run).
    Returns list of Day 1 info with gap data.
    """
    day1s = []
    i = 0
    
    while i < len(hist) - min_run_days:
        # Check if this day starts a run
        if hist['daily_pct'].iloc[i] > 0:  # Green day
            run_length = 1
            j = i + 1
            
            # Count consecutive green days
            while j < len(hist) and hist['daily_pct'].iloc[j] > 0:
                run_length += 1
                j += 1
            
            if run_length >= min_run_days:
                # Calculate total run gain
                end_idx = min(i + run_length, len(hist) - 1)
                start_price = hist['prev_close'].iloc[i]
                end_price = hist['Close'].iloc[end_idx - 1]
                
                if pd.notna(start_price) and start_price > 0:
                    total_gain = ((end_price / start_price) - 1) * 100
                    
                    if total_gain >= min_gain:
                        day1s.append({
                            'date': hist.index[i],
                            'gap_pct': hist['gap_pct'].iloc[i],
                            'day1_gain': hist['daily_pct'].iloc[i],
                            'intraday': hist['intraday_pct'].iloc[i],
                            'vol_ratio': hist['vol_ratio'].iloc[i],
                            'run_length': run_length,
                            'run_gain': total_gain,
                            'open': hist['Open'].iloc[i],
                            'prev_close': hist['prev_close'].iloc[i]
                        })
                
                i = j
                continue
        
        i += 1
    
    return day1s

print("‚úÖ find_day1s() ready")

‚úÖ find_day1s() ready


In [5]:
# COLLECT ALL DAY 1 DATA
print("\n" + "="*60)
print("üìä COLLECTING DAY 1 GAP DATA")
print("="*60)

all_day1s = []

for ticker in UNIVERSE:
    hist = get_daily_data(ticker)
    if hist is None:
        continue
    
    day1s = find_day1s(hist)
    
    for d1 in day1s:
        d1['ticker'] = ticker
        all_day1s.append(d1)
    
    if day1s:
        print(f"   {ticker}: {len(day1s)} Day 1's found")

print(f"\nüìä TOTAL: {len(all_day1s)} Day 1's across all stocks")


üìä COLLECTING DAY 1 GAP DATA
   SIDU: 7 Day 1's found
   ASTS: 10 Day 1's found
   LUNR: 8 Day 1's found
   RKLB: 8 Day 1's found
   RDW: 6 Day 1's found
   IONQ: 8 Day 1's found
   QBTS: 9 Day 1's found
   RGTI: 10 Day 1's found
   QUBT: 6 Day 1's found
   NVTS: 6 Day 1's found
   WOLF: 2 Day 1's found
   ON: 1 Day 1's found
   AEHR: 6 Day 1's found
   SKYT: 6 Day 1's found
   UUUU: 9 Day 1's found
   LEU: 5 Day 1's found
   CCJ: 2 Day 1's found
   SMR: 7 Day 1's found
   OKLO: 9 Day 1's found
   USAR: 5 Day 1's found
   MP: 6 Day 1's found

üìä TOTAL: 136 Day 1's across all stocks


In [6]:
# ANALYZE GAP PATTERNS
print("\n" + "="*60)
print("üìä GAP PATTERN ANALYSIS")
print("="*60)

if all_day1s:
    df = pd.DataFrame(all_day1s)
    
    # Filter out any invalid gap data
    df = df[df['gap_pct'].notna()]
    
    print(f"\nüìà GAP STATISTICS (n={len(df)} Day 1's):")
    print("-" * 40)
    
    # Basic stats
    print(f"   Average gap on Day 1: {df['gap_pct'].mean():.2f}%")
    print(f"   Median gap: {df['gap_pct'].median():.2f}%")
    print(f"   Min gap: {df['gap_pct'].min():.2f}%")
    print(f"   Max gap: {df['gap_pct'].max():.2f}%")
    
    # Categorize gaps
    gap_down = (df['gap_pct'] < -1).sum()
    gap_flat = ((df['gap_pct'] >= -1) & (df['gap_pct'] < 1)).sum()
    gap_small = ((df['gap_pct'] >= 1) & (df['gap_pct'] < 3)).sum()
    gap_medium = ((df['gap_pct'] >= 3) & (df['gap_pct'] < 5)).sum()
    gap_large = (df['gap_pct'] >= 5).sum()
    
    print(f"\nüìä GAP DISTRIBUTION:")
    print("-" * 40)
    print(f"   Gap DOWN (<-1%):     {gap_down:3d} ({gap_down/len(df)*100:.0f}%)")
    print(f"   Gap FLAT (-1% to 1%): {gap_flat:3d} ({gap_flat/len(df)*100:.0f}%)")
    print(f"   Gap SMALL (1-3%):    {gap_small:3d} ({gap_small/len(df)*100:.0f}%)")
    print(f"   Gap MEDIUM (3-5%):   {gap_medium:3d} ({gap_medium/len(df)*100:.0f}%)")
    print(f"   Gap LARGE (5%+):     {gap_large:3d} ({gap_large/len(df)*100:.0f}%)")
    
    # What % gap up?
    gap_up_pct = (df['gap_pct'] > 0).sum() / len(df) * 100
    gap_up_2pct = (df['gap_pct'] >= 2).sum() / len(df) * 100
    gap_up_3pct = (df['gap_pct'] >= 3).sum() / len(df) * 100
    
    print(f"\nüìä KEY FINDING:")
    print(f"   Day 1's that gap UP: {gap_up_pct:.0f}%")
    print(f"   Day 1's with 2%+ gap: {gap_up_2pct:.0f}%")
    print(f"   Day 1's with 3%+ gap: {gap_up_3pct:.0f}%")
else:
    print("‚ùå No Day 1 data collected")
    df = pd.DataFrame()


üìä GAP PATTERN ANALYSIS

üìà GAP STATISTICS (n=136 Day 1's):
----------------------------------------
   Average gap on Day 1: 1.89%
   Median gap: 0.97%
   Min gap: -4.19%
   Max gap: 19.46%

üìä GAP DISTRIBUTION:
----------------------------------------
   Gap DOWN (<-1%):       7 (5%)
   Gap FLAT (-1% to 1%):  62 (46%)
   Gap SMALL (1-3%):     32 (24%)
   Gap MEDIUM (3-5%):    18 (13%)
   Gap LARGE (5%+):      17 (12%)

üìä KEY FINDING:
   Day 1's that gap UP: 75%
   Day 1's with 2%+ gap: 35%
   Day 1's with 3%+ gap: 26%


In [7]:
# CORRELATION: GAP SIZE vs RUN SUCCESS
print("\n" + "="*60)
print("üìä GAP SIZE vs RUN SUCCESS")
print("="*60)

if len(df) > 0:
    # Group by gap size buckets
    def gap_bucket(gap):
        if gap < 0:
            return 'Gap Down'
        elif gap < 2:
            return 'Small (0-2%)'
        elif gap < 4:
            return 'Medium (2-4%)'
        else:
            return 'Large (4%+)'
    
    df['gap_bucket'] = df['gap_pct'].apply(gap_bucket)
    
    print(f"\n{'Gap Size':<20} {'Count':>8} {'Avg Run':>10} {'Avg Days':>10}")
    print("-" * 50)
    
    for bucket in ['Gap Down', 'Small (0-2%)', 'Medium (2-4%)', 'Large (4%+)']:
        bucket_data = df[df['gap_bucket'] == bucket]
        if len(bucket_data) > 0:
            avg_gain = bucket_data['run_gain'].mean()
            avg_days = bucket_data['run_length'].mean()
            print(f"{bucket:<20} {len(bucket_data):>8} {avg_gain:>9.1f}% {avg_days:>10.1f}")
    
    # Correlation coefficient
    corr = df['gap_pct'].corr(df['run_gain'])
    print(f"\nüìà Correlation (gap vs run gain): {corr:.2f}")
    
    if corr > 0.3:
        print(f"   ‚úÖ POSITIVE CORRELATION: Bigger gaps = bigger runs")
    elif corr < -0.1:
        print(f"   ‚ö†Ô∏è NEGATIVE CORRELATION: Smaller gaps = bigger runs")
    else:
        print(f"   ‚ö†Ô∏è WEAK CORRELATION: Gap size doesn't predict run size")


üìä GAP SIZE vs RUN SUCCESS

Gap Size                Count    Avg Run   Avg Days
--------------------------------------------------
Gap Down                   29      28.1%        4.5
Small (0-2%)               60      26.9%        4.3
Medium (2-4%)              22      24.6%        3.9
Large (4%+)                25      32.9%        3.4

üìà Correlation (gap vs run gain): 0.18
   ‚ö†Ô∏è WEAK CORRELATION: Gap size doesn't predict run size


In [8]:
# FALSE SIGNAL ANALYSIS
print("\n" + "="*60)
print("üìä FALSE SIGNAL ANALYSIS")
print("="*60)
print("\nWhat happens when there's a gap UP but NO run?\n")

# Find all gap-ups that DIDN'T become Day 1's
false_signals = []

for ticker in UNIVERSE:
    hist = get_daily_data(ticker)
    if hist is None:
        continue
    
    # Get Day 1 dates for this ticker
    day1s = find_day1s(hist)
    day1_dates = set([d['date'] for d in day1s])
    
    # Find gap-ups (3%+) that weren't Day 1's
    for i in range(1, len(hist)):
        gap = hist['gap_pct'].iloc[i]
        date = hist.index[i]
        
        if pd.notna(gap) and gap >= 3:  # 3%+ gap up
            if date not in day1_dates:
                # This is a false signal
                false_signals.append({
                    'ticker': ticker,
                    'date': date,
                    'gap_pct': gap,
                    'daily_pct': hist['daily_pct'].iloc[i],
                    'vol_ratio': hist['vol_ratio'].iloc[i]
                })

print(f"Found {len(false_signals)} false signals (3%+ gap that didn't start a run)")

# Compare true vs false signals
if len(df) > 0 and len(false_signals) > 0:
    true_3pct = df[df['gap_pct'] >= 3]
    
    total_3pct_gaps = len(true_3pct) + len(false_signals)
    true_positive_rate = len(true_3pct) / total_3pct_gaps * 100
    false_positive_rate = len(false_signals) / total_3pct_gaps * 100
    
    print(f"\nüìä 3%+ GAP SIGNAL QUALITY:")
    print("-" * 40)
    print(f"   Total 3%+ gaps: {total_3pct_gaps}")
    print(f"   True positives (started run): {len(true_3pct)} ({true_positive_rate:.0f}%)")
    print(f"   False positives (no run): {len(false_signals)} ({false_positive_rate:.0f}%)")
    
    if true_positive_rate >= 60:
        print(f"\n   ‚úÖ 3%+ GAP IS A RELIABLE SIGNAL")
    else:
        print(f"\n   ‚ö†Ô∏è 3%+ GAP HAS HIGH FALSE POSITIVE RATE")
        print(f"   Need additional confirmation signals")


üìä FALSE SIGNAL ANALYSIS

What happens when there's a gap UP but NO run?

Found 292 false signals (3%+ gap that didn't start a run)

üìä 3%+ GAP SIGNAL QUALITY:
----------------------------------------
   Total 3%+ gaps: 327
   True positives (started run): 35 (11%)
   False positives (no run): 292 (89%)

   ‚ö†Ô∏è 3%+ GAP HAS HIGH FALSE POSITIVE RATE
   Need additional confirmation signals


In [9]:
# VOLUME AS CONFIRMATION
print("\n" + "="*60)
print("üìä VOLUME AS GAP CONFIRMATION")
print("="*60)

if len(df) > 0:
    # For true Day 1's, what's the volume ratio?
    vol_ratios = df['vol_ratio'].dropna()
    
    print(f"\nüìà Day 1 Volume Stats:")
    print(f"   Average volume ratio: {vol_ratios.mean():.1f}x normal")
    print(f"   Median volume ratio: {vol_ratios.median():.1f}x normal")
    
    # What % have elevated volume?
    elevated_vol = (vol_ratios >= 1.5).sum() / len(vol_ratios) * 100
    high_vol = (vol_ratios >= 2.0).sum() / len(vol_ratios) * 100
    
    print(f"\n   Day 1's with 1.5x volume: {elevated_vol:.0f}%")
    print(f"   Day 1's with 2x+ volume: {high_vol:.0f}%")
    
    # Compare volume on true Day 1's vs false gaps
    if false_signals:
        false_vol = pd.DataFrame(false_signals)['vol_ratio'].dropna().mean()
        true_vol = vol_ratios.mean()
        
        print(f"\nüìä TRUE vs FALSE SIGNAL VOLUME:")
        print(f"   True Day 1 avg volume: {true_vol:.1f}x")
        print(f"   False gap avg volume: {false_vol:.1f}x")
        
        if true_vol > false_vol * 1.2:
            print(f"\n   ‚úÖ VOLUME IS A CONFIRMING SIGNAL")
            print(f"   True Day 1's have ~{true_vol/false_vol:.0f}x more volume than false gaps")
        else:
            print(f"\n   ‚ö†Ô∏è Volume doesn't differentiate true vs false gaps")


üìä VOLUME AS GAP CONFIRMATION

üìà Day 1 Volume Stats:
   Average volume ratio: 1.0x normal
   Median volume ratio: 0.9x normal

   Day 1's with 1.5x volume: 17%
   Day 1's with 2x+ volume: 4%

üìä TRUE vs FALSE SIGNAL VOLUME:
   True Day 1 avg volume: 1.0x
   False gap avg volume: 1.6x

   ‚ö†Ô∏è Volume doesn't differentiate true vs false gaps


In [10]:
# COMBINED SIGNAL: GAP + VOLUME
print("\n" + "="*60)
print("üìä COMBINED SIGNAL: GAP + VOLUME")
print("="*60)

if len(df) > 0:
    # Test: Gap 2%+ AND Volume 1.5x+
    combined_signal = df[(df['gap_pct'] >= 2) & (df['vol_ratio'] >= 1.5)]
    just_gap = df[df['gap_pct'] >= 2]
    
    print(f"\nüìä Signal Quality Comparison:")
    print("-" * 40)
    print(f"   Gap only (2%+): {len(just_gap)} Day 1's")
    print(f"   Gap + Volume: {len(combined_signal)} Day 1's")
    
    if len(combined_signal) > 5:
        # Compare run quality
        gap_only_avg = just_gap['run_gain'].mean()
        combined_avg = combined_signal['run_gain'].mean()
        
        print(f"\n   Avg run gain (gap only): {gap_only_avg:.1f}%")
        print(f"   Avg run gain (gap+vol): {combined_avg:.1f}%")
        
        if combined_avg > gap_only_avg * 1.1:
            print(f"\n   ‚úÖ COMBINED SIGNAL PRODUCES BETTER RUNS")
            print(f"   Using Gap + Volume confirmation improves avg gain by {(combined_avg/gap_only_avg-1)*100:.0f}%")


üìä COMBINED SIGNAL: GAP + VOLUME

üìä Signal Quality Comparison:
----------------------------------------
   Gap only (2%+): 47 Day 1's
   Gap + Volume: 13 Day 1's

   Avg run gain (gap only): 29.0%
   Avg run gain (gap+vol): 35.9%

   ‚úÖ COMBINED SIGNAL PRODUCES BETTER RUNS
   Using Gap + Volume confirmation improves avg gain by 24%


In [11]:
# THESIS VERDICT
print("\n" + "="*80)
print("üéØ THESIS VERDICT: PRE-MARKET SIGNALS")
print("="*80)

if len(df) > 0:
    gap_up_pct = (df['gap_pct'] > 0).sum() / len(df) * 100
    gap_3pct = (df['gap_pct'] >= 3).sum() / len(df) * 100
    avg_vol = df['vol_ratio'].mean()
    
    print(f"\nüìä SUMMARY:")
    print("-" * 40)
    print(f"   Total Day 1's analyzed: {len(df)}")
    print(f"   Day 1's that gap UP: {gap_up_pct:.0f}%")
    print(f"   Day 1's with 3%+ gap: {gap_3pct:.0f}%")
    print(f"   Average volume on Day 1: {avg_vol:.1f}x normal")
    
    # Verdict
    if gap_up_pct >= 70:
        print(f"\n‚úÖ THESIS VALIDATED: Day 1's DO gap up in pre-market")
        print(f"   {gap_up_pct:.0f}% of Day 1's show overnight/pre-market strength")
        
        print(f"\nüìã ACTIONABLE ENTRY RULES:")
        if gap_3pct >= 50:
            print(f"   1. Look for 3%+ gap on stocks showing Day 0 signals")
        else:
            print(f"   1. Look for any gap UP on stocks showing Day 0 signals")
        print(f"   2. Confirm with elevated volume ({avg_vol:.0f}x+ normal)")
        print(f"   3. Enter within first 30 min of market open")
        print(f"   4. Hold for expected run duration (from Notebook 1)")
        
    elif gap_up_pct >= 50:
        print(f"\n‚ö†Ô∏è THESIS PARTIALLY VALIDATED")
        print(f"   {gap_up_pct:.0f}% gap up - majority but not overwhelming")
        print(f"   Use gap as ONE signal, not the ONLY signal")
        
    else:
        print(f"\n‚ùå THESIS KILLED: Day 1's don't reliably gap up")
        print(f"   Only {gap_up_pct:.0f}% gap up in pre-market")
        print(f"   Cannot use pre-market as reliable Day 1 signal")

print("\n" + "="*80)
print("NEXT: Run Notebook 5 (Combined Backtest) to test all rules together")
print("="*80)


üéØ THESIS VERDICT: PRE-MARKET SIGNALS

üìä SUMMARY:
----------------------------------------
   Total Day 1's analyzed: 136
   Day 1's that gap UP: 75%
   Day 1's with 3%+ gap: 26%
   Average volume on Day 1: 1.0x normal

‚úÖ THESIS VALIDATED: Day 1's DO gap up in pre-market
   75% of Day 1's show overnight/pre-market strength

üìã ACTIONABLE ENTRY RULES:
   1. Look for any gap UP on stocks showing Day 0 signals
   2. Confirm with elevated volume (1x+ normal)
   3. Enter within first 30 min of market open
   4. Hold for expected run duration (from Notebook 1)

NEXT: Run Notebook 5 (Combined Backtest) to test all rules together
