In [None]:
# ============================================================
# CELL 1: SETUP & INSTALLS
# ============================================================

# Run this cell first to install dependencies
!pip install yfinance pandas numpy matplotlib seaborn requests tqdm -q

import pandas as pd
import numpy as np
import yfinance as yf
import requests
import json
import time
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Tuple
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("üê∫ Wolf Pack Backtest System Loaded!")
print(f"Pandas: {pd.__version__}")
print(f"NumPy: {np.__version__}")

# Check for GPU (optional - not needed for this workload)
try:
    import subprocess
    gpu_check = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    if gpu_check.returncode == 0:
        print("‚úÖ GPU Available (not required for this workload)")
    else:
        print("‚ÑπÔ∏è  CPU Mode (perfectly fine - bottleneck is SEC API, not compute)")
except:
    print("‚ÑπÔ∏è  CPU Mode (perfectly fine - bottleneck is SEC API, not compute)")

In [None]:
# ============================================================
# CELL 2: CONFIGURATION
# ============================================================

# SEC EDGAR Configuration
SEC_BASE_URL = "https://data.sec.gov"
SEC_HEADERS = {
    "User-Agent": "WolfPackScanner contact@wolfpack.trading",
    "Accept-Encoding": "gzip, deflate"
}

# Keywords that indicate contract wins (from our scanner)
CONTRACT_KEYWORDS = [
    "contract awarded", "contract award", "government contract",
    "defense contract", "department of defense", "dod contract",
    "idiq", "task order", "prime contract", "subcontract",
    "army", "navy", "air force", "space force", "missile defense",
    "awarded a contract", "received a contract", "contract value",
    "multi-year contract", "indefinite delivery", "ceiling value",
    "nasa", "faa", "homeland security"
]

# Our sectors of interest
SECTOR_TICKERS = {
    'defense': ['LMT', 'NOC', 'RTX', 'GD', 'LHX', 'KTOS', 'PLTR', 'BBAI'],
    'space': ['RKLB', 'LUNR', 'ASTS', 'SPCE', 'MNTS'],
    'ai_infra': ['MU', 'VRT', 'NVDA', 'AMD', 'AVGO', 'MRVL'],
    'nuclear': ['CCJ', 'LEU', 'OKLO', 'SMR', 'VST', 'CEG'],
    'small_cap_defense': ['SIDU', 'BBAI', 'KTOS', 'MRCY']
}

# Flatten for easy access
ALL_TICKERS = list(set([t for sector in SECTOR_TICKERS.values() for t in sector]))

print(f"üìä Configuration Loaded")
print(f"   Tracking {len(ALL_TICKERS)} tickers across {len(SECTOR_TICKERS)} sectors")
print(f"   Contract keywords: {len(CONTRACT_KEYWORDS)} phrases")
print(f"\nSectors:")
for sector, tickers in SECTOR_TICKERS.items():
    print(f"   {sector:20} | {len(tickers)} tickers")

In [None]:
# ============================================================
# CELL 3: SEC FILING FETCHER
# ============================================================

def get_company_cik(ticker: str) -> Optional[str]:
    """Get CIK number for a ticker from SEC."""
    try:
        # Try the company tickers JSON first (faster)
        url = f"{SEC_BASE_URL}/files/company_tickers.json"
        response = requests.get(url, headers=SEC_HEADERS, timeout=10)
        data = response.json()
        
        for entry in data.values():
            if entry.get('ticker', '').upper() == ticker.upper():
                return str(entry['cik_str']).zfill(10)
    except:
        pass
    
    return None


def get_8k_filings(cik: str, start_date: str, end_date: str) -> List[Dict]:
    """
    Get all 8-K filings for a company in date range.
    Returns list of filing metadata.
    """
    filings = []
    
    try:
        # Get submissions using the modern API
        url = f"{SEC_BASE_URL}/submissions/CIK{cik}.json"
        response = requests.get(url, headers=SEC_HEADERS, timeout=15)
        data = response.json()
        
        # Parse recent filings
        recent = data.get('filings', {}).get('recent', {})
        forms = recent.get('form', [])
        dates = recent.get('filingDate', [])
        accessions = recent.get('accessionNumber', [])
        
        for i, form in enumerate(forms):
            if form == '8-K':
                filing_date = dates[i]
                
                # Check date range
                if filing_date >= start_date and filing_date <= end_date:
                    filings.append({
                        'date': filing_date,
                        'type': '8-K',
                        'accession': accessions[i],
                        'cik': cik
                    })
        
        time.sleep(0.15)  # SEC rate limiting
        
    except Exception as e:
        print(f"‚ö†Ô∏è  Error fetching 8-K for CIK {cik}: {e}")
    
    return filings


def get_filing_text(cik: str, accession: str) -> str:
    """Get the text content of an 8-K filing."""
    try:
        # Format accession for URL
        acc_formatted = accession.replace('-', '')
        url = f"{SEC_BASE_URL}/Archives/edgar/data/{int(cik)}/{acc_formatted}/{accession}.txt"
        
        response = requests.get(url, headers=SEC_HEADERS, timeout=15)
        time.sleep(0.15)  # Rate limiting
        
        return response.text.lower()
    except:
        return ""


def contains_contract_keywords(text: str) -> Tuple[bool, List[str]]:
    """Check if filing text contains contract-related keywords."""
    found = []
    text_lower = text.lower()
    
    for keyword in CONTRACT_KEYWORDS:
        if keyword in text_lower:
            found.append(keyword)
    
    return len(found) > 0, found

print("‚úÖ SEC Filing Functions Loaded")

In [None]:
# ============================================================
# CELL 4: PRICE DATA FETCHER
# ============================================================

def get_price_reaction(ticker: str, event_date: str, days_before: int = 5, days_after: int = 20) -> Optional[Dict]:
    """
    Get price reaction around an event date.
    
    Returns:
        Dict with price changes at various intervals (1d, 2d, 3d, 5d, 10d, 20d)
    """
    try:
        # Parse date
        event_dt = datetime.strptime(event_date, '%Y-%m-%d')
        start_dt = event_dt - timedelta(days=days_before + 10)  # Buffer for trading days
        end_dt = event_dt + timedelta(days=days_after + 10)
        
        # Fetch data
        stock = yf.Ticker(ticker)
        hist = stock.history(start=start_dt.strftime('%Y-%m-%d'), 
                            end=end_dt.strftime('%Y-%m-%d'))
        
        if hist.empty or len(hist) < 10:
            return None
        
        # Find the event date in data (or closest trading day after)
        hist.index = hist.index.tz_localize(None)
        event_idx = None
        
        for i in range(5):  # Look up to 5 days forward for trading day
            check_date = event_dt + timedelta(days=i)
            matches = hist.index[hist.index.date == check_date.date()]
            if len(matches) > 0:
                event_idx = hist.index.get_loc(matches[0])
                break
        
        if event_idx is None:
            return None
        
        # Get prices at key points
        event_close = hist.iloc[event_idx]['Close']
        
        # Price before event (1 day before)
        pre_idx = max(0, event_idx - 1)
        pre_close = hist.iloc[pre_idx]['Close']
        
        # Calculate returns at different intervals
        returns = {}
        
        intervals = [1, 2, 3, 5, 10, 20]
        for days in intervals:
            future_idx = min(len(hist) - 1, event_idx + days)
            if future_idx > event_idx:
                future_close = hist.iloc[future_idx]['Close']
                returns[f'return_{days}d'] = ((future_close - event_close) / event_close) * 100
            else:
                returns[f'return_{days}d'] = None
        
        # Overnight gap (event day open vs previous close)
        event_open = hist.iloc[event_idx]['Open']
        overnight_gap = ((event_open - pre_close) / pre_close) * 100
        
        return {
            'ticker': ticker,
            'event_date': event_date,
            'pre_close': round(pre_close, 2),
            'event_close': round(event_close, 2),
            'overnight_gap': round(overnight_gap, 2),
            **{k: round(v, 2) if v else None for k, v in returns.items()}
        }
        
    except Exception as e:
        return None

print("‚úÖ Price Reaction Functions Loaded")

In [None]:
# ============================================================
# CELL 5: MAIN BACKTEST FUNCTION
# ============================================================

def backtest_contract_announcements(
    tickers: List[str],
    start_date: str = "2023-01-01",
    end_date: str = "2025-12-31",
    min_keywords: int = 2
) -> pd.DataFrame:
    """
    Backtest: What happens after 8-K filings with contract keywords?
    
    Args:
        tickers: List of tickers to analyze
        start_date: Start of analysis period
        end_date: End of analysis period
        min_keywords: Minimum keyword matches to count as contract news
    
    Returns:
        DataFrame with all contract events and price reactions
    """
    results = []
    
    print(f"\nüê∫ WOLF PACK BACKTEST: CONTRACT ANNOUNCEMENTS")
    print(f"=" * 50)
    print(f"Analyzing {len(tickers)} tickers from {start_date} to {end_date}")
    print(f"Looking for filings with {min_keywords}+ contract keywords")
    print(f"=" * 50)
    
    for ticker in tqdm(tickers, desc="üê∫ Processing tickers"):
        try:
            # Get CIK
            cik = get_company_cik(ticker)
            if not cik:
                print(f"  ‚ö†Ô∏è  {ticker}: CIK not found")
                continue
            
            # Get 8-K filings
            filings = get_8k_filings(cik, start_date, end_date)
            
            if len(filings) == 0:
                continue
            
            for filing in filings:
                # Get filing text
                text = get_filing_text(cik, filing['accession'])
                
                if not text:
                    continue
                
                # Check for contract keywords
                has_contract, keywords = contains_contract_keywords(text)
                
                if has_contract and len(keywords) >= min_keywords:
                    # Get price reaction
                    reaction = get_price_reaction(ticker, filing['date'])
                    
                    if reaction:
                        results.append({
                            'ticker': ticker,
                            'filing_date': filing['date'],
                            'keywords_found': len(keywords),
                            'keywords': ', '.join(keywords[:5]),
                            **{k: v for k, v in reaction.items() if k not in ['ticker', 'event_date']}
                        })
                
                time.sleep(0.1)  # Rate limiting
                
        except Exception as e:
            print(f"  ‚ùå Error processing {ticker}: {e}")
            continue
    
    df = pd.DataFrame(results)
    
    if len(df) > 0:
        print(f"\n‚úÖ Found {len(df)} contract announcement events!")
    else:
        print(f"\n‚ö†Ô∏è  No contract announcements found. Try different tickers or date range.")
    
    return df

print("‚úÖ Backtest Function Loaded")

In [None]:
# ============================================================
# CELL 6: ANALYSIS FUNCTIONS
# ============================================================

def analyze_results(df: pd.DataFrame):
    """
    Analyze backtest results and show statistics.
    """
    if df.empty:
        print("No data to analyze.")
        return
    
    print(f"\nüê∫ BACKTEST RESULTS ANALYSIS")
    print(f"=" * 50)
    print(f"Total Events: {len(df)}")
    print(f"Unique Tickers: {df['ticker'].nunique()}")
    print(f"Date Range: {df['filing_date'].min()} to {df['filing_date'].max()}")
    
    # Return statistics at different intervals
    print(f"\nüìä RETURN STATISTICS:")
    print("-" * 50)
    
    return_cols = [c for c in df.columns if c.startswith('return_')]
    
    for col in return_cols:
        valid_data = df[col].dropna()
        if len(valid_data) > 0:
            days = col.split('_')[1]
            avg = valid_data.mean()
            median = valid_data.median()
            win_rate = (valid_data > 0).sum() / len(valid_data) * 100
            
            print(f"  {days:>5} | Avg: {avg:+6.2f}% | Median: {median:+6.2f}% | Win Rate: {win_rate:.1f}%")
    
    # Overnight gap stats
    print(f"\nüìà OVERNIGHT GAP STATS:")
    print("-" * 50)
    gaps = df['overnight_gap'].dropna()
    if len(gaps) > 0:
        print(f"  Average Gap: {gaps.mean():+.2f}%")
        print(f"  Median Gap:  {gaps.median():+.2f}%")
        print(f"  Gap Up Rate: {(gaps > 0).sum() / len(gaps) * 100:.1f}%")
    
    # Best performers
    print(f"\nüèÜ TOP 10 BEST REACTIONS (5-day return):")
    print("-" * 50)
    if 'return_5d' in df.columns:
        top10 = df.nlargest(10, 'return_5d')[['ticker', 'filing_date', 'return_5d', 'keywords']]
        for idx, row in top10.iterrows():
            print(f"  {row['ticker']:6} | {row['filing_date']} | {row['return_5d']:+6.2f}% | {str(row['keywords'])[:30]}...")
    
    # By ticker
    print(f"\nüìã BY TICKER (Avg 5-day return):")
    print("-" * 50)
    if 'return_5d' in df.columns:
        by_ticker = df.groupby('ticker')['return_5d'].agg(['mean', 'count']).sort_values('mean', ascending=False)
        for ticker, row in by_ticker.head(10).iterrows():
            print(f"  {ticker:6} | Avg: {row['mean']:+6.2f}% | Events: {int(row['count'])}")
    
    return df


def plot_results(df: pd.DataFrame):
    """
    Visualize backtest results.
    """
    if df.empty:
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Distribution of overnight gaps
    ax1 = axes[0, 0]
    df['overnight_gap'].hist(bins=30, ax=ax1, color='steelblue', edgecolor='black')
    ax1.axvline(x=0, color='red', linestyle='--', label='Zero')
    ax1.axvline(x=df['overnight_gap'].mean(), color='green', linestyle='--', 
                label=f'Mean: {df["overnight_gap"].mean():.2f}%')
    ax1.set_title('üê∫ Distribution of Overnight Gaps', fontsize=12, fontweight='bold')
    ax1.set_xlabel('Gap %')
    ax1.legend()
    
    # 2. Distribution of 5-day returns
    ax2 = axes[0, 1]
    if 'return_5d' in df.columns:
        df['return_5d'].dropna().hist(bins=30, ax=ax2, color='green', edgecolor='black')
        ax2.axvline(x=0, color='red', linestyle='--')
        ax2.axvline(x=df['return_5d'].mean(), color='blue', linestyle='--', 
                    label=f'Mean: {df["return_5d"].mean():.2f}%')
        ax2.set_title('üê∫ Distribution of 5-Day Returns', fontsize=12, fontweight='bold')
        ax2.set_xlabel('Return %')
        ax2.legend()
    
    # 3. Return by number of keywords
    ax3 = axes[1, 0]
    if 'keywords_found' in df.columns and 'return_5d' in df.columns:
        by_keywords = df.groupby('keywords_found')['return_5d'].mean()
        by_keywords.plot(kind='bar', ax=ax3, color='purple', edgecolor='black')
        ax3.set_title('üê∫ Avg 5-Day Return by # Keywords Found', fontsize=12, fontweight='bold')
        ax3.set_xlabel('Number of Keywords')
        ax3.set_ylabel('Avg Return %')
        ax3.set_xticklabels(ax3.get_xticklabels(), rotation=0)
    
    # 4. Cumulative returns over time
    ax4 = axes[1, 1]
    if 'return_5d' in df.columns:
        df_sorted = df.sort_values('filing_date')
        df_sorted['cumulative'] = df_sorted['return_5d'].fillna(0).cumsum()
        ax4.plot(range(len(df_sorted)), df_sorted['cumulative'], color='green', linewidth=2)
        ax4.fill_between(range(len(df_sorted)), df_sorted['cumulative'], alpha=0.3)
        ax4.set_title('üê∫ Cumulative Returns (if traded each signal)', fontsize=12, fontweight='bold')
        ax4.set_xlabel('Event Number')
        ax4.set_ylabel('Cumulative Return %')
        ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('backtest_results.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("\nüìä Charts saved to backtest_results.png")


def print_edge_summary(df: pd.DataFrame):
    """
    Print the actionable edge we found.
    """
    if df.empty:
        return
    
    print(f"\n")
    print(f"üê∫" * 25)
    print(f"\n        THE WOLF PACK EDGE - SUMMARY\n")
    print(f"üê∫" * 25)
    
    # Calculate key metrics
    if 'return_5d' in df.columns:
        avg_5d = df['return_5d'].mean()
        win_rate = (df['return_5d'] > 0).sum() / len(df.dropna(subset=['return_5d'])) * 100
        
        print(f"""
WHAT WE FOUND:
--------------
After a company announces a government/defense contract (8-K filing):

  üìà Average 5-day return: {avg_5d:+.2f}%
  üéØ Win rate: {win_rate:.1f}%
  üìä Sample size: {len(df)} events

THE EDGE:
---------
If we had bought every stock the day a contract 8-K was filed
and sold 5 days later:

  ‚úÖ We would have been RIGHT {win_rate:.0f}% of the time
  ‚úÖ Average gain per trade: {avg_5d:.2f}%
  ‚úÖ Expected value is {'POSITIVE ‚úÖ' if avg_5d > 0 else 'NEGATIVE ‚ö†Ô∏è'}

HOW TO USE THIS:
----------------
1. Run the scanner daily to catch new 8-K filings
2. When contract keywords detected ‚Üí ALERT
3. Enter position same day or next morning
4. Hold for 3-5 days
5. Take profits at target

REMEMBER:
---------
- This is a STATISTICAL edge, not a guarantee
- Use proper position sizing (5-10% of account)
- Set stop losses (Wolf Pack 2% risk rule)
- The edge works OVER TIME, not every trade

AWOOOO üê∫
        """)

print("‚úÖ Analysis Functions Loaded")

In [None]:
# ============================================================
# CELL 7: RUN BACKTEST - DEFENSE SECTOR
# ============================================================

# Run backtest on defense sector (most likely to have contract news)
print("üê∫ Running defense sector backtest...")
print("‚è±Ô∏è  This will take 5-15 minutes depending on SEC API speed...\n")

defense_tickers = SECTOR_TICKERS['defense'] + SECTOR_TICKERS['small_cap_defense']

defense_results = backtest_contract_announcements(
    tickers=list(set(defense_tickers)),  # Remove duplicates
    start_date="2024-01-01",  # Last year
    end_date="2025-12-31",
    min_keywords=2  # Must have at least 2 contract keywords
)

# Analyze and visualize
if not defense_results.empty:
    analyze_results(defense_results)
    plot_results(defense_results)
    print_edge_summary(defense_results)
    
    # Save for later use
    defense_results.to_csv('defense_backtest_results.csv', index=False)
    print("\nüíæ Results saved to defense_backtest_results.csv")
    print("   You can load this later with: df = pd.read_csv('defense_backtest_results.csv')")
else:
    print("\n‚ö†Ô∏è  No results found. This could mean:")
    print("   - SEC API is down or rate limiting")
    print("   - No contract announcements in this period")
    print("   - Try running with min_keywords=1 instead of 2")

In [None]:
# ============================================================
# CELL 8: QUICK SINGLE TICKER ANALYSIS
# ============================================================

def quick_ticker_analysis(ticker: str, days: int = 365):
    """
    Quick analysis of a single ticker's contract announcements.
    """
    end_date = datetime.now().strftime('%Y-%m-%d')
    start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
    
    print(f"\nüê∫ QUICK ANALYSIS: {ticker}")
    print(f"Period: Last {days} days ({start_date} to {end_date})\n")
    
    results = backtest_contract_announcements(
        tickers=[ticker],
        start_date=start_date,
        end_date=end_date,
        min_keywords=1  # Lower threshold for single ticker
    )
    
    if not results.empty:
        analyze_results(results)
        if len(results) >= 3:  # Only plot if we have enough data
            plot_results(results)
        return results
    else:
        print(f"‚ùå No contract announcements found for {ticker}.")
        return pd.DataFrame()

# Example: Analyze SIDU (the one that ran 218%)
# Uncomment to run:
# sidu_results = quick_ticker_analysis('SIDU', days=180)

# Or try PLTR:
# pltr_results = quick_ticker_analysis('PLTR', days=365)

print("\nüí° To analyze a specific ticker, run:")
print("   results = quick_ticker_analysis('TICKER', days=365)")
print("\n   Examples:")
print("   - quick_ticker_analysis('SIDU', days=180)")
print("   - quick_ticker_analysis('PLTR', days=365)")
print("   - quick_ticker_analysis('BBAI', days=90)")

---

## üê∫ WOLF PACK BACKTEST COMPLETE

### What You Have Now:

1. **Backtest results** showing actual historical edge
2. **Statistical analysis** (win rate, avg returns, etc.)
3. **Visualizations** (charts saved to backtest_results.png)
4. **CSV export** (defense_backtest_results.csv)

### Next Steps:

1. **Run more sectors**:
   ```python
   space_results = backtest_contract_announcements(
       tickers=SECTOR_TICKERS['space'],
       start_date="2024-01-01",
       end_date="2025-12-31"
   )
   ```

2. **Analyze specific tickers**:
   ```python
   sidu = quick_ticker_analysis('SIDU', days=180)
   ```

3. **Combine with live scanner**:
   - Use this backtest data to validate the edge
   - When scanner alerts ‚Üí check if it fits the pattern
   - Trade with confidence knowing the statistics

**AWOOOO üê∫ - The Pack Hunts With Data**