In [19]:
# =============================================================================
# IMPORTS
# =============================================================================

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("‚úì Imports loaded")

‚úì Imports loaded


In [20]:
# =============================================================================
# CONFIGURATION - EDIT THIS CELL TO CUSTOMIZE
# =============================================================================

# SECTOR BASKETS - Add or remove tickers as needed
BASKETS = {
    'cybersecurity': ['CRWD', 'PANW', 'ZS', 'FTNT', 'OKTA', 'S', 'VRNS', 'TENB', 'CYBR', 'NET'],
    'biotech_small': ['SANA', 'KYTX', 'ORIC', 'ALT', 'PALI', 'GHRS', 'MNMD', 'NTLA', 'TERN'],
    'biotech_large': ['AMGN', 'GILD', 'REGN', 'VRTX', 'BIIB'],
    'uranium': ['UUUU', 'UEC', 'CCJ', 'LEU', 'DNN', 'NXE', 'URG', 'SMR'],
    'ai_hype': ['BBAI', 'AI', 'SOUN', 'PLTR', 'PATH'],
    'ai_infrastructure': ['NVDA', 'AMD', 'MU', 'AVGO', 'MRVL'],
    'defense': ['LMT', 'RTX', 'NOC', 'GD', 'BA'],
    'quantum': ['IONQ', 'RGTI', 'QBTS'],
    'space': ['RKLB', 'ASTS', 'LUNR'],
    'semi': ['NVTS', 'NXPI', 'SWKS', 'MRVL', 'ARM', 'NVDA', 'AMD', 'INTC', 'MU', 'TSM'],
}

# BENCHMARK ETFS
ETFS = {
    'cyber_etf': 'HACK',
    'biotech_small_etf': 'XBI',
    'biotech_large_etf': 'IBB',
    'healthcare_etf': 'XLV',
    'tech_etf': 'XLK',
    'spy': 'SPY',
}

# TIME PERIOD
START_DATE = '2024-01-01'
END_DATE = '2026-01-08'

print(f"‚úì Configuration loaded")
print(f"  Sectors available: {list(BASKETS.keys())}")
print(f"  Date range: {START_DATE} to {END_DATE}")

‚úì Configuration loaded
  Sectors available: ['cybersecurity', 'biotech_small', 'biotech_large', 'uranium', 'ai_hype', 'ai_infrastructure', 'defense', 'quantum', 'space', 'semi']
  Date range: 2024-01-01 to 2026-01-08


In [21]:
# =============================================================================
# DATA LOADING FUNCTIONS
# =============================================================================

def load_basket_data(tickers, start=START_DATE, end=END_DATE):
    """
    Load OHLCV data for a list of tickers.
    Returns dict of DataFrames.
    """
    data = {}
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end, progress=False)
            if len(df) > 0:
                # Handle multi-index columns from yfinance
                if isinstance(df.columns, pd.MultiIndex):
                    df.columns = df.columns.droplevel(1)  # Drop ticker level, keep price level
                
                # Use Close instead of Adj Close if Adj Close doesn't exist
                if 'Adj Close' in df.columns:
                    df['Returns'] = df['Adj Close'].pct_change()
                elif 'Close' in df.columns:
                    df['Returns'] = df['Close'].pct_change()
                    df['Adj Close'] = df['Close']  # Create Adj Close from Close
                else:
                    print(f"‚úó No price data for {ticker}")
                    continue
                
                df['Ticker'] = ticker
                data[ticker] = df
                print(f"‚úì Loaded {ticker}: {len(df)} days")
            else:
                print(f"‚úó No data for {ticker}")
        except Exception as e:
            print(f"‚úó Error loading {ticker}: {e}")
    return data

def load_sector(sector_name):
    """Load all tickers for a named sector."""
    if sector_name in BASKETS:
        print(f"\nüì¶ Loading sector: {sector_name.upper()}")
        print("="*60)
        return load_basket_data(BASKETS[sector_name])
    else:
        print(f"‚úó Unknown sector: {sector_name}")
        print(f"  Available: {list(BASKETS.keys())}")
        return {}

def load_etf(etf_name):
    """Load a single ETF for benchmarking."""
    if etf_name in ETFS:
        ticker = ETFS[etf_name]
        print(f"\nüìä Loading ETF: {etf_name} ({ticker})")
        print("="*60)
        return load_basket_data([ticker])
    else:
        print(f"‚úó Unknown ETF: {etf_name}")
        print(f"  Available: {list(ETFS.keys())}")
        return {}

print("‚úì Functions defined")

‚úì Functions defined


In [22]:
# =============================================================================
# EXAMPLE: LOAD CYBERSECURITY SECTOR
# =============================================================================

# Load cybersecurity basket
cyber_data = load_sector('cybersecurity')

print(f"\n‚úì Loaded {len(cyber_data)} tickers")


üì¶ Loading sector: CYBERSECURITY
‚úì Loaded CRWD: 506 days
‚úì Loaded PANW: 506 days
‚úì Loaded ZS: 506 days
‚úì Loaded FTNT: 506 days
‚úì Loaded OKTA: 506 days
‚úì Loaded S: 506 days
‚úì Loaded VRNS: 506 days
‚úì Loaded TENB: 506 days
‚úì Loaded CYBR: 506 days
‚úì Loaded NET: 506 days

‚úì Loaded 10 tickers


In [23]:
# =============================================================================
# EXAMPLE: LOAD BENCHMARK (SPY)
# =============================================================================

# Load SPY for benchmarking
spy_data = load_basket_data(['SPY'])

print(f"\n‚úì Loaded SPY benchmark")

‚úì Loaded SPY: 506 days

‚úì Loaded SPY benchmark


In [24]:
# =============================================================================
# QUICK DATA CHECK
# =============================================================================

# Show sample data for first ticker
if len(cyber_data) > 0:
    first_ticker = list(cyber_data.keys())[0]
    print(f"\nüìä Sample data for {first_ticker}:")
    print("="*60)
    print(cyber_data[first_ticker].tail(10)[['Adj Close', 'Volume', 'Returns']])
    
    # Calculate some basic stats
    returns = cyber_data[first_ticker]['Returns'].dropna()
    print(f"\nüìà Stats for {first_ticker}:")
    print(f"  Average daily return: {returns.mean()*100:.2f}%")
    print(f"  Volatility (std): {returns.std()*100:.2f}%")
    print(f"  Best day: {returns.max()*100:.2f}%")
    print(f"  Worst day: {returns.min()*100:.2f}%")


üìä Sample data for CRWD:
Price        Adj Close   Volume   Returns
Date                                     
2025-12-23  478.839996  2053000 -0.008900
2025-12-24  477.109985   745100 -0.003613
2025-12-26  481.190002  1150700  0.008552
2025-12-29  475.910004  1314600 -0.010973
2025-12-30  475.630005  1168100 -0.000588
2025-12-31  468.760010  1661000 -0.014444
2026-01-02  453.579987  3325000 -0.032383
2026-01-05  456.549988  2615700  0.006548
2026-01-06  458.320007  2340100  0.003877
2026-01-07  478.910004  3958285  0.044925

üìà Stats for CRWD:
  Average daily return: 0.17%
  Volatility (std): 2.91%
  Best day: 16.30%
  Worst day: -13.46%


---

## ‚úÖ DATA LOADED

**Next steps:**
1. Use `cyber_data` (or whichever sector you loaded) in other notebooks
2. Or load a different sector by changing the sector name in the load cell
3. Data is stored as dict: `{'TICKER': DataFrame}`

**To use in another notebook:**
```python
# Option 1: Copy the functions and run
cyber_data = load_sector('cybersecurity')

# Option 2: Load specific tickers
custom_data = load_basket_data(['CRWD', 'ZS', 'PANW'])
```

üê∫ **Data ready. Time to hunt patterns.**