# Space Infrastructure Index - Data Exploration

This notebook explores market data from Polygon.io to understand:
1. Available stock universe
2. Company metadata and financials
3. Data quality and coverage
4. Potential space infrastructure candidates

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from polygon_client import PolygonClient
from datetime import datetime, timedelta

# Set up plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Initialize client
client = PolygonClient()

## 1. Test Connection with Known Space Companies

In [None]:
# Known space infrastructure companies (publicly traded)
space_tickers = [
    "ASTS",   # AST SpaceMobile - satellite connectivity
    "RKLB",   # Rocket Lab - launch provider
    "SPCE",   # Virgin Galactic - space tourism/launch
    "GSAT",   # Globalstar - satellite communications
    "IRDM",   # Iridium - satellite communications
    "LUNR",   # Intuitive Machines - lunar infrastructure
    "PL",     # Planet Labs - Earth observation
    "GNSS",   # Genasys - satellite-enabled emergency management
]

# Fetch details for each ticker
space_companies = []

for ticker in space_tickers:
    try:
        details = client.get_ticker_details(ticker)
        result = details.get('results', {})
        
        space_companies.append({
            'ticker': ticker,
            'name': result.get('name'),
            'market_cap': result.get('market_cap'),
            'description': result.get('description', '')[:150] + '...',
            'primary_exchange': result.get('primary_exchange'),
            'locale': result.get('locale'),
        })
        print(f"✓ {ticker}: {result.get('name')}")
    except Exception as e:
        print(f"✗ {ticker}: Error - {e}")

df_space = pd.DataFrame(space_companies)
df_space

## 2. Get Financial Fundamentals

In [None]:
# Example: Get financials for RKLB (Rocket Lab)
ticker = "RKLB"
financials = client.get_financials(ticker, timeframe="annual", limit=3)

# Extract revenue data
revenue_data = []
for period in financials:
    revenue_data.append({
        'fiscal_year': period.get('fiscal_year'),
        'revenue': period.get('financials', {}).get('income_statement', {}).get('revenues', {}).get('value'),
        'period': period.get('fiscal_period'),
    })

df_revenue = pd.DataFrame(revenue_data)
print(f"\nRevenue data for {ticker}:")
df_revenue

## 3. Get Historical Price Data

In [None]:
# Get 1 year of daily data
ticker = "ASTS"
from_date = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d")
to_date = datetime.now().strftime("%Y-%m-%d")

bars = client.get_aggregates(ticker, from_date=from_date, to_date=to_date)

# Convert to DataFrame
df_price = pd.DataFrame(bars)
df_price['date'] = pd.to_datetime(df_price['t'], unit='ms')
df_price = df_price.rename(columns={'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume'})
df_price = df_price.set_index('date')

print(f"Retrieved {len(df_price)} days of data for {ticker}")
df_price.tail()

In [None]:
# Plot price history
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Price
ax1.plot(df_price.index, df_price['close'], linewidth=2, label='Close Price')
ax1.fill_between(df_price.index, df_price['low'], df_price['high'], alpha=0.3)
ax1.set_ylabel('Price ($)', fontsize=12)
ax1.set_title(f'{ticker} - Price History (1 Year)', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Volume
ax2.bar(df_price.index, df_price['volume'], alpha=0.6, color='steelblue')
ax2.set_ylabel('Volume', fontsize=12)
ax2.set_xlabel('Date', fontsize=12)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Calculate Basic Metrics

In [None]:
# Calculate metrics for space companies
metrics = []

for ticker in space_tickers[:3]:  # Test with first 3 to avoid rate limits
    try:
        # Get recent price data
        bars = client.get_aggregates(ticker, from_date=from_date, to_date=to_date)
        if not bars:
            continue
            
        df = pd.DataFrame(bars)
        df['date'] = pd.to_datetime(df['t'], unit='ms')
        
        # Calculate metrics
        latest_price = df.iloc[-1]['c']
        avg_volume = df['v'].mean()
        volatility = df['c'].pct_change().std() * np.sqrt(252)  # Annualized
        
        # Get market cap
        details = client.get_ticker_details(ticker)
        market_cap = details.get('results', {}).get('market_cap', 0)
        
        metrics.append({
            'ticker': ticker,
            'price': latest_price,
            'market_cap_m': market_cap / 1_000_000 if market_cap else 0,
            'avg_volume': avg_volume,
            'volatility': volatility,
        })
        
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

df_metrics = pd.DataFrame(metrics)
df_metrics

## 5. Next Steps: AI Classification

To build the AI classification system, we need to:

1. **Gather text data** for classification:
   - Company descriptions from Polygon
   - SEC filings (10-K business section)
   - News articles

2. **Design classification prompt**:
   ```
   Given this company information:
   - Name: [name]
   - Description: [description]
   - Recent news: [news]
   
   Determine:
   1. Is this a space infrastructure company? (yes/no/partial)
   2. What % of revenue comes from space activities? (0-100%)
   3. Which space segments: [launch/satellites/ground/components]
   4. Confidence level (high/medium/low)
   ```

3. **Test on known companies** to validate accuracy

4. **Run on broader universe** (all NASDAQ/NYSE stocks)

5. **Apply fundamental screening** and weighting algorithm

## Summary

✓ Successfully connected to Polygon.io
✓ Can fetch company details and financials
✓ Can get historical price data
✓ Calculated basic metrics

**Ready for next phase:** AI classification system