# AI-Powered Space Company Classification

This notebook uses Claude (Anthropic) to:
1. Identify space infrastructure companies
2. Estimate what % of revenue comes from space activities
3. Classify companies into segments (launch, satellites, ground, components)
4. Build dataset for index construction

In [None]:
import pandas as pd
import numpy as np
from polygon_client import PolygonClient
from ai_classifier import SpaceCompanyClassifier
import time

## 1. Initialize Clients

In [None]:
# Initialize API clients
polygon_client = PolygonClient()
ai_classifier = SpaceCompanyClassifier()

print("✓ Polygon.io client initialized")
print("✓ AI classifier initialized (using Claude 3 Haiku)")

## 2. Test on Known Space Companies

In [None]:
# List of known space infrastructure companies
known_space_tickers = [
    "ASTS",   # AST SpaceMobile - satellite connectivity
    "RKLB",   # Rocket Lab - launch provider
    "SPCE",   # Virgin Galactic - space tourism/launch
    "GSAT",   # Globalstar - satellite communications
    "IRDM",   # Iridium - satellite communications
    "LUNR",   # Intuitive Machines - lunar infrastructure
    "PL",     # Planet Labs - Earth observation
    "VSAT",   # Viasat - satellite communications
]

print(f"Testing {len(known_space_tickers)} known space companies...")

In [None]:
# Fetch company data from Polygon and classify
results = []

for ticker in known_space_tickers:
    try:
        print(f"\nProcessing {ticker}...")
        
        # Get company details from Polygon
        details = polygon_client.get_ticker_details(ticker)
        result = details.get('results', {})
        
        name = result.get('name', '')
        description = result.get('description', '')
        market_cap = result.get('market_cap', 0)
        
        print(f"  Name: {name}")
        print(f"  Market Cap: ${market_cap:,.0f}")
        
        # Classify with AI
        classification = ai_classifier.classify_company(
            ticker=ticker,
            company_name=name,
            description=description
        )
        
        print(f"  Space Related: {classification.is_space_related}")
        print(f"  Space Revenue %: {classification.space_revenue_pct:.0f}%")
        print(f"  Segments: {', '.join(classification.segments)}")
        
        results.append({
            'ticker': ticker,
            'name': name,
            'market_cap': market_cap,
            'is_space': classification.is_space_related,
            'space_revenue_pct': classification.space_revenue_pct,
            'confidence': classification.confidence,
            'segments': ', '.join(classification.segments),
            'reasoning': classification.reasoning
        })
        
        time.sleep(1)  # Rate limiting
        
    except Exception as e:
        print(f"  ✗ Error: {e}")

df_results = pd.DataFrame(results)
print(f"\n✓ Processed {len(df_results)} companies")

In [None]:
# Display results
df_results[['ticker', 'name', 'market_cap', 'space_revenue_pct', 'confidence', 'segments']]

## 3. Analyze Classification Results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8-darkgrid')

# Distribution of space revenue %
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Space revenue % distribution
ax1.bar(df_results['ticker'], df_results['space_revenue_pct'], color='steelblue')
ax1.set_ylabel('Space Revenue %', fontsize=12)
ax1.set_xlabel('Ticker', fontsize=12)
ax1.set_title('Estimated Space Revenue %', fontsize=14, fontweight='bold')
ax1.tick_params(axis='x', rotation=45)
ax1.grid(True, alpha=0.3)

# Market cap vs space revenue %
df_plot = df_results[df_results['market_cap'] > 0].copy()
df_plot['market_cap_b'] = df_plot['market_cap'] / 1e9

ax2.scatter(df_plot['space_revenue_pct'], df_plot['market_cap_b'], 
            s=100, alpha=0.6, color='steelblue')
for idx, row in df_plot.iterrows():
    ax2.annotate(row['ticker'], (row['space_revenue_pct'], row['market_cap_b']),
                xytext=(5, 5), textcoords='offset points', fontsize=9)
ax2.set_xlabel('Space Revenue %', fontsize=12)
ax2.set_ylabel('Market Cap ($B)', fontsize=12)
ax2.set_title('Market Cap vs Space Exposure', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Test on Non-Space Companies (Validation)

In [None]:
# Test with non-space companies to validate classifier doesn't give false positives
non_space_tickers = [
    "AAPL",   # Apple - consumer electronics
    "WMT",    # Walmart - retail
    "JPM",    # JPMorgan - banking
]

validation_results = []

for ticker in non_space_tickers:
    try:
        details = polygon_client.get_ticker_details(ticker)
        result = details.get('results', {})
        
        name = result.get('name', '')
        description = result.get('description', '')
        
        classification = ai_classifier.classify_company(
            ticker=ticker,
            company_name=name,
            description=description
        )
        
        validation_results.append({
            'ticker': ticker,
            'name': name,
            'is_space': classification.is_space_related,
            'space_revenue_pct': classification.space_revenue_pct,
        })
        
        time.sleep(1)
        
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

df_validation = pd.DataFrame(validation_results)
print("\nValidation: Non-space companies")
df_validation

## 5. Edge Cases: Defense Contractors with Space Divisions

In [None]:
# Test aerospace/defense companies with space divisions
# These should have lower space_revenue_pct since space is only part of their business
aerospace_tickers = [
    "BA",     # Boeing - aerospace (has space division)
    "LMT",    # Lockheed Martin - defense/aerospace (satellites, space systems)
    "NOC",    # Northrop Grumman - defense/aerospace (space systems)
]

aerospace_results = []

for ticker in aerospace_tickers:
    try:
        details = polygon_client.get_ticker_details(ticker)
        result = details.get('results', {})
        
        name = result.get('name', '')
        description = result.get('description', '')
        
        classification = ai_classifier.classify_company(
            ticker=ticker,
            company_name=name,
            description=description
        )
        
        aerospace_results.append({
            'ticker': ticker,
            'name': name,
            'is_space': classification.is_space_related,
            'space_revenue_pct': classification.space_revenue_pct,
            'reasoning': classification.reasoning
        })
        
        time.sleep(1)
        
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

df_aerospace = pd.DataFrame(aerospace_results)
print("\nAerospace/Defense companies with space divisions:")
df_aerospace

## 6. Save Classification Results

In [None]:
# Combine all results
df_all = pd.concat([df_results, df_validation, df_aerospace], ignore_index=True)

# Save to CSV for later use
df_all.to_csv('space_classification_results.csv', index=False)
print(f"✓ Saved {len(df_all)} classified companies to space_classification_results.csv")

## Summary

The AI classifier successfully:
- ✓ Identified known space companies
- ✓ Estimated space revenue percentages
- ✓ Classified companies into segments
- ✓ Rejected non-space companies
- ✓ Handled edge cases (aerospace conglomerates)

**Next Steps:**
1. Get financial data (revenue, growth) from Polygon
2. Apply fundamental screening criteria
3. Calculate custom weights (space_rev_pct × market_cap × growth)
4. Build initial index composition