# Fortune 100 Data Exploration

This notebook explores the Fortune 100 company registry and SEC filing data.

**Contents:**
1. Load and explore Fortune 100 companies
2. Fetch sample SEC filings
3. Inspect raw data structure
4. Preview extraction results

In [None]:
# Setup - Add src to path
import sys
from pathlib import Path

# Add src to path for imports
src_path = Path.cwd().parent / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

print(f"Source path: {src_path}")

## 1. Fortune 100 Company Registry

In [None]:
from edgar.data.fortune100 import Fortune100Registry

# Load the registry
registry = Fortune100Registry.load_default()

print(f"Total companies: {len(registry.companies)}")
print(f"\nSectors: {len(registry.get_sectors())}")
for sector in sorted(registry.get_sectors()):
    count = len([c for c in registry.companies if c.sector == sector])
    print(f"  - {sector}: {count}")

In [None]:
# View top 10 companies
top_10 = registry.get_by_rank_range(1, 10)

print("Top 10 Fortune 100 Companies:\n")
for company in top_10:
    print(f"#{company.rank:3d} | {company.ticker:5s} | {company.name:30s} | CIK: {company.cik}")

In [None]:
# Look up specific companies
apple = registry.get_by_ticker("AAPL")
print(f"Apple: Rank #{apple.rank}, CIK: {apple.cik}, Sector: {apple.sector}")

amazon = registry.get_by_ticker("AMZN")
print(f"Amazon: Rank #{amazon.rank}, CIK: {amazon.cik}, Sector: {amazon.sector}")

## 2. Fetch Sample SEC Filings

In [None]:
from edgar.services.sec_edgar_client import SecEdgarClient

# Initialize SEC client
sec_client = SecEdgarClient()

# Fetch Apple's latest DEF 14A
apple_cik = "0000320193"
def14a = sec_client.get_filing(apple_cik, "DEF 14A")

print(f"Apple DEF 14A:")
print(f"  Filing Date: {def14a.get('filing_date', 'N/A')}")
print(f"  HTML Length: {len(def14a.get('html', '')):,} characters")

In [None]:
# Fetch Apple's latest 10-K
form10k = sec_client.get_filing(apple_cik, "10-K")

print(f"Apple 10-K:")
print(f"  Filing Date: {form10k.get('filing_date', 'N/A')}")
print(f"  HTML Length: {len(form10k.get('html', '')):,} characters")

## 3. Extract Executive Compensation

In [None]:
from edgar.extractors.sct import SCTExtractor

# Create extractor and run
sct_extractor = SCTExtractor(company="Apple Inc.", cik=apple_cik)
sct_data = sct_extractor.extract(def14a)

print(f"Executives Found: {len(sct_data.executives)}\n")

for exec in sct_data.executives:
    print(f"{exec.name} - {exec.title}")
    for comp in exec.compensation:
        print(f"  {comp.year}: ${comp.total:,.0f}")
    print()

## 4. Extract Corporate Tax Data

In [None]:
from edgar.extractors.tax import TaxExtractor

# Create extractor and run
tax_extractor = TaxExtractor(company="Apple Inc.", cik=apple_cik)
tax_data = tax_extractor.extract(form10k)

print(f"Tax Years Found: {len(tax_data.tax_years)}\n")

for year in tax_data.tax_years:
    print(f"FY {year.year}:")
    print(f"  Total Tax Expense: ${year.total_tax_expense:,.0f}M")
    print(f"  Effective Rate: {year.effective_tax_rate:.1%}")
    if year.current_federal or year.current_state or year.current_foreign:
        print(f"  Components:")
        print(f"    Federal: ${year.current_federal:,.0f}M")
        print(f"    State: ${year.current_state:,.0f}M")
        print(f"    Foreign: ${year.current_foreign:,.0f}M")
    print()

## 5. Run Pipeline on Sample Companies

In [None]:
from edgar.pipelines import Fortune100Pipeline, PipelineConfig

# Configure for top 5 companies
config = PipelineConfig(
    companies_range=(1, 5),
    output_dir=Path("../output/notebook_test"),
    verbose=True,
)

# Preview companies
pipeline = Fortune100Pipeline(config)
companies = pipeline.get_companies()

print("Companies to process:")
for c in companies:
    print(f"  #{c.rank}: {c.name} ({c.ticker})")

In [None]:
# Run the pipeline (this will take ~30 seconds for 5 companies)
result = await pipeline.run()

print(f"\nPipeline Complete!")
print(f"Duration: {result.total_duration:.1f}s")
print(f"DEF 14A Success: {result.def14a_success_rate:.1%}")
print(f"10-K Success: {result.form10k_success_rate:.1%}")
print(f"\nOutput files:")
for f in result.output_files:
    print(f"  - {f}")

## Next Steps

- **02_compensation.ipynb**: Deep dive into executive compensation analysis
- **03_tax_analysis.ipynb**: Corporate tax analysis and trends
- **04_comparison.ipynb**: Combined compensation vs. tax visualization