# Corporate Tax Analysis

Analysis of Fortune 100 corporate tax data extracted from 10-K filings.

**Contents:**
1. Load tax data
2. Tax expense rankings
3. Effective tax rate analysis
4. Tax component breakdown (federal/state/foreign)
5. Sector comparison

In [None]:
# Setup
import sys
from pathlib import Path

src_path = Path.cwd().parent / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: f'{x:,.0f}')

## 1. Load Tax Data

In [None]:
# Load the corporate tax CSV
output_dir = Path.cwd().parent / "output" / "fortune100"
tax_df = pd.read_csv(output_dir / "corporate_tax.csv")

print(f"Records: {len(tax_df):,}")
print(f"Companies: {tax_df['company'].nunique()}")
print(f"\nFiscal Years: {sorted(tax_df['fiscal_year'].unique())}")

In [None]:
# Preview data structure
tax_df.head(10)

## 2. Tax Expense Rankings

In [None]:
# Latest year tax expense rankings
latest_year = tax_df['fiscal_year'].max()

latest_tax = tax_df[tax_df['fiscal_year'] == latest_year].copy()
latest_tax = latest_tax.sort_values('total_tax_expense', ascending=False)

print(f"\nTop 15 Largest Tax Payers (FY {latest_year}):\n")
top_15 = latest_tax[['rank', 'company', 'total_tax_expense', 'effective_tax_rate']].head(15)
print(top_15.to_string(index=False))

In [None]:
# Tax expense statistics
print(f"\nTax Expense Statistics (FY {latest_year}):")
print(f"  Total (all companies): ${latest_tax['total_tax_expense'].sum():,.0f}M")
print(f"  Mean: ${latest_tax['total_tax_expense'].mean():,.0f}M")
print(f"  Median: ${latest_tax['total_tax_expense'].median():,.0f}M")
print(f"  Max: ${latest_tax['total_tax_expense'].max():,.0f}M")

## 3. Effective Tax Rate Analysis

In [None]:
# Filter to companies with meaningful effective tax rate data
etr_df = latest_tax[latest_tax['effective_tax_rate'] > 0].copy()

print(f"\nEffective Tax Rate Distribution (FY {latest_year}):\n")
print(f"  Companies with ETR data: {len(etr_df)}")
print(f"  Mean ETR: {etr_df['effective_tax_rate'].mean():.1%}")
print(f"  Median ETR: {etr_df['effective_tax_rate'].median():.1%}")
print(f"  Min ETR: {etr_df['effective_tax_rate'].min():.1%}")
print(f"  Max ETR: {etr_df['effective_tax_rate'].max():.1%}")

In [None]:
# Lowest effective tax rates
low_etr = etr_df.nsmallest(10, 'effective_tax_rate')[['rank', 'company', 'effective_tax_rate', 'total_tax_expense']]

print(f"\nLowest Effective Tax Rates (FY {latest_year}):\n")
print(low_etr.to_string(index=False))

In [None]:
# Highest effective tax rates
high_etr = etr_df.nlargest(10, 'effective_tax_rate')[['rank', 'company', 'effective_tax_rate', 'total_tax_expense']]

print(f"\nHighest Effective Tax Rates (FY {latest_year}):\n")
print(high_etr.to_string(index=False))

## 4. Tax Component Breakdown

In [None]:
# Companies with component breakdown
has_components = latest_tax[
    (latest_tax['current_federal'] > 0) |
    (latest_tax['current_state'] > 0) |
    (latest_tax['current_foreign'] > 0)
].copy()

print(f"Companies with tax component breakdown: {len(has_components)}\n")

if len(has_components) > 0:
    # Aggregate components
    print("Tax Component Totals:")
    print(f"  Federal (current): ${has_components['current_federal'].sum():,.0f}M")
    print(f"  State (current): ${has_components['current_state'].sum():,.0f}M")
    print(f"  Foreign (current): ${has_components['current_foreign'].sum():,.0f}M")
    print(f"  Deferred (total): ${has_components['total_deferred'].sum():,.0f}M")

In [None]:
# Top foreign tax payers
if 'current_foreign' in latest_tax.columns:
    foreign_leaders = latest_tax.nlargest(10, 'current_foreign')[['company', 'current_foreign', 'total_tax_expense']]
    print(f"\nTop 10 Foreign Tax Payers (FY {latest_year}):\n")
    print(foreign_leaders.to_string(index=False))

## 5. Sector Comparison

In [None]:
# Load Fortune 100 registry for sector data
from edgar.data.fortune100 import Fortune100Registry

registry = Fortune100Registry.load_default()
sector_map = {c.ticker: c.sector for c in registry.companies}

# Add sector to dataframe
latest_tax['sector'] = latest_tax['ticker'].map(sector_map)

# Sector tax analysis
sector_tax = latest_tax.groupby('sector').agg({
    'total_tax_expense': ['sum', 'mean'],
    'effective_tax_rate': 'mean',
    'company': 'count'
})
sector_tax.columns = ['Total Tax', 'Avg Tax', 'Avg ETR', 'Companies']
sector_tax = sector_tax.sort_values('Total Tax', ascending=False)

print(f"\nTax by Sector (FY {latest_year}):\n")
print(sector_tax.to_string())

## Summary

Key findings from corporate tax analysis:

1. **Tax Leaders**: Largest corporate tax payers in the Fortune 100
2. **Effective Rates**: Distribution of effective tax rates
3. **Components**: Federal vs state vs foreign tax breakdown
4. **Sectors**: Which industries pay the most in taxes

Next: **04_comparison.ipynb** for executive compensation vs. tax analysis