# Corporate Tax Analysis

Analysis of Fortune 100 corporate tax data extracted from 10-K filings.

**Contents:**
1. Load tax data
2. Tax expense rankings
3. Effective tax rate analysis
4. Tax component breakdown (federal/state/foreign)
5. Sector comparison

In [1]:
# Setup
import sys
from pathlib import Path

src_path = Path.cwd().parent / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: f'{x:,.0f}')

## 1. Load Tax Data

In [2]:
# Load the corporate tax CSV
output_dir = Path.cwd().parent / "output" / "fortune100"
tax_df = pd.read_csv(output_dir / "corporate_tax.csv")

print(f"Records: {len(tax_df):,}")
print(f"Companies: {tax_df['company'].nunique()}")
print(f"\nFiscal Years: {sorted(tax_df['fiscal_year'].unique())}")

Records: 206
Companies: 71

Fiscal Years: [np.int64(2022), np.int64(2023), np.int64(2024), np.int64(2025)]


In [3]:
# Preview data structure
tax_df.head(10)

Unnamed: 0,rank,company,ticker,cik,fiscal_year,current_federal,current_state,current_foreign,total_current,deferred_federal,deferred_state,deferred_foreign,total_deferred,total_tax_expense,pretax_income,effective_tax_rate,cash_taxes_paid
0,1,Walmart Inc.,WMT,104169,2025,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,6152.0,0,10.00%,0
1,1,Walmart Inc.,WMT,104169,2024,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,5578.0,0,0.00%,0
2,1,Walmart Inc.,WMT,104169,2023,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,5724.0,0,0.00%,0
3,2,Amazon.com Inc.,AMZN,1018724,2024,9039.0,2109.0,2765.0,13913.0,-4101.0,-453.0,-94,-4648.0,9265.0,0,0.00%,0
4,2,Amazon.com Inc.,AMZN,1018724,2023,8652.0,2158.0,2186.0,12996.0,-5505.0,-498.0,127,-5876.0,7120.0,0,0.00%,0
5,2,Amazon.com Inc.,AMZN,1018724,2022,2175.0,1074.0,1682.0,4931.0,-6686.0,-1302.0,-160,-8148.0,-3217.0,0,0.00%,0
6,3,Apple Inc.,AAPL,320193,2025,11487.0,1680.0,8891.0,22058.0,-1804.0,-139.0,604,-1339.0,20719.0,0,43.00%,0
7,3,Apple Inc.,AAPL,320193,2024,5571.0,1726.0,25483.0,32780.0,-3080.0,-298.0,347,-3031.0,29749.0,0,0.00%,0
8,3,Apple Inc.,AAPL,320193,2023,9445.0,1570.0,8750.0,19765.0,-3644.0,-49.0,669,-3024.0,16741.0,0,0.00%,0
9,4,UnitedHealth Group Inc.,UNH,731766,2024,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,4829.0,0,21.00%,0


## 2. Tax Expense Rankings

In [None]:
# Latest year tax expense rankings
latest_year = tax_df['fiscal_year'].max()

latest_tax = tax_df[tax_df['fiscal_year'] == latest_year].copy()
latest_tax = latest_tax.sort_values('total_tax_expense', ascending=False)

print(f"\nTop 15 Largest Tax Payers (FY {latest_year}):\n")
top_15 = latest_tax[['rank', 'company', 'total_tax_expense', 'effective_tax_rate']].head(15)
print(top_15.to_string(index=False))

In [None]:
# Tax expense statistics
print(f"\nTax Expense Statistics (FY {latest_year}):")
print(f"  Total (all companies): ${latest_tax['total_tax_expense'].sum():,.0f}M")
print(f"  Mean: ${latest_tax['total_tax_expense'].mean():,.0f}M")
print(f"  Median: ${latest_tax['total_tax_expense'].median():,.0f}M")
print(f"  Max: ${latest_tax['total_tax_expense'].max():,.0f}M")

## 3. Effective Tax Rate Analysis

In [None]:
# Filter to companies with meaningful effective tax rate data
etr_df = latest_tax[latest_tax['effective_tax_rate'] > 0].copy()

print(f"\nEffective Tax Rate Distribution (FY {latest_year}):\n")
print(f"  Companies with ETR data: {len(etr_df)}")
print(f"  Mean ETR: {etr_df['effective_tax_rate'].mean():.1%}")
print(f"  Median ETR: {etr_df['effective_tax_rate'].median():.1%}")
print(f"  Min ETR: {etr_df['effective_tax_rate'].min():.1%}")
print(f"  Max ETR: {etr_df['effective_tax_rate'].max():.1%}")

In [None]:
# Lowest effective tax rates
low_etr = etr_df.nsmallest(10, 'effective_tax_rate')[['rank', 'company', 'effective_tax_rate', 'total_tax_expense']]

print(f"\nLowest Effective Tax Rates (FY {latest_year}):\n")
print(low_etr.to_string(index=False))

In [None]:
# Highest effective tax rates
high_etr = etr_df.nlargest(10, 'effective_tax_rate')[['rank', 'company', 'effective_tax_rate', 'total_tax_expense']]

print(f"\nHighest Effective Tax Rates (FY {latest_year}):\n")
print(high_etr.to_string(index=False))

## 4. Tax Component Breakdown

In [None]:
# Companies with component breakdown
has_components = latest_tax[
    (latest_tax['current_federal'] > 0) |
    (latest_tax['current_state'] > 0) |
    (latest_tax['current_foreign'] > 0)
].copy()

print(f"Companies with tax component breakdown: {len(has_components)}\n")

if len(has_components) > 0:
    # Aggregate components
    print("Tax Component Totals:")
    print(f"  Federal (current): ${has_components['current_federal'].sum():,.0f}M")
    print(f"  State (current): ${has_components['current_state'].sum():,.0f}M")
    print(f"  Foreign (current): ${has_components['current_foreign'].sum():,.0f}M")
    print(f"  Deferred (total): ${has_components['total_deferred'].sum():,.0f}M")

In [4]:
# Top foreign tax payers
if 'current_foreign' in latest_tax.columns:
    foreign_leaders = latest_tax.nlargest(10, 'current_foreign')[['company', 'current_foreign', 'total_tax_expense']]
    print(f"\nTop 10 Foreign Tax Payers (FY {latest_year}):\n")
    print(foreign_leaders.to_string(index=False))

NameError: name 'latest_tax' is not defined

## 5. Sector Comparison

In [None]:
# Load Fortune 100 registry for sector data
from edgar.data.fortune100 import Fortune100Registry

registry = Fortune100Registry.load_default()
sector_map = {c.ticker: c.sector for c in registry.companies}

# Add sector to dataframe
latest_tax['sector'] = latest_tax['ticker'].map(sector_map)

# Sector tax analysis
sector_tax = latest_tax.groupby('sector').agg({
    'total_tax_expense': ['sum', 'mean'],
    'effective_tax_rate': 'mean',
    'company': 'count'
})
sector_tax.columns = ['Total Tax', 'Avg Tax', 'Avg ETR', 'Companies']
sector_tax = sector_tax.sort_values('Total Tax', ascending=False)

print(f"\nTax by Sector (FY {latest_year}):\n")
print(sector_tax.to_string())

## Summary

Key findings from corporate tax analysis:

1. **Tax Leaders**: Largest corporate tax payers in the Fortune 100
2. **Effective Rates**: Distribution of effective tax rates
3. **Components**: Federal vs state vs foreign tax breakdown
4. **Sectors**: Which industries pay the most in taxes

Next: **04_comparison.ipynb** for executive compensation vs. tax analysis