# 02 - Core Analysis
## Section 1.1: Defining Mass Extinction

---

**Notebook Purpose**: Calculate extinction rates, compare to Big Five events, establish mass extinction thresholds.

**Author**: Dennis 'dnoice' Smaltz  
**AI Acknowledgement**: Claude Opus 4  
**Version**: 0.1 (Template)  
**Date**: 2025-12-12  
**Signature**: Ô∏ª„Éá‚ïê‚Äî¬∑¬∑¬∑ üéØ = Aim Twice, Shoot Once!

---

### Analysis Objectives

1. Calculate current extinction rate (E/MSY)
2. Compare to background extinction rate
3. Compare to Big Five mass extinction rates
4. Perform sensitivity analysis on key assumptions
5. Generate derived datasets for visualization

---

## 1. Environment Setup

In [None]:
# Standard imports
import json
import logging
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, Tuple, List

# Scientific computing
import numpy as np
import pandas as pd
from scipy import stats

# Configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Paths
SECTION_PATH = Path('../').resolve()
RAW_DATA_PATH = SECTION_PATH / 'data' / 'raw'
PROCESSED_DATA_PATH = SECTION_PATH / 'data' / 'processed'
DERIVED_DATA_PATH = SECTION_PATH / 'data' / 'derived'

# Ensure output directories exist
PROCESSED_DATA_PATH.mkdir(parents=True, exist_ok=True)
DERIVED_DATA_PATH.mkdir(parents=True, exist_ok=True)

# Set random seed for reproducibility
np.random.seed(42)

print(f"Analysis Date: {pd.Timestamp.now()}")

## 2. Load Data

In [None]:
# Load raw data from 01_data_acquisition.ipynb

# IUCN Summary
with open(RAW_DATA_PATH / 'iucn_summary_2025-2.json', 'r') as f:
    iucn_data = json.load(f)

# Big Five extinctions
big_five = pd.read_csv(RAW_DATA_PATH / 'big_five_mass_extinctions.csv')

# Modern extinctions
with open(RAW_DATA_PATH / 'modern_extinctions_since_1500.json', 'r') as f:
    modern_ext = json.load(f)

# Background rates
background_rates = pd.read_csv(RAW_DATA_PATH / 'background_extinction_rates.csv')

print("Data loaded successfully.")
print(f"\nBig Five Events: {len(big_five)}")
print(f"Background Rate Estimates: {len(background_rates)}")
print(f"IUCN Total Assessed: {iucn_data['assessed_species']:,}")

## 3. Core Functions: Extinction Rate Calculation

In [None]:
@dataclass
class ExtinctionRateResult:
    """Container for extinction rate calculation results."""
    rate_emsy: float
    extinctions: int
    species_count: float
    time_years: float
    ci_low: float = None
    ci_high: float = None
    method: str = ""
    
    def to_dict(self) -> dict:
        return {
            'rate_emsy': self.rate_emsy,
            'extinctions': self.extinctions,
            'species_count': self.species_count,
            'time_years': self.time_years,
            'ci_low': self.ci_low,
            'ci_high': self.ci_high,
            'method': self.method
        }


def calculate_extinction_rate(
    extinctions: int,
    species_count: float,
    time_years: float,
    method: str = ""
) -> ExtinctionRateResult:
    """
    Calculate extinction rate in E/MSY.
    
    E/MSY = (Extinctions / Species) √ó (1,000,000 / Years)
    
    Parameters
    ----------
    extinctions : int
        Number of documented extinctions
    species_count : float
        Total species in taxon
    time_years : float
        Time period in years
    method : str
        Description of method used
        
    Returns
    -------
    ExtinctionRateResult
        Object containing rate and metadata
    """
    # Convert to million species-years
    rate = (extinctions / species_count) * (1e6 / time_years)
    
    return ExtinctionRateResult(
        rate_emsy=rate,
        extinctions=extinctions,
        species_count=species_count,
        time_years=time_years,
        method=method
    )


def calculate_rate_with_uncertainty(
    extinctions: int,
    species_count: float,
    time_years: float,
    species_uncertainty: float = 0.2,  # 20% uncertainty
    n_simulations: int = 10000,
    method: str = ""
) -> ExtinctionRateResult:
    """
    Calculate extinction rate with Monte Carlo uncertainty estimation.
    
    Parameters
    ----------
    extinctions : int
        Number of documented extinctions
    species_count : float
        Total species (point estimate)
    time_years : float
        Time period in years
    species_uncertainty : float
        Relative uncertainty in species count (default 20%)
    n_simulations : int
        Number of Monte Carlo iterations
    method : str
        Description of method
        
    Returns
    -------
    ExtinctionRateResult
        Object with rate and 95% CI
    """
    # Monte Carlo simulation
    # Sample species count from normal distribution
    species_samples = np.random.normal(
        species_count, 
        species_count * species_uncertainty, 
        n_simulations
    )
    species_samples = np.maximum(species_samples, 1)  # Ensure positive
    
    # Calculate rates for each sample
    rates = (extinctions / species_samples) * (1e6 / time_years)
    
    # Calculate statistics
    rate_median = np.median(rates)
    ci_low, ci_high = np.percentile(rates, [2.5, 97.5])
    
    return ExtinctionRateResult(
        rate_emsy=rate_median,
        extinctions=extinctions,
        species_count=species_count,
        time_years=time_years,
        ci_low=ci_low,
        ci_high=ci_high,
        method=method
    )

## 4. Calculate Current Extinction Rates

In [None]:
# Calculate rates for different time periods and taxa

results = {}

# --- All species since 1500 ---
results['all_since_1500'] = calculate_rate_with_uncertainty(
    extinctions=modern_ext['total']['extinct'],
    species_count=2e6,  # Rough estimate of total species
    time_years=modern_ext['time_period']['duration_years'],
    species_uncertainty=0.5,  # 50% uncertainty on total species
    method='All documented extinctions since 1500'
)

# --- Vertebrates since 1500 ---
vert_extinctions = sum(
    modern_ext['by_taxon'][t]['extinct'] 
    for t in ['Mammals', 'Birds', 'Reptiles', 'Amphibians', 'Fishes']
)
vert_species = sum(
    modern_ext['by_taxon'][t]['species_count'] 
    for t in ['Mammals', 'Birds', 'Reptiles', 'Amphibians', 'Fishes']
)

results['vertebrates_since_1500'] = calculate_rate_with_uncertainty(
    extinctions=vert_extinctions,
    species_count=vert_species,
    time_years=modern_ext['time_period']['duration_years'],
    species_uncertainty=0.1,  # Better known
    method='Vertebrate extinctions since 1500'
)

# --- Mammals since 1900 (per Ceballos 2015) ---
results['mammals_since_1900'] = calculate_rate_with_uncertainty(
    extinctions=69,  # Ceballos 2015 figure
    species_count=5513,  # Mammals known in 1900
    time_years=125,  # 1900-2025
    species_uncertainty=0.05,
    method='Mammal extinctions since 1900 (Ceballos 2015)'
)

# Display results
print("Current Extinction Rate Estimates (E/MSY)")
print("=" * 60)
for name, result in results.items():
    print(f"\n{name}:")
    print(f"  Rate: {result.rate_emsy:.1f} E/MSY")
    print(f"  95% CI: [{result.ci_low:.1f}, {result.ci_high:.1f}]")
    print(f"  Based on: {result.extinctions} extinctions / {result.species_count:,.0f} species / {result.time_years:.0f} years")

## 5. Compare to Background Rate

In [None]:
def compare_to_background(
    current_rate: ExtinctionRateResult,
    background_rate: float = 1.0,
    background_range: Tuple[float, float] = (0.1, 2.0)
) -> Dict:
    """
    Compare current extinction rate to background rate.
    
    Parameters
    ----------
    current_rate : ExtinctionRateResult
        Current rate calculation
    background_rate : float
        Central estimate of background rate (E/MSY)
    background_range : tuple
        (low, high) range of background estimates
        
    Returns
    -------
    dict
        Comparison metrics
    """
    ratio_central = current_rate.rate_emsy / background_rate
    ratio_conservative = current_rate.ci_low / background_range[1] if current_rate.ci_low else None
    ratio_liberal = current_rate.ci_high / background_range[0] if current_rate.ci_high else None
    
    return {
        'current_rate': current_rate.rate_emsy,
        'background_rate': background_rate,
        'ratio_central': ratio_central,
        'ratio_conservative': ratio_conservative,
        'ratio_liberal': ratio_liberal,
        'times_background': f"{ratio_central:.0f}x",
        'range': f"{ratio_conservative:.0f}-{ratio_liberal:.0f}x" if ratio_conservative and ratio_liberal else "N/A"
    }


# Calculate comparisons
print("Comparison to Background Rate")
print("=" * 60)
print("\nUsing background rate: 1.0 E/MSY (range: 0.1-2.0)\n")

comparisons = {}
for name, result in results.items():
    comparison = compare_to_background(result)
    comparisons[name] = comparison
    print(f"{name}:")
    print(f"  {comparison['times_background']} background rate")
    print(f"  Range: {comparison['range']}")
    print()

## 6. Compare to Big Five Mass Extinctions

In [None]:
# Calculate comparable rates for Big Five
# Note: These are rough estimates due to data limitations

def estimate_big_five_rate(row: pd.Series) -> float:
    """
    Estimate E/MSY for a mass extinction event.
    
    This is a rough approximation using species loss percentage
    and estimated pre-event diversity.
    """
    # Assume ~1 million species (marine + terrestrial)
    # This is highly uncertain for ancient periods
    estimated_species = 1e6
    
    species_lost = estimated_species * (row['species_loss_pct'] / 100)
    duration_my = row['duration_my']
    
    # E/MSY = extinctions / (species √ó time_in_MY)
    rate = species_lost / (estimated_species * duration_my)
    
    return rate

big_five['rate_emsy'] = big_five.apply(estimate_big_five_rate, axis=1)

print("Big Five Mass Extinction Rates (Estimated E/MSY)")
print("=" * 60)
print(big_five[['event', 'age_ma', 'duration_my', 'species_loss_pct', 'rate_emsy']].to_string())
print("\nNote: These are rough estimates with high uncertainty.")

In [None]:
# Create comparison dataframe

comparison_data = []

# Add Big Five
for _, row in big_five.iterrows():
    comparison_data.append({
        'event': row['event'],
        'type': 'Big Five',
        'rate_emsy': row['rate_emsy'],
        'species_loss_pct': row['species_loss_pct'],
        'age_ma': row['age_ma']
    })

# Add current rates
for name, result in results.items():
    comparison_data.append({
        'event': name,
        'type': 'Current',
        'rate_emsy': result.rate_emsy,
        'rate_ci_low': result.ci_low,
        'rate_ci_high': result.ci_high,
        'age_ma': 0
    })

# Add background
comparison_data.append({
    'event': 'Background Rate',
    'type': 'Background',
    'rate_emsy': 1.0,
    'rate_ci_low': 0.1,
    'rate_ci_high': 2.0,
    'age_ma': None
})

comparison_df = pd.DataFrame(comparison_data)
print("\nFull Comparison Dataset:")
print(comparison_df.to_string())

## 7. Sensitivity Analysis

In [None]:
# Test sensitivity to key assumptions

def sensitivity_analysis(
    base_extinctions: int,
    base_species: float,
    base_time: float,
    param_ranges: Dict[str, List[float]]
) -> pd.DataFrame:
    """
    Perform one-at-a-time sensitivity analysis.
    
    Parameters
    ----------
    base_extinctions, base_species, base_time : baseline values
    param_ranges : dict mapping param name to [low, high] values
    
    Returns
    -------
    pd.DataFrame
        Sensitivity analysis results
    """
    results = []
    
    # Base rate
    base_rate = (base_extinctions / base_species) * (1e6 / base_time)
    results.append({
        'parameter': 'Base Case',
        'value': 'N/A',
        'rate_emsy': base_rate,
        'pct_change': 0
    })
    
    # Vary each parameter
    for param, (low, high) in param_ranges.items():
        for value in [low, high]:
            if param == 'extinctions':
                rate = (value / base_species) * (1e6 / base_time)
            elif param == 'species':
                rate = (base_extinctions / value) * (1e6 / base_time)
            elif param == 'time':
                rate = (base_extinctions / base_species) * (1e6 / value)
            else:
                continue
                
            pct_change = ((rate - base_rate) / base_rate) * 100
            
            results.append({
                'parameter': param,
                'value': value,
                'rate_emsy': rate,
                'pct_change': pct_change
            })
    
    return pd.DataFrame(results)


# Run sensitivity analysis for vertebrates since 1500
sensitivity_results = sensitivity_analysis(
    base_extinctions=vert_extinctions,
    base_species=vert_species,
    base_time=525,
    param_ranges={
        'extinctions': [vert_extinctions * 0.5, vert_extinctions * 2],  # Detection uncertainty
        'species': [vert_species * 0.8, vert_species * 1.2],  # Species count uncertainty
        'time': [400, 525]  # Different start dates
    }
)

print("Sensitivity Analysis Results:")
print(sensitivity_results.to_string())

## 8. Key Findings Summary

In [None]:
# Generate key findings

key_findings = {
    'current_vertebrate_rate': results['vertebrates_since_1500'].rate_emsy,
    'current_vertebrate_ci': (
        results['vertebrates_since_1500'].ci_low,
        results['vertebrates_since_1500'].ci_high
    ),
    'times_background_conservative': comparisons['vertebrates_since_1500']['ratio_conservative'],
    'times_background_central': comparisons['vertebrates_since_1500']['ratio_central'],
    'times_background_liberal': comparisons['vertebrates_since_1500']['ratio_liberal'],
    'comparison_to_big_five': {
        event: {
            'current_vs_event': results['vertebrates_since_1500'].rate_emsy / rate
            if rate > 0 else None
        }
        for event, rate in big_five[['event', 'rate_emsy']].values
    },
    'conclusion': 'Current vertebrate extinction rate is approximately 100√ó background rate, '
                  'comparable to rates seen in previous mass extinction events when accounting '
                  'for the shorter time scale.'
}

print("KEY FINDINGS")
print("=" * 60)
print(f"\nCurrent vertebrate extinction rate: {key_findings['current_vertebrate_rate']:.1f} E/MSY")
print(f"95% CI: [{key_findings['current_vertebrate_ci'][0]:.1f}, {key_findings['current_vertebrate_ci'][1]:.1f}]")
print(f"\nTimes background rate:")
print(f"  Conservative: {key_findings['times_background_conservative']:.0f}√ó")
print(f"  Central:      {key_findings['times_background_central']:.0f}√ó")
print(f"  Liberal:      {key_findings['times_background_liberal']:.0f}√ó")
print(f"\nConclusion: {key_findings['conclusion']}")

## 9. Save Derived Data

In [None]:
# Save all derived datasets

# 1. Rate calculation results
rate_results_df = pd.DataFrame([r.to_dict() for r in results.values()], index=results.keys())
rate_results_df.to_csv(DERIVED_DATA_PATH / 'extinction_rate_calculations.csv')

# 2. Comparison data
comparison_df.to_csv(DERIVED_DATA_PATH / 'rate_comparison_big_five.csv', index=False)

# 3. Sensitivity analysis
sensitivity_results.to_csv(DERIVED_DATA_PATH / 'sensitivity_analysis.csv', index=False)

# 4. Key findings JSON
with open(DERIVED_DATA_PATH / 'key_findings.json', 'w') as f:
    # Convert numpy types to Python types for JSON serialization
    findings_serializable = json.loads(
        pd.json_normalize(key_findings).to_json(orient='records')
    )[0]
    json.dump(key_findings, f, indent=2, default=str)

print("Derived data saved:")
for f in DERIVED_DATA_PATH.glob('*'):
    print(f"  - {f.name}")

---

## Next Steps

1. **03_visualization.ipynb**: Generate publication-quality figures from these results

---

*Ô∏ª„Éá‚ïê‚Äî¬∑¬∑¬∑ üéØ = Aim Twice, Shoot Once!*