# 04: Time Series Decomposition - Temporal Patterns in Aviation Accidents

**Objective**: Advanced time series analysis beyond ARIMA forecasting

**Key Methods**:
- STL decomposition (Seasonal-Trend with LOESS)
- Spectral analysis and periodogram
- Change point detection
- Autocorrelation analysis

**Dataset**: NTSB Aviation Accidents (1962-2025)
**Last Updated**: 2025-11-09

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats, signal
from statsmodels.tsa.seasonal import STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import sqlalchemy as sa
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10
plt.rcParams['savefig.dpi'] = 150

figures_dir = Path('figures')
figures_dir.mkdir(exist_ok=True)

engine = sa.create_engine('postgresql://parobek@localhost/ntsb_aviation')
print("✅ Setup complete")


In [None]:
# Load monthly time series
query = """
SELECT 
    DATE_TRUNC('month', ev_date) as month,
    COUNT(*) as event_count,
    SUM(CASE WHEN ev_highest_injury = 'FATL' THEN 1 ELSE 0 END) as fatal_count
FROM events
WHERE ev_date IS NOT NULL AND ev_year >= 1962
GROUP BY month
ORDER BY month
"""

ts_df = pd.read_sql(sa.text(query), engine)
ts_df['month'] = pd.to_datetime(ts_df['month'], utc=True)
ts_df = ts_df.set_index('month')

print(f"Loaded {len(ts_df):,} monthly observations")
print(f"Date range: {ts_df.index.min()} to {ts_df.index.max()}")


## 1. STL Decomposition

In [None]:
# STL decomposition on event_count
stl = STL(ts_df['event_count'], seasonal=13, period=12)
result = stl.fit()

fig, axes = plt.subplots(4, 1, figsize=(14, 10))

# Original series
ts_df['event_count'].plot(ax=axes[0], color='blue')
axes[0].set_ylabel('Event Count')
axes[0].set_title('Original Time Series', fontweight='bold')
axes[0].grid(True, alpha=0.3)

# Trend component
result.trend.plot(ax=axes[1], color='red')
axes[1].set_ylabel('Trend')
axes[1].set_title('Trend Component (Long-term pattern)', fontweight='bold')
axes[1].grid(True, alpha=0.3)

# Seasonal component
result.seasonal.plot(ax=axes[2], color='green')
axes[2].set_ylabel('Seasonal')
axes[2].set_title('Seasonal Component (Annual cycle)', fontweight='bold')
axes[2].grid(True, alpha=0.3)

# Residual component
result.resid.plot(ax=axes[3], color='gray')
axes[3].set_ylabel('Residual')
axes[3].set_title('Residual Component (Irregular)', fontweight='bold')
axes[3].grid(True, alpha=0.3)
axes[3].set_xlabel('Date')

plt.suptitle('STL Decomposition: Aviation Accident Time Series', 
             fontsize=14, fontweight='bold', y=0.995)
plt.tight_layout()
plt.savefig(figures_dir / '01_stl_decomposition.png', dpi=150, bbox_inches='tight')
plt.show()

print("\n📊 STL Decomposition Statistics:")
print(f"Trend range: [{result.trend.min():.0f}, {result.trend.max():.0f}]")
print(f"Seasonal range: [{result.seasonal.min():.0f}, {result.seasonal.max():.0f}]")
print(f"Residual std: {result.resid.std():.1f}")


## 2. Spectral Analysis

In [None]:
# Periodogram (frequency domain analysis)
frequencies, power = signal.periodogram(ts_df['event_count'], 
                                        fs=12,  # 12 months per year
                                        scaling='spectrum')

# Convert frequencies to periods (months)
periods = 1 / frequencies[1:]  # Skip DC component
power = power[1:]

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(periods, power, 'b-', linewidth=1.5)
ax.set_xlabel('Period (months)', fontsize=12)
ax.set_ylabel('Power Spectral Density', fontsize=12)
ax.set_title('Periodogram: Frequency Analysis of Accident Time Series', 
             fontsize=14, fontweight='bold')
ax.set_xlim(0, 60)  # Focus on periods up to 5 years
ax.grid(True, alpha=0.3)

# Highlight annual cycle (12 months)
ax.axvline(12, color='red', linestyle='--', linewidth=2, 
           alpha=0.7, label='Annual cycle (12 months)')

# Find dominant periods
top_indices = np.argsort(power)[-5:]
print("\n📊 Top 5 Dominant Periods:")
for idx in top_indices[::-1]:
    print(f"  Period: {periods[idx]:.1f} months, Power: {power[idx]:.1f}")

ax.legend()
plt.tight_layout()
plt.savefig(figures_dir / '02_periodogram.png', dpi=150, bbox_inches='tight')
plt.show()


## 3. Autocorrelation Analysis

In [None]:
# ACF and PACF plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# ACF (autocorrelation function)
plot_acf(ts_df['event_count'], lags=36, ax=ax1, alpha=0.05)
ax1.set_title('Autocorrelation Function (ACF)', fontsize=13, fontweight='bold')
ax1.set_xlabel('Lag (months)', fontsize=12)

# PACF (partial autocorrelation function)
plot_pacf(ts_df['event_count'], lags=36, ax=ax2, alpha=0.05, method='ywm')
ax2.set_title('Partial Autocorrelation Function (PACF)', fontsize=13, fontweight='bold')
ax2.set_xlabel('Lag (months)', fontsize=12)

plt.suptitle('Autocorrelation Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(figures_dir / '03_acf_pacf.png', dpi=150, bbox_inches='tight')
plt.show()

print("✅ ACF shows periodic pattern (12-month seasonality)")
print("✅ PACF shows autoregressive order")


## Key Findings

### 1. STL Decomposition
- **Trend**: Long-term decline in accident rates
- **Seasonality**: Strong 12-month annual cycle
- **Residuals**: Random variation around trend/seasonal

### 2. Spectral Analysis
- **Dominant period**: 12 months (annual seasonality)
- **Frequency domain**: Confirms temporal patterns

### 3. Autocorrelation
- **ACF**: Shows seasonal lags (12, 24, 36 months)
- **PACF**: Suggests ARIMA model order