# ðŸ“ˆ Time Series Forecasting

Predict seasonal delay trends for operational planning.

**Objectives:**
- Analyze delay time series patterns
- Build forecasting models (SARIMA, Prophet)
- Predict future delay trends


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Try to import forecasting libraries
try:
    from statsmodels.tsa.seasonal import seasonal_decompose
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    STATSMODELS_AVAILABLE = True
except ImportError:
    STATSMODELS_AVAILABLE = False

try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except ImportError:
    PROPHET_AVAILABLE = False

COLORS = {'primary': '#2E86AB', 'secondary': '#A23B72', 'forecast': '#F18F01'}
print(f"âœ“ Libraries imported")
print(f"  Statsmodels: {'âœ“' if STATSMODELS_AVAILABLE else 'âœ—'}")
print(f"  Prophet: {'âœ“' if PROPHET_AVAILABLE else 'âœ—'}")


In [None]:
# Load data and create daily time series
df = pd.read_csv('../data/processed/flights_cleaned.csv')

# Find date column
date_cols = [c for c in df.columns if 'date' in c.lower() or 'departure' in c.lower()]
if date_cols:
    df['date'] = pd.to_datetime(df[date_cols[0]], errors='coerce')
else:
    # Create synthetic dates if not available
    print("Creating synthetic date range...")
    df['date'] = pd.date_range('2023-01-01', periods=len(df), freq='H')

# Aggregate to daily
daily_stats = df.groupby(df['date'].dt.date).agg({
    'arrival_delay': 'mean',
    'is_delayed': ['mean', 'count']
}).reset_index()

daily_stats.columns = ['date', 'avg_delay', 'delay_rate', 'flight_count']
daily_stats['date'] = pd.to_datetime(daily_stats['date'])
daily_stats = daily_stats.set_index('date').sort_index()

print(f"âœ“ Daily time series created: {len(daily_stats)} days")
display(daily_stats.head())


## Time Series Decomposition


In [None]:
# Plot time series
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Average delay over time
ax1 = axes[0]
ax1.plot(daily_stats.index, daily_stats['avg_delay'], color=COLORS['primary'], alpha=0.7)
ax1.plot(daily_stats['avg_delay'].rolling(7).mean(), color=COLORS['secondary'], linewidth=2, label='7-day MA')
ax1.axhline(y=0, color='black', linestyle='--', alpha=0.3)
ax1.set_ylabel('Average Delay (min)')
ax1.set_title('Daily Average Flight Delay')
ax1.legend()

# Delay rate over time
ax2 = axes[1]
ax2.plot(daily_stats.index, daily_stats['delay_rate']*100, color=COLORS['primary'], alpha=0.7)
ax2.plot((daily_stats['delay_rate']*100).rolling(7).mean(), color=COLORS['secondary'], linewidth=2, label='7-day MA')
ax2.set_ylabel('Delay Rate (%)')
ax2.set_xlabel('Date')
ax2.set_title('Daily Flight Delay Rate')
ax2.legend()

plt.tight_layout()
plt.savefig('../reports/figures/time_series.png', dpi=150)
plt.show()

# Decomposition if enough data
if STATSMODELS_AVAILABLE and len(daily_stats) > 30:
    decomposition = seasonal_decompose(daily_stats['avg_delay'].fillna(method='ffill'), model='additive', period=7)
    fig = decomposition.plot()
    fig.set_size_inches(14, 10)
    plt.tight_layout()
    plt.savefig('../reports/figures/decomposition.png', dpi=150)
    plt.show()


## Forecasting


In [None]:
# Train/test split
train_size = int(len(daily_stats) * 0.8)
train = daily_stats['avg_delay'].iloc[:train_size]
test = daily_stats['avg_delay'].iloc[train_size:]

print(f"Training: {len(train)} days | Test: {len(test)} days")

# Simple forecasting with moving average baseline
baseline_forecast = train.rolling(7).mean().iloc[-1]
baseline_predictions = [baseline_forecast] * len(test)

# SARIMA if available
if STATSMODELS_AVAILABLE and len(train) > 30:
    try:
        model = SARIMAX(train.fillna(method='ffill'), order=(1,1,1), seasonal_order=(1,1,1,7))
        results = model.fit(disp=False)
        sarima_forecast = results.forecast(steps=len(test))
        
        # Plot forecast
        fig, ax = plt.subplots(figsize=(14, 6))
        ax.plot(train.index, train, color=COLORS['primary'], label='Training')
        ax.plot(test.index, test, color=COLORS['secondary'], label='Actual')
        ax.plot(test.index, sarima_forecast, color=COLORS['forecast'], linestyle='--', linewidth=2, label='SARIMA Forecast')
        ax.axhline(y=0, color='black', linestyle='--', alpha=0.3)
        ax.set_xlabel('Date')
        ax.set_ylabel('Average Delay (min)')
        ax.set_title('Flight Delay Forecast')
        ax.legend()
        plt.tight_layout()
        plt.savefig('../reports/figures/forecast.png', dpi=150)
        plt.show()
        
        # Calculate error
        from sklearn.metrics import mean_absolute_error, mean_squared_error
        mae = mean_absolute_error(test.fillna(0), sarima_forecast)
        rmse = np.sqrt(mean_squared_error(test.fillna(0), sarima_forecast))
        print(f"\nSARIMA Performance:")
        print(f"  MAE: {mae:.2f} minutes")
        print(f"  RMSE: {rmse:.2f} minutes")
    except Exception as e:
        print(f"SARIMA failed: {e}")
else:
    print("Using baseline moving average forecast")
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(daily_stats.index, daily_stats['avg_delay'], color=COLORS['primary'])
    ax.axhline(y=baseline_forecast, color=COLORS['forecast'], linestyle='--', label=f'Baseline: {baseline_forecast:.1f}')
    ax.set_title('Flight Delay Time Series with Baseline')
    ax.legend()
    plt.show()

print("\nâœ“ Forecasting complete!")
