# üîÆ Flu Forecast Hub - Forecasting Models

This notebook demonstrates time-series forecasting for flu ILI data.

**Models:**
- ARIMA (AutoRegressive Integrated Moving Average)
- Moving Average (baseline)

**Author:** Anita  
**Date:** 2025

## 1. Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

# Auto-detect project root (works on Windows and Linux)
if os.path.exists('C:/Users/Ani/OneDrive/Desktop/flu-forecast-hub'):
    PROJECT_ROOT = 'C:/Users/Ani/OneDrive/Desktop/flu-forecast-hub'
else:
    PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath('.')))

os.chdir(PROJECT_ROOT)
sys.path.insert(0, 'backend/app')

from forecaster import FluForecaster, run_forecast_pipeline
from data_fetcher import fetch_sample_data

plt.style.use('seaborn-v0_8-whitegrid')
print(f" Setup complete! Running from: {PROJECT_ROOT}")

## 2. Load Data

In [None]:
# Load data
df = fetch_sample_data()
print(f"Data shape: {df.shape}")
print(f"Years: {df['year'].min()} - {df['year'].max()}")
df.head()

## 3. Train/Test Split

In [None]:
# Initialize forecaster
forecaster = FluForecaster()

# Split data: hold out last 12 weeks for testing
TEST_WEEKS = 12
train, test = forecaster.prepare_data(df, test_size=TEST_WEEKS)

print(f"Training data: {len(train)} weeks")
print(f"Test data: {len(test)} weeks")

In [None]:
# Visualize train/test split
plt.figure(figsize=(14, 5))

plt.plot(range(len(train)), train, label='Training Data', color='steelblue')
plt.plot(range(len(train), len(train) + len(test)), test, label='Test Data', color='orange')
plt.axvline(x=len(train), color='red', linestyle='--', alpha=0.7, label='Train/Test Split')

plt.xlabel('Week Index')
plt.ylabel('ILI Percentage (%)')
plt.title('üìä Train/Test Split Visualization')
plt.legend()
plt.tight_layout()
plt.show()

## 4. ARIMA Model

In [None]:
# Fit ARIMA model
print("Fitting ARIMA model...")
forecaster.fit_arima(train, order=(2, 1, 2))
print(f" Model fitted: {forecaster.model_type}")

In [None]:
# Predict on test period
predictions = forecaster.predict(steps=len(test))

print("Predictions vs Actual:")
comparison = pd.DataFrame({
    'Actual': test.values,
    'Predicted': predictions,
    'Error': test.values - predictions
})
comparison

In [None]:
# Evaluate model
metrics = forecaster.evaluate(test, predictions)

print(" Model Performance:")

for metric, value in metrics.items():
    print(f"   {metric}: {value}")

In [None]:
# Visualize predictions vs actual
plt.figure(figsize=(14, 6))

weeks = range(len(test))
plt.plot(weeks, test.values, 'o-', label='Actual', color='steelblue', markersize=8)
plt.plot(weeks, predictions, 's--', label='ARIMA Predicted', color='orange', markersize=8)

plt.fill_between(weeks, test.values, predictions, alpha=0.2, color='gray')

plt.xlabel('Week')
plt.ylabel('ILI Percentage (%)')
plt.title(f'üéØ ARIMA Predictions vs Actual (MAE: {metrics["MAE"]:.3f})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Compare Models

In [None]:
# Compare ARIMA vs Moving Average

# ARIMA (already done)
arima_forecaster = FluForecaster()
arima_forecaster.fit_arima(train)
arima_pred = arima_forecaster.predict(steps=len(test))
arima_metrics = arima_forecaster.evaluate(test, arima_pred)

# Moving Average
ma_forecaster = FluForecaster()
ma_forecaster.fit_moving_average(train, window=4)
ma_pred = ma_forecaster.predict(steps=len(test))
ma_metrics = ma_forecaster.evaluate(test, ma_pred)

# Comparison table
comparison_df = pd.DataFrame({
    'Metric': ['MAE', 'RMSE', 'MAPE'],
    'ARIMA': [arima_metrics['MAE'], arima_metrics['RMSE'], arima_metrics['MAPE']],
    'Moving Avg': [ma_metrics['MAE'], ma_metrics['RMSE'], ma_metrics['MAPE']]
})

print("üìä Model Comparison:")
print(comparison_df.to_string(index=False))

In [None]:
# Visual comparison
plt.figure(figsize=(14, 6))

weeks = range(len(test))
plt.plot(weeks, test.values, 'o-', label='Actual', color='steelblue', markersize=8, linewidth=2)
plt.plot(weeks, arima_pred, 's--', label=f'ARIMA (MAE: {arima_metrics["MAE"]:.2f})', color='orange', markersize=6)
plt.plot(weeks, ma_pred, '^--', label=f'Moving Avg (MAE: {ma_metrics["MAE"]:.2f})', color='green', markersize=6)

plt.xlabel('Week')
plt.ylabel('ILI Percentage (%)')
plt.title('üèÜ Model Comparison: ARIMA vs Moving Average')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Future Forecast

In [None]:
# Refit on ALL data and forecast future
FORECAST_WEEKS = 8

full_data = pd.Series(df['ili_percentage'].values)
final_forecaster = FluForecaster()
final_forecaster.fit_arima(full_data)

future_forecast = final_forecaster.predict(steps=FORECAST_WEEKS)

print(f" Next {FORECAST_WEEKS} Weeks Forecast:")
print("=" * 30)
for i, val in enumerate(future_forecast, 1):
    print(f"   Week +{i}: {val:.2f}%")

In [None]:
# Visualize forecast
plt.figure(figsize=(14, 6))

# Historical data (last 52 weeks)
history = df['ili_percentage'].values[-52:]
hist_weeks = range(len(history))
future_weeks = range(len(history), len(history) + FORECAST_WEEKS)

plt.plot(hist_weeks, history, '-', label='Historical Data', color='steelblue', linewidth=2)
plt.plot(future_weeks, future_forecast, 'o--', label='Forecast', color='red', markersize=8, linewidth=2)

# Add confidence band (simple approximation)
std = np.std(history)
plt.fill_between(future_weeks, 
                 future_forecast - std, 
                 future_forecast + std, 
                 alpha=0.2, color='red', label='Confidence Band')

plt.axvline(x=len(history)-1, color='gray', linestyle='--', alpha=0.7)
plt.text(len(history)-1, plt.ylim()[1]*0.9, ' Forecast Start', fontsize=10)

plt.xlabel('Week')
plt.ylabel('ILI Percentage (%)')
plt.title(' Flu ILI Forecast: Next 8 Weeks')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 7. Summary

In [None]:

print(" FORECASTING SUMMARY")


print(f"\n Data:")
print(f"   ‚Ä¢ Total records: {len(df)}")
print(f"   ‚Ä¢ Training weeks: {len(train)}")
print(f"   ‚Ä¢ Test weeks: {len(test)}")

print(f"\n Best Model: ARIMA(2,1,2)")
print(f"   ‚Ä¢ MAE: {arima_metrics['MAE']}")
print(f"   ‚Ä¢ RMSE: {arima_metrics['RMSE']}")

print(f"\n Forecast (next {FORECAST_WEEKS} weeks):")
print(f"   ‚Ä¢ Average: {np.mean(future_forecast):.2f}%")
print(f"   ‚Ä¢ Range: {np.min(future_forecast):.2f}% - {np.max(future_forecast):.2f}%")

