In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append('..')

from load_data import load_dataset
from utils.preprocessing import prepare_time_series
from config.model_config import FORECASTING_CONFIG
from forecasting.forecasting_utils import train_prophet_model, evaluate_forecast_model, save_forecast_results, cross_validate_timeseries

In [None]:
# Load data
# Load CDC data (anxiety/depression prevalence over time)
cdc_df = load_dataset('cdc')
print(f"CDC data: {len(cdc_df)} records")
print(f"Date range: {cdc_df['date'].min()} to {cdc_df['date'].max()}")

# Load Google Trends data
trends_df = load_dataset('trends')  
print(f"Trends data: {len(trends_df)} records")
print(f"Date range: {trends_df['date'].min()} to {trends_df['date'].max()}")

# Display data structure
print("\nCDC columns:", list(cdc_df.columns))
print("Trends columns:", list(trends_df.columns))

In [None]:
# Prepare CDC anxiety data for Prophet
cdc_anxiety = prepare_time_series(cdc_df, 'date', 'anxiety')

# Prophet requires 'ds' and 'y' columns
prophet_data = cdc_anxiety.reset_index()
prophet_data.columns = ['ds', 'y']
prophet_data['ds'] = pd.to_datetime(prophet_data['ds'])

print(f"Prophet data shape: {prophet_data.shape}")
print(f"Date range: {prophet_data['ds'].min()} to {prophet_data['ds'].max()}")

# Plot the time series
plt.figure(figsize=(12, 6))
plt.plot(prophet_data['ds'], prophet_data['y'])
plt.title('CDC Anxiety Prevalence Over Time')
plt.xlabel('Date')
plt.ylabel('Anxiety Prevalence (%)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Train Prophet model
config = FORECASTING_CONFIG['prophet']

# Train model using utility function
model = train_prophet_model(prophet_data, config)

In [None]:
# Generate forecasts
periods = 90  # Forecast 90 days ahead
future = model.make_future_dataframe(periods=periods)
forecast = model.predict(future)

print(f"Forecast shape: {forecast.shape}")
print(f"Forecast extends to: {forecast['ds'].max()}")

In [None]:
# Visualize forecasts
fig1 = model.plot(forecast)
plt.title('CDC Anxiety Prevalence Forecast')
plt.ylabel('Anxiety Prevalence (%)')
plt.show()

fig2 = model.plot_components(forecast)
plt.show()

In [None]:
# Evaluate model performance using utilities
if len(prophet_data) > 30:
    # Split data for validation
    train_size = int(len(prophet_data) * 0.8)
    train_data = prophet_data[:train_size]
    test_data = prophet_data[train_size:]
    
    # Train evaluation model
    eval_model = train_prophet_model(train_data, config)
    
    # Evaluate performance
    metrics = evaluate_forecast_model(eval_model, train_data, test_data, model_type='prophet')
    
    print("Model Evaluation:")
    for metric, value in metrics.items():
        print(f"{metric.upper()}: {value:.3f}")
else:
    print("Insufficient data for evaluation")

In [None]:
# Save model and results
timestamp = pd.Timestamp.now().strftime("%Y%m%d")
save_forecast_results(model, forecast, f'cdc_anxiety_{timestamp}')
print("Forecast model and results saved")

In [None]:
# Cross-validation
if len(prophet_data) > 365:
    try:
        cv_results, cv_metrics = cross_validate_timeseries(prophet_data, config)
        print("Cross-validation metrics:")
        print(cv_metrics.describe())
    except Exception as e:
        print(f"Cross-validation failed: {e}")
        print("Likely insufficient data for time series cross-validation")
else:
    print("Need more data for cross-validation")