In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append('..')

from load_data import load_dataset
from utils.preprocessing import prepare_time_series, prepare_who_time_series
from config.model_config import FORECASTING_CONFIG
from forecasting.forecasting_utils import train_prophet_model, evaluate_forecast_model, save_forecast_results, cross_validate_timeseries

In [None]:
# Load CDC data
cdc_df = load_dataset('cdc')

# Load Google Trends data
trends_df = load_dataset('trends')  

# Load WHO suicide data
who_suicide_df = load_dataset('who_suicide')

In [None]:

# Prepare CDC anxiety data
cdc_ts = prepare_time_series(cdc_df, 'date', 'anxiety')

# Prepare Google Trends data
trends_ts = prepare_time_series(trends_df, 'date', 'interest')

# Prepare WHO suicide data
who_suicide_ts = prepare_who_time_series(who_suicide_df, 'year', 'suicides_no')

# Convert all to Prophet format
datasets_to_forecast = {}

# CDC data
cdc_prophet = cdc_ts.reset_index()
cdc_prophet.columns = ['ds', 'y']
datasets_to_forecast['cdc_anxiety'] = cdc_prophet

# Google Trends data
trends_prophet = trends_ts.reset_index()
trends_prophet.columns = ['ds', 'y']
datasets_to_forecast['google_trends'] = trends_prophet

# WHO data
who_prophet = who_suicide_ts.reset_index()
who_prophet.columns = ['ds', 'y']
datasets_to_forecast['who_suicides'] = who_prophet

# Plot all time series for comparison
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

for i, (name, data) in enumerate(datasets_to_forecast.items()):
    axes[i].plot(data['ds'], data['y'])
    axes[i].set_title(f'{name.replace("_", " ").title()} Over Time')
    axes[i].set_xlabel('Date')
    axes[i].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Train Prophet model
config = FORECASTING_CONFIG['prophet']

# Train model using utility function
model = train_prophet_model(prophet_data, config)

In [None]:
# Generate forecasts
periods = 90  # Forecast 90 days ahead
future = model.make_future_dataframe(periods=periods)
forecast = model.predict(future)

print(f"Forecast shape: {forecast.shape}")
print(f"Forecast extends to: {forecast['ds'].max()}")

In [None]:
# Visualize forecasts
fig1 = model.plot(forecast)
plt.title('CDC Anxiety Prevalence Forecast')
plt.ylabel('Anxiety Prevalence (%)')
plt.show()

fig2 = model.plot_components(forecast)
plt.show()

In [None]:
# Evaluate model performance using utilities
if len(prophet_data) > 30:
    # Split data for validation
    train_size = int(len(prophet_data) * 0.8)
    train_data = prophet_data[:train_size]
    test_data = prophet_data[train_size:]
    
    # Train evaluation model
    eval_model = train_prophet_model(train_data, config)
    
    # Evaluate performance
    metrics = evaluate_forecast_model(eval_model, train_data, test_data, model_type='prophet')
    
    print("Model Evaluation:")
    for metric, value in metrics.items():
        print(f"{metric.upper()}: {value:.3f}")
else:
    print("Insufficient data for evaluation")

In [None]:
# Save model and results
timestamp = pd.Timestamp.now().strftime("%Y%m%d")
save_forecast_results(model, forecast, f'cdc_anxiety_{timestamp}')
print("Forecast model and results saved")

In [None]:
# Cross-validation
if len(prophet_data) > 365:
    try:
        cv_results, cv_metrics = cross_validate_timeseries(prophet_data, config)
        print("Cross-validation metrics:")
        print(cv_metrics.describe())
    except Exception as e:
        print(f"Cross-validation failed: {e}")
        print("Likely insufficient data for time series cross-validation")
else:
    print("Need more data for cross-validation")