# SocialProphet - Phase 2: Time Series Forecasting

This notebook implements and evaluates the forecasting pipeline:
1. Load preprocessed data
2. Stationarity analysis
3. Train individual models (Prophet, SARIMA, LSTM)
4. Ensemble predictions
5. Evaluate metrics
6. Visualizations

**Target Metrics:**
- MAPE < 15%
- RMSE < 15% of mean
- R² > 0.70

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)

import sys
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

# Project imports
from src.utils.config import Config
from src.forecasting.stationarity import StationarityAnalyzer
from src.forecasting.prophet_model import ProphetForecaster
from src.forecasting.sarima_model import SARIMAForecaster
from src.forecasting.lstm_model import LSTMForecaster
from src.forecasting.ensemble import EnsembleForecaster
from src.evaluation.metrics import ForecastMetrics
from src.evaluation.visualizer import Visualizer

print("Imports successful!")
print(f"Project root: {project_root}")

## 1. Load Data

In [None]:
# Load train and test data
train_df = pd.read_csv(Config.PROCESSED_DATA_DIR / "train_data.csv", parse_dates=['ds'])
test_df = pd.read_csv(Config.PROCESSED_DATA_DIR / "test_data.csv", parse_dates=['ds'])

# Load Prophet-format data
train_prophet = pd.read_csv(Config.PROCESSED_DATA_DIR / "train_prophet.csv", parse_dates=['ds'])
test_prophet = pd.read_csv(Config.PROCESSED_DATA_DIR / "test_prophet.csv", parse_dates=['ds'])

print(f"Training data: {len(train_df)} rows, {len(train_df.columns)} columns")
print(f"Test data: {len(test_df)} rows")
print(f"\nDate range: {train_df['ds'].min()} to {test_df['ds'].max()}")
print(f"\nTarget variable (y):")
print(f"  Log scale range: [{train_df['y'].min():.2f}, {train_df['y'].max():.2f}]")
print(f"  Original scale range: [{train_df['y_raw'].min():,.0f}, {train_df['y_raw'].max():,.0f}]")

In [None]:
# Preview data
train_df.head()

In [None]:
# Feature columns for LSTM
feature_cols = [col for col in train_df.columns if col not in ['ds', 'y', 'y_raw']]
print(f"Available features ({len(feature_cols)}): {feature_cols}")

## 2. Stationarity Analysis

Before fitting SARIMA, we verify time series stationarity using:
- **ADF Test**: Null hypothesis = non-stationary (reject if p < 0.05)
- **KPSS Test**: Null hypothesis = stationary (reject if p < 0.05)

In [None]:
# Analyze stationarity
analyzer = StationarityAnalyzer()
stationarity_result = analyzer.analyze(train_df['y'], name="engagement_log")

print("\n" + "="*60)
print("STATIONARITY ANALYSIS RESULTS")
print("="*60)
print(f"\nSeries: {stationarity_result['series_name']}")
print(f"Observations: {stationarity_result['n_observations']}")

print(f"\nADF Test:")
print(f"  Statistic: {stationarity_result['adf_test']['test_statistic']:.4f}")
print(f"  P-value: {stationarity_result['adf_test']['p_value']:.4f}")
print(f"  Is Stationary: {stationarity_result['adf_test']['is_stationary']}")

print(f"\nKPSS Test:")
print(f"  Statistic: {stationarity_result['kpss_test']['test_statistic']:.4f}")
print(f"  P-value: {stationarity_result['kpss_test']['p_value']:.4f}")
print(f"  Is Stationary: {stationarity_result['kpss_test']['is_stationary']}")

print(f"\nRecommendation: {stationarity_result['recommendation']}")

In [None]:
# Differencing analysis
diff_result = analyzer.differencing_analysis(train_df['y'], max_d=2)
print(f"\nRecommended differencing order: d={diff_result['recommended_d']}")

# Save stationarity report
analyzer.save_report()

## 3. Train Individual Models

### 3.1 Prophet Model

In [None]:
# Train Prophet
print("Training Prophet model...")
prophet_forecaster = ProphetForecaster(
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=True
)
prophet_forecaster.fit(train_prophet)
print("Prophet training complete!")

# Predict on test set
prophet_preds = prophet_forecaster.predict_test(test_prophet)
print(f"\nProphet predictions: {len(prophet_preds)} rows")

In [None]:
# Prophet evaluation
prophet_eval = prophet_forecaster.evaluate(test_prophet)
print("\nProphet Metrics (Original Scale):")
for metric, value in prophet_eval['metrics_original_scale'].items():
    print(f"  {metric}: {value:.4f}")

### 3.2 SARIMA Model

In [None]:
# Train SARIMA with auto order selection
print("Training SARIMA model (auto-selecting order)...")
sarima_forecaster = SARIMAForecaster()

# Auto-select order
order, seasonal_order = sarima_forecaster.auto_select_order(
    train_df['y'],
    seasonal=True,
    m=7  # Weekly seasonality
)
print(f"Selected order: {order}")
print(f"Selected seasonal order: {seasonal_order}")

# Fit model
sarima_forecaster.fit(train_df['y'])
print("SARIMA training complete!")

In [None]:
# SARIMA diagnostics
diagnostics = sarima_forecaster.get_diagnostics()
print(f"\nSARIMA Diagnostics:")
print(f"  Order: {diagnostics['order']}")
print(f"  Seasonal Order: {diagnostics['seasonal_order']}")
print(f"  AIC: {diagnostics['aic']:.2f}")
print(f"  BIC: {diagnostics['bic']:.2f}")

In [None]:
# SARIMA predictions
sarima_preds = sarima_forecaster.predict(steps=len(test_df))
print(f"SARIMA predictions: {len(sarima_preds)} rows")

### 3.3 LSTM Model

**Note:** With only 292 training samples, LSTM may underperform compared to traditional models. This is expected behavior for deep learning with limited data.

In [None]:
# Train LSTM
print("Training LSTM model...")
print(f"Training samples: {len(train_df)}")
print(f"Note: 292 samples is borderline for LSTM. Performance may vary.")

lstm_forecaster = LSTMForecaster(
    n_units=50,
    n_layers=2,
    window_size=30,
    epochs=100,
    batch_size=16,
    dropout=0.2,
    early_stopping_patience=10
)

# Select features for LSTM
lstm_features = [
    'y_lag_1', 'y_lag_7', 'y_lag_14',
    'y_rolling_mean_7', 'y_rolling_std_7',
    'day_of_week', 'is_weekend'
]

# Filter to available features
lstm_features = [f for f in lstm_features if f in train_df.columns]
print(f"Using features: {lstm_features}")

lstm_forecaster.fit(train_df, feature_cols=lstm_features, verbose=1)
print("\nLSTM training complete!")

In [None]:
# LSTM predictions
lstm_preds = lstm_forecaster.predict_test(train_df, test_df)
print(f"LSTM predictions: {len(lstm_preds)} rows")

In [None]:
# Plot LSTM training history
if lstm_forecaster.history:
    visualizer = Visualizer()
    fig = visualizer.plot_training_history(lstm_forecaster.history)
    plt.tight_layout()
    plt.show()

## 4. Ensemble Forecasting

Combining predictions with weights:
- Prophet: 40%
- SARIMA: 35%
- LSTM: 25%

In [None]:
# Create ensemble forecaster
ensemble = EnsembleForecaster(
    weights={
        'prophet': 0.40,
        'sarima': 0.35,
        'lstm': 0.25
    }
)

# Add pre-trained models
ensemble.add_model('prophet', prophet_forecaster)
ensemble.add_model('sarima', sarima_forecaster)
ensemble.add_model('lstm', lstm_forecaster)

print("Ensemble created with models:", list(ensemble.models.keys()))
print("Weights:", ensemble.weights)

In [None]:
# Generate ensemble predictions
ensemble_preds = ensemble.predict(test_df, train_df=train_df)
print(f"\nEnsemble predictions: {len(ensemble_preds)} rows")
ensemble_preds.head()

## 5. Evaluate Metrics

**Target Thresholds:**
- MAPE < 15%
- RMSE < 15% of mean
- R² > 0.70

In [None]:
# Evaluate ensemble
metrics = ForecastMetrics()

# Get predictions and actuals (log scale)
y_true_log = test_df['y'].values
y_pred_log = ensemble_preds['ensemble'].values

# Full evaluation
results = metrics.evaluate(y_true_log, y_pred_log, include_log_metrics=True)

# Print report
metrics.print_report()

In [None]:
# Compare all models
predictions_dict = {
    'Prophet': prophet_preds['yhat'].values,
    'SARIMA': sarima_preds['forecast'].values,
    'LSTM': lstm_preds['yhat'].values,
    'Ensemble': ensemble_preds['ensemble'].values
}

comparison = metrics.compare_models(y_true_log, predictions_dict)
print("\nModel Comparison:")
print(comparison.to_string(index=False))

In [None]:
# Save evaluation results
metrics.save_results()

## 6. Visualizations

In [None]:
# Initialize visualizer
visualizer = Visualizer(figsize=(14, 6))

In [None]:
# Plot predictions vs actuals
fig = visualizer.plot_predictions(
    dates=test_df['ds'],
    y_true=test_df['y_raw'],  # Original scale
    predictions={
        'Prophet': np.expm1(prophet_preds['yhat'].values),
        'SARIMA': np.expm1(sarima_preds['forecast'].values),
        'LSTM': np.expm1(lstm_preds['yhat'].values),
        'Ensemble': np.expm1(ensemble_preds['ensemble'].values)
    },
    title="Forecast vs Actual (Original Scale)"
)
plt.tight_layout()
plt.show()

In [None]:
# Plot residuals for ensemble
y_pred_original = np.expm1(ensemble_preds['ensemble'].values)
y_true_original = test_df['y_raw'].values

fig = visualizer.plot_residuals(y_true_original, y_pred_original, model_name="Ensemble")
plt.tight_layout()
plt.show()

In [None]:
# Plot model comparison metrics
fig = visualizer.plot_model_comparison(comparison)
plt.tight_layout()
plt.show()

In [None]:
# Create comprehensive dashboard
fig = visualizer.create_dashboard(
    results=results,
    train_df=train_df,
    test_df=test_df,
    predictions_df=ensemble_preds
)
plt.tight_layout()
plt.show()

# Save dashboard
visualizer.save_figure(fig, Config.PROCESSED_DATA_DIR / "forecast_dashboard.png")

## 7. Results Summary

In [None]:
print("=" * 70)
print("PHASE 2 FORECASTING - FINAL RESULTS")
print("=" * 70)

print("\nData Summary:")
print(f"  Training samples: {len(train_df)}")
print(f"  Test samples: {len(test_df)}")
print(f"  Features used: {len(feature_cols)}")

print("\nModel Performance (on test set):")
print(comparison[['model', 'mape', 'rmse_pct', 'r2', 'all_pass']].to_string(index=False))

print("\nTarget Thresholds:")
print(f"  MAPE < 15%: {results['pass_fail']['mape']}")
print(f"  RMSE < 15% of mean: {results['pass_fail']['rmse_pct']}")
print(f"  R² > 0.70: {results['pass_fail']['r2']}")

print("\n" + "=" * 70)
if results['all_passed']:
    print("ALL TARGETS ACHIEVED!")
else:
    print("SOME TARGETS NOT MET - Review individual model performance")
print("=" * 70)

In [None]:
# Save ensemble results
import json

ensemble_results = {
    'models': list(ensemble.models.keys()),
    'weights': ensemble.weights,
    'ensemble_metrics': results['metrics_original_scale'],
    'pass_fail': results['pass_fail'],
    'all_passed': results['all_passed'],
    'model_comparison': comparison.to_dict(orient='records')
}

with open(Config.PROCESSED_DATA_DIR / "ensemble_results.json", 'w') as f:
    json.dump(ensemble_results, f, indent=2, default=str)

print("Results saved to data/processed/ensemble_results.json")

---

## Notes on Performance

### LSTM with Limited Data
With only 292 training samples, LSTM may underperform compared to traditional statistical models (Prophet, SARIMA). This is expected behavior - deep learning models typically require thousands of samples to outperform simpler methods.

If LSTM significantly underperforms:
> "Traditional statistical models outperformed deep learning due to limited dataset size (292 samples). For production deployment with more data, LSTM weights could be increased."

### Ensemble Weights
Current weights (Prophet: 40%, SARIMA: 35%, LSTM: 25%) can be optimized using `ensemble.optimize_weights()` with validation data.

### Log Scale
All predictions are made on log-scale (`y = log1p(engagement)`) and inverse-transformed for evaluation. This normalizes the 1030x scale difference between datasets.