# Time Series Forecasting Template

This notebook provides a template for time series forecasting experiments.

## Steps:
1. Load and explore data
2. Preprocess and create features
3. Split into train/test sets
4. Build and train models
5. Evaluate and compare results

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import custom utilities
import sys
sys.path.append('../src')
from utils import (
    load_csv_timeseries,
    train_test_split_timeseries,
    create_lag_features,
    create_rolling_features,
    plot_timeseries,
    plot_forecast,
    plot_decomposition,
    evaluate_forecast,
    print_metrics
)

# Set random seed for reproducibility
np.random.seed(42)

## 1. Load and Explore Data

In [None]:
# Load your data
# df = load_csv_timeseries(
#     filepath='../data/raw/your_data.csv',
#     date_column='date',
#     target_column='value',
#     freq='D'  # Daily frequency
# )

# For demo purposes, create sample data
dates = pd.date_range(start='2020-01-01', end='2023-12-31', freq='D')
trend = np.linspace(100, 200, len(dates))
seasonal = 20 * np.sin(2 * np.pi * np.arange(len(dates)) / 365.25)
noise = np.random.normal(0, 5, len(dates))
values = trend + seasonal + noise

df = pd.DataFrame({'value': values}, index=dates)
df.head()

In [None]:
# Basic statistics
print(df.describe())
print(f"\nData shape: {df.shape}")
print(f"Date range: {df.index.min()} to {df.index.max()}")

In [None]:
# Visualize the time series
plot_timeseries(df, title='Time Series Data')

In [None]:
# Seasonal decomposition
plot_decomposition(df['value'], model='additive', period=365)

## 2. Data Preprocessing

In [None]:
# Check for missing values
print(f"Missing values: {df.isnull().sum().sum()}")

# If there are missing values, handle them
# df = fill_missing_values(df, method='linear')

## 3. Train/Test Split

In [None]:
# Split data (80% train, 20% test)
train, test = train_test_split_timeseries(df, test_size=0.2)

print(f"Train size: {len(train)}")
print(f"Test size: {len(test)}")

## 4. Model Building

### 4.1 Statistical Model (ARIMA)

In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA model
model_arima = ARIMA(train['value'], order=(1, 1, 1))
fitted_arima = model_arima.fit()

# Make predictions
forecast_arima = fitted_arima.forecast(steps=len(test))
forecast_arima.index = test.index

### 4.2 Prophet Model

In [None]:
from prophet import Prophet

# Prepare data for Prophet (requires 'ds' and 'y' columns)
train_prophet = train.reset_index()
train_prophet.columns = ['ds', 'y']

# Fit Prophet model
model_prophet = Prophet()
model_prophet.fit(train_prophet)

# Make predictions
future = model_prophet.make_future_dataframe(periods=len(test))
forecast_prophet_df = model_prophet.predict(future)
forecast_prophet = forecast_prophet_df['yhat'].iloc[-len(test):]
forecast_prophet.index = test.index

### 4.3 Machine Learning Model (XGBoost)

In [None]:
from xgboost import XGBRegressor

# Create features
train_ml = create_lag_features(train, 'value', lags=[1, 7, 14, 30])
train_ml = create_rolling_features(train_ml, 'value', windows=[7, 30])
train_ml = train_ml.dropna()

# Prepare train data
X_train = train_ml.drop('value', axis=1)
y_train = train_ml['value']

# Fit XGBoost model
model_xgb = XGBRegressor(n_estimators=100, random_state=42)
model_xgb.fit(X_train, y_train)

# Prepare test data with same features
full_data = pd.concat([train, test])
test_ml = create_lag_features(full_data, 'value', lags=[1, 7, 14, 30])
test_ml = create_rolling_features(test_ml, 'value', windows=[7, 30])
test_ml = test_ml.loc[test.index]

X_test = test_ml.drop('value', axis=1)
forecast_xgb = pd.Series(model_xgb.predict(X_test), index=test.index)

## 5. Evaluation and Comparison

In [None]:
# Evaluate ARIMA
print("ARIMA Model:")
metrics_arima = evaluate_forecast(test['value'].values, forecast_arima.values, train['value'].values)
print_metrics(metrics_arima)

In [None]:
# Evaluate Prophet
print("Prophet Model:")
metrics_prophet = evaluate_forecast(test['value'].values, forecast_prophet.values, train['value'].values)
print_metrics(metrics_prophet)

In [None]:
# Evaluate XGBoost
print("XGBoost Model:")
metrics_xgb = evaluate_forecast(test['value'].values, forecast_xgb.values, train['value'].values)
print_metrics(metrics_xgb)

In [None]:
# Compare models visually
from utils import plot_multiple_forecasts

forecasts = {
    'ARIMA': forecast_arima,
    'Prophet': forecast_prophet,
    'XGBoost': forecast_xgb
}

plot_multiple_forecasts(
    actual=test['value'],
    forecasts=forecasts,
    train=train['value'].iloc[-100:],  # Show last 100 days of training
    title='Model Comparison'
)

In [None]:
# Compare metrics in a table
metrics_comparison = pd.DataFrame({
    'ARIMA': metrics_arima,
    'Prophet': metrics_prophet,
    'XGBoost': metrics_xgb
})

print("\nMetrics Comparison:")
print(metrics_comparison)

## 6. Conclusion

- Summarize which model performed best
- Note any patterns or insights
- Suggest next steps for improvement