# Time Series Forecasting
## CRISP-DM Phase 4: Modeling - Demand Prediction

This notebook trains and evaluates time series models to forecast booking demand.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import sys
sys.path.insert(0, '../src')

from preprocessing import load_data, clean_data, engineer_features, prepare_time_series_data
from time_series_model import DemandForecaster

plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

## 1. Load and Prepare Time Series Data

In [None]:
# Load and preprocess
df = load_data('../data/raw/hotel_bookings.csv')
df = clean_data(df)
df = engineer_features(df)

# Prepare time series data
ts_data = prepare_time_series_data(df)

print(f"Time series data: {len(ts_data)} days")
print(f"Date range: {ts_data['ds'].min()} to {ts_data['ds'].max()}")
ts_data.head()

## 2. Time Series Visualization

In [None]:
# Plot daily bookings
plt.figure(figsize=(14, 5))
plt.plot(ts_data['ds'], ts_data['y'], linewidth=0.8)
plt.title('Daily Hotel Bookings')
plt.xlabel('Date')
plt.ylabel('Number of Bookings')
plt.tight_layout()
plt.show()

In [None]:
# Resample to weekly for clearer trend
ts_weekly = ts_data.set_index('ds').resample('W')['y'].sum().reset_index()

plt.figure(figsize=(14, 5))
plt.plot(ts_weekly['ds'], ts_weekly['y'], marker='o', markersize=3)
plt.title('Weekly Hotel Bookings')
plt.xlabel('Date')
plt.ylabel('Number of Bookings')
plt.tight_layout()
plt.show()

In [None]:
# Seasonal decomposition
ts_indexed = ts_data.set_index('ds')['y']
decomposition = seasonal_decompose(ts_indexed, model='additive', period=7)

fig, axes = plt.subplots(4, 1, figsize=(14, 10))
decomposition.observed.plot(ax=axes[0], title='Observed')
decomposition.trend.plot(ax=axes[1], title='Trend')
decomposition.seasonal.plot(ax=axes[2], title='Seasonal')
decomposition.resid.plot(ax=axes[3], title='Residual')
plt.tight_layout()
plt.show()

## 3. ACF & PACF Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 4))
plot_acf(ts_indexed.dropna(), ax=axes[0], lags=30)
plot_pacf(ts_indexed.dropna(), ax=axes[1], lags=30)
plt.tight_layout()
plt.show()

## 4. Train/Test Split

In [None]:
# Time-based split (80/20)
train_size = int(len(ts_data) * 0.8)
train_df = ts_data.iloc[:train_size]
test_df = ts_data.iloc[train_size:]

print(f"Training: {len(train_df)} days ({train_df['ds'].min()} to {train_df['ds'].max()})")
print(f"Testing: {len(test_df)} days ({test_df['ds'].min()} to {test_df['ds'].max()})")

# Visualize split
plt.figure(figsize=(14, 5))
plt.plot(train_df['ds'], train_df['y'], label='Train', linewidth=0.8)
plt.plot(test_df['ds'], test_df['y'], label='Test', linewidth=0.8)
plt.title('Train/Test Split')
plt.legend()
plt.tight_layout()
plt.show()

## 5. Train ARIMA Model

In [None]:
# Train ARIMA
print("Training ARIMA model...")
arima = DemandForecaster(model_type='arima')
arima.train(train_df)

# Evaluate
print("\nEvaluating on test set:")
arima_metrics = arima.evaluate(test_df)

In [None]:
# Train SARIMA (with weekly seasonality)
print("Training SARIMA model...")
sarima = DemandForecaster(model_type='sarima')
sarima.train(train_df)

# Evaluate
print("\nEvaluating on test set:")
sarima_metrics = sarima.evaluate(test_df)

## 6. Model Comparison

In [None]:
# Compare models
comparison = pd.DataFrame({
    'ARIMA': arima_metrics,
    'SARIMA': sarima_metrics
}).T

comparison.style.background_gradient(cmap='Reds_r', axis=0)

In [None]:
# Visualize forecasts vs actual
forecast_arima = arima.predict(len(test_df))
forecast_sarima = sarima.predict(len(test_df))

plt.figure(figsize=(14, 5))
plt.plot(test_df['ds'].values, test_df['y'].values, label='Actual', linewidth=2)
plt.plot(forecast_arima['ds'].values, forecast_arima['yhat'].values, 
         label='ARIMA', linestyle='--', alpha=0.8)
plt.plot(forecast_sarima['ds'].values, forecast_sarima['yhat'].values, 
         label='SARIMA', linestyle='--', alpha=0.8)

plt.title('Forecast vs Actual (Test Period)')
plt.xlabel('Date')
plt.ylabel('Bookings')
plt.legend()
plt.tight_layout()
plt.show()

## 7. Forecast Future Demand

In [None]:
# Retrain on full data
print("Retraining ARIMA on full dataset...")
final_model = DemandForecaster(model_type='arima')
final_model.train(ts_data)

# Forecast next 30 days
future_forecast = final_model.predict(periods=30)

print("\nNext 30 Days Forecast:")
future_forecast

In [None]:
# Visualize future forecast
plt.figure(figsize=(14, 5))

# Historical data (last 90 days)
historical = ts_data.tail(90)
plt.plot(historical['ds'], historical['y'], label='Historical', linewidth=1)

# Forecast
plt.plot(future_forecast['ds'], future_forecast['yhat'], 
         color='red', label='Forecast', linewidth=2)
plt.fill_between(future_forecast['ds'], 
                 future_forecast['yhat_lower'], 
                 future_forecast['yhat_upper'], 
                 alpha=0.2, color='red')

plt.title('30-Day Demand Forecast')
plt.xlabel('Date')
plt.ylabel('Predicted Bookings')
plt.legend()
plt.tight_layout()
plt.show()

## 8. Save Model

In [None]:
# Save the final model
final_model.save('../models/demand_model.pkl')

print("Time series model saved successfully!")