# üìà Prediction & Forecasting

S·ª≠ d·ª•ng trained models ƒë·ªÉ t·∫°o forecasts cho t∆∞∆°ng lai.

## M·ª•c ti√™u
- Load trained quantile models
- Prepare future data
- Generate predictions v·ªõi uncertainty intervals
- Visualize forecasts


In [2]:
# Setup
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Add project root to path
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.config import setup_project_path, get_dataset_config, OUTPUT_FILES, setup_logging
from src.pipelines._05_prediction import load_models, prepare_prediction_data, make_predictions

setup_project_path()
setup_logging()

# Import display for Jupyter notebooks
try:
    from IPython.display import display
except ImportError:
    # Fallback if not in Jupyter
    display = print

# Get config
config = get_dataset_config()
print(f"Dataset: {config['name']}")


ImportError: cannot import name 'load_models' from 'src.pipelines._05_prediction' (D:\datastorm\E-Grocery_Forecaster\src\pipelines\_05_prediction.py)

## 1. Load Trained Models


In [None]:
# Load models
models_dir = OUTPUT_FILES['models_dir']
quantiles = [0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95]

models = {}
for q in quantiles:
    model_file = models_dir / f"q{int(q*100):02d}_forecaster.joblib"
    if model_file.exists():
        models[q] = joblib.load(model_file)
        print(f"‚úÖ Loaded Q{int(q*100):02d} model")
    else:
        print(f"‚ùå Model not found: {model_file}")

print(f"\nLoaded {len(models)}/{len(quantiles)} models")


## 2. Prepare Future Data


In [None]:
# Load feature table
df = pd.read_parquet(OUTPUT_FILES['master_feature_table'])

# Get future periods (beyond training data)
# Assuming we want to predict next 4 weeks
if config['temporal_unit'] == 'week':
    max_week = df[config['time_column']].max()
    future_weeks = list(range(max_week + 1, max_week + 5))
    
    # Create future dataframe (simplified - in practice, need to generate features)
    print(f"Current max week: {max_week}")
    print(f"Future weeks to predict: {future_weeks}")
    print("\nNote: In practice, you need to generate features for future periods")
    print("      using the feature engineering pipeline")
else:
    print(f"Temporal unit: {config['temporal_unit']}")
    print("Future prediction logic depends on temporal unit")


## 3. Generate Predictions


In [None]:
# Use test set for demonstration (in practice, use future data)
from src.pipelines._03_model_training import prepare_data

_, X_test, _, y_test, features, _ = prepare_data(df, config)

# Make predictions for all quantiles
predictions = {}
for q, model in models.items():
    pred = model.predict(X_test[features])
    predictions[q] = pred
    print(f"Q{int(q*100):02d} predictions: shape {pred.shape}, mean {pred.mean():.2f}")

# Create prediction dataframe
pred_df = pd.DataFrame({
    'actual': y_test.values,
    **{f'q{int(q*100):02d}': predictions[q] for q in quantiles if q in predictions}
})

display(pred_df.head(10))


## 4. Visualize Predictions


In [None]:
# Plot predictions v·ªõi uncertainty intervals
fig = go.Figure()

# Add uncertainty bands
if 0.05 in predictions and 0.95 in predictions:
    fig.add_trace(go.Scatter(
        x=list(range(len(pred_df))),
        y=pred_df['q95'],
        mode='lines',
        name='Q95 (Upper)',
        line=dict(color='lightblue', width=1),
        showlegend=True
    ))
    fig.add_trace(go.Scatter(
        x=list(range(len(pred_df))),
        y=pred_df['q05'],
        mode='lines',
        name='Q05 (Lower)',
        line=dict(color='lightblue', width=1),
        fill='tonexty',
        fillcolor='rgba(173, 216, 230, 0.3)',
        showlegend=True
    ))

# Add median prediction
if 0.50 in predictions:
    fig.add_trace(go.Scatter(
        x=list(range(len(pred_df))),
        y=pred_df['q50'],
        mode='lines',
        name='Q50 (Median)',
        line=dict(color='blue', width=2),
        showlegend=True
    ))

# Add actual values
fig.add_trace(go.Scatter(
    x=list(range(len(pred_df))),
    y=pred_df['actual'],
    mode='markers',
    name='Actual',
    marker=dict(color='red', size=4),
    showlegend=True
))

fig.update_layout(
    title='Quantile Predictions v·ªõi Uncertainty Intervals',
    xaxis_title='Sample Index',
    yaxis_title='Sales Value',
    height=500,
    hovermode='x unified'
)

fig.show()
