# Prophet Parameter Tuning Notebook

This notebook allows you to:
1. Query a single time series from Victoria Metrics
2. Plot the time series
3. Define multiple Prophet parameter sets
4. Compare forecasts from different parameter configurations

**Use this notebook for:**
- Exploring different Prophet configurations
- Comparing forecast results side-by-side
- Parameter tuning and optimization

**Note:** The Victoria Metrics selector should return exactly one time series.


## 1. Configuration and Imports


In [None]:
# Configuration
import os
import sys
from pathlib import Path

# Add current directory to Python path
current_dir = str(Path.cwd())
if current_dir not in sys.path:
    sys.path.insert(0, current_dir)

# Victoria Metrics connection - from environment variables
VM_QUERY_URL = os.getenv('VM_QUERY_URL', 'http://victoria-metrics:8428')
VM_TOKEN = os.getenv('VM_TOKEN', '')

print(f"VM Query URL: {VM_QUERY_URL}")


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
import warnings
warnings.filterwarnings('ignore')

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Darts imports
from darts import TimeSeries
from darts.models import Prophet as DartsProphet

# Helper modules
from prometheus_api_client import PrometheusConnect

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (15, 8)

print("Imports successful")


## 2. Connect to Victoria Metrics, Query and Parse Data


In [None]:
# PromQL selector - EDIT THIS
SELECTOR = '{job="extractor"}'  # Your PromQL selector (should return exactly one time series)

# History parameter - EDIT THIS
HISTORY_DAYS = 365  # Days of history to fetch

# Connect to Victoria Metrics and query historical data
headers = {"Authorization": f"Bearer {VM_TOKEN}"} if VM_TOKEN else {}
prom = PrometheusConnect(url=VM_QUERY_URL, headers=headers, disable_ssl=True)
print(f"Connected to Victoria Metrics at {VM_QUERY_URL}")

print(f"\nQuerying: {SELECTOR}")
end_date = datetime.now(timezone.utc)
start_date = end_date - timedelta(days=HISTORY_DAYS)
query_result = prom.custom_query_range(
    query=SELECTOR.replace("'", '"'),  # Ensure double quotes for PromQL
    start_time=start_date,
    end_time=end_date,
    step="24h"
)

print(f"Query range: {start_date.date()} to {end_date.date()}")
print(f"Query returned {len(query_result)} series")

# Parse the single time series from query result
if len(query_result) == 0:
    raise ValueError("No data found for selector")
if len(query_result) > 1:
    raise ValueError(f"Selector returned {len(query_result)} series, expected exactly 1")

item = query_result[0]
metric = item.get('metric', {})
metric_name = metric.get('__name__')
if not metric_name:
    raise ValueError("No metric name found in query result")

labels = {k: v for k, v in metric.items() if k != '__name__'}
values = item.get('values', [])
samples = [(datetime.fromtimestamp(float(ts), tz=timezone.utc), float(value)) for ts, value in values]

if not samples:
    raise ValueError("No data points found in query result")

# Prepare training data
df_training = pd.DataFrame(samples, columns=['ds', 'y'])
df_training['ds'] = pd.to_datetime(df_training['ds'], utc=True).dt.tz_localize(None)

series_info = {'metric_name': metric_name, 'labels': labels}

print(f"\nTime series: {metric_name}")
print(f"Labels: {labels}")
print(f"Data points: {len(df_training)}")
print(f"Date range: {df_training['ds'].min().date()} to {df_training['ds'].max().date()}")
print(f"Value range: {df_training['y'].min():.2f} to {df_training['y'].max():.2f}")


## 3. Plot Time Series


In [None]:
# Plot the time series
plt.figure(figsize=(18, 8))
plt.plot(df_training['ds'], df_training['y'], 
         'ko-', label='Historical Data', linewidth=2, markersize=3, alpha=0.7)

title = f"Time Series: {series_info['metric_name']}"
if series_info['labels']:
    title += f" {series_info['labels']}"
plt.title(title, fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Value', fontsize=12)
plt.legend(loc='best', fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 4. Define Forecast Parameters and Multiple Prophet Configurations

**Edit the configurations below to test different Prophet parameter sets.**


In [None]:
# Forecast parameters
FORECAST_HORIZON_DAYS = 20  # Days to forecast ahead
MIN_HISTORY_POINTS = 30  # Minimum data points required
FORECAST_FLOOR = 0  # Minimum value for forecasts (set to None to disable)

# Define multiple Prophet parameter sets
# Each dict represents a different configuration to test
PROPHET_CONFIGS = [
    {
        'name': 'Baseline',
        'params': {
            'yearly_seasonality': False,
            'weekly_seasonality': False,
            'daily_seasonality': False,
            'seasonality_mode': 'additive',
            'changepoint_prior_scale': 0.5,
        }
    },
    {
        'name': 'High Flexibility',
        'params': {
            'yearly_seasonality': False,
            'weekly_seasonality': False,
            'daily_seasonality': False,
            'seasonality_mode': 'additive',
            'changepoint_prior_scale': 0.1,  # More flexible (allows more changepoints)
        }
    },
    {
        'name': 'Low Flexibility',
        'params': {
            'yearly_seasonality': False,
            'weekly_seasonality': False,
            'daily_seasonality': False,
            'seasonality_mode': 'additive',
            'changepoint_prior_scale': 1.0,  # Less flexible (fewer changepoints)
        }
    },
    # Add more configurations as needed
]

print(f"Forecast horizon: {FORECAST_HORIZON_DAYS} days")
print(f"Number of Prophet configurations: {len(PROPHET_CONFIGS)}")
print("\nConfigurations:")
for idx, config in enumerate(PROPHET_CONFIGS):
    print(f"  {idx}. {config['name']}: {config['params']}")


## 5. Train Models and Generate Forecasts for Each Configuration


In [None]:
# Check minimum history requirement
if len(df_training) < MIN_HISTORY_POINTS:
    raise ValueError(f"Insufficient data: {len(df_training)} < {MIN_HISTORY_POINTS} points required")

# Store forecasts for each configuration
forecasts_by_config = []

for config_idx, config in enumerate(PROPHET_CONFIGS):
    config_name = config['name']
    prophet_params = config['params'].copy()
    
    print(f"\n{'='*60}")
    print(f"Configuration {config_idx + 1}/{len(PROPHET_CONFIGS)}: {config_name}")
    print(f"{'='*60}")
    
    try:
        # Prepare data with floor/cap if needed
        df_config = df_training.copy()
        
        # Add floor/cap columns if floor is specified
        if FORECAST_FLOOR is not None:
            prophet_params['growth'] = 'logistic'
            df_config['floor'] = FORECAST_FLOOR
            df_config['cap'] = df_config['y'].max() * 1.5 if df_config['y'].max() > FORECAST_FLOOR else FORECAST_FLOOR + 1
        
        # Convert to darts TimeSeries
        series = TimeSeries.from_dataframe(df_config.set_index('ds'))
        
        # Train model
        model = DartsProphet(**prophet_params)
        model.fit(series)
        print(f"  ✓ Model trained successfully")
        
        # Generate forecast with uncertainty intervals
        forecast = model.predict(n=FORECAST_HORIZON_DAYS, num_samples=100)
        forecast_df = pd.DataFrame({
            'ds': forecast.time_index,
            'yhat': forecast.values().flatten()
        })
        
        # Extract 95% prediction intervals
        if hasattr(forecast, 'quantile_timeseries'):
            try:
                lower = forecast.quantile_timeseries(0.025)
                upper = forecast.quantile_timeseries(0.975)
                forecast_df['yhat_lower'] = lower.values().flatten()
                forecast_df['yhat_upper'] = upper.values().flatten()
            except Exception:
                pass
        
        print(f"  ✓ Forecast generated: {len(forecast_df)} predictions")
        
        # Store forecast with configuration info
        forecasts_by_config.append({
            'config_name': config_name,
            'config_params': prophet_params,
            'forecast_df': forecast_df,
            'df_training': df_config
        })
        
    except Exception as exc:
        print(f"  ✗ Failed: {exc}")
        continue

print(f"\n{'='*60}")
print(f"Successfully generated {len(forecasts_by_config)}/{len(PROPHET_CONFIGS)} forecasts")


## 6. Plot Forecasts: Individual Plots for Each Configuration


In [None]:
# Plot each forecast configuration separately
for forecast_item in forecasts_by_config:
    config_name = forecast_item['config_name']
    forecast_df = forecast_item['forecast_df']
    df_training_plot = forecast_item['df_training']
    
    plt.figure(figsize=(18, 8))
    
    # Plot historical data
    plt.plot(df_training_plot['ds'], df_training_plot['y'], 
             'ko-', label='Historical Data', linewidth=2, markersize=3, alpha=0.6)
    
    # Plot forecast trend
    plt.plot(forecast_df['ds'], forecast_df['yhat'], 
             'b--', label='Forecast (trend)', linewidth=2.5)
    
    # Plot uncertainty intervals (if available)
    if 'yhat_lower' in forecast_df.columns and 'yhat_upper' in forecast_df.columns:
        plt.fill_between(forecast_df['ds'], 
                        forecast_df['yhat_lower'], 
                        forecast_df['yhat_upper'],
                        alpha=0.2, color='blue', label='95% Uncertainty Interval')
    
    # Vertical line showing where forecast starts
    last_history_date = df_training_plot['ds'].max()
    plt.axvline(x=last_history_date, color='red', linestyle=':', 
               linewidth=2, label='Forecast Start', alpha=0.7)
    
    # Title and labels
    title = f"Prophet Forecast: {config_name}"
    title += f"\n{series_info['metric_name']}"
    if series_info['labels']:
        title += f" {series_info['labels']}"
    plt.title(title, fontsize=16, fontweight='bold')
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Value', fontsize=12)
    plt.legend(loc='best', fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print(f"  ✓ Plotted forecast for configuration: {config_name}")


## 7. Plot Forecasts: Side-by-Side Comparison


In [None]:
# Plot all forecasts together for comparison
if len(forecasts_by_config) > 0:
    # Determine subplot layout
    n_configs = len(forecasts_by_config)
    n_cols = min(2, n_configs)
    n_rows = (n_configs + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 6 * n_rows))
    if n_configs == 1:
        axes = [axes]
    elif n_rows == 1:
        axes = axes if isinstance(axes, list) else [axes]
    else:
        axes = axes.flatten()
    
    for idx, forecast_item in enumerate(forecasts_by_config):
        ax = axes[idx]
        config_name = forecast_item['config_name']
        forecast_df = forecast_item['forecast_df']
        df_training_plot = forecast_item['df_training']
        
        # Plot historical data
        ax.plot(df_training_plot['ds'], df_training_plot['y'], 
                'ko-', label='Historical', linewidth=2, markersize=2, alpha=0.6)
        
        # Plot forecast trend
        ax.plot(forecast_df['ds'], forecast_df['yhat'], 
                'b--', label='Forecast', linewidth=2)
        
        # Plot uncertainty intervals (if available)
        if 'yhat_lower' in forecast_df.columns and 'yhat_upper' in forecast_df.columns:
            ax.fill_between(forecast_df['ds'], 
                           forecast_df['yhat_lower'], 
                           forecast_df['yhat_upper'],
                           alpha=0.2, color='blue', label='95% Interval')
        
        # Vertical line showing where forecast starts
        last_history_date = df_training_plot['ds'].max()
        ax.axvline(x=last_history_date, color='red', linestyle=':', 
                  linewidth=2, label='Forecast Start', alpha=0.7)
        
        ax.set_title(f"{config_name}", fontsize=14, fontweight='bold')
        ax.set_xlabel('Date', fontsize=10)
        ax.set_ylabel('Value', fontsize=10)
        ax.legend(loc='best', fontsize=8)
        ax.grid(True, alpha=0.3)
    
    # Hide unused subplots
    for idx in range(n_configs, len(axes)):
        axes[idx].set_visible(False)
    
    # Overall title
    fig.suptitle(f"Forecast Comparison: {series_info['metric_name']}", 
                 fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()
    
    print(f"✓ Comparison plot generated for {n_configs} configurations")


## 8. Overlay Comparison Plot (All Forecasts on One Chart)


In [None]:
# Plot all forecasts overlaid on one chart for direct comparison
if len(forecasts_by_config) > 0:
    plt.figure(figsize=(18, 8))
    
    # Use the first config's training data for historical plot
    df_training_plot = forecasts_by_config[0]['df_training']
    
    # Plot historical data (once)
    plt.plot(df_training_plot['ds'], df_training_plot['y'], 
             'ko-', label='Historical Data', linewidth=2, markersize=3, alpha=0.6)
    
    # Color palette for different forecasts
    colors = plt.cm.tab10(np.linspace(0, 1, len(forecasts_by_config)))
    
    # Plot each forecast
    for idx, forecast_item in enumerate(forecasts_by_config):
        config_name = forecast_item['config_name']
        forecast_df = forecast_item['forecast_df']
        color = colors[idx]
        
        # Plot forecast trend
        plt.plot(forecast_df['ds'], forecast_df['yhat'], 
                 '--', label=f"Forecast: {config_name}", linewidth=2.5, color=color)
        
        # Plot uncertainty intervals (if available)
        if 'yhat_lower' in forecast_df.columns and 'yhat_upper' in forecast_df.columns:
            plt.fill_between(forecast_df['ds'], 
                           forecast_df['yhat_lower'], 
                           forecast_df['yhat_upper'],
                           alpha=0.15, color=color, label=f"95% Interval: {config_name}")
    
    # Vertical line showing where forecast starts
    last_history_date = df_training_plot['ds'].max()
    plt.axvline(x=last_history_date, color='red', linestyle=':', 
               linewidth=2, label='Forecast Start', alpha=0.7)
    
    # Title and labels
    title = f"Forecast Comparison (Overlay): {series_info['metric_name']}"
    if series_info['labels']:
        title += f" {series_info['labels']}"
    plt.title(title, fontsize=16, fontweight='bold')
    plt.xlabel('Date', fontsize=12)
    plt.ylabel('Value', fontsize=12)
    plt.legend(loc='best', fontsize=10, ncol=2)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print(f"✓ Overlay comparison plot generated")
