# Advanced Usage with TimeCopilot

This notebook demonstrates advanced features of TimeCopilot, including:

- Working with multiple time series (panel data)
- Custom model configurations
- Advanced querying and explanations
- Ensemble forecasting
- Working with different frequencies and seasonal patterns

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from timecopilot import TimeCopilot
from timecopilot.forecaster import TimeCopilotForecaster
from timecopilot.models.benchmarks import (
    AutoETS, AutoARIMA, SeasonalNaive, 
    HistoricAverage, Theta, DynamicOptimizedTheta
)
from timecopilot.models.foundational.timesfm import TimesFM
from timecopilot.models.benchmarks.prophet import Prophet

# Set styling
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
np.random.seed(42)

## Working with Multiple Time Series

TimeCopilot excels at handling panel data with multiple time series. Let's create a synthetic dataset with multiple series.

In [None]:
# Create synthetic multi-series data
def create_synthetic_panel_data(n_series=5, n_periods=100, freq='D'):
    """Create synthetic panel data with different patterns"""
    
    # Create date range
    dates = pd.date_range(start='2020-01-01', periods=n_periods, freq=freq)
    
    series_data = []
    
    for i in range(n_series):
        # Create different patterns for each series
        if i == 0:  # Trend + seasonality
            trend = np.linspace(100, 200, n_periods)
            seasonal = 20 * np.sin(2 * np.pi * np.arange(n_periods) / 7)  # Weekly seasonality
            noise = np.random.normal(0, 5, n_periods)
            y = trend + seasonal + noise
            series_id = "retail_sales"
            
        elif i == 1:  # Strong seasonality
            base = 50
            seasonal = 30 * np.sin(2 * np.pi * np.arange(n_periods) / 365.25)  # Annual seasonality
            weekly = 10 * np.sin(2 * np.pi * np.arange(n_periods) / 7)  # Weekly seasonality
            noise = np.random.normal(0, 3, n_periods)
            y = base + seasonal + weekly + noise
            series_id = "website_traffic"
            
        elif i == 2:  # Exponential growth
            growth_rate = 0.001
            y = 10 * np.exp(growth_rate * np.arange(n_periods)) + np.random.normal(0, 2, n_periods)
            series_id = "user_signups"
            
        elif i == 3:  # Random walk
            y = np.cumsum(np.random.normal(0, 1, n_periods)) + 100
            series_id = "stock_price"
            
        else:  # Cyclical pattern
            cycle = 25 * np.sin(2 * np.pi * np.arange(n_periods) / 30)  # Monthly cycle
            trend = 0.1 * np.arange(n_periods)
            noise = np.random.normal(0, 4, n_periods)
            y = 80 + cycle + trend + noise
            series_id = "inventory_levels"
        
        # Create DataFrame for this series
        series_df = pd.DataFrame({
            'unique_id': series_id,
            'ds': dates,
            'y': y
        })
        
        series_data.append(series_df)
    
    return pd.concat(series_data, ignore_index=True)

# Generate panel data
panel_df = create_synthetic_panel_data(n_series=3, n_periods=150, freq='D')

print(f"Panel data shape: {panel_df.shape}")
print(f"Unique series: {panel_df['unique_id'].unique()}")
print(f"Date range: {panel_df['ds'].min()} to {panel_df['ds'].max()}")
panel_df.head()

In [None]:
# Visualize the panel data
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

for i, series_id in enumerate(panel_df['unique_id'].unique()):
    series_data = panel_df[panel_df['unique_id'] == series_id]
    axes[i].plot(series_data['ds'], series_data['y'], linewidth=2, label=series_id)
    axes[i].set_title(f'Time Series: {series_id}')
    axes[i].set_xlabel('Date')
    axes[i].set_ylabel('Value')
    axes[i].grid(True, alpha=0.3)
    axes[i].legend()

plt.tight_layout()
plt.show()

## Advanced Agent Usage

Let's use the TimeCopilot agent with more sophisticated queries and configurations.

In [None]:
# Initialize agent with more sophisticated configuration
agent = TimeCopilot(
    model="openai:gpt-4o-mini",  # Use more capable model for complex analysis
    # Add any additional configuration here
)

# Generate forecast with detailed analysis
complex_query = """
Analyze these three time series and provide insights on:
1. The dominant patterns in each series (trend, seasonality, cyclical)
2. Which series are most predictable and why
3. Any relationships or correlations between the series
4. Recommendations for forecasting each series
5. Potential risks or uncertainties in the forecasts
"""

result = agent.forecast(
    df=panel_df,
    h=30,  # Forecast 30 days ahead
    freq="D",  # Daily frequency
    level=[80, 95],  # Prediction intervals
    query=complex_query
)

print("Advanced forecast completed!")
print(f"Forecast shape: {result.forecast.shape}")
print(f"Series forecasted: {result.forecast['unique_id'].unique()}")

In [None]:
# Display the detailed analysis
print("=" * 60)
print("DETAILED ANALYSIS FROM TIMECOPILOT AGENT")
print("=" * 60)
print(result.explanation)
print("=" * 60)

## Custom Model Ensemble

Create a custom ensemble of models for more robust forecasting.

In [None]:
# Create a comprehensive ensemble of models
ensemble_models = [
    # Statistical models
    AutoETS(),
    AutoARIMA(),
    SeasonalNaive(),
    Theta(),
    DynamicOptimizedTheta(),
    
    # Machine learning models
    Prophet(),
    
    # Foundation models
    TimesFM(),
    
    # Simple baselines
    HistoricAverage(),
]

# Create ensemble forecaster
ensemble_forecaster = TimeCopilotForecaster(models=ensemble_models)

# Generate ensemble forecasts
ensemble_forecasts = ensemble_forecaster.forecast(
    df=panel_df,
    h=21,  # 3 weeks
    freq="D",
    level=[80, 95]
)

print(f"Ensemble forecasts shape: {ensemble_forecasts.shape}")
print(f"Models in ensemble: {len(ensemble_forecasts['model'].unique())}")
print(f"Models: {ensemble_forecasts['model'].unique()}")

## Model Performance Analysis

Evaluate the performance of different models using cross-validation.

In [None]:
# Perform cross-validation with the ensemble
cv_results = ensemble_forecaster.cross_validation(
    df=panel_df,
    h=7,  # Forecast 1 week ahead
    freq="D",
    n_windows=5,  # 5 cross-validation windows
    step_size=7   # Weekly steps
)

print(f"Cross-validation results shape: {cv_results.shape}")
print(f"CV windows: {cv_results['cutoff'].nunique()}")
cv_results.head()

In [None]:
# Calculate comprehensive performance metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

def calculate_metrics(y_true, y_pred):
    """Calculate various performance metrics"""
    return {
        'mae': mean_absolute_error(y_true, y_pred),
        'mse': mean_squared_error(y_true, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
        'mape': mean_absolute_percentage_error(y_true, y_pred) * 100,
    }

# Calculate metrics by model and series
performance_results = []

for series_id in cv_results['unique_id'].unique():
    series_data = cv_results[cv_results['unique_id'] == series_id]
    
    for model in series_data['model'].unique():
        model_data = series_data[series_data['model'] == model]
        
        if len(model_data) > 0:
            metrics = calculate_metrics(model_data['y'], model_data['y_pred'])
            metrics.update({
                'series_id': series_id,
                'model': model,
                'n_predictions': len(model_data)
            })
            performance_results.append(metrics)

performance_df = pd.DataFrame(performance_results)
print("Model Performance by Series:")
performance_df.round(3)

## Advanced Visualization

Create comprehensive visualizations to understand model performance and forecasts.

In [None]:
# Create performance heatmap
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Heatmap of MAPE by model and series
mape_pivot = performance_df.pivot(index='model', columns='series_id', values='mape')
sns.heatmap(mape_pivot, annot=True, fmt='.1f', cmap='YlOrRd', ax=axes[0, 0])
axes[0, 0].set_title('MAPE by Model and Series (%)')

# Heatmap of RMSE by model and series
rmse_pivot = performance_df.pivot(index='model', columns='series_id', values='rmse')
sns.heatmap(rmse_pivot, annot=True, fmt='.1f', cmap='YlOrRd', ax=axes[0, 1])
axes[0, 1].set_title('RMSE by Model and Series')

# Box plot of MAPE by model
performance_df.boxplot(column='mape', by='model', ax=axes[1, 0])
axes[1, 0].set_title('MAPE Distribution by Model')
axes[1, 0].set_xlabel('Model')
axes[1, 0].set_ylabel('MAPE (%)')

# Box plot of RMSE by series
performance_df.boxplot(column='rmse', by='series_id', ax=axes[1, 1])
axes[1, 1].set_title('RMSE Distribution by Series')
axes[1, 1].set_xlabel('Series')
axes[1, 1].set_ylabel('RMSE')

plt.tight_layout()
plt.show()

In [None]:
# Plot forecasts for each series
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

for i, series_id in enumerate(panel_df['unique_id'].unique()):
    # Historical data
    historical = panel_df[panel_df['unique_id'] == series_id]
    axes[i].plot(historical['ds'], historical['y'], 'b-', linewidth=2, label='Historical')
    
    # Agent forecast
    agent_forecast = result.forecast[result.forecast['unique_id'] == series_id]
    agent_forecast['ds'] = pd.to_datetime(agent_forecast['ds'])
    axes[i].plot(agent_forecast['ds'], agent_forecast['y'], 'r--', linewidth=2, label='Agent Forecast')
    
    # Add prediction intervals
    if 'lo-80' in agent_forecast.columns:
        axes[i].fill_between(agent_forecast['ds'], agent_forecast['lo-80'], agent_forecast['hi-80'], 
                           alpha=0.3, color='red', label='80% PI')
    
    # Add top 3 ensemble models
    series_performance = performance_df[performance_df['series_id'] == series_id].sort_values('mape')
    top_models = series_performance.head(3)['model'].tolist()
    
    colors = ['green', 'orange', 'purple']
    for j, model in enumerate(top_models):
        model_forecast = ensemble_forecasts[
            (ensemble_forecasts['unique_id'] == series_id) & 
            (ensemble_forecasts['model'] == model)
        ]
        if len(model_forecast) > 0:
            model_forecast['ds'] = pd.to_datetime(model_forecast['ds'])
            axes[i].plot(model_forecast['ds'], model_forecast['y'], 
                        color=colors[j], linestyle=':', linewidth=2, label=f'{model} (Top {j+1})')
    
    axes[i].set_title(f'Forecasts for {series_id}')
    axes[i].set_xlabel('Date')
    axes[i].set_ylabel('Value')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Specific Use Cases

Let's demonstrate some specific advanced use cases.

In [None]:
# Use case 1: Forecast with different horizons for different series
custom_forecasts = {}

for series_id in panel_df['unique_id'].unique():
    series_data = panel_df[panel_df['unique_id'] == series_id]
    
    # Different horizons based on series characteristics
    if 'retail' in series_id:
        h = 14  # 2 weeks for retail
    elif 'traffic' in series_id:
        h = 7   # 1 week for web traffic
    else:
        h = 30  # 1 month for others
    
    # Get best model for this series
    best_model_name = performance_df[performance_df['series_id'] == series_id].sort_values('mape').iloc[0]['model']
    
    # Find the best model object
    best_model = None
    for model in ensemble_models:
        if model.__class__.__name__ == best_model_name:
            best_model = model
            break
    
    if best_model:
        custom_forecaster = TimeCopilotForecaster(models=[best_model])
        forecast = custom_forecaster.forecast(df=series_data, h=h, freq="D")
        custom_forecasts[series_id] = {
            'forecast': forecast,
            'horizon': h,
            'model': best_model_name
        }

print("Custom forecasts generated:")
for series_id, info in custom_forecasts.items():
    print(f"{series_id}: {info['horizon']} days with {info['model']}")

In [None]:
# Use case 2: Query-driven analysis
business_queries = [
    "Which series shows the most growth potential?",
    "What are the key risk factors for each forecast?",
    "How do seasonal patterns differ between series?",
    "What external factors might influence these forecasts?"
]

for query in business_queries:
    print(f"\n{'='*60}")
    print(f"QUERY: {query}")
    print(f"{'='*60}")
    
    # Use agent to answer specific question
    query_result = agent.forecast(
        df=panel_df,
        h=7,  # Short horizon for quick analysis
        freq="D",
        query=query
    )
    
    print(query_result.explanation[:500] + "...")

## Summary and Best Practices

### Key Advanced Features:

1. **Multi-Series Forecasting**: TimeCopilot handles panel data seamlessly
2. **Model Ensembles**: Combine multiple models for robust forecasting
3. **Custom Configurations**: Tailor forecasts to specific business needs
4. **Performance Evaluation**: Use cross-validation for model selection
5. **Intelligent Querying**: Get specific insights through natural language

### Best Practices:

1. **Data Quality**: Ensure clean, consistent time series data
2. **Model Selection**: Use cross-validation to choose the best model for each series
3. **Ensemble Approach**: Combine multiple models for better robustness
4. **Business Context**: Include domain knowledge in queries
5. **Validation**: Always validate forecasts against business logic

### Next Steps:

- Experiment with different model combinations
- Integrate with your specific business data
- Set up automated forecasting pipelines
- Monitor forecast accuracy over time
- Use insights to drive business decisions