# Curve Formation Pipeline Orchestrator

This notebook orchestrates the end-to-end curve formation pipeline, including:
1. Market data ingestion and validation
2. Curve calculation for different asset classes
3. Data quality checks and monitoring
4. Output persistence and notifications

## Parameters
- `env`: Environment (dev/staging/prod)
- `trade_date`: Trade date for curve construction (YYYY-MM-DD)
- `asset_classes`: List of asset classes to process (comma-separated)
- `notification_email`: Email for notifications

In [None]:
# Curve Formation Pipeline Orchestrator
import os
import sys
from datetime import datetime, timedelta

# Add project root to Python path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.curve_formation.core.processor import CurveProcessor
from src.curve_formation.core.writer import CurveWriter
from src.utils.config_manager import ConfigManager
from src.utils.data_quality.validator import DataValidator
from src.utils.exception_handler import handle_exception
from src.utils.logging_utils import get_logger
from src.utils.spark_utils import get_spark_session

In [None]:
# Initialize logging and configuration
logger = get_logger(__name__)
config_manager = ConfigManager()

# Get pipeline parameters
dbutils.widgets.text("env", "dev", "Environment")
dbutils.widgets.text("trade_date", datetime.now().strftime("%Y-%m-%d"), "Trade Date")
dbutils.widgets.text("asset_classes", "IR,FX,CREDIT", "Asset Classes")
dbutils.widgets.text("notification_email", "", "Notification Email")

env = dbutils.widgets.get("env")
trade_date = datetime.strptime(dbutils.widgets.get("trade_date"), "%Y-%m-%d")
asset_classes = dbutils.widgets.get("asset_classes").split(",")
notification_email = dbutils.widgets.get("notification_email")

# Load configuration
config = config_manager.load_config(env)
logger.info(f"Loaded configuration for environment: {env}")

# Initialize Spark session
spark = get_spark_session(config)
logger.info("Initialized Spark session")

In [None]:
# Load and validate market data
@handle_exception
def load_market_data(trade_date: datetime, asset_classes: list) -> dict:
    """Load market data for specified asset classes and validate"""
    logger.info(f"Loading market data for {trade_date} - Asset Classes: {asset_classes}")
    
    data_validator = DataValidator()
    market_data = {}
    
    for asset_class in asset_classes:
        # Load market data from Delta table
        df = spark.table(f"{config['catalog']}.{config['schemas']['market_data']}.market_data") \
            .filter(f"trade_date = '{trade_date.strftime('%Y-%m-%d')}' AND asset_class = '{asset_class}'")
        
        # Validate data quality
        validation_config = config['data_quality'][asset_class]
        if not data_validator.validate_market_data(df, validation_config):
            raise ValueError(f"Data quality validation failed for {asset_class}")
            
        market_data[asset_class] = df
        logger.info(f"Successfully loaded and validated market data for {asset_class}")
    
    return market_data

# Load market data for processing
market_data = load_market_data(trade_date, asset_classes)
logger.info("Market data loading complete")

In [None]:
# Process curves for each asset class
@handle_exception
def process_curves(market_data: dict, trade_date: datetime) -> dict:
    """Construct curves for each asset class"""
    logger.info("Starting curve construction process")
    
    curve_processor = CurveProcessor(config)
    curve_writer = CurveWriter(config)
    processed_curves = {}
    
    for asset_class, data in market_data.items():
        logger.info(f"Processing curves for {asset_class}")
        
        # Construct curves
        curves = curve_processor.process(
            data,
            asset_class=asset_class,
            trade_date=trade_date
        )
        
        # Validate curve outputs
        data_validator = DataValidator()
        validation_config = config['data_quality'][f'{asset_class}_curves']
        
        for curve_name, curve_data in curves.items():
            if not data_validator.validate_market_data(curve_data, validation_config):
                raise ValueError(f"Curve validation failed for {curve_name}")
        
        # Write curves to Delta table
        curve_writer.write_curves(curves, asset_class, trade_date)
        processed_curves[asset_class] = curves
        
        logger.info(f"Successfully processed and stored curves for {asset_class}")
    
    return processed_curves

# Process curves
processed_curves = process_curves(market_data, trade_date)
logger.info("Curve processing complete")

In [None]:
# Generate monitoring metrics and send notifications
@handle_exception
def generate_monitoring_metrics(processed_curves: dict, trade_date: datetime) -> None:
    """Generate monitoring metrics and send notifications"""
    logger.info("Generating monitoring metrics")
    
    metrics = {}
    for asset_class, curves in processed_curves.items():
        metrics[asset_class] = {
            'curve_count': len(curves),
            'timestamp': datetime.now().isoformat(),
            'trade_date': trade_date.strftime('%Y-%m-%d')
        }
        
        # Calculate curve-specific metrics
        for curve_name, curve_data in curves.items():
            curve_metrics = {
                'points_count': curve_data.count(),
                'min_tenor': curve_data.agg({'tenor': 'min'}).collect()[0][0],
                'max_tenor': curve_data.agg({'tenor': 'max'}).collect()[0][0],
                'min_value': curve_data.agg({'value': 'min'}).collect()[0][0],
                'max_value': curve_data.agg({'value': 'max'}).collect()[0][0]
            }
            metrics[asset_class][curve_name] = curve_metrics
    
    # Write metrics to monitoring table
    metrics_df = spark.createDataFrame([metrics])
    metrics_df.write.format('delta').mode('append').saveAsTable(
        f"{config['catalog']}.{config['schemas']['monitoring']}.curve_metrics"
    )
    
    # Send notification if configured
    if notification_email:
        send_notification(
            notification_email,
            f"Curve Formation Pipeline Complete - {trade_date.strftime('%Y-%m-%d')}",
            f"Processed curves for asset classes: {', '.join(processed_curves.keys())}\n"
            f"Metrics: {metrics}"
        )
    
    logger.info("Monitoring metrics generated and notifications sent")

# Generate metrics and send notifications
generate_monitoring_metrics(processed_curves, trade_date)
logger.info("Pipeline execution complete")