In [None]:
import os
import sys
import time
import uuid
from datetime import datetime
from pyspark.sql import SparkSession

In [None]:
# Current working directory
current_dir = os.getcwd()
# Go up 3 levels and append 'src'
project_root = os.path.abspath(os.path.join(current_dir, '..', '..', 'src'))
# Add src to sys.path
sys.path.append(project_root)

from logging_utils.logger import log_pipeline_event, log_task_event
from unikargo_utils import add_pipeline_metadata

spark = SparkSession.builder.getOrCreate()

In [None]:

# -----------------------------
# Pipeline Identifiers
# -----------------------------
pipeline_id = str(uuid.uuid4())  # Unique ID for this pipeline run
pipeline_name = "unicargo_pipeline"
run_id = f"run_{int(time.time())}"
environment = "unikargo_dev"

# Track start time
pipeline_start = datetime.now()

# Initialize dictionary to store task results (optional)
task_results = {}

In [None]:
# -----------------------------
# Helper to run notebook safely
# -----------------------------
def run_notebook_safe(path, **kwargs):
    """
    Run a notebook and catch exceptions for logging.
    Returns: (status, rows_processed)
    """
    try:
        result = dbutils.notebook.run(path, 0, kwargs)
        rows_processed = int(result)
        return "SUCCESS", rows_processed
    except Exception as e:
        # Log failure at task level
        try:
            log_task_event(
                status="FAILED",
                rows=None,
                message=str(e),
                pipeline_name=kwargs.get("pipeline_name"),
                pipeline_id=kwargs.get("pipeline_id"),
                file_format="delta"
            )
        except Exception as log_e:
            print(f"Failed to log task event: {log_e}")
        return "FAILED", 0

In [None]:
# -----------------------------
# Run Bronze Notebooks
# -----------------------------
bronze_notebooks = [
    "bronze_airlines",
    "bronze_customers",
    "bronze_flights"
]

for nb in bronze_notebooks:
    task_results[nb] = run_notebook(
        f"notebooks/bronze/{nb}",
        pipeline_id=pipeline_id,
        pipeline_name=pipeline_name,
        run_id=run_id,
        environment=environment
    )
