In [None]:
import sys
from pathlib import Path

if '__file__' in globals():
    # Navigate up 4 levels from file location to repo root
    repo_root = Path(__file__).resolve().parent.parent.parent.parent
else:
    # Test two execution scenarios
    cwd = Path.cwd()
    if (cwd / 'src').exists():
        # Executing from repo root (right above src)
        repo_root = cwd
    else:
        # Executing from notebooks directory, go up 3 levels
        repo_root = cwd.parent.parent.parent

repo_root_path = str(repo_root.resolve())
if repo_root_path not in sys.path:
    sys.path.insert(0, repo_root_path)
    print(f'{repo_root_path} added to sys path')

In [None]:
try:
    from src.etl.common.config import *
    from src.etl.common.table_updater import *
except:
    from config import *
    from table_updater import *

In [None]:
"""
ETL DAG Orchestrator

Creates a DAG that orchestrates ETL loads in dependency order:
1. dim_employee (runs first)
2. fact_employee_pay (runs after dim_employee succeeds)

Usage:
    Copy this code into a Snowflake Python worksheet or notebook cell,
    adjust the configuration variables, and run.
"""

from snowflake.snowpark import Session
from snowflake.core.task.dagv1 import DAG, DAGTask, DAGOperation
from snowflake.core import CreateMode, Root

# =============================================================================
# Configuration - adjust these values for your environment
# =============================================================================
TARGET_DATABASE = 'LEARNING_DB'
TARGET_SCHEMA = 'ETL'
WAREHOUSE_NAME = 'COMPUTE_WH'

# Set to number of minutes for automatic scheduling, or None for manual execution only
SCHEDULE_MINUTES = None  # e.g., 60 for hourly runs


def create_dag_orchestrator(session: Session) -> str:
    """
    Creates and deploys the ETL DAG orchestrator.
    
    Args:
        session: Snowflake session object (use get_active_session() in notebooks)
    
    Returns:
        Success message with DAG details
    """
    root = Root(session)
    
    # Build DAG configuration
    dag_config = {
        'name': 'etl_dag_orchestrator_notebook_test',
        'warehouse': WAREHOUSE_NAME
    }
    
    # Add schedule if configured
    if SCHEDULE_MINUTES:
        dag_config['schedule'] = f'{SCHEDULE_MINUTES} MINUTE'
    
    # Define the DAG with tasks and dependencies
    with DAG(**dag_config) as dag:
        # Task 1: Load dim_employee (runs first)
        task_dim_employee = DAGTask(
            name="load_dim_employee",
            definition=f"EXECUTE NOTEBOOK learning_db.etl.MAIN_SRC_ETL_NOTEBOOKS_TEST_NOTEBOOK_WITH_COMMON;",
            comment="Load dimension table: dim_employee"
        )
    
    # Deploy the DAG
    dag_op = DAGOperation(root.databases[TARGET_DATABASE].schemas[TARGET_SCHEMA])
    dag_op.deploy(dag, mode=CreateMode.or_replace)
    
    schedule_msg = f" (scheduled every {SCHEDULE_MINUTES} minutes)" if SCHEDULE_MINUTES else " (manual execution only)"
    return f"DAG 'etl_dag_orchestrator' deployed to {TARGET_DATABASE}.{TARGET_SCHEMA}{schedule_msg}"


def execute_dag(session: Session) -> str:
    """Manually trigger the DAG execution."""
    root = Root(session)
    tasks = root.databases[TARGET_DATABASE].schemas[TARGET_SCHEMA].tasks
    dag_task = tasks['etl_dag_orchestrator']
    dag_task.execute()
    return "DAG execution triggered!"


session = get_session()
    
# Deploy the DAG
result = create_dag_orchestrator(session)
print(result)



In [None]:

root = Root(session)
tasks = root.databases["LEARNING_DB"].schemas["ETL"].tasks
dag_res = tasks['etl_dag_orchestrator_notebook_test']


In [None]:
dag_res.execute()