In [0]:
import os
import time
import requests
import logging
from typing import Dict, Any, List, Optional
from pathlib import Path
from typing import Tuple

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s"
)
logger = logging.getLogger(__name__)

instance_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().apply('browserHostName')
DATABRICKS_INSTANCE = f"https://{instance_name}"
TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
CURRENT_PATH = os.getcwd()
NOTEBOOK_PATH = os.path.join(Path(CURRENT_PATH).parent.parent, "notebooks")
NODE_TYPE_ID = "Standard_DS3_v2"

In [0]:
import os
import time
import logging
from typing import List, Dict, Any, Tuple
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

DBR_VERSIONS = [
    "14.3.x-scala2.12", "15.4.x-scala2.12", "16.4.x-scala2.12",
    "14.3.x-cpu-ml-scala2.12", "15.4.x-cpu-ml-scala2.12", "16.4.x-cpu-ml-scala2.12"
]
MODES = ["comment", "pi"]

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)


def databricks_api(
    method: str, endpoint: str, payload: Dict[str, Any] = None
) -> Dict[str, Any]:
    """
    Make a Databricks REST API call with error handling and logging.
    """
    url = f"{DATABRICKS_INSTANCE}/api/2.1{endpoint}"
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }
    try:
        response = requests.request(
            method, url, headers=headers, json=payload, timeout=60
        )
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        logger.error(f"API call failed: {method} {endpoint} | Error: {e}")
        raise

def create_job(dbr_version: str, mode: str) -> int:
    """
    Create a Databricks job for a specific DBR version and mode.
    Returns the job_id.
    """
    job_name = f"auto_job_{dbr_version.replace('.', '_')}_{mode}"
    is_single_node = "ml" in dbr_version
    payload = {
        "name": job_name,
        "tasks": [{
            "task_key": "notebook_task",
            "notebook_task": {
                "notebook_path": os.path.join(NOTEBOOK_PATH, "generate_metadata"),
                "base_parameters": {"mode": mode, "cleanup_control_table": "true"}
            },
            "new_cluster": {
                "spark_version": dbr_version,
                "is_single_node": is_single_node,
                "node_type_id": NODE_TYPE_ID,
                "num_workers": 2
            }
        }]
    }
    result = databricks_api("POST", "/jobs/create", payload)
    job_id = result.get("job_id")
    logger.info(f"Created job {job_name} (ID: {job_id})")
    return job_id

def run_job(job_id: int) -> int:
    """
    Trigger a run for the given job_id.
    Returns the run_id.
    """
    result = databricks_api("POST", "/jobs/run-now", {"job_id": job_id})
    run_id = result.get("run_id")
    logger.info(f"Triggered run {run_id} for job {job_id}")
    return run_id

def poll_run(run_id: int, poll_interval: int = 60) -> Tuple[str, str]:
    """
    Poll the run status every minute until completion.
    Returns (result_state, state_message).
    """
    while True:
        result = databricks_api("GET", f"/jobs/runs/get?run_id={run_id}")
        state = result.get("state", {})
        life_cycle = state.get("life_cycle_state")
        result_state = state.get("result_state")
        state_message = state.get("state_message", "")
        logger.info(f"Run {run_id} status: {life_cycle} ({result_state})")
        if life_cycle in ("TERMINATED", "SKIPPED", "INTERNAL_ERROR"):
            return result_state, state_message
        time.sleep(poll_interval)

def delete_job(job_id: int) -> None:
    """
    Delete the job with the given job_id.
    """
    databricks_api("POST", "/jobs/delete", {"job_id": job_id})
    logger.info(f"Deleted job {job_id}")


def main() -> None:
    """
    Create, run, monitor, and clean up 12 Databricks jobs in parallel.
    """
    jobs: List[Tuple[str, str, int]] = []
    with ThreadPoolExecutor(max_workers=6) as executor:
        future_to_combo = {
            executor.submit(create_job, dbr, mode): (dbr, mode)
            for dbr in DBR_VERSIONS for mode in MODES
        }
        for future in as_completed(future_to_combo):
            dbr, mode = future_to_combo[future]
            try:
                job_id = future.result()
                jobs.append((dbr, mode, job_id))
            except Exception as e:
                logger.error(f"Failed to create job for {dbr} / {mode}: {e}")

    runs: List[Tuple[str, str, int, int]] = []
    with ThreadPoolExecutor(max_workers=6) as executor:
        future_to_job = {
            executor.submit(run_job, job_id): (dbr, mode, job_id)
            for dbr, mode, job_id in jobs
        }
        for future in as_completed(future_to_job):
            dbr, mode, job_id = future_to_job[future]
            try:
                run_id = future.result()
                runs.append((dbr, mode, job_id, run_id))
            except Exception as e:
                logger.error(f"Failed to run job {job_id} ({dbr}/{mode}): {e}")

    results: List[Dict[str, Any]] = []
    with ThreadPoolExecutor(max_workers=6) as executor:
        future_to_run = {
            executor.submit(poll_run, run_id): (dbr, mode, job_id, run_id)
            for dbr, mode, job_id, run_id in runs
        }
        for future in as_completed(future_to_run):
            dbr, mode, job_id, run_id = future_to_run[future]
            try:
                result_state, state_message = future.result()
                results.append({
                    "dbr_version": dbr,
                    "mode": mode,
                    "job_id": job_id,
                    "run_id": run_id,
                    "result_state": result_state,
                    "state_message": state_message
                })
                logger.info(f"Run {run_id} ({dbr}/{mode}) finished: {result_state} - {state_message}")
            except Exception as e:
                logger.error(f"Polling failed for run {run_id} ({dbr}/{mode}): {e}")

    with ThreadPoolExecutor(max_workers=12) as executor:
        futures = [executor.submit(delete_job, job_id) for _, _, job_id in jobs]
        for future in as_completed(futures):
            try:
                future.result()
            except Exception as e:
                logger.error(f"Failed to delete job: {e}")

    print("\nJob Results Summary:")
    print("| DBR Version | Mode    | Job ID   | Run ID   | Result     | Message           |")
    print("|-------------|---------|----------|----------|------------|-------------------|")
    for r in results:
        print(f"| {r['dbr_version']:11} | {r['mode']:7} | {r['job_id']:8} | {r['run_id']:8} | {r['result_state']:10} | {r['state_message'][:18]:17} |")

if __name__ == "__main__":
    main()
