In [0]:
# Cell 1: Set up Databricks Workflow for full pipeline orchestration
import json
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.jobs import Job, JobCluster, NotebookTask, JobEmailNotifications

print("ðŸ”§ SETTING UP DATABRICKS WORKFLOW")
print("=" * 80)

# Initialize workspace client
ws = WorkspaceClient()

# Define the workflow job
job_config = {
    "name": "smart_claims_full_pipeline",
    "description": "Full end-to-end pipeline: Bronze â†’ Silver â†’ Gold with ML features",
    "tasks": [
        {
            "task_key": "bronze_claims",
            "description": "Load Bronze Claims",
            "notebook_task": {
                "notebook_path": "/Workspace/your_username/databricks-insurance-claims-project/05_silver_claims"
            },
            "new_cluster": {
                "spark_version": "14.3.x-scala2.12",
                "node_type_id": "i3.xlarge",
                "num_workers": 1,
                "aws_attributes": {
                    "availability": "SPOT"
                }
            },
            "timeout_seconds": 3600
        },
        {
            "task_key": "silver_customers",
            "description": "Transform Silver Customers",
            "notebook_task": {
                "notebook_path": "/Workspace/your_username/databricks-insurance-claims-project/06_silver_customers"
            },
            "depends_on": [
                {"task_key": "bronze_claims"}
            ],
            "new_cluster": {
                "spark_version": "14.3.x-scala2.12",
                "node_type_id": "i3.xlarge",
                "num_workers": 1,
                "aws_attributes": {
                    "availability": "SPOT"
                }
            },
            "timeout_seconds": 3600
        },
        {
            "task_key": "silver_policies",
            "description": "Transform Silver Policies",
            "notebook_task": {
                "notebook_path": "/Workspace/your_username/databricks-insurance-claims-project/07_silver_policies"
            },
            "depends_on": [
                {"task_key": "bronze_claims"}
            ],
            "new_cluster": {
                "spark_version": "14.3.x-scala2.12",
                "node_type_id": "i3.xlarge",
                "num_workers": 1,
                "aws_attributes": {
                    "availability": "SPOT"
                }
            },
            "timeout_seconds": 3600
        },
        {
            "task_key": "gold_layer",
            "description": "Generate Gold Analytics Tables",
            "notebook_task": {
                "notebook_path": "/Workspace/your_username/databricks-insurance-claims-project/09_gold_claims"
            },
            "depends_on": [
                {"task_key": "silver_customers"},
                {"task_key": "silver_policies"}
            ],
            "new_cluster": {
                "spark_version": "14.3.x-scala2.12",
                "node_type_id": "i3.xlarge",
                "num_workers": 1,
                "aws_attributes": {
                    "availability": "SPOT"
                }
            },
            "timeout_seconds": 3600
        }
    ],
    "max_concurrent_runs": 1
}

print("ðŸ“‹ Workflow Configuration:")
print(json.dumps(job_config, indent=2))

print("\nâœ… Workflow structure created (ready to deploy)")
print("=" * 80)

print("\nðŸ“Œ NEXT STEPS:")
print("1. Replace YOUR_USERNAME with your Databricks username")
print("2. Go to Databricks Jobs UI â†’ Create Job â†’ Copy this config")
print("3. Or run this cell to auto-create via API")
print("=" * 80)