In [0]:
%pip install databricks-sdk --quiet
dbutils.library.restartPython()

In [0]:
# Get list of workspaces from the system table (available in all workspaces)
workspaces_list = spark.table("system.access.workspaces_latest") \
    .select("workspace_name") \
    .distinct() \
    .orderBy("workspace_name") \
    .toPandas()['workspace_name'].tolist()

# Add "All" as the first option
workspace_options = ["All"] + workspaces_list

# Create widgets for configuration parameters
dbutils.widgets.dropdown("dry_run", "true", ["true", "false"], "Dry Run Mode")
dbutils.widgets.dropdown("workspaces", "All", workspace_options, "Target Workspaces")

displayHTML("""
<div style="padding: 15px; background-color: #e8f5e9; border-left: 5px solid #4caf50; margin: 10px 0;">
    <h3 style="margin-top: 0; color: #2e7d32;">‚úì Configuration Widgets Created</h3>
    <ul style="color: #1b5e20;">
        <li><strong>Dry Run Mode:</strong> Set to 'true' for preview only, 'false' for actual updates</li>
        <li><strong>Target Workspaces:</strong> Select specific workspace or 'All' to process all workspaces</li>
    </ul>
    <p style="margin-top: 10px; color: #1565c0; font-style: italic;">‚úì Using system.access.workspaces_latest (available in all workspaces - no cloning needed!)</p>
</div>
""")

In [0]:
from pyspark.sql import functions as F

# Get widget values first
dry_run_str = dbutils.widgets.get("dry_run")
workspaces_str = dbutils.widgets.get("workspaces")

# Convert to usable variables
DRY_RUN = dry_run_str.lower() == "true"
WORKSPACES_TO_UPDATE = workspaces_str.strip() if workspaces_str != "All" else ""

# Load opportunities data
opportunities_df = spark.table("ex_dash_temp.billing_forecast.cluster_opportunities")

# Use system.access.workspaces_latest and extract deployment_name from workspace_url
# This table is available in ALL workspaces - no need to clone!
workspaces_df = spark.table("system.access.workspaces_latest") \
    .withColumn(
        "deployment_name",
        F.regexp_extract(F.col("workspace_url"), r"https://([^.]+)\.cloud\.databricks\.com", 1)
    ) \
    .select(
        F.col("workspace_id").cast("long").alias("workspace_id"),
        "workspace_name",
        "deployment_name"
    )

# Join to get workspace details
cluster_data = opportunities_df.join(
    workspaces_df,
    on="workspace_name",
    how="left"
)

# Filter by workspace if specified
filtered_cluster_data = cluster_data
if WORKSPACES_TO_UPDATE:
    workspace_list = [ws.strip() for ws in WORKSPACES_TO_UPDATE.split(",")]
    filtered_cluster_data = cluster_data.filter(F.col("workspace_name").isin(workspace_list))

# Get cluster counts by workspace
workspace_counts = (
    filtered_cluster_data
    .groupBy("workspace_name")
    .agg(
        F.count("*").alias("cluster_count"),
        F.sum("validated_savings").alias("total_savings")
    )
    .orderBy(F.col("cluster_count").desc())
    .collect()
)

total_clusters = sum([row.cluster_count for row in workspace_counts])
total_savings = sum([float(row.total_savings) if row.total_savings else 0 for row in workspace_counts])

# Build HTML table for workspace breakdown
workspace_table_html = """
<div style="border: 2px solid #1976d2; padding: 20px; margin: 20px 0; background-color: #e3f2fd; border-radius: 8px;">
    <h3 style="color: #0d47a1; margin-top: 0;">üìã Cluster Update Plan by Workspace</h3>
    <table style="width: 100%; border-collapse: collapse; margin-top: 15px;">
        <tr style="background-color: #90caf9;">
            <th style="padding: 12px; text-align: left; border: 1px solid #64b5f6; color: #0d47a1;">Workspace</th>
            <th style="padding: 12px; text-align: center; border: 1px solid #64b5f6; color: #0d47a1;">Clusters to Update</th>
            <th style="padding: 12px; text-align: right; border: 1px solid #64b5f6; color: #0d47a1;">Potential Savings (USD)</th>
        </tr>
"""

for row in workspace_counts:
    savings_display = f"${row.total_savings:,.2f}" if row.total_savings else "$0.00"
    workspace_table_html += f"""
        <tr style="background-color: #ffffff;">
            <td style="padding: 10px; border: 1px solid #64b5f6; font-weight: bold;">{row.workspace_name}</td>
            <td style="padding: 10px; border: 1px solid #64b5f6; text-align: center; font-size: 18px; color: #1976d2;">{row.cluster_count}</td>
            <td style="padding: 10px; border: 1px solid #64b5f6; text-align: right; font-weight: bold; color: #2e7d32;">{savings_display}</td>
        </tr>
    """

# Add total row
workspace_table_html += f"""
        <tr style="background-color: #bbdefb; font-weight: bold;">
            <td style="padding: 12px; border: 1px solid #64b5f6; color: #0d47a1;">TOTAL</td>
            <td style="padding: 12px; border: 1px solid #64b5f6; text-align: center; font-size: 20px; color: #0d47a1;">{total_clusters}</td>
            <td style="padding: 12px; border: 1px solid #64b5f6; text-align: right; font-size: 18px; color: #1b5e20;">${total_savings:,.2f}</td>
        </tr>
    </table>
</div>
"""

displayHTML(workspace_table_html)

In [0]:
# Get widget values
dry_run_str = dbutils.widgets.get("dry_run")
workspaces_str = dbutils.widgets.get("workspaces")

# Convert dry_run to boolean
DRY_RUN = dry_run_str.lower() == "true"
WORKSPACES_TO_UPDATE = workspaces_str.strip() if workspaces_str != "All" else ""

# Determine mode styling
if DRY_RUN:
    mode_color = "#4caf50"
    mode_bg = "#e8f5e9"
    mode_icon = "üü¢"
    mode_text = "DRY RUN (PREVIEW ONLY - NO CHANGES)"
else:
    mode_color = "#f44336"
    mode_bg = "#ffebee"
    mode_icon = "üî¥"
    mode_text = "LIVE UPDATE (WILL MODIFY CLUSTERS)"

# Determine workspace display
if WORKSPACES_TO_UPDATE:
    workspace_display = WORKSPACES_TO_UPDATE
else:
    workspace_display = "ALL WORKSPACES"

# Create HTML display
html_content = f"""
<div style="border: 3px solid {mode_color}; padding: 20px; margin: 20px 0; background-color: {mode_bg}; border-radius: 8px;">
    <h2 style="text-align: center; color: {mode_color}; margin-top: 0;">
        ‚ö†Ô∏è CLUSTER UPDATE CONFIGURATION ‚ö†Ô∏è
    </h2>
    <hr style="border: 1px solid {mode_color}; margin: 20px 0;">
    
    <div style="font-size: 16px; line-height: 2;">
        <div style="margin: 15px 0;">
            <strong style="font-size: 18px;">üîß EXECUTION MODE:</strong>
            <span style="font-size: 20px; font-weight: bold; color: {mode_color}; margin-left: 20px;">
                {mode_icon} {mode_text}
            </span>
        </div>
        
        <div style="margin: 15px 0;">
            <strong style="font-size: 18px;">üåê TARGET WORKSPACES:</strong>
            <span style="font-size: 18px; font-weight: bold; color: #1976d2; margin-left: 20px;">
                {workspace_display}
            </span>
        </div>
    </div>
    
    <hr style="border: 1px solid {mode_color}; margin: 20px 0;">
    
    <div style="padding: 15px; background-color: white; border-radius: 5px; margin-top: 15px;">
"""

if not DRY_RUN:
    html_content += """
        <div style="color: #d32f2f; font-weight: bold; font-size: 16px;">
            ‚ö†Ô∏è WARNING: Live update mode is enabled!<br>
            ‚ö†Ô∏è Clusters will be ACTUALLY MODIFIED after validation.
        </div>
    """
else:
    html_content += """
        <div style="color: #388e3c; font-weight: bold; font-size: 16px;">
            ‚úì Safe mode: Dry run will only preview changes without modifying clusters.
        </div>
    """

html_content += """
    </div>
    
    <div style="text-align: center; margin-top: 20px; font-size: 18px; font-weight: bold; color: #ff6f00;">
        ‚è±Ô∏è Starting in 10 seconds...
    </div>
</div>
"""

displayHTML(html_content)

In [0]:
import time

# 10-second countdown
for i in range(10, 0, -1):
    displayHTML(f"""
    <div style="text-align: center; padding: 20px; background-color: #fff3e0; border: 2px solid #ff9800; border-radius: 8px;">
        <h2 style="color: #e65100; margin: 0;">
            ‚è±Ô∏è Starting in <span style="font-size: 36px; color: #ff6f00;">{i}</span> seconds...
        </h2>
        <p style="color: #bf360c; margin-top: 10px; font-size: 14px;">(Press Stop to cancel)</p>
    </div>
    """)
    time.sleep(1)

displayHTML("""
<div style="text-align: center; padding: 25px; background-color: #e3f2fd; border: 3px solid #2196f3; border-radius: 8px;">
    <h1 style="color: #0d47a1; margin: 0;">
        üöÄ STARTING CLUSTER UPDATE PROCESS
    </h1>
</div>
""")

In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.core import Config
from pyspark.sql import functions as F
from pyspark.sql.types import *
from datetime import datetime
import json
import time

# Get service principal credentials from secrets
client_id = dbutils.secrets.get(scope="sp-oauth", key="client")
client_secret = dbutils.secrets.get(scope="sp-oauth", key="secret")

print("‚úì Authentication credentials loaded successfully")

In [0]:
from databricks.sdk import WorkspaceClient

def get_workspace_client(deployment_name, client_id, client_secret):
    """Create authenticated workspace client for a specific workspace"""
    host = f"https://{deployment_name}.cloud.databricks.com"
    
    # SDK handles OAuth token generation automatically
    return WorkspaceClient(
        host=host,
        client_id=client_id,
        client_secret=client_secret
    )

def get_cluster_current_config(ws_client, cluster_id):
    """Fetch current cluster configuration"""
    try:
        cluster = ws_client.clusters.get(cluster_id=cluster_id)
        return {
            "driver_instance_type": cluster.driver_node_type_id,
            "worker_instance_type": cluster.node_type_id,
            "min_workers": cluster.autoscale.min_workers if cluster.autoscale else None,
            "max_workers": cluster.autoscale.max_workers if cluster.autoscale else None,
            "num_workers": cluster.num_workers if not cluster.autoscale else None,
            "state": cluster.state.value if cluster.state else None
        }
    except Exception as e:
        return {"error": str(e)}

def validate_cluster_config(current_config, expected_driver, expected_worker, expected_min, expected_max):
    """Validate if current cluster config matches expected values from opportunities table"""
    if "error" in current_config:
        return False, f"Failed to fetch cluster: {current_config['error']}"
    
    # Check if instance types match (before update)
    driver_matches = current_config.get("driver_instance_type") == expected_driver
    worker_matches = current_config.get("worker_instance_type") == expected_worker
    
    # For validation, we check if current config matches what we expect to change FROM
    # This prevents updating clusters that have already been modified
    validation_msg = []
    if not driver_matches:
        validation_msg.append(f"Driver mismatch: current={current_config.get('driver_instance_type')}, expected={expected_driver}")
    if not worker_matches:
        validation_msg.append(f"Worker mismatch: current={current_config.get('worker_instance_type')}, expected={expected_worker}")
    
    is_valid = driver_matches and worker_matches
    msg = "; ".join(validation_msg) if validation_msg else "Configuration matches expected state"
    
    return is_valid, msg

print("‚úì Cluster update functions defined using Databricks SDK (simplified approach)")

In [0]:
def update_cluster_with_recommendation(row, dry_run=True, batch_metadata=None):
    """Process a single cluster update based on recommendation
    
    Args:
        row: Cluster data row
        dry_run: If True, only preview changes without updating
        batch_metadata: Dict containing batch tracking info (batch_id, start_time, etc.)
    """
    # Initialize batch metadata if not provided
    if batch_metadata is None:
        batch_metadata = {
            "batch_id": "unknown",
            "execution_label": "unknown",
            "batch_start_time": datetime.now(),
            "batch_end_time": None,
            "execution_mode": "DRY_RUN" if dry_run else "LIVE_UPDATE",
            "workspace_filter_applied": "",
            "total_clusters_in_batch": 0,
            "executed_by_user": "unknown"
        }
    
    log_entry = {
        # Batch metadata
        "batch_id": batch_metadata["batch_id"],
        "execution_label": batch_metadata.get("execution_label", "unknown"),
        "batch_start_time": batch_metadata["batch_start_time"],
        "batch_end_time": batch_metadata.get("batch_end_time"),
        "execution_mode": batch_metadata["execution_mode"],
        "workspace_filter_applied": batch_metadata["workspace_filter_applied"],
        "total_clusters_in_batch": batch_metadata["total_clusters_in_batch"],
        "executed_by_user": batch_metadata["executed_by_user"],
        
        # Individual cluster details
        "log_id": f"{row.cluster_id}_{int(time.time() * 1000)}",
        "cluster_id": row.cluster_id,
        "cluster_name": row.cluster_name,
        "workspace_name": row.workspace_name,
        "workspace_id": row.workspace_id,
        "deployment_url": f"https://{row.deployment_name}.cloud.databricks.com" if row.deployment_name else None,
        "action_type": row.action_item,
        "recommendation": row.recommendation,
        "current_driver_instance": row.driver_instance_type,
        "current_worker_instance": row.worker_instance_type,
        "suggested_driver_instance": row.suggested_driver_instance,
        "suggested_worker_instance": row.suggested_worker_instance,
        "current_min_workers": row.min_workers,
        "current_max_workers": row.max_workers,
        "validation_status": "PENDING",
        "validation_message": "",
        "update_status": "PENDING",
        "update_message": "",
        "dry_run": dry_run,
        "validated_savings": row.validated_savings,
        "execution_timestamp": datetime.now(),
        "error_details": None
    }
    
    try:
        # Skip if no deployment name
        if not row.deployment_name:
            log_entry["validation_status"] = "FAILED"
            log_entry["validation_message"] = "No deployment name found for workspace"
            log_entry["update_status"] = "SKIPPED"
            log_entry["update_message"] = "Skipped: No deployment name found for workspace"
            return log_entry
        
        # Create workspace client - SDK handles OAuth automatically
        try:
            ws_client = get_workspace_client(row.deployment_name, client_id, client_secret)
        except Exception as client_error:
            error_msg = str(client_error)
            log_entry["validation_status"] = "FAILED"
            if "403" in error_msg or "Cert validation failed" in error_msg or "certificate" in error_msg.lower():
                log_entry["validation_message"] = f"Connectivity failed to workspace '{row.workspace_name}' - Cross-workspace access denied or certificate validation failed"
                log_entry["update_message"] = f"Skipped: Cannot connect to workspace '{row.workspace_name}' - Cross-workspace access issue"
            else:
                log_entry["validation_message"] = f"Failed to create workspace client: {error_msg[:200]}"
                log_entry["update_message"] = f"Skipped: Failed to create workspace client - {error_msg[:100]}"
            log_entry["update_status"] = "SKIPPED"
            log_entry["error_details"] = error_msg
            return log_entry
        
        # Get current cluster configuration
        current_config = get_cluster_current_config(ws_client, row.cluster_id)
        
        # Check for connectivity errors
        if "error" in current_config:
            error_msg = current_config["error"]
            log_entry["validation_status"] = "FAILED"
            if "403" in error_msg or "Cert validation failed" in error_msg or "certificate" in error_msg.lower():
                log_entry["validation_message"] = f"Connectivity failed to workspace '{row.workspace_name}' - Cross-workspace access denied or certificate validation failed"
                log_entry["update_message"] = f"Skipped: Cannot fetch cluster from workspace '{row.workspace_name}' - Cross-workspace access issue"
            else:
                log_entry["validation_message"] = f"Failed to fetch cluster: {error_msg[:200]}"
                log_entry["update_message"] = f"Skipped: Failed to fetch cluster - {error_msg[:100]}"
            log_entry["update_status"] = "SKIPPED"
            log_entry["error_details"] = error_msg
            return log_entry
        
        # Check if cluster is in RUNNING or PENDING state
        cluster_state = current_config.get("state")
        if cluster_state in ["RUNNING", "PENDING"]:
            log_entry["validation_status"] = "FAILED"
            log_entry["validation_message"] = f"Cluster is in {cluster_state} state - cannot update running or starting clusters"
            log_entry["update_status"] = "SKIPPED"
            log_entry["update_message"] = f"Skipped: Cluster in {cluster_state} state - stop cluster before updating"
            return log_entry
        
        # Validate cluster configuration matches expected state
        is_valid, validation_msg = validate_cluster_config(
            current_config,
            row.driver_instance_type,
            row.worker_instance_type,
            row.min_workers,
            row.max_workers
        )
        
        log_entry["validation_status"] = "PASSED" if is_valid else "FAILED"
        log_entry["validation_message"] = validation_msg
        
        if not is_valid:
            log_entry["update_status"] = "SKIPPED"
            log_entry["update_message"] = f"Skipped: Cluster config mismatch - {validation_msg[:150]}"
            return log_entry
        
        # Proceed with update if validation passed
        if dry_run:
            log_entry["update_status"] = "DRY_RUN"
            log_entry["update_message"] = f"Would update: Driver {row.driver_instance_type}‚Üí{row.suggested_driver_instance}, Worker {row.worker_instance_type}‚Üí{row.suggested_worker_instance}"
        else:
            # Perform actual update
            try:
                ws_client.clusters.edit(
                    cluster_id=row.cluster_id,
                    node_type_id=row.suggested_worker_instance,
                    driver_node_type_id=row.suggested_driver_instance
                )
                log_entry["update_status"] = "SUCCESS"
                log_entry["update_message"] = f"Updated: Driver {row.driver_instance_type}‚Üí{row.suggested_driver_instance}, Worker {row.worker_instance_type}‚Üí{row.suggested_worker_instance}"
            except Exception as update_error:
                error_msg = str(update_error)
                log_entry["update_status"] = "FAILED"
                if "403" in error_msg or "Cert validation failed" in error_msg or "certificate" in error_msg.lower():
                    log_entry["update_message"] = f"Failed: Connectivity issue to workspace '{row.workspace_name}' during update"
                else:
                    log_entry["update_message"] = f"Failed: {error_msg[:200]}"
                log_entry["error_details"] = error_msg
            
    except Exception as e:
        error_msg = str(e)
        log_entry["validation_status"] = "ERROR"
        log_entry["update_status"] = "FAILED"
        log_entry["error_details"] = error_msg
        if "403" in error_msg or "Cert validation failed" in error_msg or "certificate" in error_msg.lower():
            log_entry["validation_message"] = f"Connectivity failed to workspace '{row.workspace_name}' - Cross-workspace access denied or certificate validation failed"
            log_entry["update_message"] = f"Failed: Cannot connect to workspace '{row.workspace_name}'"
        else:
            log_entry["validation_message"] = f"Error: {error_msg[:200]}"
            log_entry["update_message"] = f"Failed: {error_msg[:200]}"
    
    return log_entry

print("‚úì Main orchestration function defined with complete validation and update messages")

In [0]:
# ============================================================
# TEST CASE: Verify DRY_RUN mode does not update clusters
# ============================================================

from unittest.mock import Mock, patch, MagicMock
import sys

displayHTML("""
<div style="border: 3px solid #9c27b0; padding: 20px; margin: 20px 0; background-color: #f3e5f5; border-radius: 8px;">
    <h2 style="text-align: center; color: #6a1b9a; margin-top: 0;">
        üß™ Running Dry-Run Test Suite
    </h2>
</div>
""")

# Create a mock cluster row with test data
class MockRow:
    def __init__(self):
        self.cluster_id = "test-cluster-123"
        self.cluster_name = "Test Cluster"
        self.workspace_name = "test-workspace"
        self.workspace_id = 12345
        self.deployment_name = "test-deployment"
        self.action_item = "Test action"
        self.recommendation = "Test recommendation"
        self.driver_instance_type = "m5.xlarge"
        self.worker_instance_type = "m5.xlarge"
        self.suggested_driver_instance = "m5.large"
        self.suggested_worker_instance = "m5.large"
        self.min_workers = 1
        self.max_workers = 3
        self.validated_savings = 100.00

test_row = MockRow()

# Mock the workspace client and cluster API
mock_cluster_info = Mock()
mock_cluster_info.driver_node_type_id = "m5.xlarge"
mock_cluster_info.node_type_id = "m5.xlarge"
mock_cluster_info.autoscale = Mock(min_workers=1, max_workers=3)
mock_cluster_info.num_workers = None
mock_cluster_info.state = Mock(value="TERMINATED")

mock_clusters_api = Mock()
mock_clusters_api.get = Mock(return_value=mock_cluster_info)
mock_clusters_api.edit = Mock()  # This should NEVER be called in dry_run mode

mock_ws_client = Mock()
mock_ws_client.clusters = mock_clusters_api

test_results = []

# Test 1: Verify dry_run=True does NOT call clusters.edit()
displayHTML("""
<div style="padding: 10px; background-color: #e3f2fd; border-left: 4px solid #2196f3; margin: 10px 0;">
    <strong>Test 1:</strong> Dry-run mode should NOT call clusters.edit()
</div>
""")

with patch('__main__.get_workspace_client', return_value=mock_ws_client):
    result = update_cluster_with_recommendation(test_row, dry_run=True)
    
    # Verify edit was NOT called
    if mock_clusters_api.edit.called:
        test_results.append((
            "Test 1a", 
            False, 
            f"clusters.edit() was called {mock_clusters_api.edit.call_count} time(s) in dry_run mode"
        ))
    else:
        test_results.append((
            "Test 1a", 
            True, 
            "clusters.edit() was NOT called"
        ))
    
    # Verify the log entry shows DRY_RUN status
    if result["update_status"] != "DRY_RUN":
        test_results.append((
            "Test 1b", 
            False, 
            f"Expected update_status='DRY_RUN', got '{result['update_status']}'"
        ))
    else:
        test_results.append((
            "Test 1b", 
            True, 
            "update_status is 'DRY_RUN'"
        ))
    
    # Verify dry_run flag is True in log
    if result["dry_run"] != True:
        test_results.append((
            "Test 1c", 
            False, 
            f"Expected dry_run=True, got {result['dry_run']}"
        ))
    else:
        test_results.append((
            "Test 1c", 
            True, 
            "dry_run flag is True in log entry"
        ))

# Test 2: Verify dry_run=False DOES call clusters.edit()
displayHTML("""
<div style="padding: 10px; background-color: #fff3e0; border-left: 4px solid #ff9800; margin: 10px 0;">
    <strong>Test 2:</strong> Live mode should call clusters.edit()
</div>
""")

mock_clusters_api.edit.reset_mock()  # Reset the mock

with patch('__main__.get_workspace_client', return_value=mock_ws_client):
    result = update_cluster_with_recommendation(test_row, dry_run=False)
    
    # Verify edit WAS called
    if not mock_clusters_api.edit.called:
        test_results.append((
            "Test 2a", 
            False, 
            "clusters.edit() was NOT called in live mode"
        ))
    else:
        test_results.append((
            "Test 2a", 
            True, 
            f"clusters.edit() was called with: {mock_clusters_api.edit.call_args}"
        ))
    
    # Verify the log entry shows SUCCESS status
    if result["update_status"] != "SUCCESS":
        test_results.append((
            "Test 2b", 
            False, 
            f"Expected update_status='SUCCESS', got '{result['update_status']}'"
        ))
    else:
        test_results.append((
            "Test 2b", 
            True, 
            "update_status is 'SUCCESS'"
        ))
    
    # Verify dry_run flag is False in log
    if result["dry_run"] != False:
        test_results.append((
            "Test 2c", 
            False, 
            f"Expected dry_run=False, got {result['dry_run']}"
        ))
    else:
        test_results.append((
            "Test 2c", 
            True, 
            "dry_run flag is False in log entry"
        ))

# Display results
results_html = """
<div style="border: 2px solid #4caf50; padding: 20px; margin: 20px 0; background-color: #e8f5e9; border-radius: 8px;">
    <h3 style="color: #2e7d32; margin-top: 0;">Test Results:</h3>
    <table style="width: 100%; border-collapse: collapse;">
        <tr style="background-color: #c8e6c9;">
            <th style="padding: 10px; text-align: left; border: 1px solid #81c784;">Test</th>
            <th style="padding: 10px; text-align: left; border: 1px solid #81c784;">Status</th>
            <th style="padding: 10px; text-align: left; border: 1px solid #81c784;">Details</th>
        </tr>
"""

all_passed = True
for test_name, passed, message in test_results:
    status_icon = "‚úì" if passed else "‚úó"
    status_color = "#4caf50" if passed else "#f44336"
    bg_color = "#ffffff" if passed else "#ffebee"
    
    if not passed:
        all_passed = False
    
    results_html += f"""
        <tr style="background-color: {bg_color};">
            <td style="padding: 10px; border: 1px solid #81c784;">{test_name}</td>
            <td style="padding: 10px; border: 1px solid #81c784; color: {status_color}; font-weight: bold;">{status_icon} {'PASSED' if passed else 'FAILED'}</td>
            <td style="padding: 10px; border: 1px solid #81c784;">{message}</td>
        </tr>
    """

results_html += """
    </table>
</div>
"""

if all_passed:
    results_html += """
    <div style="border: 3px solid #4caf50; padding: 20px; margin: 20px 0; background-color: #e8f5e9; border-radius: 8px; text-align: center;">
        <h2 style="color: #2e7d32; margin: 0;">‚úì ALL TESTS PASSED</h2>
        <div style="margin-top: 15px; font-size: 16px; color: #1b5e20;">
            <p>‚úì Dry-run mode does NOT update clusters</p>
            <p>‚úì Live mode DOES update clusters</p>
            <p>‚úì Log entries correctly reflect the mode</p>
            <p style="font-weight: bold; margin-top: 20px;">It is SAFE to run with DRY_RUN=True</p>
        </div>
    </div>
    """
else:
    results_html += """
    <div style="border: 3px solid #f44336; padding: 20px; margin: 20px 0; background-color: #ffebee; border-radius: 8px; text-align: center;">
        <h2 style="color: #c62828; margin: 0;">‚úó SOME TESTS FAILED</h2>
        <p style="margin-top: 15px; font-size: 16px; color: #b71c1c;">Please review the test results above.</p>
    </div>
    """

displayHTML(results_html)

In [0]:
%sql
CREATE TABLE IF NOT EXISTS ex_dash_temp.billing_forecast.cluster_update_log (
  -- Batch Identification
  batch_id STRING COMMENT 'Unique identifier for this execution batch',
  execution_label STRING COMMENT 'Human-readable label: YYYY-MM-DD_HH-MM_MODE_WORKSPACES for easy filtering',
  batch_start_time TIMESTAMP COMMENT 'When this batch execution started',
  batch_end_time TIMESTAMP COMMENT 'When this batch execution completed',
  execution_mode STRING COMMENT 'DRY_RUN or LIVE_UPDATE',
  workspace_filter_applied STRING COMMENT 'Workspace filter used (empty = all workspaces)',
  total_clusters_in_batch LONG COMMENT 'Total number of clusters processed in this batch',
  executed_by_user STRING COMMENT 'User who executed this batch',
  
  -- Individual Cluster Details
  log_id STRING COMMENT 'Unique identifier for this specific cluster update',
  cluster_id STRING,
  cluster_name STRING,
  workspace_name STRING,
  workspace_id LONG,
  deployment_url STRING,
  action_type STRING,
  recommendation STRING,
  current_driver_instance STRING,
  current_worker_instance STRING,
  suggested_driver_instance STRING,
  suggested_worker_instance STRING,
  current_min_workers LONG,
  current_max_workers LONG,
  validation_status STRING,
  validation_message STRING,
  update_status STRING,
  update_message STRING,
  dry_run BOOLEAN,
  validated_savings DECIMAL(35,2),
  execution_timestamp TIMESTAMP COMMENT 'When this specific cluster was processed',
  error_details STRING
)
USING DELTA
COMMENT 'Log table for cluster update activities with batch tracking for filtering and analysis'

In [0]:
# This cell is now handled by the workspace breakdown display cell
# Keeping it for reference but data loading moved to cell 3
pass

In [0]:
import uuid

# Configuration is loaded from widgets (see cells above)
# DRY_RUN, WORKSPACES_TO_UPDATE, and cluster_data are already set

mode_text = "DRY RUN (Preview Only)" if DRY_RUN else "LIVE UPDATE"
mode_color = "#4caf50" if DRY_RUN else "#f44336"

# Generate batch metadata
batch_start_time = datetime.now()
batch_id = str(uuid.uuid4())
current_user = spark.sql("SELECT current_user() as user").collect()[0]["user"]

# Create execution label for easy filtering
# Format: YYYY-MM-DD_HH-MM_MODE_WORKSPACES
date_str = batch_start_time.strftime('%Y-%m-%d')
time_str = batch_start_time.strftime('%H-%M')
mode_str = "DRY-RUN" if DRY_RUN else "LIVE"
workspace_str = WORKSPACES_TO_UPDATE.replace(",", "+") if WORKSPACES_TO_UPDATE else "ALL"
execution_label = f"{date_str}_{time_str}_{mode_str}_{workspace_str}"

displayHTML(f"""
<div style="padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; margin: 10px 0;">
    <h3 style="margin-top: 0; color: #1565c0;">Execution Mode: <span style="color: {mode_color};">{mode_text}</span></h3>
    <p style="margin: 5px 0; color: #0d47a1;"><strong>Execution Label:</strong> <code style="background-color: #bbdefb; padding: 2px 6px; border-radius: 3px;">{execution_label}</code></p>
    <p style="margin: 5px 0; color: #0d47a1;"><strong>Batch ID:</strong> {batch_id}</p>
    <p style="margin: 5px 0; color: #0d47a1;"><strong>Executed By:</strong> {current_user}</p>
    <p style="margin: 5px 0; color: #0d47a1;"><strong>Start Time:</strong> {batch_start_time.strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
""")

# Filter by workspace if specified (cluster_data already loaded in previous cell)
filtered_cluster_data = cluster_data
if WORKSPACES_TO_UPDATE:
    workspace_list = [ws.strip() for ws in WORKSPACES_TO_UPDATE.split(",")]
    filtered_cluster_data = cluster_data.filter(F.col("workspace_name").isin(workspace_list))

# Collect cluster data
cluster_rows = filtered_cluster_data.collect()
total_clusters = len(cluster_rows)

# Create batch metadata dictionary
batch_metadata = {
    "batch_id": batch_id,
    "execution_label": execution_label,
    "batch_start_time": batch_start_time,
    "batch_end_time": None,  # Will be set after processing
    "execution_mode": "DRY_RUN" if DRY_RUN else "LIVE_UPDATE",
    "workspace_filter_applied": WORKSPACES_TO_UPDATE if WORKSPACES_TO_UPDATE else "ALL",
    "total_clusters_in_batch": total_clusters,
    "executed_by_user": current_user
}

if not cluster_rows:
    displayHTML("""
    <div style="padding: 15px; background-color: #ffebee; border-left: 5px solid #f44336; margin: 10px 0;">
        <h3 style="margin: 0; color: #c62828;">‚ö† No clusters found matching the filter criteria</h3>
    </div>
    """)
else:
    displayHTML(f"""
    <div style="padding: 15px; background-color: #fff3e0; border-left: 5px solid #ff9800; margin: 10px 0;">
        <h3 style="margin: 0; color: #e65100;">üîÑ Processing {len(cluster_rows)} clusters...</h3>
    </div>
    """)
    
    # Process each cluster
    log_entries = []
    for idx, row in enumerate(cluster_rows, 1):
        display(f"Processing {idx}/{len(cluster_rows)}: {row.cluster_name} ({row.cluster_id}) in {row.workspace_name}")
        log_entry = update_cluster_with_recommendation(row, dry_run=DRY_RUN, batch_metadata=batch_metadata)
        log_entries.append(log_entry)
        
        # Brief pause to avoid rate limiting
        if idx % 10 == 0:
            time.sleep(1)
    
    # Update batch end time
    batch_end_time = datetime.now()
    batch_metadata["batch_end_time"] = batch_end_time
    
    # Update all log entries with batch end time
    for entry in log_entries:
        entry["batch_end_time"] = batch_end_time
    
    duration_seconds = (batch_end_time - batch_start_time).total_seconds()
    
    displayHTML(f"""
    <div style="padding: 15px; background-color: #e8f5e9; border-left: 5px solid #4caf50; margin: 10px 0;">
        <h3 style="margin: 0; color: #2e7d32;">‚úì Processed {len(log_entries)} clusters</h3>
        <p style="margin: 5px 0; color: #1b5e20;"><strong>Batch Duration:</strong> {duration_seconds:.2f} seconds</p>
        <p style="margin: 5px 0; color: #1b5e20;"><strong>Execution Label:</strong> <code style="background-color: #c8e6c9; padding: 2px 6px; border-radius: 3px;">{execution_label}</code></p>
        <p style="margin: 5px 0; color: #1b5e20;"><strong>Batch ID:</strong> {batch_id}</p>
    </div>
    """)

In [0]:
# Convert log entries to DataFrame
log_schema = StructType([
    # Batch metadata
    StructField("batch_id", StringType(), True),
    StructField("execution_label", StringType(), True),
    StructField("batch_start_time", TimestampType(), True),
    StructField("batch_end_time", TimestampType(), True),
    StructField("execution_mode", StringType(), True),
    StructField("workspace_filter_applied", StringType(), True),
    StructField("total_clusters_in_batch", LongType(), True),
    StructField("executed_by_user", StringType(), True),
    
    # Individual cluster details
    StructField("log_id", StringType(), True),
    StructField("cluster_id", StringType(), True),
    StructField("cluster_name", StringType(), True),
    StructField("workspace_name", StringType(), True),
    StructField("workspace_id", LongType(), True),
    StructField("deployment_url", StringType(), True),
    StructField("action_type", StringType(), True),
    StructField("recommendation", StringType(), True),
    StructField("current_driver_instance", StringType(), True),
    StructField("current_worker_instance", StringType(), True),
    StructField("suggested_driver_instance", StringType(), True),
    StructField("suggested_worker_instance", StringType(), True),
    StructField("current_min_workers", LongType(), True),
    StructField("current_max_workers", LongType(), True),
    StructField("validation_status", StringType(), True),
    StructField("validation_message", StringType(), True),
    StructField("update_status", StringType(), True),
    StructField("update_message", StringType(), True),
    StructField("dry_run", BooleanType(), True),
    StructField("validated_savings", DecimalType(35, 2), True),
    StructField("execution_timestamp", TimestampType(), True),
    StructField("error_details", StringType(), True)
])

log_df = spark.createDataFrame(log_entries, schema=log_schema)

# Write to logging table
log_df.write.mode("append").saveAsTable("ex_dash_temp.billing_forecast.cluster_update_log")

log_count = log_df.count()
displayHTML(f"""
<div style="padding: 15px; background-color: #e8f5e9; border-left: 5px solid #4caf50; margin: 10px 0;">
    <h3 style="margin: 0; color: #2e7d32;">‚úì Saved {log_count} log entries to cluster_update_log table</h3>
    <p style="margin: 5px 0; color: #1b5e20;"><strong>Execution Label:</strong> <code style="background-color: #c8e6c9; padding: 2px 6px; border-radius: 3px;">{execution_label}</code></p>
    <p style="margin: 5px 0; color: #1b5e20;"><strong>Batch ID:</strong> {batch_id}</p>
    <p style="margin: 5px 0; color: #1b5e20;">Use the Execution Label for easy filtering in dashboards</p>
</div>
""")

In [0]:
# Verify row counts
opportunities_count = spark.table("ex_dash_temp.billing_forecast.cluster_opportunities").count()
log_count = log_df.count()

row_match = opportunities_count == log_count
match_color = "#4caf50" if row_match else "#f44336"
match_icon = "‚úì" if row_match else "‚úó"

mode_display = "DRY RUN" if DRY_RUN else "LIVE UPDATE"
mode_color = "#4caf50" if DRY_RUN else "#f44336"

displayHTML(f"""
<div style="border: 2px solid #2196f3; padding: 20px; margin: 20px 0; background-color: #e3f2fd; border-radius: 8px;">
    <h2 style="text-align: center; color: #1565c0; margin-top: 0;">
        üìä EXECUTION SUMMARY
    </h2>
    <hr style="border: 1px solid #2196f3; margin: 15px 0;">
    
    <div style="font-size: 16px; line-height: 2; padding: 10px;">
        <div style="margin: 10px 0;">
            <strong>Mode:</strong> 
            <span style="color: {mode_color}; font-weight: bold; font-size: 18px;">{mode_display}</span>
        </div>
        <div style="margin: 10px 0;">
            <strong>Opportunities table rows:</strong> 
            <span style="font-size: 18px; color: #1976d2;">{opportunities_count}</span>
        </div>
        <div style="margin: 10px 0;">
            <strong>Log table rows (this run):</strong> 
            <span style="font-size: 18px; color: #1976d2;">{log_count}</span>
        </div>
        <div style="margin: 10px 0;">
            <strong>Row count match:</strong> 
            <span style="color: {match_color}; font-weight: bold; font-size: 18px;">{match_icon} {'YES' if row_match else 'NO'}</span>
        </div>
    </div>
</div>
""")

# Summary by status
displayHTML("""
<div style="padding: 10px; background-color: #fff3e0; border-left: 4px solid #ff9800; margin: 15px 0;">
    <h3 style="margin-top: 0; color: #e65100;">Status Breakdown:</h3>
</div>
""")
display(log_df.groupBy("validation_status", "update_status").count().orderBy("count", ascending=False))

displayHTML("""
<div style="padding: 10px; background-color: #e8f5e9; border-left: 4px solid #4caf50; margin: 15px 0;">
    <h3 style="margin-top: 0; color: #2e7d32;">Potential Savings Summary:</h3>
</div>
""")
display(log_df.groupBy("update_status").agg(
    F.count("*").alias("cluster_count"),
    F.sum("validated_savings").alias("total_savings_usd")
).orderBy("total_savings_usd", ascending=False))

In [0]:
# Display detailed results
displayHTML("""
<div style="padding: 10px; background-color: #e3f2fd; border-left: 4px solid #2196f3; margin: 15px 0;">
    <h3 style="margin-top: 0; color: #1565c0;">Detailed Log Entries:</h3>
</div>
""")

display(log_df.select(
    "cluster_name",
    "workspace_name",
    "validation_status",
    "update_status",
    "current_driver_instance",
    "suggested_driver_instance",
    "current_worker_instance",
    "suggested_worker_instance",
    "validated_savings",
    "update_message"
).orderBy(F.col("validated_savings").desc()))

In [0]:
# Query batch execution history
displayHTML("""
<div style="padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; margin: 15px 0;">
    <h3 style="margin-top: 0; color: #1565c0;">üìã Batch Execution History</h3>
    <p style="margin: 5px 0; color: #0d47a1;">Use execution_label for easy filtering in dashboards</p>
</div>
""")

# Get batch summary with execution_label
batch_summary = spark.sql("""
    SELECT 
        execution_label,
        batch_id,
        execution_mode,
        workspace_filter_applied,
        executed_by_user,
        batch_start_time,
        batch_end_time,
        ROUND((UNIX_TIMESTAMP(batch_end_time) - UNIX_TIMESTAMP(batch_start_time)), 2) as duration_seconds,
        total_clusters_in_batch,
        COUNT(*) as clusters_processed,
        SUM(CASE WHEN update_status = 'SUCCESS' THEN 1 ELSE 0 END) as successful_updates,
        SUM(CASE WHEN update_status = 'DRY_RUN' THEN 1 ELSE 0 END) as dry_run_previews,
        SUM(CASE WHEN update_status = 'SKIPPED' THEN 1 ELSE 0 END) as skipped_clusters,
        SUM(CASE WHEN update_status = 'FAILED' THEN 1 ELSE 0 END) as failed_updates,
        SUM(validated_savings) as total_potential_savings
    FROM ex_dash_temp.billing_forecast.cluster_update_log
    GROUP BY 
        execution_label,
        batch_id,
        execution_mode,
        workspace_filter_applied,
        executed_by_user,
        batch_start_time,
        batch_end_time,
        total_clusters_in_batch
    ORDER BY batch_start_time DESC
    LIMIT 20
""")

display(batch_summary)

print("\n" + "="*80)
print("HOW TO USE EXECUTION LABEL FILTERING:")
print("="*80)
print("\n1. Copy an execution_label from the table above (e.g., '2024-11-20_15-30_DRY-RUN_prod')")
print("\n2. Query specific execution details:")
print("   SELECT * FROM ex_dash_temp.billing_forecast.cluster_update_log")
print("   WHERE execution_label = '<your-execution-label>'")
print("\n3. Filter by pattern (useful in dashboards):")
print("   WHERE execution_label LIKE '2024-11-20%'  -- All runs on Nov 20")
print("   WHERE execution_label LIKE '%DRY-RUN%'    -- All dry runs")
print("   WHERE execution_label LIKE '%prod%'       -- All prod workspace runs")
print("\n4. Use in dashboard filters:")
print("   - Add execution_label as a dropdown filter")
print("   - Users can easily select specific runs")
print("="*80)

In [0]:
%sql
-- Example: Query details for a specific batch
-- Replace '<batch-id>' with actual batch_id from the history above

-- Uncomment and run after replacing batch_id:
/*
SELECT 
    batch_id,
    execution_mode,
    cluster_name,
    workspace_name,
    validation_status,
    update_status,
    current_driver_instance,
    suggested_driver_instance,
    current_worker_instance,
    suggested_worker_instance,
    validated_savings,
    update_message,
    execution_timestamp
FROM ex_dash_temp.billing_forecast.cluster_update_log
WHERE batch_id = '<batch-id>'
ORDER BY execution_timestamp
*/

-- Show most recent batch as example
SELECT 
    batch_id,
    execution_mode,
    cluster_name,
    workspace_name,
    validation_status,
    update_status,
    validated_savings,
    update_message
FROM ex_dash_temp.billing_forecast.cluster_update_log
WHERE batch_id = (
    SELECT batch_id 
    FROM ex_dash_temp.billing_forecast.cluster_update_log 
    ORDER BY batch_start_time DESC 
    LIMIT 1
)
ORDER BY execution_timestamp
LIMIT 10

In [0]:
# Get list of execution labels from the log table
execution_labels_df = spark.sql("""
    SELECT DISTINCT execution_label
    FROM ex_dash_temp.billing_forecast.cluster_update_log
    WHERE execution_label IS NOT NULL
    ORDER BY execution_label DESC
""")

execution_labels = (
    execution_labels_df
    .toPandas()['execution_label']
    .tolist()
)

if execution_labels:
    # Create widget for execution label selection
    dbutils.widgets.dropdown(
        "selected_execution_label", 
        execution_labels[0],
        execution_labels,
        "Select Execution Run"
    )
    
    displayHTML("""
    <div style="padding: 15px; background-color: #fff3e0; border-left: 5px solid #ff9800; margin: 10px 0;">
        <h3 style="margin-top: 0; color: #e65100;">üîç Interactive Filter</h3>
        <p style="margin: 5px 0; color: #bf360c;">Select an execution run from the dropdown above to view its details</p>
    </div>
    """)
else:
    displayHTML("""
    <div style="padding: 15px; background-color: #ffebee; border-left: 5px solid #f44336; margin: 10px 0;">
        <h3 style="margin: 0; color: #c62828;">‚ö† No execution logs found</h3>
        <p style="margin: 5px 0; color: #b71c1c;">Run the cluster update process first to generate logs</p>
    </div>
    """)

In [0]:
# Get selected execution label
try:
    selected_label = dbutils.widgets.get("selected_execution_label")
    
    displayHTML(f"""
    <div style="padding: 15px; background-color: #e3f2fd; border-left: 5px solid #2196f3; margin: 15px 0;">
        <h3 style="margin-top: 0; color: #1565c0;">üìä Viewing Execution: <code style="background-color: #bbdefb; padding: 2px 6px; border-radius: 3px;">{selected_label}</code></h3>
    </div>
    """)
    
    # Query logs for selected execution
    selected_logs = spark.sql(f"""
        SELECT 
            execution_label,
            execution_mode,
            cluster_name,
            workspace_name,
            validation_status,
            update_status,
            current_driver_instance,
            suggested_driver_instance,
            current_worker_instance,
            suggested_worker_instance,
            validated_savings,
            update_message,
            execution_timestamp
        FROM ex_dash_temp.billing_forecast.cluster_update_log
        WHERE execution_label = '{selected_label}'
        ORDER BY execution_timestamp
    """)
    
    # Display summary
    summary = spark.sql(f"""
        SELECT 
            COUNT(*) as total_clusters,
            SUM(CASE WHEN update_status = 'SUCCESS' THEN 1 ELSE 0 END) as successful,
            SUM(CASE WHEN update_status = 'DRY_RUN' THEN 1 ELSE 0 END) as dry_run,
            SUM(CASE WHEN update_status = 'SKIPPED' THEN 1 ELSE 0 END) as skipped,
            SUM(CASE WHEN update_status = 'FAILED' THEN 1 ELSE 0 END) as failed,
            SUM(validated_savings) as total_savings
        FROM ex_dash_temp.billing_forecast.cluster_update_log
        WHERE execution_label = '{selected_label}'
    """).collect()[0]
    
    displayHTML(f"""
    <div style="border: 2px solid #2196f3; padding: 15px; margin: 15px 0; background-color: #e3f2fd; border-radius: 8px;">
        <h4 style="margin-top: 0; color: #1565c0;">Summary Statistics</h4>
        <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;">
            <div style="padding: 10px; background-color: white; border-radius: 5px;">
                <strong>Total Clusters:</strong> {summary.total_clusters}
            </div>
            <div style="padding: 10px; background-color: #e8f5e9; border-radius: 5px;">
                <strong>Successful:</strong> {summary.successful}
            </div>
            <div style="padding: 10px; background-color: #e3f2fd; border-radius: 5px;">
                <strong>Dry Run:</strong> {summary.dry_run}
            </div>
            <div style="padding: 10px; background-color: #fff3e0; border-radius: 5px;">
                <strong>Skipped:</strong> {summary.skipped}
            </div>
            <div style="padding: 10px; background-color: #ffebee; border-radius: 5px;">
                <strong>Failed:</strong> {summary.failed}
            </div>
            <div style="padding: 10px; background-color: #e8f5e9; border-radius: 5px;">
                <strong>Total Savings:</strong> ${summary.total_savings:,.2f}
            </div>
        </div>
    </div>
    """)
    
    # Display detailed logs
    displayHTML("""
    <div style="padding: 10px; background-color: #e8f5e9; border-left: 4px solid #4caf50; margin: 15px 0;">
        <h4 style="margin-top: 0; color: #2e7d32;">Detailed Cluster Updates:</h4>
    </div>
    """)
    display(selected_logs)
    
except Exception as e:
    displayHTML(f"""
    <div style="padding: 15px; background-color: #ffebee; border-left: 5px solid #f44336; margin: 15px 0;">
        <h3 style="margin: 0; color: #c62828;">‚ö† Error</h3>
        <p style="margin: 5px 0; color: #b71c1c;">{str(e)}</p>
        <p style="margin: 5px 0; color: #b71c1c;">Make sure to select an execution label from the dropdown above</p>
    </div>
    """)