In [None]:
# ============================================================================
# 🎛️ WIDGET CREATION - Interactive Parameter Configuration
# ============================================================================

# Import required modules
import os
from datetime import datetime

# Check if dbutils is available (Databricks environment)
try:
    # Create interactive widgets for all parameters
    dbutils.widgets.text("streams", "20", "📊 Number of Streams")
    dbutils.widgets.text("rows_per_second", "500", "⚡ Rows per Second per Stream")
    dbutils.widgets.text("duration", "90", "🕐 Monitoring Duration (seconds)")
    dbutils.widgets.text("trigger_interval", "15", "⏱️ Trigger Interval (seconds)")
    dbutils.widgets.text("benchmark_name", "widget_benchmark", "📋 Benchmark Name")
    dbutils.widgets.text("partitions", "4", "🔧 Number of Partitions")
    
    # Advanced widgets
    dbutils.widgets.dropdown("output_path", "/Volumes/soni/default/streaming_writes/", 
                           ["/Volumes/soni/default/streaming_writes/", 
                            "/tmp/streaming_test/"], "📁 Output Path")
    
    dbutils.widgets.dropdown("checkpoint_path", "/Volumes/soni/default/checkpoints/", 
                           ["/Volumes/soni/default/checkpoints/", 
                            "/tmp/checkpoints/"], "💾 Checkpoint Path")
    
    # Preset configurations dropdown
    dbutils.widgets.dropdown("preset_config", "custom", 
                           ["custom", "quick_test", "load_test", "scale_test"], 
                           "🎯 Preset Configuration")
    
    print("✅ Databricks widgets created successfully!")
    print("💡 Modify the widget values at the top of this notebook")
    print("🔄 Re-run this cell after changing preset_config to update other widgets")
    
except Exception as e:
    print(f"⚠️ dbutils not available (running outside Databricks): {str(e)}")
    print("💡 Using default values instead")
    
    # Define default values when dbutils is not available
    class MockWidgets:
        def get(self, key):
            defaults = {
                "streams": "20",
                "rows_per_second": "500", 
                "duration": "90",
                "trigger_interval": "15",
                "benchmark_name": "widget_benchmark",
                "partitions": "4",
                "output_path": "/Volumes/soni/default/streaming_writes/",
                "checkpoint_path": "/Volumes/soni/default/checkpoints/",
                "preset_config": "custom"
            }
            return defaults.get(key, "")
    
    # Create mock dbutils for compatibility
    class MockDbutils:
        def __init__(self):
            self.widgets = MockWidgets()
    
    dbutils = MockDbutils()


In [None]:
# ============================================================================
# 📊 PARAMETER READING & PRESET CONFIGURATION
# ============================================================================

def apply_preset_config(preset):
    """Apply preset configurations based on selection"""
    presets = {
        "quick_test": {
            "streams": "10",
            "rows_per_second": "100", 
            "duration": "60",
            "trigger_interval": "15",
            "benchmark_name": "quick_test",
            "partitions": "4"
        },
        "load_test": {
            "streams": "50",
            "rows_per_second": "1000",
            "duration": "300", 
            "trigger_interval": "10",
            "benchmark_name": "load_test",
            "partitions": "8"
        },
        "scale_test": {
            "streams": "100",
            "rows_per_second": "500",
            "duration": "300",
            "trigger_interval": "15", 
            "benchmark_name": "scale_test",
            "partitions": "8"
        }
    }
    return presets.get(preset, {})

# Read widget values
preset_config = dbutils.widgets.get("preset_config")
print(f"🎯 Selected preset: {preset_config}")

# Apply preset if selected
if preset_config != "custom":
    preset_values = apply_preset_config(preset_config)
    print(f"📋 Applying {preset_config} preset configuration:")
    for key, value in preset_values.items():
        print(f"   {key}: {value}")
else:
    preset_values = {}
    print("🔧 Using custom widget values")

# Read final configuration from widgets (with preset override)
config = {
    "streams": int(preset_values.get("streams", dbutils.widgets.get("streams"))),
    "rows_per_second": int(preset_values.get("rows_per_second", dbutils.widgets.get("rows_per_second"))),
    "duration": int(preset_values.get("duration", dbutils.widgets.get("duration"))),
    "trigger_interval": int(preset_values.get("trigger_interval", dbutils.widgets.get("trigger_interval"))),
    "benchmark_name": preset_values.get("benchmark_name", dbutils.widgets.get("benchmark_name")),
    "partitions": int(preset_values.get("partitions", dbutils.widgets.get("partitions"))),
    "output_path": dbutils.widgets.get("output_path"),
    "checkpoint_path": dbutils.widgets.get("checkpoint_path")
}

# Validate parameters
def validate_config(config):
    """Validate configuration parameters"""
    issues = []
    
    if config["streams"] <= 0 or config["streams"] > 1000:
        issues.append("⚠️ streams should be between 1 and 1000")
    
    if config["rows_per_second"] <= 0 or config["rows_per_second"] > 10000:
        issues.append("⚠️ rows_per_second should be between 1 and 10000")
        
    if config["duration"] < 0:
        issues.append("⚠️ duration should be >= 0 (0 = no monitoring)")
        
    if config["trigger_interval"] <= 0 or config["trigger_interval"] > 300:
        issues.append("⚠️ trigger_interval should be between 1 and 300 seconds")
    
    total_throughput = config["streams"] * config["rows_per_second"]
    if total_throughput > 100000:
        issues.append(f"⚠️ Total throughput ({total_throughput:,} rows/sec) is very high - ensure cluster can handle it")
    
    return issues

# Validate and display configuration
validation_issues = validate_config(config)

print("\n" + "=" * 80)
print("🚀 FINAL CONFIGURATION FROM WIDGETS")
print("=" * 80)
print(f"📊 Benchmark Name: {config['benchmark_name']}")
print(f"📈 Number of Streams: {config['streams']}")
print(f"⚡ Rows per Second per Stream: {config['rows_per_second']}")
print(f"📊 Total Throughput: {config['streams'] * config['rows_per_second']:,} rows/second")
print(f"⏱️  Trigger Interval: {config['trigger_interval']} seconds")
print(f"🕐 Monitoring Duration: {config['duration']} seconds")
print(f"🔧 Partitions: {config['partitions']}")
print(f"📁 Output Path: {config['output_path']}")
print(f"💾 Checkpoint Path: {config['checkpoint_path']}")
print("=" * 80)

# Display validation results
if validation_issues:
    print("\n🚨 CONFIGURATION WARNINGS:")
    for issue in validation_issues:
        print(f"   {issue}")
    print("\n💡 Please review your widget values and re-run this cell")
else:
    print("\n✅ Configuration validation passed!")

print(f"\n🕐 Configuration read at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("💡 Modify widgets above and re-run this cell to update configuration")


In [None]:
# ============================================================================
# 📦 DEPENDENCIES & SPARK CONNECTION
# ============================================================================

import subprocess
import sys
import time
import configparser

# Install dbldatagen if needed
try:
    import dbldatagen as dg
    print("✅ dbldatagen is available")
except ImportError:
    print("📦 Installing dbldatagen...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "dbldatagen"])
    import dbldatagen as dg
    print("✅ dbldatagen installed successfully")

# Import Spark components
try:
    # Try Databricks Connect first
    from databricks.connect import DatabricksSession
    spark = DatabricksSession.builder.getOrCreate()
    connection_type = "Databricks Connect"
    print("✅ Connected via Databricks Connect")
except:
    # Fallback to regular Spark session (for local environments)
    from pyspark.sql import SparkSession
    spark = SparkSession.builder.appName("WidgetStreamingBenchmark").getOrCreate()
    connection_type = "Regular Spark Session"
    print("✅ Connected via regular Spark session")

from pyspark.sql.types import StructType, StructField, StringType, TimestampType, DoubleType, IntegerType
from pyspark.sql.functions import expr

print(f"🔗 Spark Version: {spark.version}")
print(f"🔌 Connection Type: {connection_type}")

# Get cluster information
def get_cluster_info():
    """Get cluster configuration from Databricks config"""
    try:
        config_path = os.path.expanduser('~/.databrickscfg')
        if os.path.exists(config_path):
            cfg = configparser.ConfigParser()
            cfg.read(config_path)
            
            for profile in ['DEFAULT', 'DEFAULT2']:
                if profile in cfg:
                    cluster_id = cfg[profile].get('cluster_id', '').strip('%')
                    host = cfg[profile].get('host', 'Unknown')
                    if cluster_id:
                        return {
                            'cluster_id': cluster_id,
                            'host': host, 
                            'profile': profile,
                            'found': True
                        }
        return {'found': False}
    except Exception as e:
        print(f"⚠️ Could not read cluster info: {str(e)}")
        return {'found': False}

cluster_info = get_cluster_info()

print("\n🖥️ Cluster Information:")
print("-" * 40)
if cluster_info['found']:
    print(f"📍 Host: {cluster_info['host']}")
    print(f"🔗 Cluster ID: {cluster_info['cluster_id']}")
    print(f"⚙️ Profile: {cluster_info['profile']}")
else:
    print("⚠️ Cluster information not found in config")
    print("💡 Running in local/generic mode")

print(f"✅ Connection Status: {'Remote Databricks' if cluster_info.get('host', '').find('databricks') != -1 else 'Local/Generic'}")
print("-" * 40)

print(f"\n🕐 Dependencies loaded at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("✅ Ready to proceed with benchmark execution!")


In [None]:
# ============================================================================
# 🔧 DATA GENERATOR SETUP WITH WIDGET CONFIGURATION
# ============================================================================

# Define IoT sensor data schema
iot_schema = StructType([
    StructField("device_id", StringType(), False),
    StructField("event_timestamp", TimestampType(), False),
    StructField("temperature", DoubleType(), False),
    StructField("humidity", DoubleType(), False),
    StructField("pressure", DoubleType(), False),
    StructField("battery_level", IntegerType(), False),
    StructField("device_type", StringType(), False),
    StructField("error_code", IntegerType(), True),
    StructField("signal_strength", IntegerType(), False)
])

print("📋 IoT Sensor Data Schema:")
print("=" * 50)
for i, field in enumerate(iot_schema.fields, 1):
    nullable_str = "nullable" if field.nullable else "required"
    print(f"{i:2d}. {field.name:<18} | {field.dataType.simpleString():<12} | {nullable_str}")

print(f"\n🔧 Creating data generator with widget configuration...")
print(f"   Using {config['partitions']} partitions")
print(f"   Generating {config['rows_per_second']} rows per second per stream")

try:
    # Create the data generator with widget-configured parameters
    dataspec = (
        dg.DataGenerator(spark, name="iot_widget_data", partitions=config['partitions'])
        .withSchema(iot_schema)
        .withColumnSpec("device_id", percentNulls=0.1, minValue=1000, maxValue=9999, prefix="DEV_", random=True)
        .withColumnSpec("event_timestamp", begin="2023-01-01 00:00:00", end="2023-12-31 23:59:59", random=True)
        .withColumnSpec("temperature", minValue=-10.0, maxValue=40.0, random=True)
        .withColumnSpec("humidity", minValue=0.0, maxValue=100.0, random=True)
        .withColumnSpec("pressure", minValue=900.0, maxValue=1100.0, random=True)
        .withColumnSpec("battery_level", minValue=0, maxValue=100, random=True)
        .withColumnSpec("device_type", values=["Sensor", "Actuator", "Gateway", "Controller"], random=True)
        .withColumnSpec("error_code", minValue=0, maxValue=999, random=True, percentNulls=0.2)
        .withColumnSpec("signal_strength", minValue=-100, maxValue=0, random=True)
    )
    print("✅ Data generator specification created successfully")
    
    # Build the streaming DataFrame with widget-configured row rate
    streaming_df = dataspec.build(
        withStreaming=True,
        options={'rowsPerSecond': config['rows_per_second']}
    )
    print(f"✅ Streaming DataFrame created ({config['rows_per_second']} rows/second per stream)")
    
    # Display schema preview
    print(f"\n📄 Generated DataFrame Schema:")
    streaming_df.printSchema()
    
    # Configuration summary
    print(f"\n📊 Data Generation Summary:")
    print(f"   🎯 Target per stream: {config['rows_per_second']} rows/second")
    print(f"   📈 Number of streams: {config['streams']}")
    print(f"   📊 Total target throughput: {config['streams'] * config['rows_per_second']:,} rows/second")
    print(f"   🔧 Data partitions: {config['partitions']}")
    print(f"   ⏱️ Processing interval: {config['trigger_interval']} seconds")
    
except Exception as e:
    print(f"❌ Failed to create data generator: {str(e)}")
    print(f"💡 Check your widget configuration and Spark connection")
    raise e

# Estimate resource requirements
total_throughput = config['streams'] * config['rows_per_second']
estimated_cpu_cores = max(4, config['streams'] // 5)  # Rough estimate
estimated_memory_gb = max(8, config['streams'] // 2)  # Rough estimate

print(f"\n💻 Estimated Resource Requirements:")
print(f"   📊 Total Throughput: {total_throughput:,} rows/second")
print(f"   🖥️ Recommended CPU Cores: {estimated_cpu_cores}+")
print(f"   💾 Recommended Memory: {estimated_memory_gb}+ GB")
print(f"   ⚠️ Ensure your cluster has sufficient resources!")

print(f"\n✅ Data generator setup complete!")
print(f"🔄 Ready to start streaming with widget configuration")


In [None]:
# ============================================================================
# 🚀 STREAM CREATION & STARTUP WITH WIDGET CONFIGURATION
# ============================================================================

print(f"🚀 Starting {config['streams']} concurrent streaming queries...")
print(f"📊 Using configuration from widgets:")
print(f"   Benchmark: {config['benchmark_name']}")
print(f"   Target throughput: {config['streams'] * config['rows_per_second']:,} rows/second")
print(f"   Processing interval: {config['trigger_interval']} seconds")
print(f"   Output: {config['output_path']}")
print("=" * 80)

# Initialize tracking variables
streaming_queries = []
successful_starts = 0
start_time = time.time()

# Create and start each stream using widget configuration
for i in range(config['streams']):
    stream_name = f"{config['benchmark_name']}_widget_stream_{i+1:03d}"
    output_path = f"{config['output_path']}{config['benchmark_name']}_widget_stream_{i+1:03d}/"
    checkpoint_path = f"{config['checkpoint_path']}checkpoint_{config['benchmark_name']}_widget_{stream_name}/"
    
    try:
        # Create the streaming query with widget-configured parameters
        query = (
            streaming_df
            .writeStream
            .format("delta")
            .outputMode("append")
            .option("checkpointLocation", checkpoint_path)
            .trigger(processingTime=f'{config["trigger_interval"]} seconds')
            .queryName(stream_name)
            .start(output_path)
        )
        
        streaming_queries.append(query)
        successful_starts += 1
        
        # Show progress indicators
        if successful_starts % 10 == 0 or successful_starts <= 5 or i == config['streams'] - 1:
            print(f"✅ Started {successful_starts:3d}/{config['streams']}: {stream_name}")
        elif successful_starts % 5 == 0:
            print(f"📈 Progress: {successful_starts}/{config['streams']} streams started...")
        
        # Small delay to prevent overwhelming the cluster
        time.sleep(0.2)
        
    except Exception as e:
        print(f"❌ Failed to start stream {stream_name}: {str(e)}")
        
        # Show helpful error messages
        if "No space left on device" in str(e):
            print("💡 Cluster is running out of disk space - consider restarting cluster")
        elif "connection" in str(e).lower():
            print("💡 Connection issue - check cluster status in Databricks UI")
        else:
            print("💡 Check cluster resources and configuration")
            
        print("⏹️ Stopping further stream creation due to error")
        break

# Calculate startup time and show results
startup_time = time.time() - start_time

print("=" * 80)
print(f"🎉 Stream Startup Complete!")
print(f"✅ Successfully started: {successful_starts}/{config['streams']} streams")
print(f"⏱️ Startup time: {startup_time:.1f} seconds")
print(f"📈 Target throughput: {successful_starts * config['rows_per_second']:,} rows/second")

if successful_starts == 0:
    print("\n❌ No streams were started successfully!")
    print("💡 Check your cluster status and widget configuration")
    print("🔄 Try reducing the number of streams or restarting your cluster")
else:
    print(f"\n💡 {successful_starts} streams are now running on your Databricks cluster!")
    print(f"🔗 Monitor them in Databricks UI: Compute → Cluster → Spark UI → Streaming")
    
    # Validate stream status immediately
    print(f"\n📊 Immediate Stream Status Check:")
    active_count = 0
    for i, query in enumerate(streaming_queries[:5]):  # Check first 5
        try:
            status = "🟢 Active" if query.isActive else "🔴 Inactive"
            print(f"   {query.name}: {status}")
            if query.isActive:
                active_count += 1
        except Exception as e:
            print(f"   {query.name}: ❌ Error checking status")
    
    if len(streaming_queries) > 5:
        remaining_active = sum(1 for q in streaming_queries[5:] if q.isActive)
        active_count += remaining_active
        print(f"   ... and {len(streaming_queries) - 5} more streams")
    
    print(f"\n📈 Summary: {active_count}/{successful_starts} streams are active")
    success_rate = (active_count / successful_starts) * 100 if successful_starts > 0 else 0
    print(f"🎯 Immediate success rate: {success_rate:.1f}%")

# Store results for next cells
benchmark_start_time = time.time()
initial_active_count = active_count if 'active_count' in locals() else 0

print(f"\n🕐 Startup completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("🔄 Ready for monitoring phase!")


In [None]:
# ============================================================================
# ⏳ REAL-TIME MONITORING WITH WIDGET CONFIGURATION
# ============================================================================

if 'streaming_queries' not in locals() or not streaming_queries:
    print("⚠️ No streaming queries found - please run the previous cell first")
elif config['duration'] <= 0:
    print("ℹ️ Monitoring duration set to 0 - skipping monitoring phase")
    print("💡 Streams will continue running in the background")
else:
    print(f"⏳ Monitoring {len(streaming_queries)} streams for {config['duration']} seconds...")
    print(f"🔄 Progress updates every 15 seconds")
    print(f"📊 Using widget configuration: {config['benchmark_name']}")
    print("=" * 100)
    
    monitor_start = time.time()
    check_interval = 15  # seconds between progress checks
    next_check = monitor_start + check_interval
    
    # Monitoring loop
    while time.time() - monitor_start < config['duration']:
        current_time = time.time()
        
        if current_time >= next_check:
            elapsed = current_time - monitor_start
            remaining = config['duration'] - elapsed
            
            print(f"\n📊 Progress Check - {elapsed:.0f}s elapsed, {remaining:.0f}s remaining")
            print("-" * 90)
            
            # Check stream health and gather metrics
            current_active = 0
            total_input_rate = 0
            total_processed_rate = 0
            sample_streams_shown = 0
            batch_info = []
            
            for i, query in enumerate(streaming_queries):
                try:
                    if query.isActive:
                        current_active += 1
                        
                        # Get progress information
                        progress = query.lastProgress
                        if progress:
                            batch_id = progress.get('batchId', 'N/A')
                            input_rate = progress.get('inputRowsPerSecond', 0)
                            processed_rate = progress.get('processedRowsPerSecond', 0)
                            
                            total_input_rate += input_rate
                            total_processed_rate += processed_rate
                            batch_info.append((query.name, batch_id, input_rate, processed_rate))
                            
                            # Show detailed progress for first 5 streams
                            if sample_streams_shown < 5:
                                print(f"  {query.name}: Batch {batch_id}, Input: {input_rate:.0f}/sec, Processed: {processed_rate:.0f}/sec")
                                sample_streams_shown += 1
                        else:
                            if sample_streams_shown < 5:
                                print(f"  {query.name}: Starting up...")
                                sample_streams_shown += 1
                            
                except Exception as e:
                    if sample_streams_shown < 5:
                        print(f"  {query.name}: Error - {str(e)[:30]}")
                        sample_streams_shown += 1
            
            if len(streaming_queries) > 5:
                print(f"  ... and {len(streaming_queries) - 5} more streams")
            
            # Summary statistics
            print(f"\n  📈 Summary at {elapsed:.0f}s:")
            print(f"    🟢 Active Streams: {current_active}/{len(streaming_queries)}")
            print(f"    📊 Expected Rate (from widgets): {current_active * config['rows_per_second']:,} rows/second")
            print(f"    📈 Actual Input Rate: {total_input_rate:.0f} rows/second")
            print(f"    ⚡ Processing Rate: {total_processed_rate:.0f} rows/second")
            
            # Performance metrics
            if current_active > 0:
                avg_input_per_stream = total_input_rate / current_active
                avg_processed_per_stream = total_processed_rate / current_active
                expected_per_stream = config['rows_per_second']
                
                input_efficiency = (avg_input_per_stream / expected_per_stream * 100) if expected_per_stream > 0 else 0
                processing_efficiency = (avg_processed_per_stream / avg_input_per_stream * 100) if avg_input_per_stream > 0 else 0
                
                print(f"    📏 Avg Input/Stream: {avg_input_per_stream:.0f} rows/sec ({input_efficiency:.1f}% of target)")
                print(f"    ⚙️ Processing Efficiency: {processing_efficiency:.1f}%")
            
            # Health indicator
            health_score = (current_active / len(streaming_queries)) * 100
            health_emoji = "🟢" if health_score >= 90 else "🟡" if health_score >= 70 else "🔴"
            print(f"    {health_emoji} Health Score: {health_score:.1f}%")
            
            # Widget configuration reminder
            if elapsed > 30:  # After 30 seconds, show widget info
                print(f"    🎛️ Widget Config: {config['benchmark_name']} ({config['streams']} streams @ {config['rows_per_second']}/sec)")
            
            next_check += check_interval
        
        # Short sleep to prevent busy waiting
        time.sleep(1)
    
    print("\n" + "=" * 100)
    print("✅ Monitoring period completed!")
    
    # Final status check
    final_active = sum(1 for q in streaming_queries if q.isActive)
    print(f"\n📊 Final Status:")
    print(f"   🟢 Active Streams: {final_active}/{len(streaming_queries)}")
    print(f"   📈 Final Throughput: {final_active * config['rows_per_second']:,} rows/second")
    print(f"   🕐 Total Runtime: {(time.time() - benchmark_start_time):.1f} seconds")
    
    if final_active > 0:
        print(f"\n💡 Next Steps:")
        print(f"   - {final_active} streams are still running on your cluster")
        print(f"   - Data location: {config['output_path']}{config['benchmark_name']}_widget_stream_*/")
        print(f"   - Use the utility functions in the next cell to manage streams")

print(f"\n🕐 Monitoring completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("🔄 Ready for final results and stream management!")


In [None]:
# ============================================================================
# 📊 FINAL RESULTS & STREAM MANAGEMENT UTILITIES
# ============================================================================

# Final benchmark results based on widget configuration
if 'streaming_queries' in locals() and streaming_queries:
    final_active = sum(1 for q in streaming_queries if q.isActive)
    total_started = len(streaming_queries)
    success_rate = (final_active / total_started) * 100 if total_started > 0 else 0
    actual_throughput = final_active * config['rows_per_second']
    target_throughput = config['streams'] * config['rows_per_second']
    
    print("=" * 100)
    print("🏆 DATABRICKS STREAMING BENCHMARK - FINAL RESULTS")
    print("=" * 100)
    
    print(f"\n📋 Widget Configuration Used:")
    print(f"   🎯 Benchmark Name: {config['benchmark_name']}")
    print(f"   📊 Target Streams: {config['streams']}")
    print(f"   ⚡ Rows per Second per Stream: {config['rows_per_second']}")
    print(f"   ⏱️ Trigger Interval: {config['trigger_interval']} seconds")
    print(f"   🕐 Monitoring Duration: {config['duration']} seconds")
    print(f"   🔧 Partitions: {config['partitions']}")
    
    print(f"\n📈 Performance Results:")
    print(f"   ✅ Successfully Started: {total_started}/{config['streams']} streams")
    print(f"   🟢 Currently Active: {final_active}/{total_started} streams")
    print(f"   🎯 Success Rate: {success_rate:.1f}%")
    print(f"   📊 Target Throughput: {target_throughput:,} rows/second")
    print(f"   📈 Actual Throughput: {actual_throughput:,} rows/second")
    print(f"   📏 Throughput Achievement: {(actual_throughput/target_throughput*100):.1f}%")
    
    # Performance rating based on widget targets
    if success_rate >= 95 and actual_throughput >= target_throughput * 0.9:
        rating = "🌟 EXCELLENT"
        rating_color = "🟢"
    elif success_rate >= 80 and actual_throughput >= target_throughput * 0.7:
        rating = "👍 GOOD"
        rating_color = "🟡"
    elif success_rate >= 60:
        rating = "⚠️ FAIR"
        rating_color = "🟡"
    else:
        rating = "❌ NEEDS IMPROVEMENT"
        rating_color = "🔴"
    
    print(f"\n{rating_color} Overall Rating: {rating}")
    
    print(f"\n📁 Data Location:")
    print(f"   {config['output_path']}{config['benchmark_name']}_widget_stream_*/")
    print(f"\n💾 Checkpoint Location:")
    print(f"   {config['checkpoint_path']}checkpoint_{config['benchmark_name']}_widget_*/")
    
    print(f"\n🕐 Benchmark Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
else:
    print("⚠️ No streaming queries found - please run the previous cells first")

print("\n" + "=" * 100)

# Utility functions for stream management
print("🛠️ STREAM MANAGEMENT UTILITIES")
print("=" * 100)

def stop_all_widget_streams():
    """Stop all streams from this widget-based benchmark"""
    if 'streaming_queries' not in locals() or not streaming_queries:
        print("⚠️ No streaming queries found")
        return
    
    print(f"⏹️ Stopping {len(streaming_queries)} widget streams...")
    stopped_count = 0
    
    for query in streaming_queries:
        try:
            if query.isActive:
                query.stop()
                stopped_count += 1
                print(f"✅ Stopped: {query.name}")
            else:
                print(f"ℹ️ Already stopped: {query.name}")
        except Exception as e:
            print(f"❌ Error stopping {query.name}: {str(e)}")
    
    print(f"\n🎉 Successfully stopped {stopped_count} streams")
    return stopped_count

def check_widget_stream_status():
    """Check the current status of all widget streams"""
    if 'streaming_queries' not in locals() or not streaming_queries:
        print("⚠️ No streaming queries found")
        return
    
    print(f"📊 Checking status of {len(streaming_queries)} widget streams...")
    print("-" * 80)
    
    active_count = 0
    for i, query in enumerate(streaming_queries):
        try:
            status = "🟢 Active" if query.isActive else "🔴 Stopped"
            if query.isActive:
                active_count += 1
                # Try to get progress info
                progress = query.lastProgress
                if progress:
                    batch_id = progress.get('batchId', 'N/A')
                    input_rate = progress.get('inputRowsPerSecond', 0)
                    print(f"{i+1:3d}. {query.name}: {status} (Batch: {batch_id}, Rate: {input_rate:.0f}/sec)")
                else:
                    print(f"{i+1:3d}. {query.name}: {status} (Starting up...)")
            else:
                print(f"{i+1:3d}. {query.name}: {status}")
                
        except Exception as e:
            print(f"{i+1:3d}. {query.name}: ❌ Error checking status")
    
    print("-" * 80)
    print(f"📈 Summary: {active_count}/{len(streaming_queries)} streams are active")
    print(f"🎯 Widget Config: {config['benchmark_name']} ({config['streams']} streams @ {config['rows_per_second']}/sec)")
    return active_count

def show_widget_stream_progress():
    """Show detailed progress information for active widget streams"""
    if 'streaming_queries' not in locals() or not streaming_queries:
        print("⚠️ No streaming queries found")
        return
    
    print(f"📈 Progress report for widget benchmark: {config['benchmark_name']}")
    print("=" * 90)
    
    total_input_rate = 0
    total_processed_rate = 0
    active_streams = 0
    
    for query in streaming_queries:
        try:
            if query.isActive:
                active_streams += 1
                progress = query.lastProgress
                if progress:
                    batch_id = progress.get('batchId', 'N/A')
                    input_rate = progress.get('inputRowsPerSecond', 0)
                    processed_rate = progress.get('processedRowsPerSecond', 0)
                    batch_duration = progress.get('batchDuration', 0)
                    
                    total_input_rate += input_rate
                    total_processed_rate += processed_rate
                    
                    print(f"{query.name}:")
                    print(f"  Batch: {batch_id} | Input: {input_rate:.0f}/sec | Processed: {processed_rate:.0f}/sec | Duration: {batch_duration}ms")
        except Exception as e:
            print(f"{query.name}: Error getting progress")
    
    print("=" * 90)
    print(f"📊 Overall Totals:")
    print(f"   Active Streams: {active_streams}/{len(streaming_queries)}")
    print(f"   Expected Rate (from widgets): {active_streams * config['rows_per_second']:,} rows/second")
    print(f"   Actual Input Rate: {total_input_rate:.0f} rows/second")
    print(f"   Processing Rate: {total_processed_rate:.0f} rows/second")
    
    return {"active": active_streams, "input_rate": total_input_rate, "processed_rate": total_processed_rate}

# Instructions for using utility functions
print(f"\n💡 Available Functions (based on your widget configuration):")
print(f"   📊 check_widget_stream_status() - Check all stream statuses")
print(f"   📈 show_widget_stream_progress() - Show detailed progress")
print(f"   ⏹️ stop_all_widget_streams() - Stop all streams")

print(f"\n🎛️ Widget Configuration Summary:")
print(f"   Benchmark: {config.get('benchmark_name', 'N/A')}")
print(f"   Streams: {config.get('streams', 'N/A')}")
print(f"   Rate: {config.get('rows_per_second', 'N/A')} rows/sec per stream")
print(f"   Duration: {config.get('duration', 'N/A')} seconds")

if 'streaming_queries' in locals() and streaming_queries and final_active > 0:
    print(f"\n🔗 Your {final_active} active streams are running on the Databricks cluster!")
    print(f"💡 Use the utility functions above to monitor or stop them")
    print(f"🎯 Total throughput: {final_active * config['rows_per_second']:,} rows/second")

print("\n🎉 Widget-based benchmark complete! Modify widgets and re-run cells for different configurations.")
