# ConcordBroker Data Flow Monitoring Dashboard

This notebook provides comprehensive monitoring and analysis of data flow across all ConcordBroker systems.

## Features:
- Real-time data validation monitoring
- Performance analytics
- Data quality assessment
- Anomaly detection
- Self-healing status tracking

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import psycopg2
from sqlalchemy import create_engine
import requests
import json
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

print("📊 Data Flow Monitoring Dashboard Initialized")

In [None]:
# Configuration and Connection Setup
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv('../.env.mcp')

# Configuration
SUPABASE_URL = os.getenv('SUPABASE_URL')
SERVICE_ROLE_KEY = os.getenv('SUPABASE_SERVICE_ROLE_KEY')
ORCHESTRATOR_API = 'http://localhost:8001'

# Database connection
def get_db_connection():
    """Create database connection"""
    try:
        db_url = SUPABASE_URL.replace('https://', 'postgresql://postgres:')
        db_url = f"{db_url.split('.')[0]}.{db_url.split('.')[1]}.supabase.co:5432/postgres"
        engine = create_engine(db_url + "?sslmode=require&options=-c%20search_path%3Dpublic")
        return engine
    except Exception as e:
        print(f"❌ Database connection failed: {e}")
        return None

# Test connections
engine = get_db_connection()
if engine:
    print("✅ Database connection established")
else:
    print("❌ Database connection failed")

# Test orchestrator API
try:
    response = requests.get(f"{ORCHESTRATOR_API}/health", timeout=5)
    if response.status_code == 200:
        print("✅ AI Orchestrator API connected")
    else:
        print(f"⚠️ AI Orchestrator API returned status {response.status_code}")
except Exception as e:
    print(f"⚠️ AI Orchestrator API not available: {e}")

## 1. Real-time System Health Dashboard

In [None]:
def get_system_metrics():
    """Fetch current system metrics from the orchestrator"""
    try:
        response = requests.get(f"{ORCHESTRATOR_API}/metrics", timeout=10)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching metrics: HTTP {response.status_code}")
            return None
    except Exception as e:
        print(f"Error fetching metrics: {e}")
        return None

def display_health_dashboard():
    """Display a comprehensive health dashboard"""
    metrics = get_system_metrics()
    
    if not metrics:
        print("❌ Could not fetch system metrics")
        return
    
    print(f"🕐 Last Update: {metrics['timestamp']}")
    print("="*80)
    
    # Create dashboard layout
    table_metrics = metrics.get('table_metrics', {})
    
    # Prepare data for visualization
    tables = []
    record_counts = []
    query_times = []
    validation_statuses = []
    freshness_hours = []
    
    for table_name, data in table_metrics.items():
        if 'error' not in data:
            tables.append(table_name)
            record_counts.append(data.get('record_count', 0))
            query_times.append(data.get('query_time_ms', 0))
            validation_statuses.append(data.get('validation_status', 'unknown'))
            freshness_hours.append(data.get('data_freshness_hours', 0))
    
    if not tables:
        print("❌ No valid table metrics available")
        return
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Record Counts by Table', 'Query Performance (ms)', 
                       'Data Freshness (hours)', 'Validation Status'),
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "pie"}]]
    )
    
    # Record counts
    fig.add_trace(
        go.Bar(x=tables, y=record_counts, name="Record Count",
               marker_color='lightblue'),
        row=1, col=1
    )
    
    # Query performance
    color_scale = ['green' if qt < 1000 else 'orange' if qt < 3000 else 'red' for qt in query_times]
    fig.add_trace(
        go.Bar(x=tables, y=query_times, name="Query Time (ms)",
               marker_color=color_scale),
        row=1, col=2
    )
    
    # Data freshness
    freshness_colors = ['green' if fh < 24 else 'orange' if fh < 72 else 'red' for fh in freshness_hours]
    fig.add_trace(
        go.Bar(x=tables, y=freshness_hours, name="Data Age (hours)",
               marker_color=freshness_colors),
        row=2, col=1
    )
    
    # Validation status pie chart
    status_counts = pd.Series(validation_statuses).value_counts()
    status_colors = {'healthy': 'green', 'warning': 'orange', 'error': 'red', 'unknown': 'gray'}
    pie_colors = [status_colors.get(status, 'gray') for status in status_counts.index]
    
    fig.add_trace(
        go.Pie(labels=status_counts.index, values=status_counts.values,
               marker_colors=pie_colors, name="Validation Status"),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title_text="ConcordBroker Data Flow Health Dashboard",
        title_x=0.5,
        height=800,
        showlegend=False
    )
    
    # Rotate x-axis labels for better readability
    fig.update_xaxes(tickangle=45)
    
    fig.show()
    
    # Print detailed status
    print("\n📋 Detailed Table Status:")
    print("-" * 80)
    
    for table_name, data in table_metrics.items():
        if 'error' in data:
            print(f"❌ {table_name:20} ERROR: {data['error']}")
        else:
            status_icon = {
                'healthy': '✅',
                'warning': '⚠️',
                'error': '❌',
                'unknown': '❓'
            }.get(data.get('validation_status'), '❓')
            
            print(f"{status_icon} {table_name:20} "
                  f"Records: {data.get('record_count', 0):>8,} | "
                  f"Query: {data.get('query_time_ms', 0):>6.1f}ms | "
                  f"Age: {data.get('data_freshness_hours', 0):>5.1f}h")

# Display the dashboard
display_health_dashboard()

## 2. Data Quality Analysis

In [None]:
def analyze_data_quality():
    """Comprehensive data quality analysis"""
    if not engine:
        print("❌ No database connection available")
        return
    
    print("🔍 Analyzing Data Quality...")
    
    # Critical tables to analyze
    critical_tables = {
        'florida_parcels': 'Main property data',
        'property_sales_history': 'Sales transaction data',
        'tax_certificates': 'Tax lien certificates',
        'florida_entities': 'Business entities',
        'sunbiz_corporate': 'Corporate registrations'
    }
    
    quality_results = []
    
    for table_name, description in critical_tables.items():
        try:
            print(f"   Analyzing {table_name}...")
            
            # Basic statistics
            basic_query = f"""
            SELECT 
                COUNT(*) as total_records,
                COUNT(DISTINCT parcel_id) as unique_parcels,
                COUNT(CASE WHEN parcel_id IS NULL THEN 1 END) as null_parcel_ids,
                ROUND(COUNT(CASE WHEN parcel_id IS NULL THEN 1 END) * 100.0 / COUNT(*), 2) as null_percentage
            FROM {table_name}
            """
            
            df = pd.read_sql(basic_query, engine)
            
            if not df.empty:
                result = df.iloc[0]
                quality_results.append({
                    'table': table_name,
                    'description': description,
                    'total_records': result['total_records'],
                    'unique_parcels': result['unique_parcels'],
                    'null_parcel_ids': result['null_parcel_ids'],
                    'null_percentage': result['null_percentage'],
                    'quality_score': max(0, 100 - result['null_percentage'])
                })
            
        except Exception as e:
            print(f"   ⚠️ Error analyzing {table_name}: {e}")
            quality_results.append({
                'table': table_name,
                'description': description,
                'total_records': 0,
                'unique_parcels': 0,
                'null_parcel_ids': 0,
                'null_percentage': 100,
                'quality_score': 0,
                'error': str(e)
            })
    
    # Create quality summary DataFrame
    quality_df = pd.DataFrame(quality_results)
    
    # Visualize quality scores
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Quality scores bar chart
    colors = ['green' if score >= 90 else 'orange' if score >= 70 else 'red' 
              for score in quality_df['quality_score']]
    
    bars1 = ax1.bar(quality_df['table'], quality_df['quality_score'], color=colors)
    ax1.set_title('Data Quality Scores by Table', fontsize=14, fontweight='bold')
    ax1.set_ylabel('Quality Score (%)')
    ax1.set_ylim(0, 100)
    ax1.tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
                f'{height:.1f}%', ha='center', va='bottom')
    
    # Record counts
    bars2 = ax2.bar(quality_df['table'], quality_df['total_records'], color='lightblue')
    ax2.set_title('Total Records by Table', fontsize=14, fontweight='bold')
    ax2.set_ylabel('Number of Records')
    ax2.tick_params(axis='x', rotation=45)
    
    # Format y-axis for better readability
    ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x/1000:.0f}K' if x >= 1000 else f'{x:.0f}'))
    
    # Add value labels on bars
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:,.0f}', ha='center', va='bottom', rotation=90)
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed results
    print("\n📊 Data Quality Analysis Results:")
    print("=" * 100)
    
    for _, row in quality_df.iterrows():
        if 'error' in row and pd.notna(row['error']):
            print(f"❌ {row['table']:25} ERROR: {row['error']}")
        else:
            quality_icon = '✅' if row['quality_score'] >= 90 else '⚠️' if row['quality_score'] >= 70 else '❌'
            print(f"{quality_icon} {row['table']:25} "
                  f"Score: {row['quality_score']:>5.1f}% | "
                  f"Records: {row['total_records']:>8,} | "
                  f"Unique Parcels: {row['unique_parcels']:>8,} | "
                  f"Null %: {row['null_percentage']:>5.1f}%")
    
    # Overall system score
    overall_score = quality_df[quality_df['quality_score'] > 0]['quality_score'].mean()
    print(f"\n🎯 Overall System Quality Score: {overall_score:.1f}%")
    
    if overall_score >= 90:
        print("✅ Excellent data quality!")
    elif overall_score >= 70:
        print("⚠️ Good data quality with room for improvement")
    else:
        print("❌ Data quality needs attention")
    
    return quality_df

# Run the analysis
quality_results = analyze_data_quality()

## 3. Performance Monitoring

In [None]:
def monitor_system_performance():
    """Monitor and display system performance metrics"""
    try:
        response = requests.get(f"{ORCHESTRATOR_API}/performance", timeout=10)
        if response.status_code != 200:
            print(f"❌ Error fetching performance data: HTTP {response.status_code}")
            return None
        
        perf_data = response.json()
        
        print(f"⚡ Performance Monitoring Report - {perf_data['timestamp']}")
        print("=" * 80)
        
        # System metrics
        if 'system' in perf_data:
            sys_data = perf_data['system']
            
            # Create performance visualization
            fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
            
            # CPU Usage gauge
            cpu_usage = sys_data.get('cpu_usage', 0)
            cpu_color = 'green' if cpu_usage < 70 else 'orange' if cpu_usage < 85 else 'red'
            
            ax1.pie([cpu_usage, 100-cpu_usage], labels=[f'Used: {cpu_usage:.1f}%', 'Free'], 
                   colors=[cpu_color, 'lightgray'], startangle=90, counterclock=False)
            ax1.set_title('CPU Usage', fontsize=14, fontweight='bold')
            
            # Memory Usage gauge
            mem_usage = sys_data.get('memory_usage', 0)
            mem_color = 'green' if mem_usage < 70 else 'orange' if mem_usage < 85 else 'red'
            
            ax2.pie([mem_usage, 100-mem_usage], labels=[f'Used: {mem_usage:.1f}%', 'Free'],
                   colors=[mem_color, 'lightgray'], startangle=90, counterclock=False)
            ax2.set_title('Memory Usage', fontsize=14, fontweight='bold')
            
            # Disk Usage gauge
            disk_usage = sys_data.get('disk_usage', 0)
            disk_color = 'green' if disk_usage < 80 else 'orange' if disk_usage < 90 else 'red'
            
            ax3.pie([disk_usage, 100-disk_usage], labels=[f'Used: {disk_usage:.1f}%', 'Free'],
                   colors=[disk_color, 'lightgray'], startangle=90, counterclock=False)
            ax3.set_title('Disk Usage', fontsize=14, fontweight='bold')
            
            # Database connections
            if 'database' in perf_data and 'active_connections' in perf_data['database']:
                db_data = perf_data['database']
                active_conn = db_data.get('active_connections', 0)
                avg_query_time = db_data.get('avg_query_time_ms', 0)
                
                # Simple bar chart for database metrics
                metrics = ['Active\nConnections', 'Avg Query\nTime (ms)']
                values = [active_conn, avg_query_time]
                colors_db = ['blue', 'green' if avg_query_time < 1000 else 'orange' if avg_query_time < 3000 else 'red']
                
                bars = ax4.bar(metrics, values, color=colors_db)
                ax4.set_title('Database Performance', fontsize=14, fontweight='bold')
                
                # Add value labels
                for bar, value in zip(bars, values):
                    ax4.text(bar.get_x() + bar.get_width()/2., bar.get_height() + bar.get_height()*0.01,
                            f'{value:.1f}', ha='center', va='bottom')
            else:
                ax4.text(0.5, 0.5, 'Database metrics\nnot available', 
                        ha='center', va='center', transform=ax4.transAxes, fontsize=12)
                ax4.set_title('Database Performance', fontsize=14, fontweight='bold')
            
            plt.tight_layout()
            plt.show()
            
            # Print detailed metrics
            print("\n💻 System Resource Usage:")
            print("-" * 40)
            print(f"CPU Usage:           {cpu_usage:>6.1f}%")
            print(f"Memory Usage:        {mem_usage:>6.1f}%")
            print(f"Memory Available:    {sys_data.get('memory_available_gb', 0):>6.1f} GB")
            print(f"Disk Usage:          {disk_usage:>6.1f}%")
            print(f"Disk Free:           {sys_data.get('disk_free_gb', 0):>6.1f} GB")
            
            if 'database' in perf_data:
                db_data = perf_data['database']
                print(f"\n🗄️ Database Performance:")
                print("-" * 40)
                print(f"Active Connections:  {db_data.get('active_connections', 0):>6}")
                print(f"Avg Query Time:      {db_data.get('avg_query_time_ms', 0):>6.1f} ms")
            
            # Check for alerts
            if 'alerts' in perf_data and perf_data['alerts']:
                print(f"\n🚨 Performance Alerts:")
                print("-" * 40)
                for alert in perf_data['alerts']:
                    print(f"⚠️ {alert}")
            else:
                print(f"\n✅ No performance alerts")
        
        return perf_data
        
    except Exception as e:
        print(f"❌ Error monitoring performance: {e}")
        return None

# Run performance monitoring
performance_data = monitor_system_performance()

## 4. Validation Status and Self-Healing Monitoring

In [None]:
def run_full_validation():
    """Run comprehensive validation and display results"""
    try:
        print("🔍 Running comprehensive data validation...")
        
        response = requests.post(f"{ORCHESTRATOR_API}/validate/all", timeout=30)
        if response.status_code != 200:
            print(f"❌ Validation request failed: HTTP {response.status_code}")
            return None
        
        validation_data = response.json()
        results = validation_data.get('validation_results', [])
        
        if not results:
            print("⚠️ No validation results received")
            return None
        
        print(f"✅ Validation completed at {validation_data['timestamp']}")
        
        # Analyze results
        table_validations = [r for r in results if r['validation_type'] == 'table_integrity']
        relationship_validations = [r for r in results if r['validation_type'] == 'referential_integrity']
        
        # Create summary visualization
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
        
        # Table validation results
        if table_validations:
            table_names = [r['table_name'] for r in table_validations]
            table_passed = [r['passed'] for r in table_validations]
            
            colors = ['green' if passed else 'red' for passed in table_passed]
            bars1 = ax1.bar(table_names, [1 if p else 0 for p in table_passed], color=colors)
            ax1.set_title('Table Validation Results', fontsize=14, fontweight='bold')
            ax1.set_ylabel('Status (1=Pass, 0=Fail)')
            ax1.set_ylim(0, 1.2)
            ax1.tick_params(axis='x', rotation=45)
            
            # Add status labels
            for bar, passed in zip(bars1, table_passed):
                status = '✅ PASS' if passed else '❌ FAIL'
                ax1.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.05,
                        status, ha='center', va='bottom', fontweight='bold')
        
        # Relationship validation results
        if relationship_validations:
            rel_names = [f"{r['table_name']}\n{r['validation_type']}" for r in relationship_validations]
            rel_passed = [r['passed'] for r in relationship_validations]
            
            colors = ['green' if passed else 'red' for passed in rel_passed]
            bars2 = ax2.bar(rel_names, [1 if p else 0 for p in rel_passed], color=colors)
            ax2.set_title('Relationship Validation Results', fontsize=14, fontweight='bold')
            ax2.set_ylabel('Status (1=Pass, 0=Fail)')
            ax2.set_ylim(0, 1.2)
            ax2.tick_params(axis='x', rotation=45)
            
            # Add status labels
            for bar, passed in zip(bars2, rel_passed):
                status = '✅ PASS' if passed else '❌ FAIL'
                ax2.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.05,
                        status, ha='center', va='bottom', fontweight='bold')
        
        plt.tight_layout()
        plt.show()
        
        # Print detailed results
        print("\n📋 Detailed Validation Results:")
        print("=" * 100)
        
        for result in results:
            status_icon = '✅' if result['passed'] else '❌'
            print(f"{status_icon} {result['table_name']:25} "
                  f"{result['validation_type']:20} - {result['message']}")
            
            # Show additional details for failed validations
            if not result['passed'] and result.get('details'):
                details = result['details']
                if 'total_records' in details:
                    print(f"    📊 Records: {details['total_records']:,}")
                if 'validation_checks' in details:
                    for check in details['validation_checks']:
                        check_icon = '✅' if check['passed'] else '❌'
                        print(f"    {check_icon} {check['column']}: {check['null_percentage']:.1f}% null")
        
        # Summary statistics
        total_validations = len(results)
        passed_validations = sum(1 for r in results if r['passed'])
        success_rate = (passed_validations / total_validations * 100) if total_validations > 0 else 0
        
        print(f"\n📈 Validation Summary:")
        print("-" * 40)
        print(f"Total Validations:   {total_validations:>3}")
        print(f"Passed:              {passed_validations:>3}")
        print(f"Failed:              {total_validations - passed_validations:>3}")
        print(f"Success Rate:        {success_rate:>6.1f}%")
        
        if success_rate >= 95:
            print("\n✅ Excellent! System validation passed with flying colors!")
        elif success_rate >= 80:
            print("\n⚠️ Good validation results with minor issues to address")
        else:
            print("\n❌ Validation identified significant issues requiring attention")
        
        return validation_data
        
    except Exception as e:
        print(f"❌ Error running validation: {e}")
        return None

def trigger_self_healing():
    """Trigger the self-healing process and monitor results"""
    try:
        print("🔧 Triggering self-healing process...")
        
        response = requests.post(f"{ORCHESTRATOR_API}/heal", timeout=60)
        if response.status_code != 200:
            print(f"❌ Self-healing request failed: HTTP {response.status_code}")
            return None
        
        healing_data = response.json()
        healing_actions = healing_data.get('healing_actions', [])
        
        print(f"✅ Self-healing completed at {healing_data['timestamp']}")
        
        if healing_actions:
            print(f"\n🔧 Healing Actions Performed ({len(healing_actions)}):")
            print("-" * 60)
            for i, action in enumerate(healing_actions, 1):
                print(f"   {i}. {action}")
        else:
            print("\n✅ No healing actions were needed - system is healthy!")
        
        return healing_data
        
    except Exception as e:
        print(f"❌ Error triggering self-healing: {e}")
        return None

# Run validation and healing
validation_results = run_full_validation()

if validation_results:
    # Check if healing is needed
    failed_validations = [r for r in validation_results.get('validation_results', []) if not r['passed']]
    
    if failed_validations:
        print(f"\n🔧 Found {len(failed_validations)} failed validations. Triggering self-healing...")
        healing_results = trigger_self_healing()
    else:
        print("\n✅ All validations passed - no healing needed!")

## 5. Real-time Monitoring Dashboard

In [None]:
def start_continuous_monitoring():
    """Start the continuous monitoring service"""
    try:
        response = requests.post(f"{ORCHESTRATOR_API}/start-monitoring", timeout=10)
        if response.status_code == 200:
            result = response.json()
            print(f"✅ {result['message']}")
            print(f"📊 Monitoring interval: {result.get('interval_seconds', 'unknown')} seconds")
            return True
        else:
            print(f"❌ Failed to start monitoring: HTTP {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ Error starting monitoring: {e}")
        return False

def create_live_dashboard():
    """Create a live monitoring dashboard"""
    from IPython.display import clear_output
    import time
    
    print("🔴 Starting Live Data Flow Monitoring Dashboard")
    print("Press Ctrl+C to stop monitoring")
    print("=" * 80)
    
    try:
        # Start monitoring service
        start_continuous_monitoring()
        
        # Live monitoring loop
        iteration = 0
        while iteration < 5:  # Limit to 5 iterations for notebook demo
            iteration += 1
            
            clear_output(wait=True)
            
            print(f"🔴 LIVE MONITORING - Iteration {iteration}/5")
            print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
            print("=" * 80)
            
            # Get current health status
            try:
                health_response = requests.get(f"{ORCHESTRATOR_API}/health", timeout=5)
                if health_response.status_code == 200:
                    health_data = health_response.json()
                    print(f"🟢 Orchestrator Status: {health_data['status']}")
                    print(f"📊 Monitoring Active: {health_data['monitoring_active']}")
                    if health_data['last_check']:
                        print(f"🕐 Last Check: {health_data['last_check']}")
                else:
                    print(f"🔴 Orchestrator Status: ERROR (HTTP {health_response.status_code})")
            except:
                print("🔴 Orchestrator Status: UNAVAILABLE")
            
            print("\n" + "-" * 40)
            
            # Get quick metrics
            try:
                metrics_response = requests.get(f"{ORCHESTRATOR_API}/metrics", timeout=10)
                if metrics_response.status_code == 200:
                    metrics = metrics_response.json()
                    table_metrics = metrics.get('table_metrics', {})
                    
                    print("📊 Table Status Summary:")
                    for table_name, data in table_metrics.items():
                        if 'error' not in data:
                            status_icon = {
                                'healthy': '🟢',
                                'warning': '🟡',
                                'error': '🔴',
                                'unknown': '⚪'
                            }.get(data.get('validation_status'), '⚪')
                            
                            record_count = data.get('record_count', 0)
                            query_time = data.get('query_time_ms', 0)
                            
                            print(f"   {status_icon} {table_name[:20]:20} "
                                  f"Records: {record_count:>8,} | "
                                  f"Query: {query_time:>6.1f}ms")
                        else:
                            print(f"   🔴 {table_name[:20]:20} ERROR: {data['error'][:30]}...")
                else:
                    print("🔴 Could not fetch table metrics")
            except:
                print("🔴 Metrics service unavailable")
            
            print("\n" + "-" * 40)
            
            # Get performance snapshot
            try:
                perf_response = requests.get(f"{ORCHESTRATOR_API}/performance", timeout=5)
                if perf_response.status_code == 200:
                    perf_data = perf_response.json()
                    if 'system' in perf_data:
                        sys_data = perf_data['system']
                        print("⚡ System Performance:")
                        print(f"   CPU: {sys_data.get('cpu_usage', 0):>5.1f}% | "
                              f"Memory: {sys_data.get('memory_usage', 0):>5.1f}% | "
                              f"Disk: {sys_data.get('disk_usage', 0):>5.1f}%")
                        
                        if 'alerts' in perf_data and perf_data['alerts']:
                            print(f"   🚨 Alerts: {len(perf_data['alerts'])}")
                            for alert in perf_data['alerts'][:2]:  # Show max 2 alerts
                                print(f"      • {alert}")
                        else:
                            print("   ✅ No performance alerts")
                else:
                    print("🔴 Performance data unavailable")
            except:
                print("🔴 Performance monitoring unavailable")
            
            print(f"\n⏰ Next update in 30 seconds... (Iteration {iteration}/5)")
            
            if iteration < 5:
                time.sleep(30)  # Wait 30 seconds between updates
        
        print("\n✅ Live monitoring demo completed (5 iterations)")
        print("To continue monitoring, run the cell again or check the orchestrator directly at http://localhost:8001")
        
    except KeyboardInterrupt:
        print("\n🛑 Monitoring stopped by user")
    except Exception as e:
        print(f"\n❌ Error in live monitoring: {e}")

# Create live dashboard
create_live_dashboard()

## 6. Data Flow Report Generation

In [None]:
def generate_comprehensive_report():
    """Generate a comprehensive data flow report"""
    from datetime import datetime
    import os
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    report_file = f"../logs/data_flow_report_{timestamp}.md"
    
    print(f"📝 Generating comprehensive data flow report...")
    print(f"📄 Report will be saved to: {report_file}")
    
    try:
        # Collect all data
        metrics = get_system_metrics()
        performance = monitor_system_performance()
        validation = run_full_validation()
        
        # Create report content
        report_content = f"""
# ConcordBroker Data Flow Monitoring Report

**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## Executive Summary

This report provides a comprehensive overview of the ConcordBroker data flow monitoring system status, including:
- System health and performance metrics
- Data quality validation results
- Self-healing actions and recommendations

## System Health Overview

"""
        
        if metrics and 'table_metrics' in metrics:
            table_metrics = metrics['table_metrics']
            healthy_tables = sum(1 for data in table_metrics.values() 
                                if 'error' not in data and data.get('validation_status') == 'healthy')
            total_tables = len(table_metrics)
            total_records = sum(data.get('record_count', 0) for data in table_metrics.values() 
                               if 'error' not in data)
            
            report_content += f"""
### Table Health Status
- **Total Tables Monitored:** {total_tables}
- **Healthy Tables:** {healthy_tables}
- **Health Rate:** {(healthy_tables/total_tables*100):.1f}%
- **Total Records:** {total_records:,}

### Table Details

| Table Name | Status | Records | Query Time (ms) | Data Age (hours) |
|------------|--------|---------|-----------------|------------------|
"""
            
            for table_name, data in table_metrics.items():
                if 'error' not in data:
                    status = data.get('validation_status', 'unknown').upper()
                    records = f"{data.get('record_count', 0):,}"
                    query_time = f"{data.get('query_time_ms', 0):.1f}"
                    data_age = f"{data.get('data_freshness_hours', 0):.1f}"
                    
                    report_content += f"| {table_name} | {status} | {records} | {query_time} | {data_age} |\n"
                else:
                    report_content += f"| {table_name} | ERROR | - | - | - |\n"
        
        # Performance section
        if performance and 'system' in performance:
            sys_data = performance['system']
            report_content += f"""

## Performance Metrics

### System Resources
- **CPU Usage:** {sys_data.get('cpu_usage', 0):.1f}%
- **Memory Usage:** {sys_data.get('memory_usage', 0):.1f}%
- **Memory Available:** {sys_data.get('memory_available_gb', 0):.1f} GB
- **Disk Usage:** {sys_data.get('disk_usage', 0):.1f}%
- **Disk Free:** {sys_data.get('disk_free_gb', 0):.1f} GB
"""
            
            if 'database' in performance:
                db_data = performance['database']
                report_content += f"""

### Database Performance
- **Active Connections:** {db_data.get('active_connections', 0)}
- **Average Query Time:** {db_data.get('avg_query_time_ms', 0):.1f} ms
"""
            
            if 'alerts' in performance and performance['alerts']:
                report_content += f"""

### Performance Alerts
"""
                for alert in performance['alerts']:
                    report_content += f"- ⚠️ {alert}\n"
        
        # Validation section
        if validation and 'validation_results' in validation:
            results = validation['validation_results']
            passed = sum(1 for r in results if r['passed'])
            total = len(results)
            
            report_content += f"""

## Data Validation Results

### Validation Summary
- **Total Validations:** {total}
- **Passed:** {passed}
- **Failed:** {total - passed}
- **Success Rate:** {(passed/total*100):.1f}%

### Detailed Validation Results

| Table | Validation Type | Status | Message |
|-------|----------------|--------|----------|
"""
            
            for result in results:
                status = "✅ PASS" if result['passed'] else "❌ FAIL"
                table = result['table_name']
                val_type = result['validation_type']
                message = result['message'][:50] + "..." if len(result['message']) > 50 else result['message']
                
                report_content += f"| {table} | {val_type} | {status} | {message} |\n"
        
        # Recommendations
        report_content += f"""

## Recommendations

### Immediate Actions
"""
        
        recommendations = []
        
        # Check for failed validations
        if validation and 'validation_results' in validation:
            failed = [r for r in validation['validation_results'] if not r['passed']]
            if failed:
                recommendations.append(f"🔧 Address {len(failed)} failed validation(s)")
            else:
                recommendations.append("✅ All validations passing - excellent data quality")
        
        # Check performance alerts
        if performance and performance.get('alerts'):
            recommendations.append(f"⚡ Review {len(performance['alerts'])} performance alert(s)")
        
        # Check system resources
        if performance and 'system' in performance:
            sys_data = performance['system']
            if sys_data.get('cpu_usage', 0) > 80:
                recommendations.append("💻 High CPU usage detected - consider scaling")
            if sys_data.get('memory_usage', 0) > 85:
                recommendations.append("🧠 High memory usage detected - monitor for memory leaks")
            if sys_data.get('disk_usage', 0) > 85:
                recommendations.append("💾 High disk usage detected - cleanup may be needed")
        
        if not recommendations:
            recommendations.append("✅ System is operating optimally - no immediate actions required")
        
        for rec in recommendations:
            report_content += f"- {rec}\n"
        
        report_content += f"""

### Ongoing Monitoring
- Continue automated monitoring at 5-minute intervals
- Review daily reports for trends and patterns
- Ensure self-healing mechanisms are functioning
- Monitor data freshness and update frequencies

---

**Report Generated by ConcordBroker AI Data Flow Orchestrator**  
**Timestamp:** {datetime.now().isoformat()}
"""
        
        # Ensure logs directory exists
        os.makedirs('../logs', exist_ok=True)
        
        # Write report to file
        with open(report_file, 'w', encoding='utf-8') as f:
            f.write(report_content)
        
        print(f"✅ Report successfully generated: {report_file}")
        
        # Display summary
        print("\n📊 Report Summary:")
        print("-" * 40)
        if metrics and 'table_metrics' in metrics:
            total_tables = len(metrics['table_metrics'])
            healthy_tables = sum(1 for data in metrics['table_metrics'].values() 
                                if 'error' not in data and data.get('validation_status') == 'healthy')
            print(f"Tables Monitored: {total_tables}")
            print(f"Healthy Tables:   {healthy_tables}")
            print(f"Health Rate:      {(healthy_tables/total_tables*100):.1f}%")
        
        if validation and 'validation_results' in validation:
            results = validation['validation_results']
            passed = sum(1 for r in results if r['passed'])
            print(f"Validations:      {passed}/{len(results)} passed ({(passed/len(results)*100):.1f}%)")
        
        print(f"\n📄 Full report saved to: {os.path.abspath(report_file)}")
        
        return report_file
        
    except Exception as e:
        print(f"❌ Error generating report: {e}")
        return None

# Generate the report
report_path = generate_comprehensive_report()

if report_path:
    print(f"\n🎉 Data Flow Monitoring Report Complete!")
    print(f"📖 You can view the full report at: {report_path}")

## Summary

This notebook provides comprehensive monitoring of the ConcordBroker data flow system including:

1. **Real-time Health Dashboard** - Live monitoring of all critical tables
2. **Data Quality Analysis** - Automated validation and quality scoring
3. **Performance Monitoring** - System resource and database performance tracking
4. **Validation & Self-Healing** - Automated issue detection and resolution
5. **Live Monitoring** - Continuous real-time status updates
6. **Report Generation** - Comprehensive status reports

The AI-powered orchestrator ensures:
- ✅ Property tabs always get correct data
- ✅ MiniPropertyCards show real-time accurate information
- ✅ Filters work with proper database queries
- ✅ Sales history from property_sales_history table is accessible
- ✅ Entity linking from florida_entities and sunbiz_corporate works
- ✅ Tax certificates from tax_certificates table are properly linked

**Next Steps:**
1. Keep the orchestrator running at http://localhost:8001
2. Monitor the logs in ../logs/ directory
3. Review daily reports for trends and issues
4. Ensure continuous monitoring is active

**🤖 AI Agent Status: ACTIVE & MONITORING**