# OpenRAN Data Analysis and Visualization

This notebook provides comprehensive analysis and visualization of OpenRAN system data including:

- **E2 Interface Metrics**: Message rates, latency, and connection status
- **RIC Performance**: Near-RT RIC operations and resource utilization  
- **xApp Analytics**: ML/AI model performance and control actions
- **RAN Metrics**: Signal quality, throughput, and UE performance
- **Real-time Monitoring**: Live data visualization and alerting

## Project Overview

The OpenRAN Docker Workflow implements a complete OpenRAN environment with:
- Near-RT RIC (E2 Termination, E2 Manager, Routing Manager)
- xApp Framework with ML/AI capabilities  
- RAN Simulator for testing
- Monitoring stack (Prometheus, Grafana, ELK)

Let's explore the data generated by this system!

## 1. Load the Project Data

First, let's import the necessary libraries and load the OpenRAN data from various sources including JSON files, Prometheus metrics, and container logs.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
import requests
import glob
import time
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Configure plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure plotly for Jupyter
import plotly.io as pio
pio.renderers.default = "notebook"

print("Libraries imported successfully!")
print(f"Pandas version: {pd.__version__}")
print(f"Matplotlib version: {plt.matplotlib.__version__}")
print(f"Seaborn version: {sns.__version__}")
print(f"Plotly version: {px.__version__}")

In [None]:
# Load OpenRAN Data from Multiple Sources

def load_prometheus_metrics():
    """Load metrics from Prometheus"""
    try:
        # Query Prometheus for current metrics
        prometheus_url = "http://localhost:9090"
        queries = [
            'up',
            'container_memory_usage_bytes',
            'container_cpu_usage_seconds_total',
            'e2_messages_total',
            'ric_subscription_requests_total'
        ]
        
        metrics_data = {}
        for query in queries:
            response = requests.get(f"{prometheus_url}/api/v1/query", 
                                  params={'query': query}, timeout=5)
            if response.status_code == 200:
                metrics_data[query] = response.json()
        
        return metrics_data
    except Exception as e:
        print(f"Could not connect to Prometheus: {e}")
        return {}

def load_json_data_files():
    """Load data from generated JSON files"""
    try:
        # Look for JSON data files in /tmp directory
        json_files = glob.glob("/tmp/openran_data_*.json")
        data_list = []
        
        for file_path in sorted(json_files)[-10:]:  # Get last 10 files
            try:
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    data_list.append(data)
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
        
        return data_list
    except Exception as e:
        print(f"Error loading JSON files: {e}")
        return []

def create_sample_data():
    """Create sample data for demonstration if real data is not available"""
    timestamps = pd.date_range(start='2025-01-18 10:00:00', 
                              end='2025-01-18 12:00:00', freq='1min')
    
    sample_data = []
    for i, ts in enumerate(timestamps):
        data = {
            'timestamp': ts.isoformat(),
            'metrics': {
                'e2_messages_total': i * 10 + np.random.randint(0, 20),
                'e2_connected_nodes': 5 + np.random.randint(-1, 2),
                'ric_subscription_requests_total': i * 2 + np.random.randint(0, 5),
                'ric_control_requests_total': i * 3 + np.random.randint(0, 8),
                'xapp_control_actions_total': i * 1 + np.random.randint(0, 3),
                'xapp_active_subscriptions': 10 + np.random.randint(-2, 3),
                'xapp_ml_predictions_total': i * 5 + np.random.randint(0, 10),
                'xapp_model_accuracy': 0.92 + np.random.uniform(-0.05, 0.05)
            },
            'ues': [
                {
                    'ue_id': f'ue_{j}',
                    'rsrp': -90 + np.random.randint(-20, 20),
                    'rsrq': -10 + np.random.randint(-5, 5),
                    'throughput_dl': 50 + np.random.randint(-20, 30),
                    'throughput_ul': 25 + np.random.randint(-10, 15)
                }
                for j in range(1, 21)  # 20 UEs
            ]
        }
        sample_data.append(data)
    
    return sample_data

# Load data from various sources
print("Loading OpenRAN data...")

# Try to load real data first
prometheus_data = load_prometheus_metrics()
json_data = load_json_data_files()

# If no real data available, create sample data
if not json_data:
    print("No real data found, creating sample data for demonstration...")
    json_data = create_sample_data()

print(f"Loaded {len(json_data)} data points")
if prometheus_data:
    print(f"Connected to Prometheus, loaded {len(prometheus_data)} metric types")
else:
    print("Using sample data for analysis")

## 2. Explore Data Structure

Let's examine the structure of our OpenRAN data to understand what metrics and information we have available for analysis.

In [None]:
# Explore Data Structure and Contents

# Convert JSON data to DataFrames for easier analysis
def process_openran_data(json_data):
    """Process OpenRAN JSON data into structured DataFrames"""
    
    # Extract metrics data
    metrics_list = []
    ue_data_list = []
    
    for entry in json_data:
        timestamp = entry['timestamp']
        metrics = entry['metrics']
        
        # Add timestamp to metrics
        metrics_row = metrics.copy()
        metrics_row['timestamp'] = pd.to_datetime(timestamp)
        metrics_list.append(metrics_row)
        
        # Process UE data if available
        if 'ues' in entry:
            for ue in entry['ues']:
                ue_row = ue.copy()
                ue_row['timestamp'] = pd.to_datetime(timestamp)
                ue_data_list.append(ue_row)
    
    # Create DataFrames
    metrics_df = pd.DataFrame(metrics_list)
    ue_df = pd.DataFrame(ue_data_list) if ue_data_list else pd.DataFrame()
    
    return metrics_df, ue_df

# Process the data
metrics_df, ue_df = process_openran_data(json_data)

print("=== OpenRAN Data Structure Overview ===\n")

# Display metrics DataFrame info
print("📊 METRICS DATA:")
print(f"Shape: {metrics_df.shape}")
print(f"Time range: {metrics_df['timestamp'].min()} to {metrics_df['timestamp'].max()}")
print("\nColumns:")
for col in metrics_df.columns:
    if col != 'timestamp':
        print(f"  • {col}: {metrics_df[col].dtype}")
print("\nSample metrics data:")
display(metrics_df.head())

print("\n" + "="*50 + "\n")

# Display UE DataFrame info if available
if not ue_df.empty:
    print("📱 UE (User Equipment) DATA:")
    print(f"Shape: {ue_df.shape}")
    print(f"Unique UEs: {ue_df['ue_id'].nunique() if 'ue_id' in ue_df.columns else 'N/A'}")
    print("\nColumns:")
    for col in ue_df.columns:
        if col not in ['timestamp', 'ue_id']:
            print(f"  • {col}: {ue_df[col].dtype}")
    print("\nSample UE data:")
    display(ue_df.head())
else:
    print("📱 UE DATA: No UE data available")

print("\n" + "="*50 + "\n")

# Basic statistics
print("📈 BASIC STATISTICS:")
print("\nMetrics Summary:")
display(metrics_df.describe())

if not ue_df.empty:
    print("\nUE Metrics Summary:")
    numeric_ue_cols = ue_df.select_dtypes(include=[np.number]).columns
    if len(numeric_ue_cols) > 0:
        display(ue_df[numeric_ue_cols].describe())

## 3. Run Data Processing Code

Now let's clean and transform the data for analysis, including calculating derived metrics and preparing time series data.

In [None]:
# Data Processing and Transformation

# Set timestamp as index for time series analysis
metrics_df = metrics_df.set_index('timestamp').sort_index()

if not ue_df.empty:
    ue_df = ue_df.set_index('timestamp').sort_index()

print("🔄 Processing OpenRAN Data...\n")

# Calculate derived metrics
print("Calculating derived metrics...")

# Message rates (messages per minute)
if len(metrics_df) > 1:
    metrics_df['e2_message_rate'] = metrics_df['e2_messages_total'].diff() / \
        (metrics_df.index.to_series().diff().dt.total_seconds() / 60)
    
    metrics_df['ric_subscription_rate'] = metrics_df['ric_subscription_requests_total'].diff() / \
        (metrics_df.index.to_series().diff().dt.total_seconds() / 60)
    
    metrics_df['ric_control_rate'] = metrics_df['ric_control_requests_total'].diff() / \
        (metrics_df.index.to_series().diff().dt.total_seconds() / 60)
    
    metrics_df['xapp_prediction_rate'] = metrics_df['xapp_ml_predictions_total'].diff() / \
        (metrics_df.index.to_series().diff().dt.total_seconds() / 60)

# Clean up negative values (can occur due to resets)
rate_columns = ['e2_message_rate', 'ric_subscription_rate', 'ric_control_rate', 'xapp_prediction_rate']
for col in rate_columns:
    if col in metrics_df.columns:
        metrics_df[col] = metrics_df[col].clip(lower=0)

print("✅ Rate calculations completed")

# Process UE data if available
if not ue_df.empty:
    print("Processing UE data...")
    
    # Calculate aggregated UE metrics per timestamp
    ue_aggregated = ue_df.groupby('timestamp').agg({
        'rsrp': ['mean', 'std', 'min', 'max'],
        'rsrq': ['mean', 'std', 'min', 'max'], 
        'throughput_dl': ['mean', 'sum', 'std'],
        'throughput_ul': ['mean', 'sum', 'std'],
        'ue_id': 'count'
    }).round(2)
    
    # Flatten column names
    ue_aggregated.columns = [f'{col[0]}_{col[1]}' for col in ue_aggregated.columns]
    ue_aggregated = ue_aggregated.rename(columns={'ue_id_count': 'active_ues'})
    
    print("✅ UE aggregation completed")
    
    # Merge UE aggregated data with metrics
    metrics_df = metrics_df.join(ue_aggregated, how='left')
    
    print("✅ Data merge completed")

# Calculate system health indicators
print("Calculating system health indicators...")

# Connection stability (percentage of time with stable connections)
if 'e2_connected_nodes' in metrics_df.columns:
    target_nodes = metrics_df['e2_connected_nodes'].mode()[0] if len(metrics_df) > 0 else 5
    metrics_df['connection_stability'] = (metrics_df['e2_connected_nodes'] >= target_nodes).astype(int) * 100

# Model performance indicator
if 'xapp_model_accuracy' in metrics_df.columns:
    metrics_df['model_performance'] = metrics_df['xapp_model_accuracy'] * 100

# System load indicator (normalized)
numeric_cols = metrics_df.select_dtypes(include=[np.number]).columns
system_load_cols = [col for col in numeric_cols if 'total' in col or 'rate' in col]
if system_load_cols:
    metrics_df['system_load'] = metrics_df[system_load_cols].fillna(0).sum(axis=1)
    metrics_df['system_load_normalized'] = (metrics_df['system_load'] - metrics_df['system_load'].min()) / \
        (metrics_df['system_load'].max() - metrics_df['system_load'].min()) * 100

print("✅ Health indicators calculated")

# Display processed data summary
print(f"\n📋 PROCESSED DATA SUMMARY:")
print(f"Total data points: {len(metrics_df)}")
print(f"Time span: {metrics_df.index.max() - metrics_df.index.min()}")
print(f"Columns after processing: {len(metrics_df.columns)}")

print(f"\nNew derived columns:")
new_cols = [col for col in metrics_df.columns if any(keyword in col for keyword in 
           ['rate', 'stability', 'performance', 'load', 'mean', 'sum', 'std'])]
for col in new_cols[:10]:  # Show first 10 new columns
    print(f"  • {col}")

if len(new_cols) > 10:
    print(f"  ... and {len(new_cols) - 10} more")

print("\n✅ Data processing completed successfully!")

## 4. Visualize Data with Plots

Create comprehensive visualizations to understand OpenRAN system performance, including time series plots, correlation analysis, and interactive dashboards.

In [None]:
# E2 Interface and RIC Performance Visualization

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('OpenRAN E2 Interface and RIC Performance Metrics', fontsize=16, fontweight='bold')

# 1. E2 Message Rates Over Time
ax1 = axes[0, 0]
if 'e2_message_rate' in metrics_df.columns:
    ax1.plot(metrics_df.index, metrics_df['e2_message_rate'], 
             color='blue', linewidth=2, label='E2 Messages/min')
if 'ric_subscription_rate' in metrics_df.columns:
    ax1.plot(metrics_df.index, metrics_df['ric_subscription_rate'], 
             color='green', linewidth=2, label='Subscriptions/min')
if 'ric_control_rate' in metrics_df.columns:
    ax1.plot(metrics_df.index, metrics_df['ric_control_rate'], 
             color='red', linewidth=2, label='Control Requests/min')

ax1.set_title('E2 Interface Message Rates', fontweight='bold')
ax1.set_ylabel('Messages per Minute')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# 2. Connected Nodes and Subscriptions
ax2 = axes[0, 1]
if 'e2_connected_nodes' in metrics_df.columns:
    ax2_twin = ax2.twinx()
    
    line1 = ax2.plot(metrics_df.index, metrics_df['e2_connected_nodes'], 
                     color='navy', linewidth=3, marker='o', markersize=4, label='Connected Nodes')
    ax2.set_ylabel('Connected Nodes', color='navy')
    ax2.tick_params(axis='y', labelcolor='navy')
    
    if 'xapp_active_subscriptions' in metrics_df.columns:
        line2 = ax2_twin.plot(metrics_df.index, metrics_df['xapp_active_subscriptions'], 
                             color='orange', linewidth=2, marker='s', markersize=3, label='Active Subscriptions')
        ax2_twin.set_ylabel('Active Subscriptions', color='orange')
        ax2_twin.tick_params(axis='y', labelcolor='orange')
    
    # Combine legends
    lines1, labels1 = ax2.get_legend_handles_labels()
    lines2, labels2 = ax2_twin.get_legend_handles_labels()
    ax2.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

ax2.set_title('Network Connectivity Status', fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

# 3. xApp ML Performance
ax3 = axes[1, 0]
if 'xapp_prediction_rate' in metrics_df.columns:
    ax3.plot(metrics_df.index, metrics_df['xapp_prediction_rate'], 
             color='purple', linewidth=2, label='ML Predictions/min')

if 'model_performance' in metrics_df.columns:
    ax3_twin = ax3.twinx()
    ax3_twin.plot(metrics_df.index, metrics_df['model_performance'], 
                  color='green', linewidth=2, linestyle='--', label='Model Accuracy %')
    ax3_twin.set_ylabel('Model Accuracy (%)', color='green')
    ax3_twin.tick_params(axis='y', labelcolor='green')
    ax3_twin.set_ylim([80, 100])
    
    # Combine legends
    lines1, labels1 = ax3.get_legend_handles_labels()
    lines2, labels2 = ax3_twin.get_legend_handles_labels()
    ax3.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

ax3.set_title('xApp ML/AI Performance', fontweight='bold')
ax3.set_ylabel('Predictions per Minute')
ax3.grid(True, alpha=0.3)
ax3.tick_params(axis='x', rotation=45)

# 4. System Health Indicators
ax4 = axes[1, 1]
if 'connection_stability' in metrics_df.columns:
    ax4.fill_between(metrics_df.index, metrics_df['connection_stability'], 
                     alpha=0.6, color='lightblue', label='Connection Stability %')

if 'system_load_normalized' in metrics_df.columns:
    ax4.plot(metrics_df.index, metrics_df['system_load_normalized'], 
             color='red', linewidth=2, label='System Load (Normalized)')

ax4.set_title('System Health Indicators', fontweight='bold')
ax4.set_ylabel('Percentage / Normalized Value')
ax4.legend()
ax4.grid(True, alpha=0.3)
ax4.tick_params(axis='x', rotation=45)
ax4.set_ylim([0, 105])

plt.tight_layout()
plt.show()

print("📊 E2 Interface and RIC performance visualization complete!")

In [None]:
# UE Performance and RAN Metrics Visualization

if not ue_df.empty and any(col in metrics_df.columns for col in ['rsrp_mean', 'throughput_dl_sum']):
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('OpenRAN - RAN Metrics and UE Performance', fontsize=16, fontweight='bold')
    
    # 1. Signal Quality (RSRP and RSRQ)
    ax1 = axes[0, 0]
    if 'rsrp_mean' in metrics_df.columns:
        ax1.plot(metrics_df.index, metrics_df['rsrp_mean'], 
                 color='blue', linewidth=2, label='Avg RSRP (dBm)')
        if 'rsrp_min' in metrics_df.columns and 'rsrp_max' in metrics_df.columns:
            ax1.fill_between(metrics_df.index, metrics_df['rsrp_min'], metrics_df['rsrp_max'], 
                            alpha=0.2, color='blue', label='RSRP Range')
    
    ax1_twin = ax1.twinx()
    if 'rsrq_mean' in metrics_df.columns:
        ax1_twin.plot(metrics_df.index, metrics_df['rsrq_mean'], 
                     color='green', linewidth=2, linestyle='--', label='Avg RSRQ (dB)')
        ax1_twin.set_ylabel('RSRQ (dB)', color='green')
        ax1_twin.tick_params(axis='y', labelcolor='green')
    
    ax1.set_title('Signal Quality Metrics', fontweight='bold')
    ax1.set_ylabel('RSRP (dBm)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.grid(True, alpha=0.3)
    ax1.tick_params(axis='x', rotation=45)
    
    # Combine legends
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax1_twin.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    
    # 2. Throughput Analysis
    ax2 = axes[0, 1]
    if 'throughput_dl_sum' in metrics_df.columns:
        ax2.bar(metrics_df.index, metrics_df['throughput_dl_sum'], 
                alpha=0.7, color='skyblue', label='Total DL Throughput (Mbps)', width=0.8)
    if 'throughput_ul_sum' in metrics_df.columns:
        ax2.bar(metrics_df.index, metrics_df['throughput_ul_sum'], 
                alpha=0.7, color='lightcoral', label='Total UL Throughput (Mbps)', width=0.8)
    
    ax2.set_title('Network Throughput', fontweight='bold')
    ax2.set_ylabel('Throughput (Mbps)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.tick_params(axis='x', rotation=45)
    
    # 3. UE Distribution and Performance
    ax3 = axes[1, 0]
    if 'active_ues' in metrics_df.columns:
        ax3.plot(metrics_df.index, metrics_df['active_ues'], 
                 color='purple', linewidth=3, marker='o', markersize=5, label='Active UEs')
    
    if 'throughput_dl_mean' in metrics_df.columns:
        ax3_twin = ax3.twinx()
        ax3_twin.plot(metrics_df.index, metrics_df['throughput_dl_mean'], 
                     color='orange', linewidth=2, linestyle=':', label='Avg DL per UE (Mbps)')
        ax3_twin.set_ylabel('Avg Throughput per UE (Mbps)', color='orange')
        ax3_twin.tick_params(axis='y', labelcolor='orange')
    
    ax3.set_title('UE Count and Average Performance', fontweight='bold')
    ax3.set_ylabel('Number of UEs', color='purple')
    ax3.tick_params(axis='y', labelcolor='purple')
    ax3.grid(True, alpha=0.3)
    ax3.tick_params(axis='x', rotation=45)
    
    # Combine legends
    lines1, labels1 = ax3.get_legend_handles_labels()
    lines2, labels2 = ax3_twin.get_legend_handles_labels()
    ax3.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    
    # 4. Signal Quality Distribution
    ax4 = axes[1, 1]
    
    # Create RSRP distribution if we have individual UE data
    if not ue_df.empty and 'rsrp' in ue_df.columns:
        latest_ue_data = ue_df.loc[ue_df.index.max()]
        if isinstance(latest_ue_data, pd.DataFrame):
            latest_ue_data = latest_ue_data.iloc[-1]
        
        rsrp_values = ue_df['rsrp'].values
        ax4.hist(rsrp_values, bins=20, alpha=0.7, color='lightblue', edgecolor='black')
        ax4.axvline(rsrp_values.mean(), color='red', linestyle='--', linewidth=2, 
                   label=f'Mean: {rsrp_values.mean():.1f} dBm')
        ax4.set_title('RSRP Distribution (All UEs)', fontweight='bold')
        ax4.set_xlabel('RSRP (dBm)')
        ax4.set_ylabel('Frequency')
        ax4.legend()
        ax4.grid(True, alpha=0.3)
    else:
        ax4.text(0.5, 0.5, 'No UE distribution data\navailable', 
                transform=ax4.transAxes, ha='center', va='center', fontsize=12)
        ax4.set_title('RSRP Distribution', fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    print("📶 RAN metrics and UE performance visualization complete!")
else:
    print("⚠️  No UE data available for RAN metrics visualization")

In [None]:
# Interactive Plotly Dashboard

# Create an interactive dashboard with Plotly
fig_interactive = make_subplots(
    rows=3, cols=2,
    subplot_titles=('E2 Message Rates', 'System Performance KPIs', 
                   'Network Throughput', 'ML Model Performance',
                   'Connection Status', 'Real-time Metrics Summary'),
    specs=[[{"secondary_y": True}, {"type": "indicator"}],
           [{"secondary_y": True}, {"secondary_y": True}],
           [{"type": "scatter"}, {"type": "table"}]]
)

# 1. E2 Message Rates (Top Left)
if 'e2_message_rate' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['e2_message_rate'],
                  mode='lines+markers', name='E2 Messages/min',
                  line=dict(color='blue', width=2)),
        row=1, col=1
    )

if 'ric_subscription_rate' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['ric_subscription_rate'],
                  mode='lines+markers', name='Subscriptions/min',
                  line=dict(color='green', width=2)),
        row=1, col=1, secondary_y=True
    )

# 2. System Performance KPIs (Top Right)
current_metrics = metrics_df.iloc[-1] if len(metrics_df) > 0 else {}

kpi_value = current_metrics.get('model_performance', 92.5)
fig_interactive.add_trace(
    go.Indicator(
        mode="gauge+number+delta",
        value=kpi_value,
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "ML Model Accuracy (%)"},
        delta={'reference': 90},
        gauge={'axis': {'range': [None, 100]},
               'bar': {'color': "darkblue"},
               'steps': [{'range': [0, 50], 'color': "lightgray"},
                        {'range': [50, 80], 'color': "yellow"},
                        {'range': [80, 100], 'color': "lightgreen"}],
               'threshold': {'line': {'color': "red", 'width': 4},
                           'thickness': 0.75, 'value': 95}}
    ),
    row=1, col=2
)

# 3. Network Throughput (Middle Left)
if 'throughput_dl_sum' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['throughput_dl_sum'],
                  mode='lines', name='DL Throughput (Mbps)',
                  fill='tonexty', fillcolor='rgba(135, 206, 235, 0.3)',
                  line=dict(color='skyblue', width=2)),
        row=2, col=1
    )

if 'throughput_ul_sum' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['throughput_ul_sum'],
                  mode='lines', name='UL Throughput (Mbps)',
                  line=dict(color='lightcoral', width=2)),
        row=2, col=1, secondary_y=True
    )

# 4. ML Model Performance (Middle Right)
if 'xapp_prediction_rate' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['xapp_prediction_rate'],
                  mode='lines+markers', name='Predictions/min',
                  line=dict(color='purple', width=2)),
        row=2, col=2
    )

if 'model_performance' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df.index, y=metrics_df['model_performance'],
                  mode='lines', name='Model Accuracy %',
                  line=dict(color='green', width=2, dash='dash')),
        row=2, col=2, secondary_y=True
    )

# 5. Connection Status (Bottom Left)
if 'e2_connected_nodes' in metrics_df.columns and 'xapp_active_subscriptions' in metrics_df.columns:
    fig_interactive.add_trace(
        go.Scatter(x=metrics_df['e2_connected_nodes'], 
                  y=metrics_df['xapp_active_subscriptions'],
                  mode='markers', 
                  marker=dict(size=10, color=metrics_df.index.astype(int) // 10**9,
                             colorscale='viridis', showscale=True,
                             colorbar=dict(title="Time")),
                  name='Nodes vs Subscriptions',
                  text=[f"Time: {t}" for t in metrics_df.index]),
        row=3, col=1
    )

# 6. Metrics Summary Table (Bottom Right)
if len(current_metrics) > 0:
    summary_data = []
    summary_metrics = [
        ('Connected Nodes', current_metrics.get('e2_connected_nodes', 'N/A')),
        ('Active Subscriptions', current_metrics.get('xapp_active_subscriptions', 'N/A')),
        ('Model Accuracy', f"{current_metrics.get('model_performance', 0):.1f}%"),
        ('System Load', f"{current_metrics.get('system_load_normalized', 0):.1f}"),
        ('E2 Messages Total', current_metrics.get('e2_messages_total', 'N/A')),
        ('ML Predictions Total', current_metrics.get('xapp_ml_predictions_total', 'N/A'))
    ]
    
    fig_interactive.add_trace(
        go.Table(
            header=dict(values=['Metric', 'Current Value'],
                       fill_color='paleturquoise',
                       align='left'),
            cells=dict(values=[[metric[0] for metric in summary_metrics],
                              [metric[1] for metric in summary_metrics]],
                      fill_color='lavender',
                      align='left')
        ),
        row=3, col=2
    )

# Update layout
fig_interactive.update_layout(
    title_text="OpenRAN Real-time Interactive Dashboard",
    title_x=0.5,
    height=1000,
    showlegend=True,
    template="plotly_white"
)

# Update axes labels
fig_interactive.update_xaxes(title_text="Time", row=1, col=1)
fig_interactive.update_xaxes(title_text="Time", row=2, col=1)
fig_interactive.update_xaxes(title_text="Time", row=2, col=2)
fig_interactive.update_xaxes(title_text="Connected Nodes", row=3, col=1)

fig_interactive.update_yaxes(title_text="Messages/min", row=1, col=1)
fig_interactive.update_yaxes(title_text="Throughput (Mbps)", row=2, col=1)
fig_interactive.update_yaxes(title_text="Predictions/min", row=2, col=2)
fig_interactive.update_yaxes(title_text="Active Subscriptions", row=3, col=1)

fig_interactive.show()

print("🚀 Interactive dashboard created! Hover over data points for details.")

## 5. Display Output in Visual Studio Code Output Pane

Learn how to view results and plots in VS Code's integrated environment and access real-time monitoring dashboards.

In [None]:
# VS Code Integration and Access Information

import webbrowser
from IPython.display import HTML, display

print("🖥️  VISUAL STUDIO CODE INTEGRATION")
print("="*50)

# Display access information
access_info = """
<div style="background-color: #f0f8ff; padding: 15px; border-radius: 10px; border-left: 5px solid #4CAF50;">
<h3>🌐 OpenRAN Dashboard Access Points:</h3>
<ul>
<li><strong>Grafana Dashboard:</strong> <a href="http://localhost:3000" target="_blank">http://localhost:3000</a> (admin/admin)</li>
<li><strong>Prometheus Metrics:</strong> <a href="http://localhost:9090" target="_blank">http://localhost:9090</a></li>
<li><strong>Kibana Logs:</strong> <a href="http://localhost:5601" target="_blank">http://localhost:5601</a></li>
<li><strong>Sample xApp API:</strong> <a href="http://localhost:8080" target="_blank">http://localhost:8080</a></li>
</ul>
</div>
"""

display(HTML(access_info))

print("\n📊 VS Code Output Pane Features:")
print("• All plots and visualizations appear directly in the notebook")
print("• Interactive Plotly charts support zooming and hovering")
print("• Output is automatically saved in the notebook file")
print("• Use 'View > Output' to see terminal commands and logs")

print(f"\n📁 Data Files Generated:")
print(f"• Notebook saved to: /Users/piyushjaipuriyar/Projects/dissertation-openran/analysis/")
print(f"• JSON data files: /tmp/openran_data_*.json")
print(f"• Grafana dashboards: monitoring/grafana/dashboards/")

print(f"\n🔄 Real-time Data Generation:")
print(f"• Run: python3 scripts/generate_data.py")
print(f"• This will continuously generate OpenRAN metrics data")
print(f"• Data is saved every minute to JSON files")

# Check service status
print(f"\n🏃 Current Service Status:")
try:
    import subprocess
    result = subprocess.run(['docker', 'compose', 'ps'], 
                          capture_output=True, text=True, cwd='/Users/piyushjaipuriyar/Projects/dissertation-openran')
    if result.returncode == 0:
        lines = result.stdout.strip().split('\n')
        if len(lines) > 1:  # Header + data
            print(f"✅ {len(lines)-1} services are running")
            print(f"Use VS Code tasks to start/stop services:")
            print(f"• Ctrl+Shift+P → 'Tasks: Run Task' → 'OpenRAN: Start All Services'")
        else:
            print("⚠️  No services currently running")
    else:
        print("❌ Could not check service status")
except Exception as e:
    print(f"❌ Error checking services: {e}")

print(f"\n💡 Tips for VS Code Usage:")
print(f"• Use Ctrl+Shift+` to open integrated terminal")
print(f"• Run cells with Shift+Enter")
print(f"• Use 'Jupyter: Restart Kernel' to reset notebook state")
print(f"• Export plots: Right-click on plot → 'Save Image As'")

# Summary statistics
print(f"\n📈 Analysis Summary:")
if len(metrics_df) > 0:
    print(f"• Analyzed {len(metrics_df)} data points")
    print(f"• Time range: {metrics_df.index.min()} to {metrics_df.index.max()}")
    print(f"• Generated {len([col for col in metrics_df.columns if 'rate' in col])} derived metrics")
    if not ue_df.empty:
        print(f"• Processed data for {ue_df['ue_id'].nunique() if 'ue_id' in ue_df.columns else 'N/A'} UEs")
else:
    print("• No data points analyzed (using sample data)")

print(f"\n✅ Analysis complete! All visualizations are displayed above.")
print(f"🎯 Next steps: Access the web dashboards for real-time monitoring!")