# 📊 Kubernetes HPA Load Testing - Detailed Metrics Visualization

This notebook provides comprehensive analysis and visualization of metrics collected during Kubernetes HPA load testing.

## 📋 Analysis Overview:
- **Resource Utilization**: CPU and Memory usage vs availability
- **HPA Scaling Events**: Pod scaling patterns and triggers
- **Performance Metrics**: Request rates, response times, and failures
- **Idle Capacity Analysis**: Cluster efficiency and resource waste
- **Time Series Analysis**: Trends and patterns over test duration
- **Comparative Analysis**: Multiple test runs comparison

## 🎯 Key Metrics:
- CPU/Memory usage percentages and idle capacity
- Pod scaling events and timing
- Load test performance (RPS, latency, errors)
- Resource efficiency and utilization patterns

In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
from datetime import datetime, timedelta
import glob
import os
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

# Configure plotly for better display
import plotly.io as pio
pio.renderers.default = "notebook"

print("📚 Libraries imported successfully!")
print("🎨 Plotting styles configured")
print("📊 Ready for data analysis!")

📚 Libraries imported successfully!
🎨 Plotting styles configured
📊 Ready for data analysis!


In [3]:
# Utility functions for data loading and processing

def load_test_data(test_name):
    """Load all metrics data for a specific test"""
    base_path = f"metrics_data/{test_name}"
    data = {}
    
    try:
        # Load basic metrics (resource usage)
        basic_file = f"{base_path}/basic_metrics.csv"
        if os.path.exists(basic_file):
            data['basic'] = pd.read_csv(basic_file)
            data['basic']['timestamp'] = pd.to_datetime(data['basic']['timestamp'])
            print(f"✅ Loaded basic metrics: {len(data['basic'])} records")
        
        # Load HPA metrics
        hpa_file = f"{base_path}/hpa_metrics.csv"
        if os.path.exists(hpa_file):
            data['hpa'] = pd.read_csv(hpa_file)
            data['hpa']['timestamp'] = pd.to_datetime(data['hpa']['timestamp'])
            print(f"✅ Loaded HPA metrics: {len(data['hpa'])} records")
        
        # Load pod metrics
        pod_file = f"{base_path}/pod_metrics.csv"
        if os.path.exists(pod_file):
            data['pod'] = pd.read_csv(pod_file)
            data['pod']['timestamp'] = pd.to_datetime(data['pod']['timestamp'])
            print(f"✅ Loaded pod metrics: {len(data['pod'])} records")
        
        # Load Locust metrics
        locust_stats = f"{base_path}/locust_stats.csv"
        if os.path.exists(locust_stats):
            data['locust_stats'] = pd.read_csv(locust_stats)
            print(f"✅ Loaded Locust stats: {len(data['locust_stats'])} records")
            
        locust_history = f"{base_path}/locust_stats_history.csv"
        if os.path.exists(locust_history):
            data['locust_history'] = pd.read_csv(locust_history)
            data['locust_history']['Timestamp'] = pd.to_datetime(data['locust_history']['Timestamp'])
            print(f"✅ Loaded Locust history: {len(data['locust_history'])} records")
            
    except Exception as e:
        print(f"❌ Error loading data: {e}")
    
    return data

def get_available_tests():
    """Get list of available test directories"""
    if not os.path.exists("metrics_data"):
        print("❌ No metrics_data directory found")
        return []
    
    tests = [d for d in os.listdir("metrics_data") if os.path.isdir(f"metrics_data/{d}")]
    print(f"📁 Found {len(tests)} test directories:")
    for test in tests:
        print(f"   • {test}")
    return tests

def calculate_efficiency_metrics(basic_df):
    """Calculate efficiency and waste metrics"""
    if basic_df.empty:
        return {}
    
    metrics = {
        'avg_cpu_usage': basic_df['cpu_usage_percent'].mean(),
        'avg_memory_usage': basic_df['memory_usage_percent'].mean(),
        'avg_cpu_idle': basic_df['cpu_idle_percent'].mean(),
        'avg_memory_idle': basic_df['memory_idle_percent'].mean(),
        'max_cpu_usage': basic_df['cpu_usage_percent'].max(),
        'max_memory_usage': basic_df['memory_usage_percent'].max(),
        'min_cpu_idle': basic_df['cpu_idle_percent'].min(),
        'min_memory_idle': basic_df['memory_idle_percent'].min(),
    }
    
    return metrics

print("🔧 Utility functions defined successfully!")

🔧 Utility functions defined successfully!


In [18]:
# Load available test data
available_tests = get_available_tests()

# # Select the most recent test or specify manually
# if available_tests:
#     # Sort by creation time (most recent first)
#     test_times = []
#     for test in available_tests:
#         try:
#             # Try to extract timestamp from test name
#             if 'test_' in test:
#                 timestamp_part = test.split('_')[-1]
#                 if len(timestamp_part) >= 6:  # HHMMSS format
#                     test_times.append((test, timestamp_part))
#         except:
#             test_times.append((test, '000000'))
    
#     if test_times:
#         # Sort by timestamp (most recent first)
#         test_times.sort(key=lambda x: x[1], reverse=True)
#         selected_test = test_times[0][0]
#     else:
#         selected_test = available_tests[0]
    
#     print(f"🎯 Auto-selected test: {selected_test}")
#     print("💡 To analyze a different test, change the variable below:")
#     print(f"   selected_test = '{selected_test}'")
# else:
selected_test = "aggressive_test_051634"
    # print("❌ No test data found. Please run a load test first.")

# You can manually change this to analyze a specific test
# selected_test = "your_test_name_here"

📁 Found 8 test directories:
   • quick_test
   • aggressive_test_051634
   • load_test_20250628_050326
   • load_test_20250628_045834
   • quick
   • load_test_20250628_045939
   • quicktest
   • test_collection


In [19]:
# Load the selected test data
if selected_test:
    print(f"📊 Loading data for test: {selected_test}")
    test_data = load_test_data(selected_test)
    
    # Display data summary
    print("\n📈 Data Summary:")
    print("="*50)
    
    for key, df in test_data.items():
        if isinstance(df, pd.DataFrame):
            print(f"{key.upper():15} | {len(df):6} records | {df.shape[1]:2} columns")
            if 'timestamp' in df.columns:
                duration = df['timestamp'].max() - df['timestamp'].min()
                print(f"                | Duration: {duration}")
        print("-"*50)
    
    # Quick data quality check
    print("\n🔍 Data Quality Check:")
    if 'basic' in test_data:
        basic_df = test_data['basic']
        print(f"✓ Basic metrics timespan: {basic_df['timestamp'].min()} to {basic_df['timestamp'].max()}")
        print(f"✓ CPU usage range: {basic_df['cpu_usage_percent'].min():.2f}% - {basic_df['cpu_usage_percent'].max():.2f}%")
        print(f"✓ Memory usage range: {basic_df['memory_usage_percent'].min():.2f}% - {basic_df['memory_usage_percent'].max():.2f}%")
    
    if 'hpa' in test_data:
        hpa_df = test_data['hpa']
        print(f"✓ HPA metrics: Frontend pods range {hpa_df['frontend_pods'].min()}-{hpa_df['frontend_pods'].max()}")
        print(f"✓ HPA metrics: Recommendation pods range {hpa_df['recommendation_pods'].min()}-{hpa_df['recommendation_pods'].max()}")
        
else:
    print("❌ No test selected. Cannot proceed with analysis.")

📊 Loading data for test: aggressive_test_051634
✅ Loaded basic metrics: 17 records
✅ Loaded HPA metrics: 17 records
✅ Loaded pod metrics: 0 records
✅ Loaded Locust stats: 15 records
✅ Loaded Locust history: 176 records

📈 Data Summary:
BASIC           |     17 records | 12 columns
                | Duration: 0 days 00:02:56
--------------------------------------------------
HPA             |     17 records |  6 columns
                | Duration: 0 days 00:02:49
--------------------------------------------------
POD             |      0 records |  4 columns
                | Duration: NaT
--------------------------------------------------
LOCUST_STATS    |     15 records | 22 columns
--------------------------------------------------
LOCUST_HISTORY  |    176 records | 24 columns
--------------------------------------------------

🔍 Data Quality Check:
✓ Basic metrics timespan: 2025-06-28 05:16:44 to 2025-06-28 05:19:40
✓ CPU usage range: 3.20% - 19.85%
✓ Memory usage range: 5.81% - 6.9

In [20]:
# 📊 Resource Utilization Dashboard

if 'basic' in test_data and not test_data['basic'].empty:
    basic_df = test_data['basic']
    
    # Create comprehensive resource utilization dashboard
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            '🖥️ CPU Usage vs Idle Capacity',
            '💾 Memory Usage vs Idle Capacity', 
            '📈 CPU Utilization Over Time',
            '📈 Memory Utilization Over Time',
            '⚡ Resource Efficiency Metrics',
            '🎯 Peak Usage Analysis'
        ],
        specs=[[{"secondary_y": True}, {"secondary_y": True}],
               [{"type": "scatter"}, {"type": "scatter"}],
               [{"type": "bar"}, {"type": "scatter"}]],
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    # Row 1: CPU and Memory gauges with time series
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['cpu_usage_percent'],
            name='CPU Used %',
            line=dict(color='red', width=3),
            fill='tonexty'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['cpu_idle_percent'],
            name='CPU Idle %',
            line=dict(color='green', width=2),
            fill='tozeroy'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['memory_usage_percent'],
            name='Memory Used %',
            line=dict(color='orange', width=3),
            fill='tonexty'
        ),
        row=1, col=2
    )
    
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['memory_idle_percent'],
            name='Memory Idle %',
            line=dict(color='blue', width=2),
            fill='tozeroy'
        ),
        row=1, col=2
    )
    
    # Row 2: Detailed time series
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['cpu_used_millicores'],
            name='CPU Usage (millicores)',
            line=dict(color='crimson', width=2)
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=basic_df['memory_used_mb'],
            name='Memory Usage (MB)',
            line=dict(color='darkorange', width=2)
        ),
        row=2, col=2
    )
    
    # Row 3: Efficiency metrics and peak analysis
    efficiency_metrics = calculate_efficiency_metrics(basic_df)
    
    categories = ['Avg CPU Usage', 'Avg Memory Usage', 'Max CPU Usage', 'Max Memory Usage']
    values = [
        efficiency_metrics['avg_cpu_usage'],
        efficiency_metrics['avg_memory_usage'],
        efficiency_metrics['max_cpu_usage'],
        efficiency_metrics['max_memory_usage']
    ]
    colors = ['lightcoral', 'lightsalmon', 'red', 'orange']
    
    fig.add_trace(
        go.Bar(
            x=categories,
            y=values,
            name='Resource Metrics %',
            marker_color=colors,
            text=[f"{v:.1f}%" for v in values],
            textposition='auto'
        ),
        row=3, col=1
    )
    
    # Peak usage scatter plot
    fig.add_trace(
        go.Scatter(
            x=basic_df['cpu_usage_percent'],
            y=basic_df['memory_usage_percent'],
            mode='markers',
            name='CPU vs Memory Usage',
            marker=dict(
                size=8,
                color=basic_df.index,
                colorscale='Viridis',
                showscale=True,
                colorbar=dict(title="Time Progression")
            ),
            text=[f"Time: {t}" for t in basic_df['timestamp']],
            hovertemplate='CPU: %{x:.1f}%<br>Memory: %{y:.1f}%<br>%{text}<extra></extra>'
        ),
        row=3, col=2
    )
    
    # Update layout
    fig.update_layout(
        title=f"📊 Resource Utilization Dashboard - {selected_test}",
        height=1000,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes labels
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_xaxes(title_text="Time", row=1, col=2)
    fig.update_xaxes(title_text="Time", row=2, col=1)
    fig.update_xaxes(title_text="Time", row=2, col=2)
    fig.update_xaxes(title_text="Metric Type", row=3, col=1)
    fig.update_xaxes(title_text="CPU Usage %", row=3, col=2)
    
    fig.update_yaxes(title_text="Percentage %", row=1, col=1)
    fig.update_yaxes(title_text="Percentage %", row=1, col=2)
    fig.update_yaxes(title_text="Millicores", row=2, col=1)
    fig.update_yaxes(title_text="Megabytes", row=2, col=2)
    fig.update_yaxes(title_text="Percentage %", row=3, col=1)
    fig.update_yaxes(title_text="Memory Usage %", row=3, col=2)
    
    fig.show()
    
    # Display summary statistics
    print(f"\n📈 Resource Utilization Summary for {selected_test}:")
    print("="*60)
    print(f"🖥️  CPU - Average: {efficiency_metrics['avg_cpu_usage']:.2f}%, Peak: {efficiency_metrics['max_cpu_usage']:.2f}%")
    print(f"💾 Memory - Average: {efficiency_metrics['avg_memory_usage']:.2f}%, Peak: {efficiency_metrics['max_memory_usage']:.2f}%")
    print(f"⚡ CPU Idle - Average: {efficiency_metrics['avg_cpu_idle']:.2f}%, Minimum: {efficiency_metrics['min_cpu_idle']:.2f}%")
    print(f"💤 Memory Idle - Average: {efficiency_metrics['avg_memory_idle']:.2f}%, Minimum: {efficiency_metrics['min_memory_idle']:.2f}%")
    
else:
    print("❌ No basic metrics data available for visualization")


📈 Resource Utilization Summary for aggressive_test_051634:
🖥️  CPU - Average: 13.20%, Peak: 19.85%
💾 Memory - Average: 6.24%, Peak: 6.93%
⚡ CPU Idle - Average: 86.80%, Minimum: 80.15%
💤 Memory Idle - Average: 93.76%, Minimum: 93.07%


In [21]:
# 🚀 HPA Scaling Behavior Analysis

if 'hpa' in test_data and not test_data['hpa'].empty:
    hpa_df = test_data['hpa']
    
    # Create HPA scaling dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '🔄 Pod Scaling Over Time',
            '📊 CPU Utilization vs Target',
            '⚡ Scaling Events Timeline',
            '📈 Scaling Efficiency Analysis'
        ],
        specs=[[{"secondary_y": True}, {"secondary_y": True}],
               [{"type": "scatter"}, {"type": "bar"}]],
        vertical_spacing=0.15
    )
    
    # Row 1, Col 1: Pod scaling over time
    fig.add_trace(
        go.Scatter(
            x=hpa_df['timestamp'],
            y=hpa_df['frontend_pods'],
            name='Frontend Pods',
            line=dict(color='blue', width=3),
            mode='lines+markers'
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=hpa_df['timestamp'],
            y=hpa_df['recommendation_pods'],
            name='Recommendation Pods',
            line=dict(color='red', width=3),
            mode='lines+markers'
        ),
        row=1, col=1
    )
    
    # Row 1, Col 2: CPU utilization
    if 'frontend_cpu_target' in hpa_df.columns:
        fig.add_trace(
            go.Scatter(
                x=hpa_df['timestamp'],
                y=hpa_df['frontend_cpu_target'],
                name='Frontend CPU %',
                line=dict(color='orange', width=2)
            ),
            row=1, col=2
        )
    
    if 'recommendation_cpu_target' in hpa_df.columns:
        fig.add_trace(
            go.Scatter(
                x=hpa_df['timestamp'],
                y=hpa_df['recommendation_cpu_target'],
                name='Recommendation CPU %',
                line=dict(color='purple', width=2)
            ),
            row=1, col=2
        )
    
    # Add target lines (HPA thresholds)
    fig.add_hline(y=30, line_dash="dash", line_color="red", 
                  annotation_text="Frontend CPU Target (30%)", row=1, col=2)
    fig.add_hline(y=25, line_dash="dash", line_color="purple", 
                  annotation_text="Recommendation CPU Target (25%)", row=1, col=2)
    
    # Row 2, Col 1: Scaling events detection
    frontend_changes = hpa_df['frontend_pods'].diff().fillna(0)
    recommendation_changes = hpa_df['recommendation_pods'].diff().fillna(0)
    
    # Find scaling events
    scale_up_events = hpa_df[frontend_changes > 0]
    scale_down_events = hpa_df[frontend_changes < 0]
    
    fig.add_trace(
        go.Scatter(
            x=scale_up_events['timestamp'],
            y=scale_up_events['frontend_pods'],
            mode='markers',
            name='Scale Up Events',
            marker=dict(color='green', size=12, symbol='triangle-up'),
            text=[f"Scaled up by {int(change)}" for change in frontend_changes[frontend_changes > 0]],
            hovertemplate='Scale Up<br>Time: %{x}<br>Pods: %{y}<br>%{text}<extra></extra>'
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=scale_down_events['timestamp'],
            y=scale_down_events['frontend_pods'],
            mode='markers',
            name='Scale Down Events',
            marker=dict(color='red', size=12, symbol='triangle-down'),
            text=[f"Scaled down by {int(abs(change))}" for change in frontend_changes[frontend_changes < 0]],
            hovertemplate='Scale Down<br>Time: %{x}<br>Pods: %{y}<br>%{text}<extra></extra>'
        ),
        row=2, col=1
    )
    
    # Add baseline pods line
    fig.add_trace(
        go.Scatter(
            x=hpa_df['timestamp'],
            y=hpa_df['frontend_pods'],
            name='Pod Count Timeline',
            line=dict(color='lightblue', width=1),
            opacity=0.5
        ),
        row=2, col=1
    )
    
    # Row 2, Col 2: Scaling efficiency analysis
    total_scale_ups = len(scale_up_events)
    total_scale_downs = len(scale_down_events)
    max_pods = hpa_df['frontend_pods'].max()
    min_pods = hpa_df['frontend_pods'].min()
    avg_pods = hpa_df['frontend_pods'].mean()
    
    efficiency_data = {
        'Scale Up Events': total_scale_ups,
        'Scale Down Events': total_scale_downs,
        'Max Pods': max_pods,
        'Min Pods': min_pods,
        'Avg Pods': avg_pods
    }
    
    fig.add_trace(
        go.Bar(
            x=list(efficiency_data.keys()),
            y=list(efficiency_data.values()),
            name='Scaling Statistics',
            marker_color=['green', 'red', 'blue', 'orange', 'purple'],
            text=[f"{v:.1f}" for v in efficiency_data.values()],
            textposition='auto'
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title=f"🚀 HPA Scaling Behavior Analysis - {selected_test}",
        height=800,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_xaxes(title_text="Time", row=1, col=2)
    fig.update_xaxes(title_text="Time", row=2, col=1)
    fig.update_xaxes(title_text="Metric", row=2, col=2)
    
    fig.update_yaxes(title_text="Pod Count", row=1, col=1)
    fig.update_yaxes(title_text="CPU Utilization %", row=1, col=2)
    fig.update_yaxes(title_text="Pod Count", row=2, col=1)
    fig.update_yaxes(title_text="Count/Value", row=2, col=2)
    
    fig.show()
    
    # Display scaling summary
    print(f"\n🚀 HPA Scaling Summary for {selected_test}:")
    print("="*60)
    print(f"📈 Total Scale Up Events: {total_scale_ups}")
    print(f"📉 Total Scale Down Events: {total_scale_downs}")
    print(f"⚡ Peak Pods: {max_pods}")
    print(f"💤 Minimum Pods: {min_pods}")
    print(f"📊 Average Pods: {avg_pods:.2f}")
    
    if total_scale_ups > 0 or total_scale_downs > 0:
        scaling_frequency = (total_scale_ups + total_scale_downs) / len(hpa_df) * 100
        print(f"🔄 Scaling Frequency: {scaling_frequency:.2f}% of measurements")
    
else:
    print("❌ No HPA metrics data available for visualization")


🚀 HPA Scaling Summary for aggressive_test_051634:
📈 Total Scale Up Events: 2
📉 Total Scale Down Events: 0
⚡ Peak Pods: 6
💤 Minimum Pods: 1
📊 Average Pods: 4.18
🔄 Scaling Frequency: 11.76% of measurements


In [22]:
# 🎯 Load Test Performance Analysis

if 'locust_history' in test_data and not test_data['locust_history'].empty:
    locust_df = test_data['locust_history']
    
    # Create performance analysis dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '📊 Request Rate Over Time',
            '⏱️ Response Time Distribution',
            '❌ Error Rate Analysis',
            '👥 User Load vs Performance'
        ],
        specs=[[{"secondary_y": True}, {"type": "histogram"}],
               [{"secondary_y": True}, {"type": "scatter"}]],
        vertical_spacing=0.15
    )
    
    # Row 1, Col 1: Request rate over time
    if 'Requests/s' in locust_df.columns:
        fig.add_trace(
            go.Scatter(
                x=locust_df['Timestamp'],
                y=locust_df['Requests/s'],
                name='Requests/s',
                line=dict(color='blue', width=2),
                fill='tozeroy'
            ),
            row=1, col=1
        )
    
    # Add user count on secondary axis
    if 'User Count' in locust_df.columns:
        fig.add_trace(
            go.Scatter(
                x=locust_df['Timestamp'],
                y=locust_df['User Count'],
                name='Active Users',
                line=dict(color='red', width=2, dash='dash'),
                yaxis='y2'
            ),
            row=1, col=1, secondary_y=True
        )
    
    # Row 1, Col 2: Response time distribution
    if 'Average Response Time' in locust_df.columns:
        fig.add_trace(
            go.Histogram(
                x=locust_df['Average Response Time'],
                name='Response Time Distribution',
                nbinsx=20,
                marker_color='lightblue',
                opacity=0.7
            ),
            row=1, col=2
        )
    
    # Row 2, Col 1: Error rate analysis
    if 'Failures/s' in locust_df.columns and 'Requests/s' in locust_df.columns:
        # Calculate error rate percentage
        error_rate = (locust_df['Failures/s'] / (locust_df['Requests/s'] + 0.001)) * 100
        
        fig.add_trace(
            go.Scatter(
                x=locust_df['Timestamp'],
                y=error_rate,
                name='Error Rate %',
                line=dict(color='red', width=2),
                fill='tozeroy'
            ),
            row=2, col=1
        )
        
        # Add failure count on secondary axis
        fig.add_trace(
            go.Scatter(
                x=locust_df['Timestamp'],
                y=locust_df['Failures/s'],
                name='Failures/s',
                line=dict(color='darkred', width=1),
                yaxis='y2'
            ),
            row=2, col=1, secondary_y=True
        )
    
    # Row 2, Col 2: User load vs performance correlation
    if all(col in locust_df.columns for col in ['User Count', 'Average Response Time', 'Requests/s']):
        fig.add_trace(
            go.Scatter(
                x=locust_df['User Count'],
                y=locust_df['Average Response Time'],
                mode='markers',
                name='Users vs Response Time',
                marker=dict(
                    size=locust_df['Requests/s']/10,  # Size based on RPS
                    color=locust_df['Requests/s'],
                    colorscale='Viridis',
                    showscale=True,
                    colorbar=dict(title="RPS")
                ),
                text=[f"RPS: {rps:.1f}" for rps in locust_df['Requests/s']],
                hovertemplate='Users: %{x}<br>Response Time: %{y:.0f}ms<br>%{text}<extra></extra>'
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_layout(
        title=f"🎯 Load Test Performance Analysis - {selected_test}",
        height=800,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_xaxes(title_text="Response Time (ms)", row=1, col=2)
    fig.update_xaxes(title_text="Time", row=2, col=1)
    fig.update_xaxes(title_text="User Count", row=2, col=2)
    
    fig.update_yaxes(title_text="Requests/s", row=1, col=1)
    fig.update_yaxes(title_text="User Count", row=1, col=1, secondary_y=True)
    fig.update_yaxes(title_text="Frequency", row=1, col=2)
    fig.update_yaxes(title_text="Error Rate %", row=2, col=1)
    fig.update_yaxes(title_text="Failures/s", row=2, col=1, secondary_y=True)
    fig.update_yaxes(title_text="Response Time (ms)", row=2, col=2)
    
    fig.show()
    
    # Performance summary statistics
    print(f"\n🎯 Performance Summary for {selected_test}:")
    print("="*60)
    
    if 'Requests/s' in locust_df.columns:
        max_rps = locust_df['Requests/s'].max()
        avg_rps = locust_df['Requests/s'].mean()
        print(f"📊 Peak RPS: {max_rps:.2f}")
        print(f"📊 Average RPS: {avg_rps:.2f}")
    
    if 'Average Response Time' in locust_df.columns:
        avg_response = locust_df['Average Response Time'].mean()
        p95_response = locust_df['Average Response Time'].quantile(0.95)
        print(f"⏱️  Average Response Time: {avg_response:.2f}ms")
        print(f"⏱️  95th Percentile Response Time: {p95_response:.2f}ms")
    
    if 'Failures/s' in locust_df.columns:
        total_failures = locust_df['Failures/s'].sum()
        avg_error_rate = (locust_df['Failures/s'] / (locust_df['Requests/s'] + 0.001) * 100).mean()
        print(f"❌ Total Failures: {total_failures:.0f}")
        print(f"❌ Average Error Rate: {avg_error_rate:.2f}%")
    
elif 'locust_stats' in test_data:
    locust_stats = test_data['locust_stats']
    print(f"\n🎯 Load Test Summary Statistics:")
    print("="*60)
    print(locust_stats.head())
    
else:
    print("❌ No Locust performance data available for visualization")


🎯 Performance Summary for aggressive_test_051634:
📊 Peak RPS: 79.10
📊 Average RPS: 65.67
❌ Total Failures: 0
❌ Average Error Rate: 0.00%


In [23]:
# 🔗 Load vs Scaling Correlation Analysis

if ('basic' in test_data and 'hpa' in test_data and 
    not test_data['basic'].empty and not test_data['hpa'].empty):
    
    basic_df = test_data['basic']
    hpa_df = test_data['hpa']
    
    # Merge datasets on timestamp (approximate matching)
    merged_df = pd.merge_asof(
        basic_df.sort_values('timestamp'),
        hpa_df.sort_values('timestamp'),
        on='timestamp',
        direction='nearest',
        tolerance=pd.Timedelta('30s')
    )
    
    # Create correlation analysis dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '🔗 CPU Usage vs Pod Scaling',
            '📊 Memory vs Scaling Events',
            '⚡ Resource vs Scale Timeline',
            '🎯 Scaling Effectiveness'
        ],
        specs=[[{"type": "scatter"}, {"type": "scatter"}],
               [{"secondary_y": True}, {"type": "bar"}]],
        vertical_spacing=0.15
    )
    
    # Row 1, Col 1: CPU vs Pod scaling correlation
    fig.add_trace(
        go.Scatter(
            x=merged_df['cpu_usage_percent'],
            y=merged_df['frontend_pods'],
            mode='markers',
            name='CPU vs Frontend Pods',
            marker=dict(
                color='red',
                size=8,
                opacity=0.7
            ),
            text=[f"Time: {t}" for t in merged_df['timestamp']],
            hovertemplate='CPU: %{x:.1f}%<br>Pods: %{y}<br>%{text}<extra></extra>'
        ),
        row=1, col=1
    )
    
    # Row 1, Col 2: Memory vs scaling
    fig.add_trace(
        go.Scatter(
            x=merged_df['memory_usage_percent'],
            y=merged_df['frontend_pods'],
            mode='markers',
            name='Memory vs Frontend Pods',
            marker=dict(
                color='blue',
                size=8,
                opacity=0.7
            ),
            text=[f"Time: {t}" for t in merged_df['timestamp']],
            hovertemplate='Memory: %{x:.1f}%<br>Pods: %{y}<br>%{text}<extra></extra>'
        ),
        row=1, col=2
    )
    
    # Row 2, Col 1: Combined timeline
    fig.add_trace(
        go.Scatter(
            x=merged_df['timestamp'],
            y=merged_df['cpu_usage_percent'],
            name='CPU Usage %',
            line=dict(color='red', width=2)
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=merged_df['timestamp'],
            y=merged_df['frontend_pods'] * 10,  # Scale for visibility
            name='Frontend Pods (x10)',
            line=dict(color='blue', width=2),
            yaxis='y2'
        ),
        row=2, col=1, secondary_y=True
    )
    
    # Row 2, Col 2: Scaling effectiveness analysis
    # Calculate scaling efficiency metrics
    cpu_ranges = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100)]
    scaling_effectiveness = []
    
    for low, high in cpu_ranges:
        mask = (merged_df['cpu_usage_percent'] >= low) & (merged_df['cpu_usage_percent'] < high)
        if mask.sum() > 0:
            avg_pods = merged_df[mask]['frontend_pods'].mean()
            scaling_effectiveness.append(avg_pods)
        else:
            scaling_effectiveness.append(0)
    
    range_labels = [f"{low}-{high}%" for low, high in cpu_ranges]
    
    fig.add_trace(
        go.Bar(
            x=range_labels,
            y=scaling_effectiveness,
            name='Avg Pods per CPU Range',
            marker_color='lightgreen',
            text=[f"{v:.1f}" for v in scaling_effectiveness],
            textposition='auto'
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title=f"🔗 Load vs Scaling Correlation Analysis - {selected_test}",
        height=800,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes
    fig.update_xaxes(title_text="CPU Usage %", row=1, col=1)
    fig.update_xaxes(title_text="Memory Usage %", row=1, col=2)
    fig.update_xaxes(title_text="Time", row=2, col=1)
    fig.update_xaxes(title_text="CPU Usage Range", row=2, col=2)
    
    fig.update_yaxes(title_text="Pod Count", row=1, col=1)
    fig.update_yaxes(title_text="Pod Count", row=1, col=2)
    fig.update_yaxes(title_text="CPU Usage %", row=2, col=1)
    fig.update_yaxes(title_text="Pod Count", row=2, col=1, secondary_y=True)
    fig.update_yaxes(title_text="Average Pod Count", row=2, col=2)
    
    fig.show()
    
    # Calculate correlation coefficients
    cpu_pod_corr = merged_df['cpu_usage_percent'].corr(merged_df['frontend_pods'])
    memory_pod_corr = merged_df['memory_usage_percent'].corr(merged_df['frontend_pods'])
    
    print(f"\n🔗 Correlation Analysis for {selected_test}:")
    print("="*60)
    print(f"📊 CPU Usage vs Pod Count Correlation: {cpu_pod_corr:.3f}")
    print(f"💾 Memory Usage vs Pod Count Correlation: {memory_pod_corr:.3f}")
    
    if cpu_pod_corr > 0.7:
        print("✅ Strong positive correlation between CPU and scaling")
    elif cpu_pod_corr > 0.3:
        print("⚠️  Moderate correlation between CPU and scaling")
    else:
        print("❌ Weak correlation between CPU and scaling")
    
    # Analyze scaling responsiveness
    cpu_high_load = merged_df[merged_df['cpu_usage_percent'] > 50]
    if not cpu_high_load.empty:
        avg_pods_high_load = cpu_high_load['frontend_pods'].mean()
        print(f"🚀 Average pods during high CPU load (>50%): {avg_pods_high_load:.2f}")
    
else:
    print("❌ Cannot perform correlation analysis - missing data")


🔗 Correlation Analysis for aggressive_test_051634:
📊 CPU Usage vs Pod Count Correlation: 0.989
💾 Memory Usage vs Pod Count Correlation: 0.687
✅ Strong positive correlation between CPU and scaling


In [25]:
# 💡 Cluster Efficiency & Resource Waste Analysis

if 'basic' in test_data and not test_data['basic'].empty:
    basic_df = test_data['basic']
    
    # Calculate efficiency metrics
    total_duration = (basic_df['timestamp'].max() - basic_df['timestamp'].min()).total_seconds()
    
    # Create efficiency analysis dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '🎯 Resource Efficiency Over Time',
            '💰 Cost of Idle Resources',
            '📊 Utilization Heatmap',
            '⚡ Efficiency Recommendations'
        ],
        specs=[[{"secondary_y": True}, {"type": "bar"}],
               [{"type": "heatmap"}, {"type": "indicator"}]],
        vertical_spacing=0.2
    )
    
    # Row 1, Col 1: Efficiency over time
    efficiency_score = 100 - basic_df['cpu_idle_percent']  # Higher efficiency = less idle
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=efficiency_score,
            name='CPU Efficiency %',
            line=dict(color='green', width=3),
            fill='tozeroy'
        ),
        row=1, col=1
    )
    
    memory_efficiency = 100 - basic_df['memory_idle_percent']
    fig.add_trace(
        go.Scatter(
            x=basic_df['timestamp'],
            y=memory_efficiency,
            name='Memory Efficiency %',
            line=dict(color='blue', width=2),
            yaxis='y2'
        ),
        row=1, col=1, secondary_y=True
    )
    
    # Row 1, Col 2: Cost of idle resources (simulated)
    # Assume cost per CPU-hour and memory-GB-hour
    cpu_cost_per_hour = 0.05  # $0.05 per CPU core per hour
    memory_cost_per_hour = 0.01  # $0.01 per GB per hour
    
    avg_cpu_idle = basic_df['cpu_idle_percent'].mean()
    avg_memory_idle = basic_df['memory_idle_percent'].mean()
    avg_cpu_allocatable = basic_df['cpu_allocatable_millicores'].mean() / 1000  # Convert to cores
    avg_memory_allocatable = basic_df['memory_allocatable_mb'].mean() / 1024  # Convert to GB
    
    duration_hours = total_duration / 3600
    
    cpu_waste_cost = (avg_cpu_idle / 100) * avg_cpu_allocatable * cpu_cost_per_hour * duration_hours
    memory_waste_cost = (avg_memory_idle / 100) * avg_memory_allocatable * memory_cost_per_hour * duration_hours
    total_cost = avg_cpu_allocatable * cpu_cost_per_hour * duration_hours + avg_memory_allocatable * memory_cost_per_hour * duration_hours
    used_cost = total_cost - cpu_waste_cost - memory_waste_cost
    
    cost_categories = ['Used Resources', 'CPU Waste', 'Memory Waste']
    cost_values = [used_cost, cpu_waste_cost, memory_waste_cost]
    cost_colors = ['green', 'red', 'orange']
    
    fig.add_trace(
        go.Bar(
            x=cost_categories,
            y=cost_values,
            name='Resource Costs',
            marker_color=cost_colors,
            text=[f"${v:.3f}" for v in cost_values],
            textposition='auto'
        ),
        row=1, col=2
    )
    
    # Row 2, Col 1: Utilization heatmap
    # Create time-based heatmap
    basic_df['hour'] = basic_df['timestamp'].dt.strftime('%H:%M')
    heatmap_data = basic_df.pivot_table(
        values=['cpu_usage_percent', 'memory_usage_percent'],
        index=range(len(basic_df)),
        aggfunc='mean'
    )
    
    fig.add_trace(
        go.Heatmap(
            z=np.column_stack([basic_df['cpu_usage_percent'], basic_df['memory_usage_percent']]),
            x=['CPU Usage %', 'Memory Usage %'],
            y=[f"T{i}" for i in range(len(basic_df))],
            colorscale='RdYlGn',
            showscale=True,
            colorbar=dict(title="Usage %")
        ),
        row=2, col=1
    )
    
    # Row 2, Col 2: Efficiency score indicator
    overall_efficiency = (
        (100 - avg_cpu_idle) * 0.6 +  # Weight CPU more heavily
        (100 - avg_memory_idle) * 0.4
    )
    
    fig.add_trace(
        go.Indicator(
            mode="gauge+number+delta",
            value=overall_efficiency,
            domain={'x': [0, 1], 'y': [0, 1]},
            title={'text': "Overall Efficiency"},
            delta={'reference': 80},
            gauge={
                'axis': {'range': [None, 100]},
                'bar': {'color': "darkgreen"},
                'steps': [
                    {'range': [0, 50], 'color': "lightgray"},
                    {'range': [50, 80], 'color': "yellow"},
                    {'range': [80, 100], 'color': "lightgreen"}
                ],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 90
                }
            }
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title=f"💡 Cluster Efficiency & Resource Waste Analysis - {selected_test}",
        height=900,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes
    fig.update_xaxes(title_text="Time", row=1, col=1)
    fig.update_xaxes(title_text="Resource Type", row=1, col=2)
    fig.update_xaxes(title_text="Resource Type", row=2, col=1)
    
    fig.update_yaxes(title_text="CPU Efficiency %", row=1, col=1)
    fig.update_yaxes(title_text="Memory Efficiency %", row=1, col=1, secondary_y=True)
    fig.update_yaxes(title_text="Cost ($)", row=1, col=2)
    fig.update_yaxes(title_text="Time Point", row=2, col=1)
    
    fig.show()
    
    # Display efficiency summary
    print(f"\n💡 Efficiency Analysis for {selected_test}:")
    print("="*60)
    print(f"⚡ Overall Efficiency Score: {overall_efficiency:.2f}%")
    print(f"🖥️  CPU Efficiency: {100 - avg_cpu_idle:.2f}% (Idle: {avg_cpu_idle:.2f}%)")
    print(f"💾 Memory Efficiency: {100 - avg_memory_idle:.2f}% (Idle: {avg_memory_idle:.2f}%)")
    print(f"💰 Estimated Waste Cost: ${cpu_waste_cost + memory_waste_cost:.4f}")
    print(f"💰 Total Resource Cost: ${total_cost:.4f}")
    print(f"📊 Waste Percentage: {((cpu_waste_cost + memory_waste_cost) / total_cost * 100):.2f}%")
    
    # Recommendations
    print(f"\n🎯 Efficiency Recommendations:")
    if avg_cpu_idle > 70:
        print("🔧 Consider reducing cluster size - high CPU idle capacity")
    elif avg_cpu_idle < 20:
        print("⚠️  Consider increasing cluster size - low CPU idle capacity")
    else:
        print("✅ CPU capacity appears well-sized")
        
    if avg_memory_idle > 70:
        print("🔧 Consider memory-optimized instances or smaller cluster")
    elif avg_memory_idle < 20:
        print("⚠️  Consider increasing memory capacity")
    else:
        print("✅ Memory capacity appears well-sized")

else:
    print("❌ No basic metrics data available for efficiency analysis")


💡 Efficiency Analysis for aggressive_test_051634:
⚡ Overall Efficiency Score: 10.42%
🖥️  CPU Efficiency: 13.20% (Idle: 86.80%)
💾 Memory Efficiency: 6.24% (Idle: 93.76%)
💰 Estimated Waste Cost: $0.0155
💰 Total Resource Cost: $0.0173
📊 Waste Percentage: 89.83%

🎯 Efficiency Recommendations:
🔧 Consider reducing cluster size - high CPU idle capacity
🔧 Consider memory-optimized instances or smaller cluster


In [27]:
# 📈 Comparative Analysis - Multiple Test Runs

def compare_multiple_tests(test_names=None):
    """Compare metrics across multiple test runs"""
    
    if test_names is None:
        test_names = available_tests[:5]  # Compare up to 5 most recent tests
    
    comparison_data = {}
    
    for test_name in test_names:
        try:
            data = load_test_data(test_name)
            if 'basic' in data and not data['basic'].empty:
                basic_df = data['basic']
                metrics = calculate_efficiency_metrics(basic_df)
                
                # Add HPA metrics if available
                if 'hpa' in data and not data['hpa'].empty:
                    hpa_df = data['hpa']
                    metrics['max_frontend_pods'] = hpa_df['frontend_pods'].max()
                    metrics['avg_frontend_pods'] = hpa_df['frontend_pods'].mean()
                    
                comparison_data[test_name] = metrics
                
        except Exception as e:
            print(f"❌ Error loading {test_name}: {e}")
    
    if not comparison_data:
        print("❌ No valid test data found for comparison")
        return
    
    # Create comparison dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '📊 CPU Efficiency Comparison',
            '💾 Memory Efficiency Comparison',
            '🚀 HPA Scaling Comparison',
            '⚡ Overall Performance Ranking'
        ],
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "polar"}]]
    )
    
    # Extract data for comparison
    test_labels = list(comparison_data.keys())
    cpu_efficiency = [100 - comparison_data[test]['avg_cpu_idle'] for test in test_labels]
    memory_efficiency = [100 - comparison_data[test]['avg_memory_idle'] for test in test_labels]
    max_cpu_usage = [comparison_data[test]['max_cpu_usage'] for test in test_labels]
    max_memory_usage = [comparison_data[test]['max_memory_usage'] for test in test_labels]
    
    # Row 1, Col 1: CPU efficiency comparison
    fig.add_trace(
        go.Bar(
            x=test_labels,
            y=cpu_efficiency,
            name='CPU Efficiency %',
            marker_color='lightcoral',
            text=[f"{v:.1f}%" for v in cpu_efficiency],
            textposition='auto'
        ),
        row=1, col=1
    )
    
    # Row 1, Col 2: Memory efficiency comparison
    fig.add_trace(
        go.Bar(
            x=test_labels,
            y=memory_efficiency,
            name='Memory Efficiency %',
            marker_color='lightblue',
            text=[f"{v:.1f}%" for v in memory_efficiency],
            textposition='auto'
        ),
        row=1, col=2
    )
    
    # Row 2, Col 1: HPA scaling comparison
    if any('max_frontend_pods' in comparison_data[test] for test in test_labels):
        max_pods = [comparison_data[test].get('max_frontend_pods', 0) for test in test_labels]
        avg_pods = [comparison_data[test].get('avg_frontend_pods', 0) for test in test_labels]
        
        fig.add_trace(
            go.Bar(
                x=test_labels,
                y=max_pods,
                name='Max Pods',
                marker_color='lightgreen',
                text=[f"{v:.0f}" for v in max_pods],
                textposition='auto'
            ),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Bar(
                x=test_labels,
                y=avg_pods,
                name='Avg Pods',
                marker_color='darkgreen',
                text=[f"{v:.1f}" for v in avg_pods],
                textposition='auto'
            ),
            row=2, col=1
        )
    
    # Row 2, Col 2: Radar chart for overall comparison
    if len(test_labels) <= 3:  # Only show radar for up to 3 tests for clarity
        categories = ['CPU Efficiency', 'Memory Efficiency', 'Max CPU Usage', 'Max Memory Usage']
        
        for i, test in enumerate(test_labels[:3]):
            values = [
                cpu_efficiency[i],
                memory_efficiency[i], 
                max_cpu_usage[i],
                max_memory_usage[i]
            ]
            
            fig.add_trace(
                go.Scatterpolar(
                    r=values,
                    theta=categories,
                    fill='toself',
                    name=f'{test}',
                    opacity=0.6
                ),
                row=2, col=2
            )
    
    # Update layout
    fig.update_layout(
        title="📈 Comparative Analysis - Multiple Test Runs",
        height=800,
        showlegend=True,
        template="plotly_white"
    )
    
    # Update axes
    fig.update_xaxes(title_text="Test Name", row=1, col=1)
    fig.update_xaxes(title_text="Test Name", row=1, col=2)
    fig.update_xaxes(title_text="Test Name", row=2, col=1)
    
    fig.update_yaxes(title_text="Efficiency %", row=1, col=1)
    fig.update_yaxes(title_text="Efficiency %", row=1, col=2)
    fig.update_yaxes(title_text="Pod Count", row=2, col=1)
    
    fig.show()
    
    # Display comparison summary
    print(f"\n📈 Comparison Summary ({len(test_labels)} tests):")
    print("="*80)
    
    for i, test in enumerate(test_labels):
        print(f"\n🔍 {test}:")
        print(f"   CPU Efficiency: {cpu_efficiency[i]:.2f}%")
        print(f"   Memory Efficiency: {memory_efficiency[i]:.2f}%")
        print(f"   Peak CPU Usage: {max_cpu_usage[i]:.2f}%")
        print(f"   Peak Memory Usage: {max_memory_usage[i]:.2f}%")
        if 'max_frontend_pods' in comparison_data[test]:
            print(f"   Max Pods: {comparison_data[test]['max_frontend_pods']:.0f}")
    
    # Find best performing test
    overall_scores = []
    for i, test in enumerate(test_labels):
        # Score based on efficiency and appropriate resource usage
        score = (cpu_efficiency[i] + memory_efficiency[i]) / 2
        overall_scores.append((test, score))
    
    overall_scores.sort(key=lambda x: x[1], reverse=True)
    
    print(f"\n🏆 Performance Ranking:")
    for i, (test, score) in enumerate(overall_scores):
        medal = ["🥇", "🥈", "🥉"][i] if i < 3 else "🔹"
        print(f"   {medal} {test}: {score:.2f}% overall efficiency")

# Run comparison if multiple tests are available
if len(available_tests) > 1:
    print(f"🔍 Comparing {len(available_tests)} available tests...")
    compare_multiple_tests(available_tests)
else:
    print("ℹ️  Only one test available. Run more tests to enable comparison analysis.")

🔍 Comparing 8 available tests...
✅ Loaded basic metrics: 3 records
❌ Error loading quick_test: 'cpu_usage_percent'
✅ Loaded basic metrics: 17 records
✅ Loaded HPA metrics: 17 records
✅ Loaded pod metrics: 0 records
✅ Loaded Locust stats: 15 records
✅ Loaded Locust history: 176 records
✅ Loaded basic metrics: 3 records
✅ Loaded HPA metrics: 3 records
✅ Loaded pod metrics: 0 records
✅ Loaded Locust stats: 9 records
✅ Loaded Locust history: 26 records
✅ Loaded basic metrics: 1 records
✅ Loaded HPA metrics: 1 records
✅ Loaded pod metrics: 0 records
✅ Loaded Locust stats: 5 records
✅ Loaded Locust history: 6 records
✅ Loaded basic metrics: 6 records
✅ Loaded HPA metrics: 6 records
✅ Loaded pod metrics: 0 records
✅ Loaded Locust stats: 9 records
✅ Loaded Locust history: 56 records
✅ Loaded basic metrics: 3 records
✅ Loaded HPA metrics: 12 records
✅ Loaded pod metrics: 0 records



📈 Comparison Summary (5 tests):

🔍 aggressive_test_051634:
   CPU Efficiency: 13.20%
   Memory Efficiency: 6.24%
   Peak CPU Usage: 19.85%
   Peak Memory Usage: 6.93%
   Max Pods: 6

🔍 load_test_20250628_050326:
   CPU Efficiency: 3.50%
   Memory Efficiency: 5.92%
   Peak CPU Usage: 3.50%
   Peak Memory Usage: 5.92%
   Max Pods: 2

🔍 load_test_20250628_045834:
   CPU Efficiency: 3.15%
   Memory Efficiency: 6.36%
   Peak CPU Usage: 3.15%
   Peak Memory Usage: 6.36%
   Max Pods: 1

🔍 load_test_20250628_045939:
   CPU Efficiency: 4.16%
   Memory Efficiency: 5.93%
   Peak CPU Usage: 4.47%
   Peak Memory Usage: 6.43%
   Max Pods: 1

🔍 quicktest:
   CPU Efficiency: 3.15%
   Memory Efficiency: 6.23%
   Peak CPU Usage: 3.15%
   Peak Memory Usage: 6.23%

🏆 Performance Ranking:
   🥇 aggressive_test_051634: 9.72% overall efficiency
   🥈 load_test_20250628_045939: 5.05% overall efficiency
   🥉 load_test_20250628_045834: 4.76% overall efficiency
   🔹 load_test_20250628_050326: 4.71% overall effici

In [28]:
# 📋 Executive Summary & Report Export

def generate_executive_summary(test_name, test_data):
    """Generate a comprehensive executive summary"""
    
    summary = {
        'test_name': test_name,
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'metrics': {}
    }
    
    # Basic metrics summary
    if 'basic' in test_data and not test_data['basic'].empty:
        basic_df = test_data['basic']
        duration = (basic_df['timestamp'].max() - basic_df['timestamp'].min()).total_seconds()
        
        summary['metrics']['duration_seconds'] = duration
        summary['metrics']['duration_minutes'] = duration / 60
        
        efficiency_metrics = calculate_efficiency_metrics(basic_df)
        summary['metrics'].update(efficiency_metrics)
        
        # Resource utilization trends
        summary['metrics']['cpu_trend'] = 'increasing' if basic_df['cpu_usage_percent'].iloc[-1] > basic_df['cpu_usage_percent'].iloc[0] else 'decreasing'
        summary['metrics']['memory_trend'] = 'increasing' if basic_df['memory_usage_percent'].iloc[-1] > basic_df['memory_usage_percent'].iloc[0] else 'decreasing'
    
    # HPA metrics summary
    if 'hpa' in test_data and not test_data['hpa'].empty:
        hpa_df = test_data['hpa']
        
        summary['metrics']['hpa_max_frontend_pods'] = hpa_df['frontend_pods'].max()
        summary['metrics']['hpa_min_frontend_pods'] = hpa_df['frontend_pods'].min()
        summary['metrics']['hpa_avg_frontend_pods'] = hpa_df['frontend_pods'].mean()
        
        # Count scaling events
        frontend_changes = hpa_df['frontend_pods'].diff().fillna(0)
        summary['metrics']['scale_up_events'] = (frontend_changes > 0).sum()
        summary['metrics']['scale_down_events'] = (frontend_changes < 0).sum()
    
    # Performance metrics summary
    if 'locust_history' in test_data and not test_data['locust_history'].empty:
        locust_df = test_data['locust_history']
        
        if 'Requests/s' in locust_df.columns:
            summary['metrics']['peak_rps'] = locust_df['Requests/s'].max()
            summary['metrics']['avg_rps'] = locust_df['Requests/s'].mean()
        
        if 'Average Response Time' in locust_df.columns:
            summary['metrics']['avg_response_time'] = locust_df['Average Response Time'].mean()
            summary['metrics']['p95_response_time'] = locust_df['Average Response Time'].quantile(0.95)
    
    return summary

def create_summary_visualization(summary):
    """Create a summary visualization"""
    
    # Create executive summary dashboard
    fig = make_subplots(
        rows=2, cols=3,
        subplot_titles=[
            '📊 Resource Efficiency',
            '🚀 Scaling Activity', 
            '⚡ Performance Metrics',
            '📈 Utilization Trends',
            '🎯 Key Insights',
            '📋 Test Overview'
        ],
        specs=[[{"type": "indicator"}, {"type": "bar"}, {"type": "indicator"}],
               [{"type": "bar"}, {"type": "table"}, {"type": "indicator"}]]
    )
    
    metrics = summary['metrics']
    
    # Row 1, Col 1: Resource efficiency gauge
    if 'avg_cpu_idle' in metrics and 'avg_memory_idle' in metrics:
        overall_efficiency = (
            (100 - metrics['avg_cpu_idle']) * 0.6 +
            (100 - metrics['avg_memory_idle']) * 0.4
        )
        
        fig.add_trace(
            go.Indicator(
                mode="gauge+number",
                value=overall_efficiency,
                title={'text': "Overall Efficiency %"},
                gauge={
                    'axis': {'range': [None, 100]},
                    'bar': {'color': "darkblue"},
                    'steps': [
                        {'range': [0, 50], 'color': "lightgray"},
                        {'range': [50, 80], 'color': "yellow"},
                        {'range': [80, 100], 'color': "lightgreen"}
                    ]
                }
            ),
            row=1, col=1
        )
    
    # Row 1, Col 2: Scaling events
    if 'scale_up_events' in metrics and 'scale_down_events' in metrics:
        fig.add_trace(
            go.Bar(
                x=['Scale Up', 'Scale Down'],
                y=[metrics['scale_up_events'], metrics['scale_down_events']],
                name='Scaling Events',
                marker_color=['green', 'red']
            ),
            row=1, col=2
        )
    
    # Row 1, Col 3: Performance indicator
    if 'avg_rps' in metrics:
        fig.add_trace(
            go.Indicator(
                mode="number+delta",
                value=metrics['avg_rps'],
                title={'text': "Average RPS"},
                delta={'reference': 100, 'relative': True}
            ),
            row=1, col=3
        )
    
    # Row 2, Col 1: Utilization comparison
    if all(key in metrics for key in ['avg_cpu_usage', 'avg_memory_usage']):
        fig.add_trace(
            go.Bar(
                x=['CPU', 'Memory'],
                y=[metrics['avg_cpu_usage'], metrics['avg_memory_usage']],
                name='Average Utilization %',
                marker_color=['red', 'blue']
            ),
            row=2, col=1
        )
    
    # Row 2, Col 2: Key metrics table
    table_data = []
    for key, value in metrics.items():
        if isinstance(value, (int, float)):
            if 'percent' in key or 'usage' in key or 'idle' in key:
                formatted_value = f"{value:.2f}%"
            elif 'time' in key:
                formatted_value = f"{value:.2f}ms"
            elif 'rps' in key:
                formatted_value = f"{value:.2f}"
            else:
                formatted_value = f"{value:.2f}"
        else:
            formatted_value = str(value)
        
        table_data.append([key.replace('_', ' ').title(), formatted_value])
    
    fig.add_trace(
        go.Table(
            header=dict(values=['Metric', 'Value']),
            cells=dict(values=list(zip(*table_data)) if table_data else [[], []])
        ),
        row=2, col=2
    )
    
    # Row 2, Col 3: Test duration
    if 'duration_minutes' in metrics:
        fig.add_trace(
            go.Indicator(
                mode="number",
                value=metrics['duration_minutes'],
                title={'text': "Test Duration (min)"}
            ),
            row=2, col=3
        )
    
    fig.update_layout(
        title=f"📋 Executive Summary - {summary['test_name']}",
        height=600,
        template="plotly_white",
        showlegend=False
    )
    
    return fig

# Generate executive summary for current test
if selected_test and test_data:
    print(f"📋 Generating Executive Summary for {selected_test}...")
    
    exec_summary = generate_executive_summary(selected_test, test_data)
    summary_fig = create_summary_visualization(exec_summary)
    summary_fig.show()
    
    # Display text summary
    print(f"\n📋 Executive Summary - {selected_test}")
    print("="*80)
    print(f"📅 Generated: {exec_summary['timestamp']}")
    
    if 'duration_minutes' in exec_summary['metrics']:
        duration = exec_summary['metrics']['duration_minutes']
        print(f"⏱️  Test Duration: {duration:.2f} minutes")
    
    if 'avg_cpu_usage' in exec_summary['metrics']:
        cpu_usage = exec_summary['metrics']['avg_cpu_usage']
        cpu_idle = exec_summary['metrics']['avg_cpu_idle']
        print(f"🖥️  CPU: {cpu_usage:.2f}% average usage, {cpu_idle:.2f}% idle")
    
    if 'avg_memory_usage' in exec_summary['metrics']:
        mem_usage = exec_summary['metrics']['avg_memory_usage']
        mem_idle = exec_summary['metrics']['avg_memory_idle']
        print(f"💾 Memory: {mem_usage:.2f}% average usage, {mem_idle:.2f}% idle")
    
    if 'scale_up_events' in exec_summary['metrics']:
        scale_up = exec_summary['metrics']['scale_up_events']
        scale_down = exec_summary['metrics']['scale_down_events']
        print(f"🚀 HPA Events: {scale_up} scale-ups, {scale_down} scale-downs")
    
    if 'avg_rps' in exec_summary['metrics']:
        rps = exec_summary['metrics']['avg_rps']
        print(f"⚡ Performance: {rps:.2f} average RPS")
    
    # Export summary to file
    import json
    summary_file = f"metrics_data/{selected_test}/executive_summary.json"
    with open(summary_file, 'w') as f:
        json.dump(exec_summary, f, indent=2, default=str)
    
    print(f"\n💾 Summary exported to: {summary_file}")
    
else:
    print("❌ No test data available for executive summary")

📋 Generating Executive Summary for aggressive_test_051634...



📋 Executive Summary - aggressive_test_051634
📅 Generated: 2025-06-28 05:39:19
⏱️  Test Duration: 2.93 minutes
🖥️  CPU: 13.20% average usage, 86.80% idle
💾 Memory: 6.24% average usage, 93.76% idle
🚀 HPA Events: 2 scale-ups, 0 scale-downs
⚡ Performance: 65.67 average RPS

💾 Summary exported to: metrics_data/aggressive_test_051634/executive_summary.json
