In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [10, 6]

## Load Benchmark Data

In [None]:
# Load benchmark results from JSON file
with open('benchmark_results.json', 'r') as f:
    data = json.load(f)

# Extract benchmark entries
benchmarks = data['benchmarks']
print(f"Total number of benchmark entries: {len(benchmarks)}")

## Parse and Structure Data

In [None]:
# Create a DataFrame from the benchmark results
df = pd.DataFrame(benchmarks)

# Extract method name from the benchmark name
def extract_method(name):
    if name.startswith('DistanceTransformGeos'):
        return 'Raster'
    elif name.startswith('DistanceTransformBlocks'):
        return 'Blocks'
    elif name.startswith('DistanceTransformTask'):
        return 'Task'
    return 'Unknown'

df['method'] = df['name'].apply(extract_method)

# Keep only relevant columns
df_clean = df[['method', 'lambda', 'real_time', 'size', 'label', 'bytes_per_second']].copy()

# Display summary
print("Methods found:", df_clean['method'].unique())
print("Lambda values found:", sorted(df_clean['lambda'].unique()))
print(f"\nDataset shape: {df_clean.shape}")
df_clean.head(10)

## Boxplots: Execution Time Distribution per Lambda Value

Each boxplot shows the distribution of execution times (in milliseconds) for the three methods at a specific lambda value.

In [None]:
# Get unique lambda values sorted
lambda_values = sorted(df_clean['lambda'].unique())

# Color palette for methods
palette = {'Raster': '#3498db', 'Blocks': '#e74c3c', 'Task': '#2ecc71'}

# Create individual boxplots for each lambda value
for lambda_val in lambda_values:
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # Filter data for this lambda value
    df_lambda = df_clean[df_clean['lambda'] == lambda_val]
    
    # Create boxplot with individual points
    sns.boxplot(
        data=df_lambda,
        x='method',
        y='real_time',
        order=['Raster', 'Blocks', 'Task'],
        palette=palette,
        ax=ax
    )
    
    # Add swarm plot to show individual data points
    sns.stripplot(
        data=df_lambda,
        x='method',
        y='real_time',
        order=['Raster', 'Blocks', 'Task'],
        color='black',
        alpha=0.5,
        size=4,
        ax=ax
    )
    
    ax.set_title(f'Execution Time Distribution (Lambda = {lambda_val})', fontsize=14, fontweight='bold')
    ax.set_xlabel('Method', fontsize=12)
    ax.set_ylabel('Execution Time (ms)', fontsize=12)
    
    # Add mean values as text
    means = df_lambda.groupby('method')['real_time'].mean()
    for i, method in enumerate(['Geos', 'Blocks', 'Task']):
        if method in means.index:
            ax.text(i, ax.get_ylim()[1] * 0.95, f'mean={means[method]:.1f}', 
                   ha='center', fontsize=10, color='darkblue')
    
    plt.tight_layout()
    plt.show()

## Summary Statistics per Lambda Value

In [None]:
# Compute summary statistics for each method and lambda combination
summary_stats = df_clean.groupby(['lambda', 'method'])['real_time'].agg(
    ['count', 'mean', 'std', 'min', 'median', 'max']
).round(2)

summary_stats.columns = ['Count', 'Mean (ms)', 'Std (ms)', 'Min (ms)', 'Median (ms)', 'Max (ms)']
print("Summary Statistics by Lambda and Method:")
summary_stats