In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from vivarium_profiling.tools.extraction import ExtractionConfig
from vivarium_profiling.tools import plotting

# Configure matplotlib for notebook
%matplotlib inline

## Load Data

In [None]:
# Load benchmark results
benchmark_results_path = Path(r"{{BENCHMARK_RESULTS_PATH}}")
summary_path = Path(r"{{SUMMARY_PATH}}")

raw = pd.read_csv(benchmark_results_path)
summary = pd.read_csv(summary_path)

# Load extraction config
config = ExtractionConfig()

print(f"Loaded {len(raw)} raw benchmark results")
print(f"Loaded {len(summary)} model summaries")
print(f"\nRaw data shape: {raw.shape}")
print(f"Summary data shape: {summary.shape}")

## Performance Analysis

Overall runtime and memory usage comparison across models.

In [None]:
plotting.create_figures(
    summary,
    output_dir=None,
    chart_title="performance_analysis",
    time_col="rt_s",
    mem_col="mem_mb",
    time_pdiff_col="rt_s_pdiff",
    save=False
)

## Phase Runtime Analysis

Detailed analysis of individual simulation phases (setup, initialize_simulants, run, finalize, report).

In [None]:
# Get phase metrics from config
phase_patterns = [p for p in config.patterns if p.cumtime_template == "rt_{name}_s"]

for pattern in phase_patterns:
    time_col = pattern.cumtime_col
    time_pdiff_col = f"{time_col}_pdiff"
    
    print(f"\n=== {pattern.name.upper()} ===")
    plotting.create_figures(
        summary,
        output_dir=None,
        chart_title=f"runtime_analysis_{pattern.name}",
        time_col=time_col,
        mem_col=None,
        time_pdiff_col=time_pdiff_col,
        save=False
    )

## Non-Run Time Analysis

Analysis of time spent outside the main run phase (setup, initialization, reporting, etc.).

In [None]:
plotting.create_figures(
    summary,
    output_dir=None,
    chart_title="runtime_analysis_non_run",
    time_col="rt_non_run_s",
    mem_col=None,
    time_pdiff_col="rt_non_run_s_pdiff",
    save=False
)

## Bottleneck Cumulative Time Analysis

Analysis of cumulative time spent in known bottleneck functions (gather_results, pipeline_call, population_get).

In [None]:
# Get bottleneck patterns from config
bottleneck_patterns = [
    p for p in config.patterns
    if p.extract_cumtime and p.cumtime_col == f"{p.name}_cumtime"
]

for pattern in bottleneck_patterns:
    time_col = pattern.cumtime_col
    time_pdiff_col = f"{time_col}_pdiff"
    
    print(f"\n=== {pattern.name.upper()} ===")
    plotting.create_figures(
        summary,
        output_dir=None,
        chart_title=f"bottleneck_runtime_analysis_{pattern.name}",
        time_col=time_col,
        mem_col=None,
        time_pdiff_col=time_pdiff_col,
        save=False
    )

## Bottleneck Fractions vs Scale Factor

Fraction of run() time spent in each bottleneck function, plotted against model scale factor.

In [None]:
plotting.plot_bottleneck_fractions(
    summary,
    output_dir=None,
    config=config,
    metric="median",
    save=False
)