In [1]:
import pickle

results_dir = 'results/'

alibaba_data_gpu = pickle.load(open(f"{results_dir}/result_alibaba_streaming_GPU.pkl", "rb"))
alibaba_data_cpu = pickle.load(open(f"{results_dir}/result_alibaba_streaming_CPU.pkl", "rb"))

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Primary data (assuming these variables are already defined)
edge_addition_time_cpu = alibaba_data_cpu['edge_addition_time']
walk_sampling_time_cpu = alibaba_data_cpu['walk_sampling_time']
total_edges_cpu = alibaba_data_cpu['total_edges']
edge_addition_time_gpu = alibaba_data_gpu['edge_addition_time']
walk_sampling_time_gpu = alibaba_data_gpu['walk_sampling_time']
total_edges_gpu = alibaba_data_gpu['total_edges']

# Compute total runtime in hours
total_time_cpu = np.sum(edge_addition_time_cpu) + np.sum(walk_sampling_time_cpu)
total_time_hours_cpu = total_time_cpu / 3600

total_time_gpu = np.sum(edge_addition_time_gpu) + np.sum(walk_sampling_time_gpu)
total_time_hours_gpu = total_time_gpu / 3600

# Calculate mean times
mean_edge_time_cpu = np.mean(edge_addition_time_cpu)
mean_edge_time_gpu = np.mean(edge_addition_time_gpu)
mean_walk_time_cpu = np.mean(walk_sampling_time_cpu)
mean_walk_time_gpu = np.mean(walk_sampling_time_gpu)

# X-axis labels
minutes_per_step = 3
steps_cpu = list(range(1, len(edge_addition_time_cpu) + 1))
step_labels_minutes_cpu = [s * minutes_per_step for s in steps_cpu]
step_labels_days_cpu = [m / 1440 for m in step_labels_minutes_cpu]

steps_gpu = list(range(1, len(edge_addition_time_gpu) + 1))
step_labels_minutes_gpu = [s * minutes_per_step for s in steps_gpu]
step_labels_days_gpu = [m / 1440 for m in step_labels_minutes_gpu]

# Academic color scheme
colors = ['#1f77b4', '#ff7f0e', '#d62728', '#2ca02c']  # Blue, Orange, Red, Green

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

def create_secondary_axis(ax, step_labels_days_gpu, total_edges_gpu):
    """Create secondary x-axis for edge count"""
    ax_secondary = ax.twiny()
    ax_secondary.set_xlim(ax.get_xlim())

    # Use GPU data for secondary axis (longer dataset)
    max_days_gpu = max(step_labels_days_gpu)
    max_edges_gpu = max(total_edges_gpu) / 1e9  # Convert to billions

    # Create ticks every 10 billion up to 80B
    tick_edges_billions = [0, 10, 20, 30, 40, 50, 60, 70, 80]
    tick_days = [(edge_bil / max_edges_gpu) * max_days_gpu for edge_bil in tick_edges_billions]

    ax_secondary.set_xticks(tick_days)
    ax_secondary.set_xticklabels([f'{int(e)}B' for e in tick_edges_billions])
    ax_secondary.set_xlabel("Total Edges Added (Billions)")
    ax_secondary.tick_params(axis='x', labelsize=10, pad=5)

    return ax_secondary

# Figure 1: Edge Addition Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))

# Plot lines
ax1.plot(step_labels_days_gpu, edge_addition_time_gpu,
         marker='o', markersize=2, linestyle='-', color=colors[0], linewidth=1.5)
ax1.plot(step_labels_days_cpu, edge_addition_time_cpu,
         marker='s', markersize=2, linestyle='-', color=colors[1], linewidth=1.5)

# Add mean lines
ax1.axhline(y=mean_edge_time_gpu, color=colors[0], linestyle='--',
            alpha=0.8, linewidth=2)
ax1.axhline(y=mean_edge_time_cpu, color=colors[1], linestyle='--',
            alpha=0.8, linewidth=2)

# Add text annotations
ax1.text(0.98, 0.65, f'GPU Mean: {mean_edge_time_gpu:.3f}s',
         transform=ax1.transAxes, fontsize=9, ha='right',
         bbox=dict(boxstyle="round,pad=0.2", facecolor=colors[0], alpha=0.2))
ax1.text(0.98, 0.55, f'CPU Mean: {mean_edge_time_cpu:.3f}s',
         transform=ax1.transAxes, fontsize=9, ha='right',
         bbox=dict(boxstyle="round,pad=0.2", facecolor=colors[1], alpha=0.2))

ax1.set_xlabel("Days of Data Processed")
ax1.set_ylabel("Time (seconds)")
ax1.set_yscale('log')
create_clean_axis(ax1)

# Add secondary axis
ax1_secondary = create_secondary_axis(ax1, step_labels_days_gpu, total_edges_gpu)

plt.tight_layout()
plt.savefig('figures/plot_5_alibaba_edge_addition_performance.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_5_alibaba_edge_addition_performance.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Walk Sampling Performance
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))

# Plot lines
ax2.plot(step_labels_days_gpu, walk_sampling_time_gpu,
         marker='o', markersize=2, linestyle='-', color=colors[0], linewidth=1.5)
ax2.plot(step_labels_days_cpu, walk_sampling_time_cpu,
         marker='s', markersize=2, linestyle='-', color=colors[1], linewidth=1.5)

# Add mean lines
ax2.axhline(y=mean_walk_time_gpu, color=colors[0], linestyle='--',
            alpha=0.8, linewidth=2)
ax2.axhline(y=mean_walk_time_cpu, color=colors[1], linestyle='--',
            alpha=0.8, linewidth=2)

# Add text annotations
ax2.text(0.98, 0.95, f'GPU Mean: {mean_walk_time_gpu:.3f}s',
         transform=ax2.transAxes, fontsize=9, ha='right',
         bbox=dict(boxstyle="round,pad=0.2", facecolor=colors[0], alpha=0.2))
ax2.text(0.98, 0.85, f'CPU Mean: {mean_walk_time_cpu:.3f}s',
         transform=ax2.transAxes, fontsize=9, ha='right',
         bbox=dict(boxstyle="round,pad=0.2", facecolor=colors[1], alpha=0.2))

ax2.set_xlabel("Days of Data Processed")
ax2.set_ylabel("Time (seconds)")
create_clean_axis(ax2)

# Add secondary axis
ax2_secondary = create_secondary_axis(ax2, step_labels_days_gpu, total_edges_gpu)

plt.tight_layout()
plt.savefig('figures/plot_5_alibaba_walk_sampling_performance.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_5_alibaba_walk_sampling_performance.png', format='png', bbox_inches='tight')
plt.close()

# Create a standalone legend for performance figures
fig_legend = plt.figure(figsize=(6, 0.5))

# Create legend elements
legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], linestyle='-',
               markersize=6, linewidth=2, label='GPU Edge Addition', markeredgewidth=0.5),
    plt.Line2D([0], [0], marker='s', color=colors[1], linestyle='-',
               markersize=6, linewidth=2, label='CPU Edge Addition', markeredgewidth=0.5),
    plt.Line2D([0], [0], marker='o', color=colors[0], linestyle='-',
               markersize=6, linewidth=2, label='GPU Walk Sampling', markeredgewidth=0.5),
    plt.Line2D([0], [0], marker='s', color=colors[1], linestyle='-',
               markersize=6, linewidth=2, label='CPU Walk Sampling', markeredgewidth=0.5)
]

# Create legend directly on figure with tight parameters
legend = fig_legend.legend(handles=legend_elements,
                          loc='center',
                          ncol=2,
                          frameon=True,
                          fancybox=False,
                          edgecolor='black',
                          columnspacing=1.2,
                          handletextpad=0.4,
                          handlelength=1.8,
                          borderpad=0.2,
                          labelspacing=0.3)

legend.get_frame().set_linewidth(0.8)

# Get exact bounding box and save with minimal whitespace
bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

plt.savefig('figures/plot_5_alibaba_performance_legend.pdf', format='pdf',
           bbox_inches=bbox_inches, pad_inches=0.02)
plt.savefig('figures/plot_5_alibaba_performance_legend.png', format='png',
           bbox_inches=bbox_inches, pad_inches=0.02)
plt.close()

# Create a performance summary figure (optional)
fig_summary = plt.figure(figsize=(4, 1))
summary_text = f"GPU: {total_time_hours_gpu:.1f}h total\nCPU: {total_time_hours_cpu:.1f}h total"

plt.text(0.5, 0.5, summary_text, transform=fig_summary.transFigure,
         fontsize=11, verticalalignment='center', horizontalalignment='center',
         bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgray', alpha=0.8))

plt.axis('off')
plt.savefig('figures/plot_5_alibaba_performance_summary.pdf', format='pdf', bbox_inches='tight', pad_inches=0.02)
plt.savefig('figures/plot_5_alibaba_performance_summary.png', format='png', bbox_inches='tight', pad_inches=0.02)
plt.close()

print("Created separate performance figures (no titles, no legends):")
print("- edge_addition_performance.pdf")
print("- walk_sampling_performance.pdf")
print("- performance_legend.pdf (standalone legend)")
print("- performance_summary.pdf (summary stats)")
print("\nAll figures saved in figures/ directory")
print("Note: Titles and legends removed from individual figures")

Created separate performance figures (no titles, no legends):
- edge_addition_performance.pdf
- walk_sampling_performance.pdf
- performance_legend.pdf (standalone legend)
- performance_summary.pdf (summary stats)

All figures saved in figures/ directory
Note: Titles and legends removed from individual figures


In [3]:
import pickle
import matplotlib.pyplot as plt
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Load results
with open('results/memory_benchmarking.pickle', 'rb') as f:
    results = pickle.load(f)

# Debug: Print available data keys
print("Available data keys in pickle file:")
for key in sorted(results.keys()):
    print(f"  - {key}")
print()

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Professional color scheme and markers
configs = ['directed_with_weights', 'directed_without_weights',
          'undirected_with_weights', 'undirected_without_weights']
config_labels = ['Directed (w/ weights)', 'Directed (w/o weights)',
                'Undirected (w/ weights)', 'Undirected (w/o weights)']

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
markers = ['o', 's', '^', 'D']
linestyles = ['-', '--', '-.', ':']

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Figure 1: Varying Node Count
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))
for idx, (config, color, marker, linestyle) in enumerate(zip(configs, colors, markers, linestyles)):
    if f"increasing_nodes_{config}" in results:
        data = results[f"increasing_nodes_{config}"]
        nodes = list(data.keys())
        memory = [x / 1024 for x in data.values()]
        ax1.semilogx(nodes, memory, marker=marker, color=color, linestyle=linestyle,
                    markersize=5, linewidth=2, markeredgewidth=0.5)

ax1.set_xlabel('Node Count')
ax1.set_ylabel('Memory Usage (GB)')
create_clean_axis(ax1)
plt.tight_layout()
plt.savefig('figures/plot_7_memory_memory_vs_nodes.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_7_memory_memory_vs_nodes.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Varying Edge Count
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))
has_data_fig2 = False
for idx, (config, color, marker, linestyle) in enumerate(zip(configs, colors, markers, linestyles)):
    if f"increasing_edges_{config}" in results:
        data = results[f"increasing_edges_{config}"]
        edges = list(data.keys())
        memory = [x / 1024 for x in data.values()]
        ax2.semilogx(edges, memory, marker=marker, color=color, linestyle=linestyle,
                    markersize=5, linewidth=2, markeredgewidth=0.5)
        has_data_fig2 = True

ax2.set_xlabel('Edge Count')
ax2.set_ylabel('Memory Usage (GB)')
create_clean_axis(ax2)
plt.tight_layout()
plt.savefig('figures/plot_7_memory_memory_vs_edges.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_7_memory_memory_vs_edges.png', format='png', bbox_inches='tight')
plt.close()

# Figure 3: Varying Timestamp Count
fig3, ax3 = plt.subplots(1, 1, figsize=(6, 4))
has_data_fig3 = False
for idx, (config, color, marker, linestyle) in enumerate(zip(configs, colors, markers, linestyles)):
    if f"increasing_timestamps_{config}" in results:
        data = results[f"increasing_timestamps_{config}"]
        timestamps = list(data.keys())
        memory = [x / 1024 for x in data.values()]
        ax3.semilogx(timestamps, memory, marker=marker, color=color, linestyle=linestyle,
                    markersize=5, linewidth=2, markeredgewidth=0.5)
        has_data_fig3 = True

ax3.set_xlabel('Timestamp Count')
ax3.set_ylabel('Memory Usage (GB)')
create_clean_axis(ax3)
plt.tight_layout()
plt.savefig('figures/plot_7_memory_memory_vs_timestamps.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_7_memory_memory_vs_timestamps.png', format='png', bbox_inches='tight')
plt.close()

# Figure 4: Streaming Window Test
fig4, ax4 = plt.subplots(1, 1, figsize=(6, 4))
has_data_fig4 = False
for idx, (config, color, marker, linestyle) in enumerate(zip(configs, colors, markers, linestyles)):
    if f"streaming_window_{config}" in results:
        data = results[f"streaming_window_{config}"]
        batches = range(1, len(data) + 1)
        memory_gb = [x / 1024 for x in data]
        ax4.plot(batches, memory_gb, marker=marker, color=color, linestyle=linestyle,
                markersize=5, linewidth=2, markeredgewidth=0.5)
        has_data_fig4 = True

ax4.set_xlabel('Batch Number')
ax4.set_ylabel('Memory Usage (GB)')
create_clean_axis(ax4)
plt.tight_layout()
plt.savefig('figures/plot_7_memory_memory_streaming_window.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_7_memory_memory_streaming_window.png', format='png', bbox_inches='tight')
plt.close()

# Create a standalone legend figure for LaTeX flexibility with minimal whitespace
fig_legend = plt.figure(figsize=(6, 0.5))  # Even smaller height

# Create legend elements
legend_elements = []
for idx, (label, color, marker, linestyle) in enumerate(zip(config_labels, colors, markers, linestyles)):
    legend_elements.append(plt.Line2D([0], [0], marker=marker, color=color, linestyle=linestyle,
                                    markersize=6, linewidth=2, label=label, markeredgewidth=0.5))

# Create legend directly on figure with very tight parameters
legend = fig_legend.legend(handles=legend_elements,
                          loc='center',
                          ncol=2,
                          frameon=True,
                          fancybox=False,
                          edgecolor='black',
                          columnspacing=1.2,    # Very tight spacing
                          handletextpad=0.4,    # Minimal padding
                          handlelength=1.8,     # Shorter handles
                          borderpad=0.2,        # Minimal border padding
                          labelspacing=0.3)     # Tight vertical spacing

# Set legend frame properties
legend.get_frame().set_linewidth(0.8)

# Get legend bounding box and set figure size to match exactly
bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

# Save with exact bounding box
plt.savefig('figures/plot_7_memory_memory_legend.pdf', format='pdf',
           bbox_inches=bbox_inches, pad_inches=0.02)  # Minimal padding
plt.savefig('figures/plot_7_memory_memory_legend.png', format='png',
           bbox_inches=bbox_inches, pad_inches=0.02)
plt.close()

print("Created separate figures (no titles, no legends):")
print("- memory_vs_nodes.pdf")
print("- memory_vs_edges.pdf")
print("- memory_vs_timestamps.pdf")
print("- memory_streaming_window.pdf")
print("- memory_legend.pdf (standalone legend)")
print("\nAll figures saved in figures/ directory")
print("Note: Titles and legends removed from individual figures")

Available data keys in pickle file:
  - increasing_edges_directed_with_weights
  - increasing_edges_directed_without_weights
  - increasing_edges_undirected_with_weights
  - increasing_edges_undirected_without_weights
  - increasing_nodes_directed_with_weights
  - increasing_nodes_directed_without_weights
  - increasing_nodes_undirected_with_weights
  - increasing_nodes_undirected_without_weights
  - increasing_timestamps_directed_with_weights
  - increasing_timestamps_directed_without_weights
  - increasing_timestamps_undirected_with_weights
  - increasing_timestamps_undirected_without_weights
  - streaming_window_directed_with_weights
  - streaming_window_directed_without_weights
  - streaming_window_undirected_with_weights
  - streaming_window_undirected_without_weights

Created separate figures (no titles, no legends):
- memory_vs_nodes.pdf
- memory_vs_edges.pdf
- memory_vs_timestamps.pdf
- memory_streaming_window.pdf
- memory_legend.pdf (standalone legend)

All figures saved in fi

In [4]:
import numpy as np


def human_format(x, pos):
    if x >= 1_000_000:
        return f'{x/1_000_000:.1f}M'.rstrip('0').rstrip('.')
    elif x >= 1_000:
        return f'{x/1_000:.0f}K'
    else:
        return str(int(x))

def compute_mean_std(data):
    data = np.array(data)
    return data.mean(axis=1), data.std(axis=1)

In [5]:
import pickle

results_dir = 'results/'

# Load the benchmark results
def load_results():
    cpu_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_CPU.pkl", "rb")),
        "walks": pickle.load(open(f"{results_dir}/result_walks_CPU.pkl", "rb")),
        "max_walk_lens": pickle.load(open(f"{results_dir}/result_max_walk_lens_CPU.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_sliding_CPU.pkl", "rb"))
    }

    gpu_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_GPU.pkl", "rb")),
        "walks": pickle.load(open(f"{results_dir}/result_walks_GPU.pkl", "rb")),
        "max_walk_lens": pickle.load(open(f"{results_dir}/result_max_walk_lens_GPU.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_sliding_GPU.pkl", "rb"))
    }

    raphtory_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_raphtory.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_raphtory.pkl", "rb"))
    }

    return cpu_results, gpu_results, raphtory_results

# Define edge and walk counts from the benchmark
edge_counts = [
    10_000, 50_000, 100_000, 500_000, 1_000_000, 2_000_000,
    5_000_000, 10_000_000, 20_000_000, 30_000_000, 40_000_000,
    50_000_000, 60_000_000
]

walk_counts = [
    10_000, 50_000, 100_000, 200_000, 500_000,
    1_000_000, 2_000_000, 5_000_000, 10_000_000
]

walk_lengths = list(range(10, 310, 10))

# Load results
cpu_results, gpu_results, raphtory_results = load_results()

In [6]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import ticker
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Academic color scheme - matching original plots
colors = {
    'cpu_no_weights': 'orange',       # Orange
    'cpu_weights': 'darkorange',      # Dark orange
    'gpu_no_weights': 'blue',         # Blue
    'gpu_weights': 'darkblue',        # Dark blue
    'raphtory': 'green',              # Green
    'ratio_cpu_gpu': 'purple',        # Purple
    'ratio_raphtory_gpu': 'green'     # Green
}

# Compute means and standard deviations (assuming compute_mean_std function exists)
cpu_mean_wo, cpu_std_wo = compute_mean_std(cpu_results["edges"]["edge_addition_time_without_weights"])
cpu_mean_w, cpu_std_w = compute_mean_std(cpu_results["edges"]["edge_addition_time_with_weights"])
gpu_mean_wo, gpu_std_wo = compute_mean_std(gpu_results["edges"]["edge_addition_time_without_weights"])
gpu_mean_w, gpu_std_w = compute_mean_std(gpu_results["edges"]["edge_addition_time_with_weights"])
raphtory_mean, raphtory_std = compute_mean_std(raphtory_results["edges"])

# Compute ratios and propagated error bars
ratios_cpu_gpu = cpu_mean_w / gpu_mean_w
ratios_cpu_gpu_std = ratios_cpu_gpu * np.sqrt((cpu_std_w / cpu_mean_w) ** 2 + (gpu_std_w / gpu_mean_w) ** 2)

ratios_raphtory_gpu = raphtory_mean / gpu_mean_w
ratios_raphtory_gpu_std = ratios_raphtory_gpu * np.sqrt((raphtory_std / raphtory_mean) ** 2 + (gpu_std_w / gpu_mean_w) ** 2)

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Figure 1: Absolute Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))

ax1.errorbar(edge_counts, cpu_mean_wo, yerr=cpu_std_wo,
             fmt='o-', color=colors['cpu_no_weights'], label="CPU (w/o Weights)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(edge_counts, cpu_mean_w, yerr=cpu_std_w,
             fmt='s--', color=colors['cpu_weights'], label="CPU (w/ Weights)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(edge_counts, gpu_mean_wo, yerr=gpu_std_wo,
             fmt='o-', color=colors['gpu_no_weights'], label="GPU (w/o Weights)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(edge_counts, gpu_mean_w, yerr=gpu_std_w,
             fmt='s--', color=colors['gpu_weights'], label="GPU (w/ Weights)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(edge_counts, raphtory_mean, yerr=raphtory_std,
             fmt='d-', color=colors['raphtory'], label="Raphtory",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.xaxis.set_major_formatter(ticker.FuncFormatter(human_format))
ax1.set_xlabel("Number of Edges")
ax1.set_ylabel("Time (seconds)")
create_clean_axis(ax1)

# Add legend inside the plot
legend1 = ax1.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=9)
legend1.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_2_incremental_edge_addition_absolute.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_2_incremental_edge_addition_absolute.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Performance Ratios
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))

ax2.errorbar(edge_counts, ratios_cpu_gpu, yerr=ratios_cpu_gpu_std,
             fmt='o-', color=colors['ratio_cpu_gpu'], label='CPU / GPU',
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax2.errorbar(edge_counts, ratios_raphtory_gpu, yerr=ratios_raphtory_gpu_std,
             fmt='s--', color=colors['ratio_raphtory_gpu'], label='Raphtory / GPU',
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

# Add horizontal line at y=1 for reference
ax2.axhline(y=1, color='black', linestyle=':', alpha=0.7, linewidth=1.5)

ax2.xaxis.set_major_formatter(ticker.FuncFormatter(human_format))
ax2.set_xlabel("Number of Edges")
ax2.set_ylabel("Time Ratio")
create_clean_axis(ax2)

# Add legend inside the plot
legend2 = ax2.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=9)
legend2.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_2_incremental_edge_addition_ratios.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_2_incremental_edge_addition_ratios.png', format='png', bbox_inches='tight')
plt.close()

print("Created separate benchmark figures (no titles, legends inside):")
print("- edge_addition_absolute.pdf")
print("- edge_addition_ratios.pdf")
print("\nAll figures saved in figures/ directory")
print("Note: Titles removed, legends kept inside plots")

Created separate benchmark figures (no titles, legends inside):
- edge_addition_absolute.pdf
- edge_addition_ratios.pdf

All figures saved in figures/ directory
Note: Titles removed, legends kept inside plots


In [7]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import ticker
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Academic color scheme - matching your original colors
colors = {
    'cpu_index': 'orange',
    'cpu_weight': 'darkorange',
    'gpu_index': 'blue',
    'gpu_weight': 'darkblue',
    'ratio': 'purple'
}

# Compute mean and std for all series (assuming compute_mean_std function exists)
cpu_mean_index, cpu_std_index = compute_mean_std(cpu_results["walks"]["walk_sampling_time_index_based"])
cpu_mean_weight, cpu_std_weight = compute_mean_std(cpu_results["walks"]["walk_sampling_time_weight_based"])
gpu_mean_index, gpu_std_index = compute_mean_std(gpu_results["walks"]["walk_sampling_time_index_based"])
gpu_mean_weight, gpu_std_weight = compute_mean_std(gpu_results["walks"]["walk_sampling_time_weight_based"])

# Compute ratio and error propagation (using index-based for comparison)
ratios = cpu_mean_index / gpu_mean_index
ratios_std = ratios * np.sqrt((cpu_std_index / cpu_mean_index) ** 2 + (gpu_std_index / gpu_mean_index) ** 2)

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Figure 1: Walk Sampling Absolute Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))

ax1.errorbar(walk_counts, cpu_mean_index, yerr=cpu_std_index,
             fmt='o-', color=colors['cpu_index'], label="CPU (Index Based)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(walk_counts, cpu_mean_weight, yerr=cpu_std_weight,
             fmt='s--', color=colors['cpu_weight'], label="CPU (Weight Based)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(walk_counts, gpu_mean_index, yerr=gpu_std_index,
             fmt='o-', color=colors['gpu_index'], label="GPU (Index Based)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(walk_counts, gpu_mean_weight, yerr=gpu_std_weight,
             fmt='s--', color=colors['gpu_weight'], label="GPU (Weight Based)",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.xaxis.set_major_formatter(ticker.FuncFormatter(human_format))
ax1.set_xlabel("Number of Walks")
ax1.set_ylabel("Time (seconds)")
create_clean_axis(ax1)

# Add legend inside the plot
legend1 = ax1.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=9)
legend1.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_3_incremental_walk_sampling_absolute.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_3_incremental_walk_sampling_absolute.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Walk Sampling Performance Ratios
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))

ax2.errorbar(walk_counts, ratios, yerr=ratios_std,
             fmt='o-', color=colors['ratio'], label='CPU / GPU (Index Based)',
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

# Add horizontal line at y=1 for reference
ax2.axhline(y=1, color='black', linestyle=':', alpha=0.7, linewidth=1.5)

ax2.xaxis.set_major_formatter(ticker.FuncFormatter(human_format))
ax2.set_xlabel("Number of Walks")
ax2.set_ylabel("Time Ratio")
create_clean_axis(ax2)

# Add legend inside the plot
legend2 = ax2.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=9)
legend2.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_3_incremental_walk_sampling_ratios.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_3_incremental_walk_sampling_ratios.png', format='png', bbox_inches='tight')
plt.close()

print("Created separate walk sampling figures (no titles, legends inside):")
print("- walk_sampling_absolute.pdf")
print("- walk_sampling_ratios.pdf")
print("\nAll figures saved in figures/ directory")
print("Note: Titles removed, legends kept inside plots")

Created separate walk sampling figures (no titles, legends inside):
- walk_sampling_absolute.pdf
- walk_sampling_ratios.pdf

All figures saved in figures/ directory
Note: Titles removed, legends kept inside plots


In [8]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Academic color scheme - matching your original colors
colors = {
    'cpu': 'orange',
    'gpu': 'blue',
    'ratio': 'teal'
}

# Compute mean and std for each walk length (assuming compute_mean_std function exists)
cpu_mean, cpu_std = compute_mean_std(cpu_results["max_walk_lens"]["walk_sampling_time"])
gpu_mean, gpu_std = compute_mean_std(gpu_results["max_walk_lens"]["walk_sampling_time"])

# Compute ratios and propagated error
ratios = cpu_mean / gpu_mean
ratios_std = ratios * np.sqrt((cpu_std / cpu_mean) ** 2 + (gpu_std / gpu_mean) ** 2)

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Figure 1: Walk Length Absolute Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))

ax1.errorbar(walk_lengths, cpu_mean, yerr=cpu_std,
             fmt='o-', color=colors['cpu'], label="CPU",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(walk_lengths, gpu_mean, yerr=gpu_std,
             fmt='o-', color=colors['gpu'], label="GPU",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.set_xlabel("Maximum Walk Length")
ax1.set_ylabel("Time (seconds)")
create_clean_axis(ax1)

# Add legend inside the plot
legend1 = ax1.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=10)
legend1.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_4_incremental_walk_length_absolute.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_4_incremental_walk_length_absolute.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Walk Length Performance Ratios
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))

ax2.errorbar(walk_lengths, ratios, yerr=ratios_std,
             fmt='o-', color=colors['ratio'], label='CPU / GPU',
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

# Add horizontal lines for reference
ax2.axhline(y=1, color='black', linestyle=':', alpha=0.7, linewidth=1.5)

# Add mean ratio line
mean_ratio = np.mean(ratios)
ax2.axhline(y=mean_ratio, color='red', linestyle='--', alpha=0.8, linewidth=2,
            label=f'Mean Ratio: {mean_ratio:.1f}×')

ax2.set_xlabel("Maximum Walk Length")
ax2.set_ylabel("Time Ratio")
create_clean_axis(ax2)

# Add legend inside the plot
legend2 = ax2.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=10)
legend2.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_4_incremental_walk_length_ratios.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_4_incremental_walk_length_ratios.png', format='png', bbox_inches='tight')
plt.close()

print("Created separate walk length figures (no titles, legends inside):")
print("- plot_4_walk_length_absolute.pdf")
print("- plot_4_walk_length_ratios.pdf")
print("\nAll figures saved in figures/ directory")
print("Note: Titles removed, legends kept inside plots")

Created separate walk length figures (no titles, legends inside):
- plot_4_walk_length_absolute.pdf
- plot_4_walk_length_ratios.pdf

All figures saved in figures/ directory
Note: Titles removed, legends kept inside plots


In [9]:
def generate_bell_curve_temporal_graph(n_nodes=1000, n_edges=1_000_000, seed=42, time_range=1_000_000):
    np.random.seed(seed)

    # Bell-curve over node indices (higher probability for middle nodes)
    node_probs = np.exp(-0.5 * ((np.arange(n_nodes) - n_nodes / 2) / (n_nodes / 6)) ** 2)
    node_probs /= node_probs.sum()

    sources = np.random.choice(n_nodes, size=n_edges, p=node_probs)
    targets = np.random.choice(n_nodes, size=n_edges, p=node_probs)

    # Remove self-loops
    mask = sources != targets
    sources = sources[mask]
    targets = targets[mask]

    # Assign random timestamps in range [0, time_range)
    timestamps = np.random.randint(0, high=time_range, size=len(sources))

    return sources, targets, timestamps

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from temporal_random_walk import TemporalRandomWalk
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

def run_walk_sampling(edge_picker, sources, targets, timestamps, n_nodes=1000):
    trw = TemporalRandomWalk(
        is_directed=False,
        use_gpu=False,
        max_time_capacity=-1,
        enable_weight_computation=True
    )
    trw.add_multiple_edges(sources, targets, timestamps)

    walks, _, walk_lengths = trw.get_random_walks_and_times(
        max_walk_len=100,
        walk_bias=edge_picker,
        num_walks_total=1_000_000,
        initial_edge_bias="Uniform",
        walk_direction="Forward_In_Time"
    )

    avg_walk_length = walk_lengths.mean()
    print(f"{edge_picker} → Avg walk length: {avg_walk_length:.2f}")

    edge_node_counts = np.zeros(n_nodes)
    for node in np.concatenate([sources, targets]):
        edge_node_counts[node] += 1

    valid_steps = walks[walks != -1]
    walk_node_counts = np.zeros(n_nodes)
    for node in valid_steps:
        walk_node_counts[node] += 1

    edge_node_counts /= edge_node_counts.max()
    walk_node_counts /= walk_node_counts.max()

    return edge_node_counts, walk_node_counts, avg_walk_length

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

def plot_individual_distributions(results, n_nodes=1000):
    """Create separated figures for each edge picker strategy"""
    pickers = ["ExponentialIndex", "LinearIndex", "Uniform"]
    picker_labels = ["Exponential Index", "Linear Index", "Uniform"]

    # Academic color scheme
    colors = ['#1f77b4', '#ff7f0e']  # Blue, Orange

    for i, (edge_counts, walk_counts, avg_walk_length) in enumerate(results):
        fig, ax = plt.subplots(1, 1, figsize=(6, 4))

        # Plot distributions
        ax.plot(np.arange(n_nodes), edge_counts,
               linewidth=2, alpha=0.8, color=colors[0],
               markeredgewidth=0.5)
        ax.plot(np.arange(n_nodes), walk_counts,
               linewidth=2, alpha=0.8, color=colors[1],
               markeredgewidth=0.5)

        ax.set_xlabel("Node ID")
        ax.set_ylabel("Normalized Frequency")
        create_clean_axis(ax)

        # Add average walk length text box
        ax.text(0.05, 0.95, f'Avg Walk Length: {avg_walk_length:.2f}',
               transform=ax.transAxes, fontsize=10,
               verticalalignment='top', horizontalalignment='left',
               bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgray', alpha=0.8))

        plt.tight_layout()

        # Save individual figures
        filename = f"plot_6_walk_distribution_{pickers[i].lower()}"
        plt.savefig(f'figures/{filename}.pdf', format='pdf', bbox_inches='tight')
        plt.savefig(f'figures/{filename}.png', format='png', bbox_inches='tight')
        plt.close()

        print(f"Saved {filename}")

def create_walk_distribution_legend():
    """Create standalone legend for walk distribution figures"""
    fig_legend = plt.figure(figsize=(5, 0.5))

    # Academic color scheme
    colors = ['#1f77b4', '#ff7f0e']  # Blue, Orange

    # Create legend elements
    legend_elements = [
        plt.Line2D([0], [0], color=colors[0], linewidth=2,
                   label='Edge Participation', markeredgewidth=0.5),
        plt.Line2D([0], [0], color=colors[1], linewidth=2,
                   label='Walk Visit Count', markeredgewidth=0.5)
    ]

    # Create legend directly on figure with tight parameters
    legend = fig_legend.legend(handles=legend_elements,
                              loc='center',
                              ncol=2,
                              frameon=True,
                              fancybox=False,
                              edgecolor='black',
                              columnspacing=1.2,
                              handletextpad=0.4,
                              handlelength=1.8,
                              borderpad=0.2,
                              labelspacing=0.3)

    legend.get_frame().set_linewidth(0.8)

    # Get exact bounding box and save with minimal whitespace
    bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
    bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

    plt.savefig('figures/plot_6_walk_distribution_legend.pdf', format='pdf',
               bbox_inches=bbox_inches, pad_inches=0.02)
    plt.savefig('figures/plot_6_walk_distribution_legend.png', format='png',
               bbox_inches=bbox_inches, pad_inches=0.02)
    plt.close()

    print("Saved standalone legend")

def run_bell_curve_walk_distribution_experiment():
    """Run the walk distribution experiment with separated figure output"""
    print("Generating graph...")
    sources, targets, timestamps = generate_bell_curve_temporal_graph()
    pickers = ["ExponentialIndex", "Linear", "Uniform"]
    results = []

    for picker in pickers:
        edge_counts, walk_counts, avg_walk_length = run_walk_sampling(
            picker, sources, targets, timestamps)
        results.append((edge_counts, walk_counts, avg_walk_length))

    # Create individual figures for each picker strategy
    plot_individual_distributions(results)

    # Create standalone legend
    create_walk_distribution_legend()

    print("Created separated walk distribution figures:")
    print("- plot_6_walk_distribution_exponentialindex.pdf")
    print("- plot_6_walk_distribution_linearindex.pdf")
    print("- plot_6_walk_distribution_uniform.pdf")
    print("- plot_6_walk_distribution_legend.pdf (standalone legend)")
    print("\nAll figures saved in figures/ directory")
    print("Note: Titles and legends removed from individual figures")

run_bell_curve_walk_distribution_experiment()

Generating graph...
ExponentialIndex → Avg walk length: 96.31
Linear → Avg walk length: 15.26
Uniform → Avg walk length: 9.96
Saved plot_6_walk_distribution_exponentialindex
Saved plot_6_walk_distribution_linearindex
Saved plot_6_walk_distribution_uniform
Saved standalone legend
Created separated walk distribution figures:
- plot_6_walk_distribution_exponentialindex.pdf
- plot_6_walk_distribution_linearindex.pdf
- plot_6_walk_distribution_uniform.pdf
- plot_6_walk_distribution_legend.pdf (standalone legend)

All figures saved in figures/ directory
Note: Titles and legends removed from individual figures


In [11]:
from temporal_random_walk import LinearRandomPicker, ExponentialIndexRandomPicker, UniformRandomPicker

N_TRIALS = 100_000
HI = 20

def test_pickers(picker, prioritize_end, hi, n_trials=N_TRIALS):
    counts = [0] * hi

    for _ in range(n_trials):
        picked_num = picker.pick_random(0, hi, prioritize_end)
        counts[picked_num] += 1

    return counts


counts_linear_end_prioritized = test_pickers(LinearRandomPicker(), True, hi=HI)
counts_linear_start_prioritized = test_pickers(LinearRandomPicker(), False, hi=HI)

counts_exp_end_prioritized = test_pickers(ExponentialIndexRandomPicker(), True, hi=HI)
counts_exp_start_prioritized = test_pickers(ExponentialIndexRandomPicker(), False, hi=HI)

counts_uniform_end_prioritized = test_pickers(UniformRandomPicker(), True, hi=HI)
counts_uniform_start_prioritized = test_pickers(UniformRandomPicker(), False, hi=HI)

In [12]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

def create_picker_validation_figures():
    """Create individual figures for each picker validation test"""

    plot_data = [
        ("exponential_end", "Exponential (End Prioritized)", counts_exp_end_prioritized),
        ("exponential_start", "Exponential (Start Prioritized)", counts_exp_start_prioritized),
        ("linear_end", "Linear (End Prioritized)", counts_linear_end_prioritized),
        ("linear_start", "Linear (Start Prioritized)", counts_linear_start_prioritized),
        ("uniform_end", "Uniform (End Prioritized)", counts_uniform_end_prioritized),
        ("uniform_start", "Uniform (Start Prioritized)", counts_uniform_start_prioritized),
    ]

    # Academic color scheme
    color = '#1f77b4'  # Blue

    for filename, title, data in plot_data:
        fig, ax = plt.subplots(1, 1, figsize=(6, 4))

        # Plot data
        ax.plot(data, color=color, linewidth=2, alpha=0.8, markeredgewidth=0.5)

        ax.set_xlabel("Number")
        ax.set_ylabel("Times Picked")
        create_clean_axis(ax)

        plt.tight_layout()

        # Save individual figure
        output_filename = f"plot_1_picker_validation_{filename}"
        plt.savefig(f'figures/{output_filename}.pdf', format='pdf', bbox_inches='tight')
        plt.savefig(f'figures/{output_filename}.png', format='png', bbox_inches='tight')
        plt.close()

        print(f"Saved {output_filename}")

def run_picker_validation_analysis():
    """Main function to create all picker validation figures"""

    # Create individual validation figures
    create_picker_validation_figures()

    print("\nCreated separated picker validation figures:")
    print("- plot_1_picker_validation_exponential_end.pdf")
    print("- plot_1_picker_validation_exponential_start.pdf")
    print("- plot_1_picker_validation_linear_end.pdf")
    print("- plot_1_picker_validation_linear_start.pdf")
    print("- plot_1_picker_validation_uniform_end.pdf")
    print("- plot_1_picker_validation_uniform_start.pdf")
    print("\nAll figures saved in figures/ directory")
    print("Note: Titles and legends removed from individual figures")

# Run the analysis
if __name__ == "__main__":
    run_picker_validation_analysis()

Saved plot_1_picker_validation_exponential_end
Saved plot_1_picker_validation_exponential_start
Saved plot_1_picker_validation_linear_end
Saved plot_1_picker_validation_linear_start
Saved plot_1_picker_validation_uniform_end
Saved plot_1_picker_validation_uniform_start

Created separated picker validation figures:
- plot_1_picker_validation_exponential_end.pdf
- plot_1_picker_validation_exponential_start.pdf
- plot_1_picker_validation_linear_end.pdf
- plot_1_picker_validation_linear_start.pdf
- plot_1_picker_validation_uniform_end.pdf
- plot_1_picker_validation_uniform_start.pdf

All figures saved in figures/ directory
Note: Titles and legends removed from individual figures
