In [1]:
import pickle

results_dir = 'results/'

alibaba_data_gpu = pickle.load(open(f"{results_dir}/result_alibaba_streaming_GPU.pkl", "rb"))
alibaba_data_cpu = pickle.load(open(f"{results_dir}/result_alibaba_streaming_CPU.pkl", "rb"))

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import os

# === Config ===
fig_dir = 'figures'
os.makedirs(fig_dir, exist_ok=True)

# Style for EuroSys/ACM format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 8,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# === Constants ===
FIGSIZE = (3.3, 2.4)  # single-column figure
ANNOTATION_BOX_STYLE = dict(boxstyle="round,pad=0.3", alpha=0.15)
COLOR_GPU = '#1f77b4'   # consistent GPU color
COLOR_CPU = '#ff7f0e'   # consistent CPU color

# === Extract metrics ===
edge_addition_time_cpu = alibaba_data_cpu['edge_addition_time']
walk_sampling_time_cpu = alibaba_data_cpu['walk_sampling_time']
total_edges_cpu = alibaba_data_cpu['total_edges']

edge_addition_time_gpu = alibaba_data_gpu['edge_addition_time']
walk_sampling_time_gpu = alibaba_data_gpu['walk_sampling_time']
total_edges_gpu = alibaba_data_gpu['total_edges']

# Time axis
minutes_per_step = 3
steps = range(1, len(edge_addition_time_gpu) + 1)
days_gpu = np.array([s * minutes_per_step / 1440 for s in steps])
days_cpu = days_gpu[:len(edge_addition_time_cpu)]

# Averages
mean_edge_time_gpu = np.mean(edge_addition_time_gpu)
mean_edge_time_cpu = np.mean(edge_addition_time_cpu)
mean_walk_time_gpu = np.mean(walk_sampling_time_gpu)
mean_walk_time_cpu = np.mean(walk_sampling_time_cpu)

# === Helpers ===
def create_clean_axis(ax):
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

def create_secondary_axis(ax, total_edges, days):
    ax2 = ax.twiny()
    ax2.set_xlim(ax.get_xlim())
    max_edges_b = max(total_edges) / 1e9
    tick_edges_b = np.arange(0, 90, 20)
    tick_days = (tick_edges_b / max_edges_b) * max(days)
    ax2.set_xticks(tick_days)
    ax2.set_xticklabels([f'{int(t)}B' for t in tick_edges_b])
    ax2.set_xlabel("Total Edges Added (Billions)")
    ax2.tick_params(axis='x', labelsize=9, pad=5)
    return ax2

# === Plot 1: Edge Addition ===
fig1, ax1 = plt.subplots(figsize=FIGSIZE)

ax1.plot(days_gpu, edge_addition_time_gpu, marker='o', markersize=2,
         linestyle='-', color=COLOR_GPU, linewidth=1.2)
ax1.plot(days_cpu, edge_addition_time_cpu, marker='s', markersize=2,
         linestyle='-', color=COLOR_CPU, linewidth=1.2)

ax1.axhline(mean_edge_time_gpu, color=COLOR_GPU, linestyle='--', linewidth=1.5, alpha=0.6)
ax1.axhline(mean_edge_time_cpu, color=COLOR_CPU, linestyle='--', linewidth=1.5, alpha=0.6)

ax1.text(0.99, 0.68, f'GPU Mean: {mean_edge_time_gpu:.3f}s',
         transform=ax1.transAxes, ha='right', fontsize=8,
         bbox={**ANNOTATION_BOX_STYLE, 'facecolor': COLOR_GPU})
ax1.text(0.99, 0.55, f'CPU Mean: {mean_edge_time_cpu:.3f}s',
         transform=ax1.transAxes, ha='right', fontsize=8,
         bbox={**ANNOTATION_BOX_STYLE, 'facecolor': COLOR_CPU})

ax1.set_xlabel("Days of Data Processed")
ax1.set_ylabel("Time (seconds)")
ax1.set_yscale('log')
ax1.set_xticks(np.arange(0, 16, 4))
create_clean_axis(ax1)
create_secondary_axis(ax1, total_edges_gpu, days_gpu)

plt.tight_layout()
plt.savefig(f'{fig_dir}/plot_5_alibaba_edge_addition_performance.pdf')
plt.savefig(f'{fig_dir}/plot_5_alibaba_edge_addition_performance.png')
plt.close()

# === Plot 2: Walk Sampling ===
fig2, ax2 = plt.subplots(figsize=FIGSIZE)

ax2.plot(days_gpu, walk_sampling_time_gpu, marker='o', markersize=2,
         linestyle='-', color=COLOR_GPU, linewidth=1.2)
ax2.plot(days_cpu, walk_sampling_time_cpu, marker='s', markersize=2,
         linestyle='-', color=COLOR_CPU, linewidth=1.2)

ax2.axhline(mean_walk_time_gpu, color=COLOR_GPU, linestyle='--', linewidth=1.5, alpha=0.6)
ax2.axhline(mean_walk_time_cpu, color=COLOR_CPU, linestyle='--', linewidth=1.5, alpha=0.6)

ax2.text(0.01, 0.92, f'GPU Mean: {mean_walk_time_gpu:.3f}s',
         transform=ax2.transAxes, ha='left', fontsize=8,
         bbox={**ANNOTATION_BOX_STYLE, 'facecolor': COLOR_GPU})

ax2.text(0.01, 0.80, f'CPU Mean: {mean_walk_time_cpu:.3f}s',
         transform=ax2.transAxes, ha='left', fontsize=8,
         bbox={**ANNOTATION_BOX_STYLE, 'facecolor': COLOR_CPU})

ax2.set_xlabel("Days of Data Processed")
ax2.set_ylabel("Time (seconds)")
ax2.set_xticks(np.arange(0, 16, 4))
create_clean_axis(ax2)
create_secondary_axis(ax2, total_edges_gpu, days_gpu)

plt.tight_layout()
plt.savefig(f'{fig_dir}/plot_5_alibaba_walk_sampling_performance.pdf')
plt.savefig(f'{fig_dir}/plot_5_alibaba_walk_sampling_performance.png')
plt.close()

# === Standalone Legend ===
fig_legend = plt.figure(figsize=(3.3, 0.4))

legend_elements = [
    mlines.Line2D([], [], color=COLOR_GPU, marker='o', linestyle='-', markersize=5, label='GPU'),
    mlines.Line2D([], [], color=COLOR_CPU, marker='s', linestyle='-', markersize=5, label='CPU')
]

legend = fig_legend.legend(
    handles=legend_elements,
    loc='center',
    ncol=2,
    frameon=True,
    fancybox=False,
    edgecolor='black',
    columnspacing=1.2,
    handletextpad=0.4,
    handlelength=1.8,
    borderpad=0.2,
    labelspacing=0.3,
    fontsize=8
)
legend.get_frame().set_linewidth(0.8)

fig_legend.canvas.draw()
bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

fig_legend.savefig(f'{fig_dir}/plot_5_alibaba_performance_legend.pdf', bbox_inches=bbox_inches, pad_inches=0.02)
fig_legend.savefig(f'{fig_dir}/plot_5_alibaba_performance_legend.png', bbox_inches=bbox_inches, pad_inches=0.02)
plt.close()

print("✅ Saved all Alibaba performance figures with consistent GPU/CPU colors and common legend.")

✅ Saved all Alibaba performance figures with consistent GPU/CPU colors and common legend.


In [3]:
import pickle
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import os

# === Load memory results ===
with open('results/memory_benchmarking.pickle', 'rb') as f:
    results = pickle.load(f)

# === Setup output folder ===
os.makedirs('figures', exist_ok=True)

# === Global font config for exact half-column plots ===
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 8,
    'axes.titlesize': 8,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'text.usetex': False,
    'figure.dpi': 600,
    'savefig.dpi': 600,
    'savefig.bbox': 'tight'
})

# === Plot styles ===
configs = ['directed_with_weights', 'directed_without_weights',
           'undirected_with_weights', 'undirected_without_weights']
config_labels = ['Directed (w/ weights)', 'Directed (w/o weights)',
                 'Undirected (w/ weights)', 'Undirected (w/o weights)']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
markers = ['o', 's', '^', 'D']
linestyles = ['-', '--', '-.', ':']

def create_clean_axis(ax, xticks=None, yticks=None):
    ax.grid(True, alpha=0.3, linewidth=0.4)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.6)
    ax.spines['bottom'].set_linewidth(0.6)
    ax.tick_params(direction='out', length=2.5, width=0.6)
    if xticks: ax.set_xticks(xticks)
    if yticks: ax.set_yticks(yticks)

def plot_memory(prefix, filename, xlabel, xticks=None, semilogx=True):
    fig, ax = plt.subplots(figsize=(1.8, 1.4))

    min_y = float('inf')
    max_y = float('-inf')

    for i, config in enumerate(configs):
        key = f"{prefix}_{config}"
        if key not in results:
            continue
        data = results[key]
        x = list(data.keys())
        y = [v / 1024 for v in data.values()]  # MB → GB
        plot_fn = ax.semilogx if semilogx else ax.plot
        plot_fn(x, y, color=colors[i], marker=markers[i], linestyle=linestyles[i],
                linewidth=1, markersize=2.5, markeredgewidth=0.5)
        min_y = min(min_y, min(y))
        max_y = max(max_y, max(y))

    # Calculate Y-tick range
    padding = (max_y - min_y) * 0.15
    lower = max(0, round(min_y - padding, 3))
    upper = round(max_y + padding, 3)
    midpoint = round((lower + upper) / 2, 3)

    ax.set_ylim(lower, upper)
    ax.set_yticks([lower, midpoint, upper])
    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f'))

    ax.set_xlabel(xlabel)
    create_clean_axis(ax, xticks)
    plt.tight_layout()
    fig.savefig(f'figures/{filename}.pdf')
    fig.savefig(f'figures/{filename}.png')
    plt.close()

def plot_streaming_window(filename="plot_7_memory_memory_streaming_window"):
    fig, ax = plt.subplots(figsize=(1.8, 1.4))

    min_y = float('inf')
    max_y = float('-inf')

    for i, config in enumerate(configs):
        key = f"streaming_window_{config}"
        if key not in results:
            continue
        y = [v / 1024 for v in results[key]]
        x = list(range(1, len(y) + 1))
        ax.plot(x, y, color=colors[i], marker=markers[i], linestyle=linestyles[i],
                linewidth=1, markersize=1, markeredgewidth=0.5)
        min_y = min(min_y, min(y))
        max_y = max(max_y, max(y))

    # Pad Y range and set 3 ticks
    padding = (max_y - min_y) * 0.15
    lower = round(min_y - padding, 3)
    upper = round(max_y + padding, 3)
    midpoint = round((lower + upper) / 2, 3)

    ax.set_ylim(lower, upper)
    ax.set_yticks([lower, midpoint, upper])
    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f'))

    ax.set_xlabel("Batch")
    create_clean_axis(ax)
    plt.tight_layout()
    fig.savefig(f'figures/{filename}.pdf')
    fig.savefig(f'figures/{filename}.png')
    plt.close()

# === Save all 4 plots individually ===
plot_memory("increasing_edges", "plot_7_memory_memory_vs_edges", "Edge Count", xticks=[1e3, 1e6, 1e9])
plot_memory("increasing_nodes", "plot_7_memory_memory_vs_nodes", "Node Count", xticks=[1e2, 1e4, 1e6])
plot_memory("increasing_timestamps", "plot_7_memory_memory_vs_timestamps", "TS Count", xticks=[1e2, 1e4, 1e6])
plot_streaming_window("plot_7_memory_memory_streaming_window")

# === Create standalone legend ===
fig_legend = plt.figure(figsize=(5.5, 0.4))
legend_elements = [
    plt.Line2D([0], [0], marker=markers[i], color=colors[i], linestyle=linestyles[i],
               markersize=4, linewidth=1, label=config_labels[i], markeredgewidth=0.5)
    for i in range(len(configs))
]

legend = fig_legend.legend(handles=legend_elements,
                           loc='center', ncol=2, frameon=True, edgecolor='black',
                           columnspacing=1.2, handletextpad=0.4, handlelength=1.8,
                           borderpad=0.2, labelspacing=0.3)
legend.get_frame().set_linewidth(0.6)

fig_legend.canvas.draw()
bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

fig_legend.savefig("figures/plot_7_memory_memory_legend.pdf", bbox_inches=bbox_inches, pad_inches=0.02)
fig_legend.savefig("figures/plot_7_memory_memory_legend.png", bbox_inches=bbox_inches, pad_inches=0.02)
plt.close()

print("🎯 All memory usage figures and legend saved with final LaTeX-safe dimensions.")

🎯 All memory usage figures and legend saved with final LaTeX-safe dimensions.


In [4]:
import numpy as np


def human_format(x, pos):
    if x >= 1_000_000:
        return f'{x/1_000_000:.1f}M'.rstrip('0').rstrip('.')
    elif x >= 1_000:
        return f'{x/1_000:.0f}K'
    else:
        return str(int(x))

def compute_mean_std(data):
    data = np.array(data)
    return data.mean(axis=1), data.std(axis=1)

In [5]:
import pickle

results_dir = 'results/'

# Load the benchmark results
def load_results():
    cpu_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_CPU.pkl", "rb")),
        "walks": pickle.load(open(f"{results_dir}/result_walks_CPU.pkl", "rb")),
        "max_walk_lens": pickle.load(open(f"{results_dir}/result_max_walk_lens_CPU.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_sliding_CPU.pkl", "rb"))
    }

    gpu_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_GPU.pkl", "rb")),
        "walks": pickle.load(open(f"{results_dir}/result_walks_GPU.pkl", "rb")),
        "max_walk_lens": pickle.load(open(f"{results_dir}/result_max_walk_lens_GPU.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_sliding_GPU.pkl", "rb"))
    }

    raphtory_results = {
        "edges": pickle.load(open(f"{results_dir}/result_edges_raphtory.pkl", "rb")),
        "incremental": pickle.load(open(f"{results_dir}/result_incremental_raphtory.pkl", "rb"))
    }

    return cpu_results, gpu_results, raphtory_results

# Define edge and walk counts from the benchmark
edge_counts = [
    10_000, 50_000, 100_000, 500_000, 1_000_000, 2_000_000,
    5_000_000, 10_000_000, 20_000_000, 30_000_000, 40_000_000,
    50_000_000, 60_000_000
]

walk_counts = [
    10_000, 50_000, 100_000, 200_000, 500_000,
    1_000_000, 2_000_000, 5_000_000, 10_000_000
]

walk_lengths = list(range(10, 310, 10))

# Load results
cpu_results, gpu_results, raphtory_results = load_results()

In [6]:
with open("results/stellargraph_results.pkl", "rb") as f:
    stellargraph_results = pickle.load(f)

In [14]:
import matplotlib.pyplot as plt
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Font and style configuration for EuroSys
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Academic color scheme
colors = {
    'cpu_no_weights': 'orange',
    'cpu_weights': 'darkorange',
    'gpu_no_weights': 'blue',
    'gpu_weights': 'darkblue',
    'raphtory': 'green',
    'stellargraph': 'teal'
}

# Example: Replace these with real measurements
cpu_mean_wo, cpu_std_wo = compute_mean_std(cpu_results["edges"]["edge_addition_time_without_weights"])
cpu_mean_w, cpu_std_w = compute_mean_std(cpu_results["edges"]["edge_addition_time_with_weights"])
gpu_mean_wo, gpu_std_wo = compute_mean_std(gpu_results["edges"]["edge_addition_time_without_weights"])
gpu_mean_w, gpu_std_w = compute_mean_std(gpu_results["edges"]["edge_addition_time_with_weights"])
raphtory_mean, raphtory_std = compute_mean_std(raphtory_results["edges"])
stellargraph_mean, stellargraph_std = compute_mean_std(stellargraph_results['edge_addition_stellargraph'])

# Clean axis formatting
def create_clean_axis(ax):
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# === Create Plot ===
fig, ax = plt.subplots(1, 1, figsize=(3.3, 2.2))  # ~Single-column width

# Plot error bars
ax.errorbar(edge_counts, cpu_mean_wo, yerr=cpu_std_wo,
             fmt='o-', color=colors['cpu_no_weights'], label="CPU (w/o Weights)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(edge_counts, cpu_mean_w, yerr=cpu_std_w,
             fmt='s--', color=colors['cpu_weights'], label="CPU (w/ Weights)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(edge_counts, gpu_mean_wo, yerr=gpu_std_wo,
             fmt='o-', color=colors['gpu_no_weights'], label="GPU (w/o Weights)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(edge_counts, gpu_mean_w, yerr=gpu_std_w,
             fmt='s--', color=colors['gpu_weights'], label="GPU (w/ Weights)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(edge_counts, raphtory_mean, yerr=raphtory_std,
             fmt='d-', color=colors['raphtory'], label="Raphtory",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(edge_counts, stellargraph_mean, yerr=stellargraph_std,
             fmt='d-', color=colors['stellargraph'], label="Stellargraph",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

# X-axis: custom spaced ticks
xticks = [1_000_000, 10_000_000, 20_000_000, 30_000_000, 40_000_000, 50_000_000, 60_000_000]
xtick_labels = ['1M', '10M', '20M', '30M', '50M', '40M', '60M']
ax.set_xticks(xticks)
ax.set_xticklabels(xtick_labels)

# Labels
ax.set_xlabel("Number of Edges")
ax.set_ylabel("Time (seconds)")
create_clean_axis(ax)

# Shared legend below
fig.legend(loc='lower center', ncol=2, fontsize=8, frameon=True,
           edgecolor='black', bbox_to_anchor=(0.5, -0.22))
fig.tight_layout(rect=[0, 0.05, 1, 1])  # Leave space for legend

# Save
plt.savefig('figures/plot_2_incremental_edge_addition_absolute.pdf')
plt.savefig('figures/plot_2_incremental_edge_addition_absolute.png')
plt.close()

print("✅ Saved figure:")
print("- figures/plot_2_incremental_edge_addition_absolute.pdf/png")

✅ Saved figure:
- figures/plot_2_incremental_edge_addition_absolute.pdf/png


In [8]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import ticker
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts for EuroSys-style paper
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Color mapping
colors = {
    'cpu_index': 'orange',
    'cpu_weight': 'darkorange',
    'gpu_index': 'blue',
    'gpu_weight': 'darkblue'
}

# Format large tick labels
def human_format(x, pos=None):
    if x >= 1_000_000:
        return f'{x/1_000_000:.0f}M'
    elif x >= 1_000:
        return f'{x/1_000:.0f}K'
    else:
        return str(int(x))

# Style helper
def create_clean_axis(ax):
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

cpu_mean_index, cpu_std_index = compute_mean_std(cpu_results["walks"]["walk_sampling_time_index_based"])
cpu_mean_weight, cpu_std_weight = compute_mean_std(cpu_results["walks"]["walk_sampling_time_weight_based"])
gpu_mean_index, gpu_std_index = compute_mean_std(gpu_results["walks"]["walk_sampling_time_index_based"])
gpu_mean_weight, gpu_std_weight = compute_mean_std(gpu_results["walks"]["walk_sampling_time_weight_based"])

# === Plot: Absolute Runtime ===
fig, ax = plt.subplots(1, 1, figsize=(3.3, 2.2))

ax.errorbar(walk_counts, cpu_mean_index, yerr=cpu_std_index,
             fmt='o-', color=colors['cpu_index'], label="CPU (Index Based)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(walk_counts, cpu_mean_weight, yerr=cpu_std_weight,
             fmt='s--', color=colors['cpu_weight'], label="CPU (Weight Based)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(walk_counts, gpu_mean_index, yerr=gpu_std_index,
             fmt='o-', color=colors['gpu_index'], label="GPU (Index Based)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax.errorbar(walk_counts, gpu_mean_weight, yerr=gpu_std_weight,
             fmt='s--', color=colors['gpu_weight'], label="GPU (Weight Based)",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

# Format X-axis
xticks = [50_000, 1_000_000, 3_000_000, 5_000_000, 7_000_000, 10_000_000]
xtick_labels = ['50K', '1M', '3M', '5M', '7M', '10M']

ax.set_xticks(xticks)
ax.set_xticklabels(xtick_labels)

ax.set_xlabel("Number of Walks")
ax.set_ylabel("Time (seconds)")
create_clean_axis(ax)

# Add legend inside the plot
ax.legend(loc='upper left', frameon=True, edgecolor='black')

plt.tight_layout()
plt.savefig('figures/plot_3_incremental_walk_sampling_absolute.pdf')
plt.savefig('figures/plot_3_incremental_walk_sampling_absolute.png')
plt.close()

print("✅ Saved walk sampling plot:")
print("- figures/plot_3_incremental_walk_sampling_absolute.pdf/png")

✅ Saved walk sampling plot:
- figures/plot_3_incremental_walk_sampling_absolute.pdf/png


In [8]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

# Academic color scheme - matching your original colors
colors = {
    'cpu': 'orange',
    'gpu': 'blue',
    'ratio': 'teal'
}

# Compute mean and std for each walk length (assuming compute_mean_std function exists)
cpu_mean, cpu_std = compute_mean_std(cpu_results["max_walk_lens"]["walk_sampling_time"])
gpu_mean, gpu_std = compute_mean_std(gpu_results["max_walk_lens"]["walk_sampling_time"])

# Compute ratios and propagated error
ratios = cpu_mean / gpu_mean
ratios_std = ratios * np.sqrt((cpu_std / cpu_mean) ** 2 + (gpu_std / gpu_mean) ** 2)

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Figure 1: Walk Length Absolute Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(6, 4))

ax1.errorbar(walk_lengths, cpu_mean, yerr=cpu_std,
             fmt='o-', color=colors['cpu'], label="CPU",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.errorbar(walk_lengths, gpu_mean, yerr=gpu_std,
             fmt='o-', color=colors['gpu'], label="GPU",
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

ax1.set_xlabel("Maximum Walk Length")
ax1.set_ylabel("Time (seconds)")
create_clean_axis(ax1)

# Add legend inside the plot
legend1 = ax1.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=10)
legend1.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_4_incremental_walk_length_absolute.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_4_incremental_walk_length_absolute.png', format='png', bbox_inches='tight')
plt.close()

# Figure 2: Walk Length Performance Ratios
fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4))

ax2.errorbar(walk_lengths, ratios, yerr=ratios_std,
             fmt='o-', color=colors['ratio'], label='CPU / GPU',
             markersize=4, linewidth=1.5, capsize=3, markeredgewidth=0.5)

# Add horizontal lines for reference
ax2.axhline(y=1, color='black', linestyle=':', alpha=0.7, linewidth=1.5)

# Add mean ratio line
mean_ratio = np.mean(ratios)
ax2.axhline(y=mean_ratio, color='red', linestyle='--', alpha=0.8, linewidth=2,
            label=f'Mean Ratio: {mean_ratio:.1f}×')

ax2.set_xlabel("Maximum Walk Length")
ax2.set_ylabel("Time Ratio")
create_clean_axis(ax2)

# Add legend inside the plot
legend2 = ax2.legend(loc='upper left', frameon=True, fancybox=False, edgecolor='black', fontsize=10)
legend2.get_frame().set_linewidth(0.8)

plt.tight_layout()
plt.savefig('figures/plot_4_incremental_walk_length_ratios.pdf', format='pdf', bbox_inches='tight')
plt.savefig('figures/plot_4_incremental_walk_length_ratios.png', format='png', bbox_inches='tight')
plt.close()

print("Created separate walk length figures (no titles, legends inside):")
print("- plot_4_walk_length_absolute.pdf")
print("- plot_4_walk_length_ratios.pdf")
print("\nAll figures saved in figures/ directory")
print("Note: Titles removed, legends kept inside plots")

Created separate walk length figures (no titles, legends inside):
- plot_4_walk_length_absolute.pdf
- plot_4_walk_length_ratios.pdf

All figures saved in figures/ directory
Note: Titles removed, legends kept inside plots


In [9]:
def generate_bell_curve_temporal_graph(n_nodes=1000, n_edges=1_000_000, seed=42, time_range=1_000_000):
    np.random.seed(seed)

    # Bell-curve over node indices (higher probability for middle nodes)
    node_probs = np.exp(-0.5 * ((np.arange(n_nodes) - n_nodes / 2) / (n_nodes / 6)) ** 2)
    node_probs /= node_probs.sum()

    sources = np.random.choice(n_nodes, size=n_edges, p=node_probs)
    targets = np.random.choice(n_nodes, size=n_edges, p=node_probs)

    # Remove self-loops
    mask = sources != targets
    sources = sources[mask]
    targets = targets[mask]

    # Assign random timestamps in range [0, time_range)
    timestamps = np.random.randint(0, high=time_range, size=len(sources))

    return sources, targets, timestamps

In [15]:
import numpy as np
import matplotlib.pyplot as plt
from temporal_random_walk import TemporalRandomWalk
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

def run_walk_sampling(edge_picker, sources, targets, timestamps, n_nodes=1000):
    trw = TemporalRandomWalk(
        is_directed=False,
        use_gpu=False,
        max_time_capacity=-1,
        enable_weight_computation=True
    )
    trw.add_multiple_edges(sources, targets, timestamps)

    walks, _, walk_lengths = trw.get_random_walks_and_times(
        max_walk_len=100,
        walk_bias=edge_picker,
        num_walks_total=1_000_000,
        initial_edge_bias="Uniform",
        walk_direction="Forward_In_Time"
    )

    avg_walk_length = walk_lengths.mean()
    print(f"{edge_picker} → Avg walk length: {avg_walk_length:.2f}")

    edge_node_counts = np.zeros(n_nodes)
    for node in np.concatenate([sources, targets]):
        edge_node_counts[node] += 1

    valid_steps = walks[walks != -1]
    walk_node_counts = np.zeros(n_nodes)
    for node in valid_steps:
        walk_node_counts[node] += 1

    edge_node_counts /= edge_node_counts.max()
    walk_node_counts /= walk_node_counts.max()

    return edge_node_counts, walk_node_counts, avg_walk_length

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

def plot_individual_distributions(results, n_nodes=1000):
    """Create optimized walk distribution plots for publication."""
    pickers = ["ExponentialIndex", "LinearIndex", "Uniform"]
    colors = ['#1f77b4', '#ff7f0e']  # Blue, Orange

    xticks = np.linspace(0, n_nodes - 1, 5, dtype=int)
    yticks = np.linspace(0, 1, 4)

    for i, (edge_counts, walk_counts, avg_walk_length) in enumerate(results):
        fig, ax = plt.subplots(figsize=(2.5, 1.7))

        ax.plot(np.arange(n_nodes), edge_counts,
                linewidth=1.5, color=colors[0], label='Edge Participation')
        ax.plot(np.arange(n_nodes), walk_counts,
                linewidth=1.5, color=colors[1], label='Walk Visit Count')

        ax.set_xlabel("Node ID", fontsize=10)
        ax.set_ylabel("Normalized Freq", fontsize=10)
        ax.set_xticks(xticks)
        ax.set_yticks(yticks)
        create_clean_axis(ax)

        plt.tight_layout()

        filename = f"plot_6_walk_distribution_{pickers[i].lower()}"
        plt.savefig(f'figures/{filename}.pdf', format='pdf')
        plt.savefig(f'figures/{filename}.png', format='png')
        plt.close()

        print(f"✅ Saved {filename}")

def create_walk_distribution_legend():
    """Create standalone legend for walk distribution figures"""
    fig_legend = plt.figure(figsize=(5, 0.5))

    # Academic color scheme
    colors = ['#1f77b4', '#ff7f0e']  # Blue, Orange

    # Create legend elements
    legend_elements = [
        plt.Line2D([0], [0], color=colors[0], linewidth=2,
                   label='Edge Participation', markeredgewidth=0.5),
        plt.Line2D([0], [0], color=colors[1], linewidth=2,
                   label='Walk Visit Count', markeredgewidth=0.5)
    ]

    # Create legend directly on figure with tight parameters
    legend = fig_legend.legend(handles=legend_elements,
                              loc='center',
                              ncol=2,
                              frameon=True,
                              fancybox=False,
                              edgecolor='black',
                              columnspacing=1.2,
                              handletextpad=0.4,
                              handlelength=1.8,
                              borderpad=0.2,
                              labelspacing=0.3)

    legend.get_frame().set_linewidth(0.8)

    # Get exact bounding box and save with minimal whitespace
    bbox = legend.get_window_extent(fig_legend.canvas.get_renderer())
    bbox_inches = bbox.transformed(fig_legend.dpi_scale_trans.inverted())

    plt.savefig('figures/plot_6_walk_distribution_legend.pdf', format='pdf',
               bbox_inches=bbox_inches, pad_inches=0.02)
    plt.savefig('figures/plot_6_walk_distribution_legend.png', format='png',
               bbox_inches=bbox_inches, pad_inches=0.02)
    plt.close()

    print("Saved standalone legend")

def run_bell_curve_walk_distribution_experiment():
    """Run the walk distribution experiment with separated figure output"""
    print("Generating graph...")
    sources, targets, timestamps = generate_bell_curve_temporal_graph()
    pickers = ["ExponentialIndex", "Linear", "Uniform"]
    results = []

    for picker in pickers:
        edge_counts, walk_counts, avg_walk_length = run_walk_sampling(
            picker, sources, targets, timestamps)
        results.append((edge_counts, walk_counts, avg_walk_length))

    # Create individual figures for each picker strategy
    plot_individual_distributions(results)

    # Create standalone legend
    create_walk_distribution_legend()

    print("Created separated walk distribution figures:")
    print("- plot_6_walk_distribution_exponentialindex.pdf")
    print("- plot_6_walk_distribution_linearindex.pdf")
    print("- plot_6_walk_distribution_uniform.pdf")
    print("- plot_6_walk_distribution_legend.pdf (standalone legend)")
    print("\nAll figures saved in figures/ directory")
    print("Note: Titles and legends removed from individual figures")

run_bell_curve_walk_distribution_experiment()

Generating graph...
ExponentialIndex → Avg walk length: 96.33
Linear → Avg walk length: 15.26
Uniform → Avg walk length: 9.96
✅ Saved plot_6_walk_distribution_exponentialindex
✅ Saved plot_6_walk_distribution_linearindex
✅ Saved plot_6_walk_distribution_uniform
Saved standalone legend
Created separated walk distribution figures:
- plot_6_walk_distribution_exponentialindex.pdf
- plot_6_walk_distribution_linearindex.pdf
- plot_6_walk_distribution_uniform.pdf
- plot_6_walk_distribution_legend.pdf (standalone legend)

All figures saved in figures/ directory
Note: Titles and legends removed from individual figures


In [11]:
from temporal_random_walk import LinearRandomPicker, ExponentialIndexRandomPicker, UniformRandomPicker

N_TRIALS = 100_000
HI = 20

def test_pickers(picker, prioritize_end, hi, n_trials=N_TRIALS):
    counts = [0] * hi

    for _ in range(n_trials):
        picked_num = picker.pick_random(0, hi, prioritize_end)
        counts[picked_num] += 1

    return counts


counts_linear_end_prioritized = test_pickers(LinearRandomPicker(), True, hi=HI)
counts_linear_start_prioritized = test_pickers(LinearRandomPicker(), False, hi=HI)

counts_exp_end_prioritized = test_pickers(ExponentialIndexRandomPicker(), True, hi=HI)
counts_exp_start_prioritized = test_pickers(ExponentialIndexRandomPicker(), False, hi=HI)

counts_uniform_end_prioritized = test_pickers(UniformRandomPicker(), True, hi=HI)
counts_uniform_start_prioritized = test_pickers(UniformRandomPicker(), False, hi=HI)

In [12]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts and style for academic paper - EuroSys format
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 11,
    'axes.labelsize': 12,
    'axes.titlesize': 13,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

def create_clean_axis(ax):
    """Apply consistent styling to axis"""
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

def create_picker_validation_figures():
    """Create individual figures for each picker validation test"""

    plot_data = [
        ("exponential_end", "Exponential (End Prioritized)", counts_exp_end_prioritized),
        ("exponential_start", "Exponential (Start Prioritized)", counts_exp_start_prioritized),
        ("linear_end", "Linear (End Prioritized)", counts_linear_end_prioritized),
        ("linear_start", "Linear (Start Prioritized)", counts_linear_start_prioritized),
        ("uniform_end", "Uniform (End Prioritized)", counts_uniform_end_prioritized),
        ("uniform_start", "Uniform (Start Prioritized)", counts_uniform_start_prioritized),
    ]

    # Academic color scheme
    color = '#1f77b4'  # Blue

    for filename, title, data in plot_data:
        fig, ax = plt.subplots(1, 1, figsize=(6, 4))

        # Plot data
        ax.plot(data, color=color, linewidth=2, alpha=0.8, markeredgewidth=0.5)

        ax.set_xlabel("Number")
        ax.set_ylabel("Times Picked")
        create_clean_axis(ax)

        plt.tight_layout()

        # Save individual figure
        output_filename = f"plot_1_picker_validation_{filename}"
        plt.savefig(f'figures/{output_filename}.pdf', format='pdf', bbox_inches='tight')
        plt.savefig(f'figures/{output_filename}.png', format='png', bbox_inches='tight')
        plt.close()

        print(f"Saved {output_filename}")

def run_picker_validation_analysis():
    """Main function to create all picker validation figures"""

    # Create individual validation figures
    create_picker_validation_figures()

    print("\nCreated separated picker validation figures:")
    print("- plot_1_picker_validation_exponential_end.pdf")
    print("- plot_1_picker_validation_exponential_start.pdf")
    print("- plot_1_picker_validation_linear_end.pdf")
    print("- plot_1_picker_validation_linear_start.pdf")
    print("- plot_1_picker_validation_uniform_end.pdf")
    print("- plot_1_picker_validation_uniform_start.pdf")
    print("\nAll figures saved in figures/ directory")
    print("Note: Titles and legends removed from individual figures")

# Run the analysis
if __name__ == "__main__":
    run_picker_validation_analysis()

Saved plot_1_picker_validation_exponential_end
Saved plot_1_picker_validation_exponential_start
Saved plot_1_picker_validation_linear_end
Saved plot_1_picker_validation_linear_start
Saved plot_1_picker_validation_uniform_end
Saved plot_1_picker_validation_uniform_start

Created separated picker validation figures:
- plot_1_picker_validation_exponential_end.pdf
- plot_1_picker_validation_exponential_start.pdf
- plot_1_picker_validation_linear_end.pdf
- plot_1_picker_validation_linear_start.pdf
- plot_1_picker_validation_uniform_end.pdf
- plot_1_picker_validation_uniform_start.pdf

All figures saved in figures/ directory
Note: Titles and legends removed from individual figures


In [15]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import ticker
import os

# Ensure figures directory exists
os.makedirs('figures', exist_ok=True)

# Configure fonts for EuroSys-style plots
plt.rcParams.update({
    'font.family': 'serif',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 10,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'text.usetex': False,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight'
})

colors = {
    'trw_cpu': 'orange',
    'trw_gpu': 'blue',
    'stellargraph': 'green',
}

def compute_mean_std(data):
    data = np.array(data)
    return data.mean(axis=1), data.std(axis=1)

def human_format(x, pos=None):
    if x >= 1_000_000:
        return f'{x/1_000_000:.0f}M'
    elif x >= 1_000:
        return f'{x/1_000:.0f}K'
    else:
        return str(int(x))

def create_clean_axis(ax):
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.8)
    ax.spines['bottom'].set_linewidth(0.8)
    ax.tick_params(direction='out', length=3, width=0.8)

# Load results
with open("results/stellargraph_results.pkl", "rb") as f:
    results = pickle.load(f)

# Edge counts for edge addition
edge_counts = [
    10_000, 50_000, 100_000, 500_000, 1_000_000, 2_000_000,
    5_000_000, 10_000_000, 20_000_000, 30_000_000, 40_000_000,
    50_000_000, 60_000_000
]

# Edge sizes used in walk sampling
edge_sizes = [1_000, 2_000, 5_000, 10_000, 20_000, 30_000, 50_000, 75_000, 100_000]

# ============================================================================
# WALK SAMPLING
# ============================================================================

gpu_walk_index, gpu_walk_index_std = compute_mean_std(
    results['walk_sampling_vs_edge_size_trw_gpu']['walk_sampling_time_index_based']
)

cpu_walk_index, cpu_walk_index_std = compute_mean_std(
    results['walk_sampling_vs_edge_size_trw_cpu']['walk_sampling_time_index_based']
)

stellar_walk, stellar_walk_std = compute_mean_std(
    results['walk_sampling_vs_edge_size_stellargraph']['walk_sampling_time']
)

fig2, ax2 = plt.subplots(figsize=(3.3, 2.2))

ax2.errorbar(edge_sizes, gpu_walk_index, yerr=gpu_walk_index_std,
             fmt='o-', color=colors['trw_gpu'], label="Tempest GPU",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax2.errorbar(edge_sizes, cpu_walk_index, yerr=cpu_walk_index_std,
             fmt='s--', color=colors['trw_cpu'], label="Tempest CPU",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax2.errorbar(edge_sizes, stellar_walk, yerr=stellar_walk_std,
             fmt='d-', color=colors['stellargraph'], label="StellarGraph",
             markersize=3, linewidth=1.2, capsize=2, markeredgewidth=0.5)

ax2.set_xticks([5_000, 20_000, 50_000, 100_000])
ax2.set_xticklabels(['5K', '20K', '50K', '100K'])

ax2.set_xlabel("Number of Walks")
ax2.set_ylabel("Time (seconds)")
ax2.set_yscale('log')
create_clean_axis(ax2)

ax2.legend(
    loc='center right',
    frameon=True,
    edgecolor='black',
    bbox_to_anchor=(1.0, 0.5),
    bbox_transform=ax2.transAxes  # keep it inside axes bounds
)

plt.tight_layout()
plt.savefig('figures/plot_10_stellargraph_walk_sampling_absolute.pdf')
plt.savefig('figures/plot_10_stellargraph_walk_sampling_absolute.png')
plt.close()

print("✅ Generated plots:")
print("- figures/plot_10_stellargraph_walk_sampling_absolute.pdf/png")

✅ Generated plots:
- figures/plot_9_stellargraph_edge_addition_absolute.pdf/png
- figures/plot_10_stellargraph_walk_sampling_absolute.pdf/png
