## Analyze e2e latencies for the single_client_e2e experiment

In [None]:
import numpy as np
import glob

def get_append_metrics(path):
    file_pattern = path + "append_metrics*.csv"

    total_throughput = 0
    latency_values = []

    for file in glob.glob(file_pattern):
        with open(file, 'r') as f:
            lines = f.readlines()[1:]
            for line in lines:
                parts = line.strip().split(',')
                gsn, latency, throughput = int(parts[0]), float(parts[1]), float(parts[2])
                latency_values.append(latency)
            
            total_throughput += throughput

    latency_array = np.array(latency_values)

    mean_latency = np.mean(latency_array)
    p50_latency = np.percentile(latency_array, 50)
    p99_latency = np.percentile(latency_array, 99)

    print("results for computation time " + path.split("_")[-1].split("/")[0] + " us")
    print("statistic/metric, latency (us)")
    print(f"mean, {mean_latency:.2f}")
    print(f"p50, {p50_latency:.2f}")
    print(f"p99, {p99_latency:.2f}")
    print(f"total throughput, {total_throughput:.2f}")

    return int(path.split("_")[-1].split("/")[0]), mean_latency, total_throughput 

def get_e2e_metrics(path):
    file_pattern = path + "e2e_metrics*.csv"

    delivery_latency_values = []
    e2e_latency_values = []
    queuing_delay_values = []

    for file in glob.glob(file_pattern):
        with open(file, 'r') as f:
            lines = f.readlines()[1:]
            for line in lines:
                parts = line.strip().split(',')
                delivery, e2e, queuing_delay = float(parts[1]), float(parts[2]), float(parts[3])
                delivery_latency_values.append(delivery)
                e2e_latency_values.append(e2e)
                queuing_delay_values.append(queuing_delay)

    
    delivery_latency_array = np.array(delivery_latency_values)
    e2e_latency_array = np.array(e2e_latency_values)
    queuing_delay_array = np.array([x for x in queuing_delay_values if x > 0])

    print("statistic/metric, delivery latency (us), e2e latency (us), queuing delay (us)")
    print(f"mean, {np.mean(delivery_latency_array):.2f}, {np.mean(e2e_latency_array):.2f}, {np.mean(queuing_delay_array):.2f}")
    print(f"std, {np.std(delivery_latency_array):.2f}, {np.std(e2e_latency_array):.2f}, {np.std(queuing_delay_array):.2f}")
    print(f"p50, {np.percentile(delivery_latency_array, 50):.2f}, {np.percentile(e2e_latency_array, 50):.2f}, {np.percentile(queuing_delay_array, 50):.2f}")
    print(f"p99, {np.percentile(delivery_latency_array, 99):.2f}, {np.percentile(e2e_latency_array, 99):.2f}, {np.percentile(queuing_delay_array, 99):.2f}")

    return int(path.split("_")[-1].split("/")[0]), np.mean(delivery_latency_array), np.mean(e2e_latency_array), np.mean(queuing_delay_array)


def get_splits(path):
    file_pattern = path + "e2e_metrics*.csv"

    delivery_latency_values = []
    e2e_latency_values = []
    queuing_delay_values = []

    for file in glob.glob(file_pattern):
        with open(file, 'r') as f:
            lines = f.readlines()[1:]
            for line in lines:
                parts = line.strip().split(',')
                delivery, e2e, queuing_delay = float(parts[1]), float(parts[2]), float(parts[3])
                delivery_latency_values.append(delivery)
                e2e_latency_values.append(e2e)
                queuing_delay_values.append(queuing_delay)

    
    delivery_latency_array = np.array(delivery_latency_values)
    e2e_latency_array = np.array(e2e_latency_values)
    queuing_delay_array = np.array([x for x in queuing_delay_values if x > 0])

    min_size = min(len(delivery_latency_array), len(e2e_latency_array), len(queuing_delay_array))

    delivery_latency_array = delivery_latency_array[:min_size]
    e2e_latency_array = e2e_latency_array[:min_size]
    queuing_delay_array = queuing_delay_array[:min_size]

    compute = e2e_latency_array - delivery_latency_array - queuing_delay_array

    print("results for computation time " + path.split("_")[-1].split("/")[0] + " us")
    print("statistic/metric, delivery latency (us), computation time (us), queuing delay (us)")
    print(f"mean, {np.mean(delivery_latency_array):.2f}, {np.mean(compute):.2f}, {np.mean(queuing_delay_array):.2f}")
    print(f"p50, {np.percentile(delivery_latency_array, 50):.2f}, {np.percentile(compute, 50):.2f}, {np.percentile(queuing_delay_array, 50):.2f}")
    print(f"p99, {np.percentile(delivery_latency_array, 99):.2f}, {np.percentile(compute, 99):.2f}, {np.percentile(queuing_delay_array, 99):.2f}")

In [None]:
path = "../results/"

for dir in glob.glob(path + "*/"):
    get_append_metrics(dir)
    get_e2e_metrics(dir)


In [None]:
path = "../results/e2e_4shard_scalog/*"
for dir in glob.glob(path + "*/"):
    get_splits(dir)

In [None]:
import re
import pandas as pd

# Input data
data = """
PLEASE PASTE ABOVE DATA HERE
"""

# Split data into lines
lines = data.splitlines()

# Parsing logic
results = []
current_time = None
append_latency = None

for i, line in enumerate(lines):
    line = line.strip()
    # Detect computation time
    if line.startswith("results for computation time"):
        match = re.search(r"computation time (\d+) us", line)
        if match:
            current_time = int(match.group(1))
    # Extract append latency (first "latency (us)" mean value)
    elif "statistic/metric, latency (us)" in line:
        append_line = lines[i + 1]  # Look at the next line
        append_match = re.match(r"mean,\s*([\d.]+)", append_line)
        if append_match:
            append_latency = float(append_match.group(1))
    # Extract other latencies (mean values)
    elif line.startswith("mean,") and current_time is not None:
        parts = line.split(",")
        if len(parts) == 4:  # Ensure correct format
            delivery_latency, e2e_latency, queueing_delay = map(float, parts[1:])
            results.append({
                "computation_time": current_time,
                "append_latency": append_latency,
                "delivery_latency": delivery_latency,
                "e2e_latency": e2e_latency,
                "queueing_delay": queueing_delay,
            })

# Create a DataFrame
df = pd.DataFrame(results)

df.to_csv("output.csv", index=False)

# Print the CSV content
print(df.to_csv(index=False))

## Analyze order server log from the reconfiguration experiment

In [None]:
import re
from datetime import datetime

def analyze_reconfig_log(path):
    # Sample log data (you can replace this with the contents of your log file)
    with open(path, 'r') as f:
        log_data = f.read()

    tput = []
    for line in log_data.splitlines():
        if "[real-time tput]:" in line:
            tput.append(int(line.split("ops/sec")[0].split()[-1]))

    
    return tput

In [None]:
tputs = analyze_reconfig_log("../results/reconfig_1000/order-0.log")
# tputs = tputs[::2]

In [None]:
import re
import matplotlib.pyplot as plt
from datetime import datetime

# File path
log_file = "../results/reconfig_1000/order-0.log"

# Regex patterns
tput_pattern = r"\[real-time tput\]: (\d+) ops/sec"
timestamp_pattern = r"(\d{2}:\d{2}:\d{2}\.\d{6})"

# Data storage
timestamps = []
tput_values = []
shard_added = [] # when did the shards send their first cut to the OL
first_cut_committed = [] # when did the first cut get committed
shard_leave_request = [] 
shard_finalized = []
replica_2_added = []
replica_3_added = []
replica_2_committed = []
replica_3_committed = []

# Parse the log file
with open(log_file, "r") as f:
    for line in f:
        # Extract real-time throughput
        tput_match = re.search(tput_pattern, line)
        timestamp_match = re.search(timestamp_pattern, line)
        
        if tput_match and timestamp_match:
            tput_values.append(int(tput_match.group(1)))
            timestamps.append(datetime.strptime(timestamp_match.group(1), "%H:%M:%S.%f"))
        
        # Extract events for annotation
        if replica_2_added == [] and "Replica 2 added" in line:
            replica_2_added.append(timestamp_match.group(1))

        if replica_3_added == [] and "Replica 3 added" in line:
            replica_3_added.append(timestamp_match.group(1))

        if replica_2_committed == [] and "cut:<key:2" in line:
            replica_2_committed.append(timestamp_match.group(1))
        
        if replica_3_committed == [] and "cut:<key:3" in line:
            replica_3_committed.append(timestamp_match.group(1))
        
        if shard_finalized == [] and "finalizeShards:<shardIDs:1 >" in line:
            shard_finalized.append(timestamp_match.group(1))

        if shard_leave_request == [] and "Shard 1 to be finalized" in line:
            shard_leave_request.append(timestamp_match.group(1))

shard_added.append(max(replica_2_added[0], replica_3_added[0]))
first_cut_committed.append(max(replica_2_committed[0], replica_3_committed[0]))

# Convert timestamps to seconds since the start
start_time = timestamps[0]
time_in_seconds = [(ts - start_time).total_seconds() for ts in timestamps]

# Plot the data
plt.figure(figsize=(10, 6))
plt.plot(time_in_seconds, tput_values, label="Throughput (ops/sec)", color="blue")
plt.xlabel("Time (seconds)")
plt.ylabel("Throughput (ops/sec)")
plt.title("Real-time Throughput vs Time")
plt.grid()

# Event times
shard_join_request = (datetime.strptime(shard_added[0], "%H:%M:%S.%f") - start_time).total_seconds()
first_cut_committed_time = (datetime.strptime(first_cut_committed[0], "%H:%M:%S.%f") - start_time).total_seconds()
shard_leave_request_time = (datetime.strptime(shard_leave_request[0], "%H:%M:%S.%f") - start_time).total_seconds()
shard_finalized_time = (datetime.strptime(shard_finalized[0], "%H:%M:%S.%f") - start_time).total_seconds()

# Add vertical lines for events
plt.axvline(shard_join_request, color="red", linestyle="--", alpha=0.7, label="shard requests to join")
plt.axvline(first_cut_committed_time, color="purple", linestyle="--", alpha=0.7, label="first cut committed from new shard")
plt.axvline(shard_leave_request_time, color="orange", linestyle="--", alpha=0.7, label="shard requests to leave")
plt.axvline(shard_finalized_time, color="black", linestyle="--", alpha=0.7, label="shard finalized, last committed cut")

# Zoom in to the relevant range (adjust as needed)
# plt.xlim(22.5, 24)
# plt.xlim(10, 65)
plt.xlim(52, 55)

# Add legend outside the plot area
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title="Events")

# Adjust layout to ensure the legend doesn't overlap with the plot
plt.tight_layout()

# Save the plot
plt.savefig("adding_reconfig_throughput_annotated.png", dpi=600)

plt.show()


## Analyze real-time tput from emulation

In [None]:
import re
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Helper function to parse timestamps
def parse_timestamp(ts):
    return datetime.strptime(ts, "%H:%M:%S.%f")

# File path
log_file = "../results/emulation_20/order-0.log"

# Regex patterns
tput_pattern = r"\[real-time tput\]: (\d+) ops/sec"
timestamp_pattern = r"(\d{2}:\d{2}:\d{2}\.\d{6})"

timestamps = []
tput_values = []

with open(log_file, "r") as f:
    for line in f:
        # Extract throughput
        tput_match = re.search(tput_pattern, line)
        timestamp_match = re.search(timestamp_pattern, line)
        
        if tput_match and timestamp_match:
            tput_values.append(int(tput_match.group(1)))
            timestamps.append(parse_timestamp(timestamp_match.group(1)))


min_timestamp = min(timestamps)

def to_relative_ms(t):
    return (t - min_timestamp).total_seconds() * 1000

# Plot
fig, ax = plt.subplots(figsize=(10, 6))

# Plot throughput
ax.plot([to_relative_ms(t) for t in timestamps], tput_values, label="Throughput (ops/sec)", color="blue")

# Labels and legend
ax.set_xlabel("Relative Time (ms)")
ax.set_ylabel("Throughput (ops/sec)")
ax.set_title("Throughput vs Time")
ax.legend()
ax.grid(True, linestyle="--", alpha=0.5)

plt.tight_layout()
# plt.savefig("emulation_tput.png", dpi=600)

In [149]:
import numpy as np
import re
import glob
from datetime import datetime
from scipy.ndimage import gaussian_filter1d

# Helper function to compute moving average
def moving_average(values, window_size):
    return np.convolve(values, np.ones(window_size)/window_size, mode='valid')

def parse_timestamp(ts):
    return datetime.strptime(ts, "%H:%M:%S.%f")

def get_mean_tput_filtered(path):
    # File path
    log_file = path + "order-0.log"

    # Regex patterns
    tput_pattern = r"\[real-time tput\]: (\d+) ops/sec"
    timestamp_pattern = r"(\d{2}:\d{2}:\d{2}\.\d{6})"

    timestamps = []
    tput_values = []

    with open(log_file, "r") as f:
        for line in f:
            # Extract throughput
            tput_match = re.search(tput_pattern, line)
            total_tput_match = re.search(total_tput_pattern, line)
            timestamp_match = re.search(timestamp_pattern, line)
            
            if tput_match and timestamp_match:
                tput_values.append(int(tput_match.group(1)))
                timestamps.append(parse_timestamp(timestamp_match.group(1)))
                
    min_timestamp = min(timestamps)

    def to_relative_ms(t):
        return (t - min_timestamp).total_seconds() * 1000

    
    # Choose smoothing method: Moving Average or Gaussian
    window_size = 20  # Adjust window size for smoothing
    smoothed_tput = moving_average(tput_values, window_size)

    # Adjust timestamps for the reduced size after smoothing
    smoothed_timestamps = [to_relative_ms(t) for t in timestamps][window_size-1:]

    # # Plot smoothed data
    # plt.figure(figsize=(10, 6))
    # plt.plot(smoothed_timestamps, smoothed_tput, label="Smoothed Throughput (ops/sec)", color="blue")
    # plt.plot(smoothed_total_timestamps, smoothed_total_tput, label="Smoothed Total Throughput (ops/sec)", color="green")

    # plt.xlabel("Relative Time (ms)")
    # plt.ylabel("Throughput (ops/sec)")
    # plt.title("Smoothed Real-time Throughput vs Time")
    # plt.grid()
    # plt.legend()
    # plt.tight_layout()
    # plt.savefig("smoothed_output.png", dpi=600)

    # Filter throughput data based on relative time condition
    filtered_timestamps = []
    filtered_tput_values = []

    for t, v in zip(smoothed_timestamps, smoothed_tput):
        relative_time = t
        if 25000 <= relative_time <= 130000:
            filtered_timestamps.append(t)
            filtered_tput_values.append(v)

    return np.mean(filtered_tput_values)

In [151]:
import glob
for file in glob.glob("../results/emulation_*/"):
    mean_tput = get_mean_tput_filtered(file)
    print(file.split("/")[-2].split("_")[1] + "," + str(mean_tput))

15,292098.3361904762
20,389823.5238095238
40,738037.1152380953
5,97653.64761904762
30,582040.8633333333
35,673107.0180952381
10,194921.69476190477
25,487563.54285714286


In [152]:
import os
import re
import fnmatch

def average_emulation_metrics(directory):
    """
    Calculates the average of the `mean`, `p50`, and `p99` metrics across all files in a directory matching the pattern `data*.log`.

    Args:
        directory (str): Path to the directory containing log files.

    Returns:
        dict: A dictionary containing the averages of `mean`, `p50`, and `p99` metrics.
    """
    data_regex = re.compile(r"(mean|p50|p99),\s*([\d.e+-]+),\s*([\d.e+-]+)")
    
    append_metrics = {"mean": [], "p50": [], "p99": []}
    delivery_metrics = {"mean": [], "p50": [], "p99": []}

    for filename in os.listdir(directory):
        if fnmatch.fnmatch(filename, 'data*.log'):  
            filepath = os.path.join(directory, filename)
            
            # Temporary dictionaries to track the last occurrence in the current file
            last_append_metrics = {}
            last_delivery_metrics = {}

            with open(filepath, 'r') as file:
                for line in file:
                    match = data_regex.search(line)
                    if match:
                        metric, append_latency, delivery_latency = match.groups()
                        # Update the last seen values for the current file
                        last_append_metrics[metric] = float(append_latency)
                        last_delivery_metrics[metric] = float(delivery_latency)

            # Append the last occurrence values from the current file to the global lists
            for metric in append_metrics.keys():
                if metric in last_append_metrics:
                    append_metrics[metric].append(last_append_metrics[metric])
                if metric in last_delivery_metrics:
                    delivery_metrics[metric].append(last_delivery_metrics[metric])

    # Compute averages across all files for the last occurrences
    averages = {
        "append": {
            metric: sum(values) / len(values) if values else None
            for metric, values in append_metrics.items()
        },
        "delivery": {
            metric: sum(values) / len(values) if values else None
            for metric, values in delivery_metrics.items()
        }
    }
    print("found values across " + str(len(append_metrics["mean"])) + " files")
    
    return averages

In [153]:
for num_shards in [5, 10, 15, 20, 25, 30, 35, 40]:
    # Define input directory
    input_directory = "../results/emulation_" + str(num_shards) + "/"

    print(f"\nAverage metrics for {num_shards} shards:")
    # Calculate averages
    averages = average_emulation_metrics(input_directory)

    # Print the results
    print("append/confirmation latency (us):")
    print(f"mean: {averages['append']['mean']}")
    print(f"p50: {averages['append']['p50']}")
    print(f"p99: {averages['append']['p99']}")

    print("delivery latency (us):")
    print(f"mean: {averages['delivery']['mean']}")
    print(f"p50: {averages['delivery']['p50']}")
    print(f"p99: {averages['delivery']['p99']}")


Average metrics for 5 shards:
found values across 10 files
append/confirmation latency (us):
mean: 2575.677237815214
p50: 2573.6
p99: 3469.3
delivery latency (us):
mean: 2576.714402993392
p50: 2574.5
p99: 3470.7

Average metrics for 10 shards:
found values across 20 files
append/confirmation latency (us):
mean: 2559.802638455046
p50: 2557.6
p99: 3634.95
delivery latency (us):
mean: 2560.8813955902274
p50: 2558.65
p99: 3636.4

Average metrics for 15 shards:
found values across 30 files
append/confirmation latency (us):
mean: 2688.454130050981
p50: 2649.8333333333335
p99: 4031.133333333333
delivery latency (us):
mean: 2689.5205374509756
p50: 2650.766666666667
p99: 4032.733333333333

Average metrics for 20 shards:
found values across 40 files
append/confirmation latency (us):
mean: 2669.032442097973
p50: 2649.5
p99: 3973.075
delivery latency (us):
mean: 2670.019698822411
p50: 2650.425
p99: 3974.425

Average metrics for 25 shards:
found values across 50 files
append/confirmation latency (