# WRR Latency CDF Plotting

This notebook calculates latency from sender and receiver logs and plots CDF curves for each flow.

**Features:**
- Automatically detects measurement window from receiver logs
- Excludes first 5 seconds and last 5 seconds to avoid startup/teardown effects
- Works with any outputs folder (configurable in Cell 2)
- Matches the logic used in `measure_bandwidth_allocation.py`

**Usage:**
1. Update `outputs_dir` in Cell 2 if needed
2. Run all cells to generate latency CDF plot
3. Measurement window is automatically determined from the data

In [None]:
import re
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from collections import defaultdict

In [None]:
# Configuration
# Can be changed to any outputs folder
outputs_dir = Path('../program/qos/outputs')

In [None]:
def parse_sender_log(sender_file):
    """Parse sender log and extract packet send timestamps"""
    send_times = {}
    
    if not sender_file.exists():
        print(f"Warning: {sender_file} not found")
        return send_times
    
    # Pattern: "This host has sent X packets until now : timestamp"
    pattern = r'This host has sent\s+(\d+)\s+packets until now\s+:\s+([\d.]+)'
    
    with open(sender_file, 'r') as f:
        for line in f:
            match = re.search(pattern, line)
            if match:
                packet_num = int(match.group(1))
                send_time = float(match.group(2))
                send_times[packet_num] = send_time
    
    return send_times

In [None]:
def parse_receiver_log(receiver_file):
    """Parse receiver log and extract packet receive timestamps"""
    receive_times = []
    
    if not receiver_file.exists():
        print(f"Warning: {receiver_file} not found")
        return receive_times
    
    # Pattern: "packet is received at time : timestamp"
    pattern = r'packet is received at time\s+:\s+([\d.]+)'
    
    with open(receiver_file, 'r') as f:
        for line in f:
            match = re.search(pattern, line)
            if match:
                receive_time = float(match.group(1))
                receive_times.append(receive_time)
    
    return receive_times


# Auto-detect measurement window from receiver logs
# Exclude first 5 seconds and last 5 seconds to avoid startup/teardown effects
# This matches the logic in measure_bandwidth_allocation.py
def get_measurement_window(outputs_dir, exclude_start=5, exclude_end=5):
    """Automatically determine measurement window from receiver logs"""
    all_receive_times = []
    
    for flow_id in range(3):
        receiver_file = outputs_dir / f"receiver_h_r{flow_id + 1}.txt"
        if receiver_file.exists():
            receive_times = parse_receiver_log(receiver_file)
            all_receive_times.extend(receive_times)
    
    if not all_receive_times:
        raise ValueError(f"No receiver logs found in {outputs_dir}")
    
    first_packet_time = min(all_receive_times)
    last_packet_time = max(all_receive_times)
    
    # Exclude first and last N seconds to avoid startup/teardown effects
    measurement_start = first_packet_time + exclude_start
    measurement_end = last_packet_time - exclude_end
    
    # Ensure valid window (at least 10 seconds)
    if measurement_end <= measurement_start:
        measurement_end = measurement_start + 10
    
    return measurement_start, measurement_end, first_packet_time, last_packet_time

# Auto-detect measurement window
measurement_start, measurement_end, first_packet_time, last_packet_time = get_measurement_window(outputs_dir)

print(f"Data time range: {first_packet_time:.2f} - {last_packet_time:.2f} seconds")
print(f"Measurement window: {measurement_start:.2f} - {measurement_end:.2f} seconds")
print(f"  (Excluded first 5s and last 5s to avoid startup/teardown effects)")
print(f"  Duration: {measurement_end - measurement_start:.2f} seconds")

In [None]:
def calculate_latencies(send_times, receive_times, flow_id, measurement_start, measurement_end):
    """
    Calculate latencies for packets within measurement window
    
    Matches packets by index (assumes packets are received in order).
    In case of packet loss or reordering, this may not match perfectly.
    """
    latencies = []
    
    # Filter receive times within measurement window
    filtered_receive_times = [t for t in receive_times 
                              if measurement_start <= t <= measurement_end]
    
    if len(send_times) == 0:
        print(f"Flow {flow_id}: No send times found")
        return latencies
    
    if len(filtered_receive_times) == 0:
        print(f"Flow {flow_id}: No receive times in measurement window")
        return latencies
    
    # Match packets: assume packets are received in order
    # Match by packet index (1-based) for simplicity
    # Note: In case of packet loss or reordering, this may not match perfectly
    min_packet_idx = min(send_times.keys()) if send_times else 1
    
    # Find the first receive time index in the filtered list
    # to align with sender packet numbers
    first_receive_idx = next((i for i, t in enumerate(receive_times) 
                              if t >= measurement_start), 0)
    
    for idx, recv_time in enumerate(filtered_receive_times, start=0):
        # Calculate packet number based on position in filtered list
        # Account for potential offset from first receive time
        packet_num = min_packet_idx + first_receive_idx + idx
        
        if packet_num in send_times:
            send_time = send_times[packet_num]
            # Only calculate latency if send time is before receive time
            if send_time < recv_time:
                latency = (recv_time - send_time) * 1000  # Convert to milliseconds
                # Filter out negative or unrealistic latencies
                if latency > 0 and latency < 10000:  # Reasonable range: 0-10 seconds
                    latencies.append(latency)
    
    return latencies

In [None]:
# Parse logs for all flows
flow_latencies = {}

for flow_id in range(3):
    sender_file = outputs_dir / f"sender_h{flow_id + 1}.txt"
    receiver_file = outputs_dir / f"receiver_h_r{flow_id + 1}.txt"
    
    print(f"\nProcessing Flow {flow_id}...")
    
    # Parse logs
    send_times = parse_sender_log(sender_file)
    receive_times = parse_receiver_log(receiver_file)
    
    print(f"  Send times: {len(send_times)} packets")
    print(f"  Receive times: {len(receive_times)} packets")
    
    # Calculate latencies
    latencies = calculate_latencies(
        send_times, receive_times, flow_id, 
        measurement_start, measurement_end
    )
    
    flow_latencies[flow_id] = latencies
    print(f"  Latencies calculated: {len(latencies)} packets")
    if len(latencies) > 0:
        print(f"  Min latency: {min(latencies):.2f} ms")
        print(f"  Max latency: {max(latencies):.2f} ms")
        print(f"  Mean latency: {np.mean(latencies):.2f} ms")
        print(f"  Median latency: {np.median(latencies):.2f} ms")

In [None]:
def plot_cdf(latencies_list, labels, title="Latency CDF", xlabel="Latency (ms)", ylabel="CDF"):
    """Plot CDF curves for multiple flows"""
    plt.figure(figsize=(10, 6))
    
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # Blue, Orange, Green
    
    for idx, (latencies, label) in enumerate(zip(latencies_list, labels)):
        if len(latencies) == 0:
            print(f"Warning: No latencies for {label}")
            continue
        
        # Sort latencies
        sorted_latencies = np.sort(latencies)
        
        # Calculate CDF
        n = len(sorted_latencies)
        y = np.arange(1, n + 1) / n
        
        # Plot CDF
        plt.plot(sorted_latencies, y, label=label, color=colors[idx], linewidth=2)
    
    plt.xlabel(xlabel, fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.title(title, fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3)
    plt.legend(fontsize=11)
    plt.tight_layout()
    
    return plt.gcf()

In [None]:
# Plot CDF for all flows
# Labels are generic - update quantums/percentages based on your configuration
latencies_list = [
    flow_latencies[0],  # Flow 0: high weight
    flow_latencies[1],  # Flow 1: medium weight
    flow_latencies[2]   # Flow 2: low weight
]

# Calculate bandwidth allocation percentages for labels (if needed)
# You can update these labels manually based on your quantums configuration
labels = [
    'Flow 0 (High Weight)',
    'Flow 1 (Medium Weight)',
    'Flow 2 (Low Weight)'
]

# Title includes measurement window duration
window_duration = measurement_end - measurement_start
fig = plot_cdf(
    latencies_list, 
    labels,
    title=f"WRR Latency CDF (Measurement Window: {window_duration:.1f}s)",
    xlabel="Latency (ms)",
    ylabel="CDF"
)

plt.show()

In [None]:
# Statistics summary
window_duration = measurement_end - measurement_start
print("\n" + "="*60)
print(f"Latency Statistics Summary (Measurement Window: {window_duration:.1f}s)")
print(f"  Time range: {measurement_start:.2f} - {measurement_end:.2f} seconds")
print("="*60)

for flow_id in range(3):
    latencies = flow_latencies[flow_id]
    if len(latencies) == 0:
        print(f"\nFlow {flow_id}: No latency data")
        continue
    
    print(f"\nFlow {flow_id}:")
    print(f"  Packets: {len(latencies)}")
    if len(latencies) > 0:
        print(f"  Min:     {min(latencies):.2f} ms")
        print(f"  25th:    {np.percentile(latencies, 25):.2f} ms")
        print(f"  Median:  {np.median(latencies):.2f} ms")
        print(f"  75th:    {np.percentile(latencies, 75):.2f} ms")
        print(f"  95th:    {np.percentile(latencies, 95):.2f} ms")
        print(f"  99th:    {np.percentile(latencies, 99):.2f} ms")
        print(f"  Max:     {max(latencies):.2f} ms")
        print(f"  Mean:    {np.mean(latencies):.2f} ms")
        print(f"  Std:     {np.std(latencies):.2f} ms")

In [None]:
# Optional: Save the figure
# fig.savefig('wrr_latency_cdf_window1-5.png', dpi=300, bbox_inches='tight')
# print("\nFigure saved as 'wrr_latency_cdf_window1-5.png'")