In [175]:
#!pip install pandas matplotlib

In [246]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [247]:
import pandas as pd
import numpy as np

def calculate_throughput(df, container_name=None):
    # Create a copy of the dataframe
    df = df.copy()
    
    # Convert timestamp to seconds from nanoseconds
    df['Timestamp_sec'] = df['Timestamp'] / 1e9
    
    # Filter for specific container if provided
    if container_name:
        df = df[df['ContainerName'] == container_name]
    
    # Calculate time range
    start_time = df['Timestamp_sec'].min()
    end_time = df['Timestamp_sec'].max()
    total_duration = end_time - start_time
    
    # Calculate total bytes
    total_bytes = df['Length'].sum()
    
    # Calculate bytes per second
    bytes_per_sec = total_bytes / total_duration
    
    # Calculate throughput in different units
    results = {
        'bytes_per_sec': bytes_per_sec,
        'kilobytes_per_sec': bytes_per_sec / 1024,
        'megabytes_per_sec': bytes_per_sec / (1024 * 1024),
        'total_duration_seconds': total_duration,
        'total_bytes': total_bytes
    }
    
    return results

# If you want to see throughput over time intervals (e.g., per second)
def calculate_interval_throughput(df, interval_seconds=1, container_name=None):
    df = df.copy()
    
    # Convert timestamp to seconds and filter if needed
    df['Timestamp_sec'] = df['Timestamp'] / 1e9
    if container_name:
        df = df[df['ContainerName'] == container_name]
    
    # Create time bins
    df['time_bin'] = pd.cut(df['Timestamp_sec'], 
                           bins=np.arange(df['Timestamp_sec'].min(),
                                        df['Timestamp_sec'].max() + interval_seconds,
                                        interval_seconds))
    
    # Calculate throughput for each bin
    throughput = df.groupby('time_bin')['Length'].sum() / interval_seconds
    return throughput

In [248]:
def print_container_stats(container_name, direction, df):
    """
    Print formatted container throughput statistics.
    
    Args:
        container_name (str): Name of the container
        direction (str): Traffic direction (e.g., 'tc_egress', 'skb_ingress')
        df (pd.DataFrame): DataFrame containing network statistics
    """
    data = df[
              (df['Hook'] == direction) & 
              (df['ContainerName'] == container_name)]
    
    container_results = calculate_throughput(data, container_name)
    interval_results = calculate_interval_throughput(data, 
                                                   interval_seconds=1, 
                                                   container_name=container_name)
    
    print(f"\nContainer Network Analysis")
    print("=" * 50)
    
    print(f"\nContainer Information:")
    print("-" * 50)
    print(f"Name:           {container_name}")
    print(f"Direction:      {direction}")
    
    print(f"\nAggregate Throughput:")
    print("-" * 50)
    print(f"Bytes/second:   {container_results['bytes_per_sec']:>15,.2f} B/s")
    print(f"Kilobytes/sec:  {container_results['kilobytes_per_sec']:>15,.2f} KB/s")
    print(f"Megabytes/sec:  {container_results['megabytes_per_sec']:>15,.2f} MB/s")
    
    print(f"\nTransfer Summary:")
    print("-" * 50)
    print(f"Total Duration: {container_results['total_duration_seconds']:>15,.2f} seconds")
    print(f"Total Bytes:    {container_results['total_bytes']:>15,d} bytes")
    
    print(f"\nThroughput Time Series (per second):")
    print("-" * 50)
    
    # Format the time series data, handling Interval objects
    for interval, throughput in interval_results.items():
        # Extract the start time from the interval object
        if hasattr(interval, 'left'):
            # If it's an Interval object, use the left boundary
            time_point = interval.left
        else:
            # If it's already a number, use it directly
            time_point = interval
            
        print(f"Second {int(time_point):3d}: {throughput:>15,.2f} B/s")

def print_multi_container_comparison(containers_data):
    """
    Print a comparison of multiple containers' throughput statistics.
    
    Args:
        containers_data (list): List of dictionaries containing container results
    """
    print("\nContainer Throughput Comparison")
    print("=" * 50)
    
    # Print header
    print("\nAggregate Statistics:")
    print("-" * 50)
    
    # Print each container's stats
    for container in containers_data:
        name = container['container_name']
        results = container['results']
        
        print(f"\n{name}")
        print(f"  Throughput:      {results['megabytes_per_sec']:>8,.2f} MB/s")
        print(f"  Total Transfer:  {results['total_bytes']:>8,d} bytes")
        print(f"  Duration:        {results['total_duration_seconds']:>8,.2f} seconds")

In [249]:
import pandas as pd
import numpy as np

def analyze_container_packet_latencies(df, container_names):
    """
    Analyze packet latencies across different stages of packet processing for specific containers.
    
    Args:
        df (pd.DataFrame): DataFrame with columns:
            - PacketHash
            - Timestamp
            - Hook (tc_egress, skb_ingress, skb_egress)
            - ContainerName
        container_names (tuple): Tuple of container names to analyze (without prefixes)
            
    Returns:
        dict: Dictionary where keys are container names and values are tuples of
             (detailed_latencies, summary_stats)
    """
    results = {}
    
    for container_name in container_names:
        # Filter for both container:/ and host:/ prefixes
        container_df = df[
            (df['ContainerName'] == f'container:/{container_name}') |
            (df['ContainerName'] == f'host:/{container_name}')
        ]
        
        # Skip if no data for this container
        if len(container_df) == 0:
            results[container_name] = (pd.DataFrame(), {
                'avg_tc_to_ingress_ns': np.nan,
                'avg_ingress_to_egress_ns': np.nan,
                'avg_end_to_end_ns': np.nan,
                'total_packets_analyzed': 0
            })
            continue
            
        # Create a results container for this container's packet latencies
        container_results = []
        
        # Group by PacketHash to analyze each packet's journey
        for packet_hash, group in container_df.groupby('PacketHash'):
            # Sort by timestamp to ensure correct order
            group = group.sort_values('Timestamp')
            
            # Initialize latency values
            tc_to_ingress = np.nan
            ingress_to_egress = np.nan
            end_to_end = np.nan
            
            # Find timestamps for each stage
            tc_egress_time = group[group['Hook'] == 'tc_egress']['Timestamp'].iloc[0] if any(group['Hook'] == 'tc_egress') else None
            skb_ingress_time = group[group['Hook'] == 'skb_ingress']['Timestamp'].iloc[0] if any(group['Hook'] == 'skb_ingress') else None
            skb_egress_time = group[group['Hook'] == 'skb_egress']['Timestamp'].iloc[0] if any(group['Hook'] == 'skb_egress') else None
            
            # Calculate latencies if timestamps exist
            if tc_egress_time is not None and skb_ingress_time is not None:
                tc_to_ingress = skb_ingress_time - tc_egress_time
                
            if skb_ingress_time is not None and skb_egress_time is not None:
                ingress_to_egress = skb_egress_time - skb_ingress_time
                
            if tc_egress_time is not None and skb_egress_time is not None:
                end_to_end = skb_egress_time - tc_egress_time
            
            # Store results
            container_results.append({
                'PacketHash': packet_hash,
                'tc_to_ingress_ns': tc_to_ingress,
                'ingress_to_egress_ns': ingress_to_egress,
                'end_to_end_ns': end_to_end
            })
        
        # Create DataFrame with detailed results
        detailed_latencies = pd.DataFrame(container_results)
        
        # Calculate summary statistics
        summary_stats = {
            'avg_tc_to_ingress_ns': detailed_latencies['tc_to_ingress_ns'].mean(),
            'avg_ingress_to_egress_ns': detailed_latencies['ingress_to_egress_ns'].mean(),
            'avg_end_to_end_ns': detailed_latencies['end_to_end_ns'].mean(),
            'total_packets_analyzed': len(detailed_latencies)
        }
        
        # Store results for this container
        results[container_name] = (detailed_latencies, summary_stats)
    
    return results

def print_latency_stats(stats_dict):
    """
    Format and print latency statistics in a clear, readable way.
    Converts nanoseconds to microseconds for better readability.
    
    Args:
        stats_dict (dict): Dictionary containing latency statistics
    """
    print("Network Packet Latency Analysis")
    print("=" * 40)
    print(f"\nLatency Measurements (in microseconds):")
    print("-" * 40)
    print(f"TC Egress → SKB Ingress:    {stats_dict['avg_tc_to_ingress_ns']/1000:>8.2f} μs")
    print(f"SKB Ingress → SKB Egress:   {stats_dict['avg_ingress_to_egress_ns']/1000:>8.2f} μs")
    print(f"End-to-End Latency:         {stats_dict['avg_end_to_end_ns']/1000:>8.2f} μs")
    print("\nSample Size:")
    print("-" * 40)
    print(f"Total Packets Analyzed:     {stats_dict['total_packets_analyzed']:>8,d}")

In [250]:
def print_container_statistics(container_name, df):
    """
    Print detailed network statistics for all traffic directions of a container.
    
    Args:
        container_name (str): Base name of the container (without prefixes)
        df (pd.DataFrame): DataFrame containing network statistics
    """
    # Define all possible traffic directions and container prefixes
    directions = ['skb_egress', 'skb_ingress', 'xdp_egress', 
                 'xdp_ingress', 'tc_ingress', 'tc_egress']
    prefixes = ['container:/', 'host:/']
    
    def get_throughput(full_container_name, direction):
        data = df[
            (df['Hook'] == direction) & 
            (df['ContainerName'] == full_container_name)
        ]
        if len(data) == 0:
            return None
            
        results = calculate_throughput(data, full_container_name)
        return results
    
    print("Container Network Analysis")
    print("=" * 50)
    
    # Process each prefix (container side and host side)
    for prefix in prefixes:
        full_container_name = f"{prefix}{container_name}"
        
        print(f"\n{prefix.rstrip(':').title()} Network Statistics")
        print("-" * 50)
        print(f"Container Name: {full_container_name}")
        
        # Check each direction
        for direction in directions:
            results = get_throughput(full_container_name, direction)
            
            print(f"\nDirection: {direction}")
            print("-" * 25)
            
            if results is not None and results['total_bytes'] > 0:
                print(f"Throughput:")
                print(f"  Bytes/second:    {results['bytes_per_sec']:>15,.2f} B/s")
                print(f"  Kilobytes/sec:   {results['kilobytes_per_sec']:>15,.2f} KB/s")
                print(f"  Megabytes/sec:   {results['megabytes_per_sec']:>15,.2f} MB/s")
                print(f"\nTransfer Summary:")
                print(f"  Total Duration:  {results['total_duration_seconds']:>15,.2f} seconds")
                print(f"  Total Bytes:     {results['total_bytes']:>15,d} bytes")
            else:
                print(f"Throughput:")
                print(f"  Bytes/second:    {0:>15,.2f} B/s")
                print(f"  Kilobytes/sec:   {0:>15,.2f} KB/s")
                print(f"  Megabytes/sec:   {0:>15,.2f} MB/s")
                print(f"\nTransfer Summary:")
                print(f"  Total Duration:  {0:>15,.2f} seconds")
                print(f"  Total Bytes:     {0:>15,d} bytes")

In [301]:
def print_container_summary(container_name, df):
    """
    Print a concise summary of all traffic directions for a container.
    Shows throughput rates and total bytes for each direction.
    
    Args:
        container_name (str): Base name of the container (without prefixes)
        df (pd.DataFrame): DataFrame containing network statistics
    """
    # Define all possible traffic directions and container prefixes
    directions = ['skb_egress', 'skb_ingress', 'xdp_egress', 
                 'xdp_ingress', 'tc_ingress', 'tc_egress']
    prefixes = ['container:/', 'host:/']
    
    # Function to get throughput for a specific container and direction
    def get_throughput(full_container_name, direction):
        data = df[
            (df['Hook'] == direction) & 
            (df['ContainerName'] == full_container_name)
        ]
        if len(data) == 0:
            return None
            
        results = calculate_throughput(data, full_container_name)
        return results
    
    # Print container name
    print(f"container name = {container_name}")

    if "host" in container_name:
        # Check each direction
        for direction in directions:
            results = get_throughput(container_name, direction)
            if results is not None and results['total_bytes'] > 0:
                print(f"* {direction:<10} {results['megabytes_per_sec']:>8.2f} MB/s "
                      f"({results['kilobytes_per_sec']:,.2f} KB/s) - "
                      f"Total: {results['total_bytes']:,d} bytes")
            else:
                print(f"* {direction:<10} {0:>8.2f} MB/s "
                      f"({0:.2f} KB/s) - "
                      f"Total: 0 bytes")
    else:
        # Process each prefix (container side and host side)
        for prefix in prefixes:
            full_container_name = f"{prefix}{container_name}"
            print(f"\n{prefix.rstrip(':')}_side ({full_container_name})")
            
            # Check each direction
            for direction in directions:
                results = get_throughput(full_container_name, direction)
                if results is not None and results['total_bytes'] > 0:
                    print(f"* {direction:<10} {results['megabytes_per_sec']:>8.2f} MB/s "
                          f"({results['kilobytes_per_sec']:,.2f} KB/s) - "
                          f"Total: {results['total_bytes']:,d} bytes")
                else:
                    print(f"* {direction:<10} {0:>8.2f} MB/s "
                          f"({0:.2f} KB/s) - "
                          f"Total: 0 bytes")

In [314]:
def print_container_statistics(container_name, df):
    """
    Print detailed network statistics for all traffic directions of a container.
    
    Args:
        container_name (str): Base name of the container (without prefixes)
        df (pd.DataFrame): DataFrame containing network statistics
    """
    print("CN= " + container_name)
    # Define all possible traffic directions and container prefixes
    directions = ['skb_egress', 'skb_ingress', 'xdp_egress', 
                 'xdp_ingress', 'tc_ingress', 'tc_egress']
    prefixes = ['container:/', 'host:/']
    
    def get_throughput(full_container_name, direction):
        data = df[
            (df['Hook'] == direction) & 
            (df['ContainerName'] == full_container_name)
        ]
        if len(data) == 0:
            return None
            
        results = calculate_throughput(data, full_container_name)
        return results
    
    print("Container Network Analysis")
    print("=" * 50)

    if "host" in container_name:
        print(f"\n{prefix.rstrip(':').title()} Network Statistics")
        print("-" * 50)
        print(f"Container Name: {container_name}")
        
        # Check each direction
        for direction in directions:
            print(f"\n{prefix.rstrip(':').title()} Network Statistics")
            print("-" * 50)
            
            results = get_throughput(container_name, direction)
            
            print(f"\nDirection: {direction}")
            print("-" * 25)
            
            if results is not None and results['total_bytes'] > 0:
                print(f"Throughput:")
                print(f"  Bytes/second:    {results['bytes_per_sec']:>15,.2f} B/s")
                print(f"  Kilobytes/sec:   {results['kilobytes_per_sec']:>15,.2f} KB/s")
                print(f"  Megabytes/sec:   {results['megabytes_per_sec']:>15,.2f} MB/s")
                print(f"\nTransfer Summary:")
                print(f"  Total Duration:  {results['total_duration_seconds']:>15,.2f} seconds")
                print(f"  Total Bytes:     {results['total_bytes']:>15,d} bytes")
            else:
                print(f"Throughput:")
                print(f"  Bytes/second:    {0:>15,.2f} B/s")
                print(f"  Kilobytes/sec:   {0:>15,.2f} KB/s")
                print(f"  Megabytes/sec:   {0:>15,.2f} MB/s")
                print(f"\nTransfer Summary:")
                print(f"  Total Duration:  {0:>15,.2f} seconds")
                print(f"  Total Bytes:     {0:>15,d} bytes")        
    else:        
        for prefix in prefixes:
            full_container_name = f"{prefix}{container_name}"
            
            print(f"\n{prefix.rstrip(':').title()} Network Statistics")
            print("-" * 50)
            print(f"Container Name: {full_container_name}")
            
            # Check each direction
            for direction in directions:
                results = get_throughput(full_container_name, direction)
                
                print(f"\nDirection: {direction}")
                print("-" * 25)
                
                if results is not None and results['total_bytes'] > 0:
                    print(f"Throughput:")
                    print(f"  Bytes/second:    {results['bytes_per_sec']:>15,.2f} B/s")
                    print(f"  Kilobytes/sec:   {results['kilobytes_per_sec']:>15,.2f} KB/s")
                    print(f"  Megabytes/sec:   {results['megabytes_per_sec']:>15,.2f} MB/s")
                    print(f"\nTransfer Summary:")
                    print(f"  Total Duration:  {results['total_duration_seconds']:>15,.2f} seconds")
                    print(f"  Total Bytes:     {results['total_bytes']:>15,d} bytes")
                else:
                    print(f"Throughput:")
                    print(f"  Bytes/second:    {0:>15,.2f} B/s")
                    print(f"  Kilobytes/sec:   {0:>15,.2f} KB/s")
                    print(f"  Megabytes/sec:   {0:>15,.2f} MB/s")
                    print(f"\nTransfer Summary:")
                    print(f"  Total Duration:  {0:>15,.2f} seconds")
                    print(f"  Total Bytes:     {0:>15,d} bytes")

In [315]:
# 90.43125 MB/s at tcpreplay

In [316]:
import pandas as pd

# Load the CSV file
#df = pd.read_csv('data/intra/1_one_to_one_tcpreplay.csv')
df = pd.read_csv('data/inter/1_one_to_one_src.csv')

# Set more readable display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [317]:
df["ContainerName"].unique()

array([nan, 'host_lo', 'container:/vigilant_bhaskara', 'docker0',
       'host_nic', 'host:/vigilant_bhaskara'], dtype=object)

In [318]:
print_container_summary('vigilant_bhaskara', df)

container name = vigilant_bhaskara

container:/_side (container:/vigilant_bhaskara)
* skb_egress     0.00 MB/s (0.26 KB/s) - Total: 595 bytes
* skb_ingress     0.00 MB/s (0.26 KB/s) - Total: 595 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.26 KB/s) - Total: 595 bytes
* tc_ingress     0.00 MB/s (0.30 KB/s) - Total: 693 bytes
* tc_egress     63.78 MB/s (65,312.24 KB/s) - Total: 151,400,951 bytes

host:/_side (host:/vigilant_bhaskara)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress    63.19 MB/s (64,708.40 KB/s) - Total: 150,000,881 bytes
* tc_ingress    63.78 MB/s (65,312.41 KB/s) - Total: 151,400,951 bytes
* tc_egress      0.00 MB/s (0.30 KB/s) - Total: 693 bytes


In [319]:
print_container_summary('docker0', df)

container name = docker0

container:/_side (container:/docker0)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_egress      0.00 MB/s (0.00 KB/s) - Total: 0 bytes

host:/_side (host:/docker0)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_egress      0.00 MB/s (0.00 KB/s) - Total: 0 bytes


In [320]:
print_container_summary('host_nic', df)

container name = host_nic
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress      inf MB/s (inf KB/s) - Total: 60 bytes
* tc_egress     28.03 MB/s (28,705.16 KB/s) - Total: 151,400,042 bytes


In [321]:
import pandas as pd

# Load the CSV file
#df = pd.read_csv('data/intra/1_one_to_one_tcpreplay.csv')
df = pd.read_csv('data/inter/1_one_to_one_dst.csv')

# Set more readable display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [322]:
df["ContainerName"].unique()

array(['container:/trusting_jemison', 'host_lo', nan, 'docker0',
       'host:/trusting_jemison', 'host_nic'], dtype=object)

In [323]:
print_container_summary('trusting_jemison', df)

container name = trusting_jemison

container:/_side (container:/trusting_jemison)
* skb_egress    72.76 MB/s (74,505.76 KB/s) - Total: 150,000,000 bytes
* skb_ingress    72.76 MB/s (74,505.66 KB/s) - Total: 150,000,000 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress    72.76 MB/s (74,504.82 KB/s) - Total: 150,000,000 bytes
* tc_ingress    28.71 MB/s (29,400.08 KB/s) - Total: 151,398,528 bytes
* tc_egress       inf MB/s (inf KB/s) - Total: 42 bytes

host:/_side (host:/trusting_jemison)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress      inf MB/s (inf KB/s) - Total: 42 bytes
* tc_egress     28.71 MB/s (29,400.09 KB/s) - Total: 151,398,528 bytes


In [324]:
print_container_summary('docker0', df)

container name = docker0

container:/_side (container:/docker0)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_egress      0.00 MB/s (0.00 KB/s) - Total: 0 bytes

host:/_side (host:/docker0)
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* tc_egress      0.00 MB/s (0.00 KB/s) - Total: 0 bytes


In [325]:
print_container_summary('host_nic', df)

container name = host_nic
* skb_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* skb_ingress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_egress     0.00 MB/s (0.00 KB/s) - Total: 0 bytes
* xdp_ingress    72.75 MB/s (74,498.59 KB/s) - Total: 150,000,000 bytes
* tc_ingress    28.03 MB/s (28,704.29 KB/s) - Total: 151,394,004 bytes
* tc_egress       inf MB/s (inf KB/s) - Total: 42 bytes


In [282]:
df[(df["ContainerName"] == "container:/mystifying_elbakyan")]

Unnamed: 0,Timestamp,PacketHash,IfIndex,Length,ContainerName,ContainerID,VethName,Hook
