In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns


import time
import math
import random
import re
import os
import sys


from utils.util import find_files_with_extension
from utils.util import  get_dataframe_from_filepath,calculate_statistics, extract_unique_mbps_and_ms
from utils.util import remove_outliers_iqr, comma_format, create_directory_if_not_exists,print_rtt_stats,save_rtt_stats


# Load data
mainpth="./Data"

stats_directory = os.path.join(mainpth, "Stats")
create_directory_if_not_exists(stats_directory)


folderpaths = [mainpth+'./client1_data',mainpth+'./client2_data']
siftrfilenames, siftrfilepaths, siftrfiledict = find_files_with_extension(folderpaths, '.log')


Directory already exists: ./Data\Stats


In [2]:
# main.py
import os
from Kernel_Data_Analysis.utils.data_preprocessing import fq_pie_pre_process_extract, l4s_pre_process_extract, trim_df
import pandas as pd

columns_to_use = [
    'queue_type', 'burst_allowance', 'drop_probability', 'current_queue_delay',
    'accumulated_probability', 'average_dequeue_time', 'length_in_bytes', 'total_drops', 'packet_length'
]

def get_stats(df, column):
    median = df[column].median()
    mean = df[column].mean()
    q25 = df[column].quantile(0.25)
    q75 = df[column].quantile(0.75)
    minimum = df[column].min()
    maximum = df[column].max()

    # Printing all the values
    print("RTT in microseconds")
    print(f"Median: {median}")
    print(f"Mean: {mean}")
    print(f"25th Percentile (Q1): {q25}")
    print(f"75th Percentile (Q3): {q75}")
    print(f"Minimum: {minimum}")
    print(f"Maximum: {maximum}")


def functionfilenameandpath(path='./', extension='.txt'):
    filepaths = []
    filenames = []

    for root, _, files_list in os.walk(path):
        for file_name in files_list:
            if file_name.endswith(extension):
                file_name_path = os.path.join(root, file_name)
                filenames.append(file_name)
                filepaths.append(file_name_path)

    return filenames, filepaths

In [3]:
kernelfilenames, kernelfilepaths = functionfilenameandpath(path='./Data/router_data', extension='.txt')

In [4]:
# def save_rtt_stats(filepaths, columns, output_file="rtt_stats.csv"):
#     all_stats = []

#     for filepath in filepaths:
#         if os.path.exists(filepath):
#             try:
#                 df = l4s_pre_process_extract(input_file=filepath,aqm='l4s')
#                 if all(col in df.columns for col in columns):
#                     stats = get_stats(df, columns)
#                     stats["File"] = os.path.basename(filepath)
#                     all_stats.append(stats)
#                 else:
#                     missing_cols = [col for col in columns if col not in df.columns]
#                     print(f"Missing columns {missing_cols} in {filepath}")
#             except Exception as e:
#                 print(f"Error processing {filepath}: {e}")
#         else:
#             print(f"File not found: {filepath}")

#     if all_stats:
#         stats_df = pd.DataFrame(all_stats)
#         stats_df.to_csv(output_file, index=False)
#         print(f"Statistics saved to {output_file}")
#     else:
#         print("No valid data to save.")

In [5]:
# save_rtt_stats(filepaths, [columns_to_use[3]], output_file="rtt_stats.csv")

In [6]:
from utils.util import extract_unique_mbps_and_ms
unique_combinations = extract_unique_mbps_and_ms(['./Data/router_data'])
print("Unique Mbps and ms Combinations:")
for mbps, ms in sorted(unique_combinations):
    print(f'{mbps} Mbps, {ms} ms')


Unique Mbps and ms Combinations:
10 Mbps, 0 ms
10 Mbps, 1 ms
10 Mbps, 10 ms
10 Mbps, 20 ms
10 Mbps, 5 ms
10 Mbps, 7 ms
20 Mbps, 0 ms
20 Mbps, 1 ms
20 Mbps, 10 ms
20 Mbps, 20 ms
20 Mbps, 5 ms
20 Mbps, 7 ms
5 Mbps, 0 ms
5 Mbps, 1 ms
5 Mbps, 10 ms
5 Mbps, 20 ms
5 Mbps, 5 ms
5 Mbps, 7 ms
8 Mbps, 0 ms
8 Mbps, 1 ms
8 Mbps, 10 ms
8 Mbps, 20 ms
8 Mbps, 5 ms
8 Mbps, 7 ms


In [7]:
unique_scenarios_dict={}
for index in range(len(siftrfilenames)):
    scenario = "_".join(siftrfilenames[index].split('_')[:5])
    if "l4s" in scenario:
        scenario = "_".join(siftrfilenames[index].split('_')[:4])
    if scenario in unique_scenarios_dict:
        unique_scenarios_dict[scenario].append(siftrfiledict[siftrfilenames[index]])
    else:
        unique_scenarios_dict[scenario]=[siftrfiledict[siftrfilenames[index]]]


In [8]:
unique_kernel_dict = {}

for scenario in unique_scenarios_dict:
    kernel_files = []
    for kernel_file in kernelfilepaths:
        if scenario in kernel_file:
            kernel_files.append(kernel_file)
    unique_kernel_dict[scenario] = kernel_files

In [9]:
unique_kernel_dict

{'1_fq_codel_10Mbps_0ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_0ms_ecn.txt'],
 '1_fq_codel_10Mbps_10ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_10ms_ecn.txt'],
 '1_fq_codel_10Mbps_1ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_1ms_ecn.txt'],
 '1_fq_codel_10Mbps_20ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_20ms_ecn.txt'],
 '1_fq_codel_10Mbps_5ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_5ms_ecn.txt'],
 '1_fq_codel_10Mbps_7ms': ['./Data/router_data\\kernel_data_1_fq_codel_10Mbps_7ms_ecn.txt'],
 '1_fq_codel_20Mbps_0ms': ['./Data/router_data\\kernel_data_1_fq_codel_20Mbps_0ms_ecn.txt'],
 '1_fq_codel_20Mbps_10ms': ['./Data/router_data\\kernel_data_1_fq_codel_20Mbps_10ms_ecn.txt'],
 '1_fq_codel_20Mbps_1ms': ['./Data/router_data\\kernel_data_1_fq_codel_20Mbps_1ms_ecn.txt'],
 '1_fq_codel_20Mbps_20ms': ['./Data/router_data\\kernel_data_1_fq_codel_20Mbps_20ms_ecn.txt'],
 '1_fq_codel_20Mbps_5ms': ['./Data/router_data\\kernel_data_1_

In [10]:
unique_scenarios_dict

{'1_fq_codel_10Mbps_0ms': ['./Data./client1_data\\1_fq_codel_10Mbps_0ms_ecn_newreno_src1.siftr.log',
  './Data./client2_data\\1_fq_codel_10Mbps_0ms_ecn_dctcp_src2.siftr.log'],
 '1_fq_codel_10Mbps_10ms': ['./Data./client1_data\\1_fq_codel_10Mbps_10ms_ecn_newreno_src1.siftr.log',
  './Data./client2_data\\1_fq_codel_10Mbps_10ms_ecn_dctcp_src2.siftr.log'],
 '1_fq_codel_10Mbps_1ms': ['./Data./client1_data\\1_fq_codel_10Mbps_1ms_ecn_newreno_src1.siftr.log',
  './Data./client2_data\\1_fq_codel_10Mbps_1ms_ecn_dctcp_src2.siftr.log'],
 '1_fq_codel_10Mbps_20ms': ['./Data./client1_data\\1_fq_codel_10Mbps_20ms_ecn_newreno_src1.siftr.log',
  './Data./client2_data\\1_fq_codel_10Mbps_20ms_ecn_dctcp_src2.siftr.log'],
 '1_fq_codel_10Mbps_5ms': ['./Data./client1_data\\1_fq_codel_10Mbps_5ms_ecn_newreno_src1.siftr.log',
  './Data./client2_data\\1_fq_codel_10Mbps_5ms_ecn_dctcp_src2.siftr.log'],
 '1_fq_codel_10Mbps_7ms': ['./Data./client1_data\\1_fq_codel_10Mbps_7ms_ecn_newreno_src1.siftr.log',
  './Data./cl

In [11]:
for scenario in unique_kernel_dict:
    if 'l4s' in scenario:
        try:
            print("Start New Scenario")
            print(scenario)
            print(f"Scenario: {scenario}")
            print("Kernel Data:",unique_kernel_dict[scenario])
            kernel_df = l4s_pre_process_extract(unique_kernel_dict[scenario][0],aqm='l4s')
            get_stats(kernel_df, columns_to_use[3])    

            tcp1 = "CUBIC"
            tcp2 = "NEWRENO"
            tcp3 = "DCTCP"
            print()
            

            classif_traffic = unique_scenarios_dict[scenario][0]
            l4s_traffic = unique_scenarios_dict[scenario][1]

            ctf = get_dataframe_from_filepath(classif_traffic)
            ltf = get_dataframe_from_filepath(l4s_traffic)

            ctf1 = ctf[ctf['ForeignPort'] == "5101"]
            ctf2 = ctf[ctf['ForeignPort'] == "5102"]
            ltf =  ltf[ltf['ForeignPort'] == "5103"]

            # Define paths
            paths = {
                tcp1: ctf1,
                tcp2: ctf2,
                tcp3: ltf,
            }

            print_rtt_stats(paths=paths,scenario=scenario)

        except Exception as e:
            print(f"Error processing {scenario}: {e}")
        

Start New Scenario
1_l4s_10Mbps_0ms
Scenario: 1_l4s_10Mbps_0ms
Kernel Data: ['./Data/router_data\\kernel_data_1_l4s_10Mbps_0ms_ecn.txt']
RTT in microseconds
Median: 0.0
Mean: 2522.0787289752807
25th Percentile (Q1): 0.0
75th Percentile (Q3): 0.0
Minimum: 0
Maximum: 360000

./Data./client1_data\1_l4s_10Mbps_0ms_ecn_newreno_src1.siftr.log
./Data./client2_data\1_l4s_10Mbps_0ms_ecn_dctcp_src2.siftr.log

RTT *-*-*RTT *-*-*RTT *-*-*RTT *-*-*RTT *-*-*
RTT statistics in millisecond (ms) for scenario 1_l4s_10Mbps_0ms:
Statistics for CUBIC:
Median: 85.0
Mean: 111.36189187024428
25th Percentile: 65.625
75th Percentile: 117.5
Minimum: 20.0
Maximum: 879.375
Statistics for NEWRENO:
Median: 86.875
Mean: 106.3593610871441
25th Percentile: 65.703
75th Percentile: 115.625
Minimum: 20.0
Maximum: 529.687
Statistics for DCTCP:
Median: 63.75
Mean: 75.49593946863872
25th Percentile: 47.5
75th Percentile: 88.125
Minimum: 17.5
Maximum: 340.0
END RTT *-*-*END RTT *-*-*END RTT *-*-*END RTT *-*-*END RTT *-*-*

St

In [None]:
for filepath in kernelfilepaths:
    
    l4s_df = l4s_pre_process_extract(input_file=filepath,aqm='l4s')
    if l4s_df['current_queue_delay'].mean() < 1000:
        print()
        print("*** Start ***"*3)
        print(filepath)
        print("dequeu_actions",l4s_df['dequeue_action'].value_counts())
        print("queue_type",l4s_df['queue_type'].value_counts())
        print("Classic Traffic")
        get_stats(l4s_df[l4s_df['queue_type'] == 0], columns_to_use[3])
        print("L4S Traffic")
        get_stats(l4s_df[l4s_df['queue_type'] == 1], columns_to_use[3])
        # get_stats(l4s_df, columns_to_use[3])
        print()
        print("*** END ***"*3)
    


In [None]:
for filepath in kernelfilepaths:
    
    l4s_df = l4s_pre_process_extract(input_file=filepath,aqm='l4s')
    if l4s_df['current_queue_delay'].mean() < 40000 and len(l4s_df['dequeue_action'].unique()) > 1:
        print()
        print("*** Start ***"*3)
        print(filepath)
        print("dequeu_actions",l4s_df['dequeue_action'].value_counts())
        print("queue_type",l4s_df['queue_type'].value_counts())
        get_stats(l4s_df[l4s_df['queue_type'] == 0], columns_to_use[3])
        get_stats(l4s_df[l4s_df['queue_type'] == 1], columns_to_use[3])
        # get_stats(l4s_df, columns_to_use[3])
        print()
        print("*** END ***"*3)
    


In [None]:
# input_file = './Data/llmrawdata-llmused.txt'
input_file = './Data/llmrawdata.txt'
input_file = kernelfilepaths[0]

In [None]:
# fq_pie_df = fq_pie_pre_process_extract(input_file=input_file,aqm='fq_pie')
l4s_df = l4s_pre_process_extract(input_file=input_file,aqm='l4s')

In [None]:
kernelfilepaths[0]

In [None]:
siftrfilepaths

In [None]:
get_stats(l4s_df, columns_to_use[3])

In [None]:
len(l4s_df['dequeue_action'].unique())

In [None]:
l4s_df['queue_type'].value_counts()

In [None]:
l4s_df['dequeue_action'].value_counts()

In [None]:
print(l4s_df[columns_to_use[3]].describe())
print("queue delay median",l4s_df[columns_to_use[3]].median())

In [None]:
get_stats(l4s_df, columns_to_use[3])

In [None]:
get_stats(l4s_df[l4s_df['queue_type'] == 0], columns_to_use[3])

In [None]:
get_stats(l4s_df[l4s_df['queue_type'] == 1], columns_to_use[3])

In [None]:
for filepath in kernelfilepaths:
    
    fq_pie_df = fq_pie_pre_process_extract(input_file=filepath,aqm='fq_pie')
    if l4s_df['current_queue_delay'].mean() < 40000 and len(l4s_df['dequeue_action'].unique()) > 1:
        print()
        print("*** Start ***"*3)
        print(filepath)
        print("dequeu_actions",l4s_df['dequeue_action'].value_counts())
        print("queue_type",l4s_df['queue_type'].value_counts())
        get_stats(l4s_df[l4s_df['queue_type'] == 0], columns_to_use[3])
        get_stats(l4s_df[l4s_df['queue_type'] == 1], columns_to_use[3])
        # get_stats(l4s_df, columns_to_use[3])
        print()
        print("*** END ***"*3)
    


In [None]:
import pandas as pd

# Set the display format for floating point numbers
pd.options.display.float_format = '{:,.4f}'.format

# Now display the summary statistics
print(fq_pie_df[columns_to_use[3]].describe())
print("queue delay mean",fq_pie_df[columns_to_use[3]].mean())


In [None]:
get_stats(fq_pie_df, columns_to_use[3])