In [177]:
import io
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causallearn.utils.GraphUtils import GraphUtils
plt.style.use('seaborn')

In [178]:
# Specify the data path and experiment name
path = "../linear/data/pcm/random_input"
exp = "exp-7"

In [179]:
# Find all the csv files under the target directory.
def list_csv_files(directory):
    """
    List all CSV files in the specified directory without traversing into subdirectories.

    Args:
        directory (str): The path to the directory where the search should be performed.

    Returns:
        list: A list of paths to CSV files found in the directory.
    """
    # Pattern to match all CSV files in the directory
    pattern = f"{directory}/*.csv"
    
    # Use glob.glob to find all files matching the pattern
    csv_files = glob.glob(pattern)
    
    return csv_files

csv_files = list_csv_files(os.path.join(path, exp))
print("CSV files found:")
for csv_file in csv_files:
    print(csv_file)

CSV files found:
../linear/data/pcm/random_input/exp-7/rx-pcm.csv
../linear/data/pcm/random_input/exp-7/bridge-pcm.csv
../linear/data/pcm/random_input/exp-7/pcm-pcie.csv
../linear/data/pcm/random_input/exp-7/ndpi_stats-pcm.csv
../linear/data/pcm/random_input/exp-7/nf_router-pcm.csv
../linear/data/pcm/random_input/exp-7/tx_stats.csv
../linear/data/pcm/random_input/exp-7/tx-pcm.csv
../linear/data/pcm/random_input/exp-7/rx_stats.csv
../linear/data/pcm/random_input/exp-7/firewall-pcm.csv
../linear/data/pcm/random_input/exp-7/latency.csv
../linear/data/pcm/random_input/exp-7/pcm-memory.csv
../linear/data/pcm/random_input/exp-7/payload_scan-pcm.csv
../linear/data/pcm/random_input/exp-7/nf_out.csv


In [180]:
vnfs = []
for f in csv_files:
    if os.path.basename(f) == "latency.csv": path_latency = f
    elif os.path.basename(f) == "rx_stats.csv": path_rx = f
    elif os.path.basename(f) == "tx_stats.csv": path_tx = f
    elif os.path.basename(f) == "nf_out.csv": path_nfout = f
    elif os.path.basename(f) == "pcm-memory.csv": path_mem = f
    elif os.path.basename(f) == "pcm-pcie.csv": path_pcie = f
    else:
        vnfs.append(f)

### Extract PCM data for each VNF

In [181]:
# Load the PCM files
df_list = []
for p in vnfs:
    vnf = os.path.basename(p).split("-")[0]
    print("VNF: ", vnf)
    
    # Read the PCM csv file for each function, using the first two rows as header fields
    df = pd.read_csv(p, header=[0,1])
    
    # Define the relevant fields that need to be extracted from the source.
    columns_filtered = [col for col in df.columns if re.match(r'Core\d',col[0])]
    
    columns_new = []
    for c in columns_filtered:
        columns_new.append(vnf + '-' + c[1])
    
    df_1 = df[columns_filtered]
    df_1.columns = columns_new
    df_list.append(df_1)

VNF:  rx
VNF:  bridge
VNF:  ndpi_stats
VNF:  nf_router
VNF:  tx
VNF:  firewall
VNF:  payload_scan


In [182]:
# Step 1: Find the length of the shortest DataFrame
min_length = min(len(df) for df in df_list)

# Step 2: Truncate each DataFrame to the length of the shortest one
truncated_dfs = [df.head(min_length) for df in df_list]

# Step 3: Concatenate the DataFrames column-wise
pcm = pd.concat(truncated_dfs, axis=1)
pcm

Unnamed: 0,rx-EXEC,rx-IPC,rx-FREQ,rx-AFREQ,rx-L3MISS,rx-L2MISS,rx-L3HIT,rx-L2HIT,rx-L3MPI,rx-L2MPI,...,payload_scan-C6res%,payload_scan-C7res%,payload_scan-TEMP,payload_scan-INST,payload_scan-ACYC,payload_scan-TIME(ticks),payload_scan-PhysIPC,payload_scan-PhysIPC%,payload_scan-INSTnom,payload_scan-INSTnom%
0,0.21,1.38,0.15,1.12,0.04,2.24,0.98,0.2,0.0001,0.0041,...,0.0,0.0,46,1650.17,503.29,2602.73,6.56,163.94,1.27,31.70
1,0.22,1.40,0.15,1.12,0.04,2.28,0.98,0.2,0.0001,0.0041,...,0.0,0.0,46,1080.03,394.15,2592.26,5.48,137.01,0.83,20.83
2,0.22,1.44,0.15,1.12,0.04,2.30,0.98,0.2,0.0001,0.0040,...,0.0,0.0,46,1132.69,405.40,2601.26,5.59,139.70,0.87,21.77
3,0.21,1.41,0.15,1.12,0.04,2.23,0.98,0.2,0.0001,0.0041,...,0.0,0.0,47,1113.67,404.43,2597.69,5.51,137.68,0.86,21.44
4,0.22,1.44,0.15,1.12,0.04,2.31,0.98,0.2,0.0001,0.0040,...,0.0,0.0,46,1113.63,403.55,2597.67,5.52,137.98,0.86,21.44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,0.30,1.92,0.16,1.12,0.00,2.33,1.00,0.2,0.0000,0.0030,...,0.0,0.0,34,1235.78,409.96,2597.13,6.03,150.72,0.95,23.79
1479,0.29,1.87,0.16,1.12,0.01,2.29,1.00,0.2,0.0000,0.0030,...,0.0,0.0,35,1220.22,412.06,2597.03,5.92,148.07,0.94,23.49
1480,0.30,1.91,0.15,1.12,0.00,2.27,1.00,0.2,0.0000,0.0030,...,0.0,0.0,35,1194.32,399.06,2597.21,5.99,149.64,0.92,22.99
1481,0.30,1.91,0.16,1.12,0.00,2.33,1.00,0.2,0.0000,0.0030,...,0.0,0.0,34,1217.94,412.11,2597.31,5.91,147.77,0.94,23.45


### Extract the TX/RX/Latency data

In [183]:
df_tx = pd.read_csv(path_tx)
df_rx = pd.read_csv(path_rx)

tx = df_tx[["PacketRate", "Mbit"]]
tx.columns = ["TX-PacketRate", "TX-Mbit"]
rx = df_rx[["PacketRate", "Mbit"]]
rx.columns = ["RX-PacketRate", "Rx-Mbit"]

In [184]:
tmp = list()
with open(path_latency) as f:
    for l in f:
        tmp.append(l.strip().split()[-1])

lat = pd.DataFrame(tmp, columns=["latency"])
lat

Unnamed: 0,latency
0,15000000
1,15000000
2,15000000
3,15000000
4,10762
...,...
1488,11257
1489,12761
1490,10742
1491,11796


### PCM Memory

In [185]:
pcm_mem = pd.read_csv(path_mem, header=[0,1])
pcm_mem = pcm_mem[[c for c in pcm_mem.columns if c[0]=="SKT0"]]
columns = pcm_mem.columns
pcm_mem.columns = [c[1] for c in columns]

In [186]:
pcm_mem

Unnamed: 0,Ch0Read,Ch0Write,Ch1Read,Ch1Write,Ch2Read,Ch2Write,Ch3Read,Ch3Write,Mem Read (MB/s),Mem Write (MB/s),P. Write (T/s),Memory (MB/s)
0,148.94,207.59,543.79,403.84,,,,,692.73,611.43,2012765306,1304.16
1,153.33,212.78,563.77,417.56,,,,,717.11,630.35,2133370598,1347.45
2,151.62,209.93,559.26,413.68,,,,,710.88,623.62,2133103792,1334.49
3,149.99,206.26,554.59,408.73,,,,,704.58,614.99,2133069675,1319.57
4,148.97,205.01,550.76,406.23,,,,,699.73,611.24,2132932266,1310.97
...,...,...,...,...,...,...,...,...,...,...,...,...
1478,74.90,150.58,267.02,253.25,,,,,341.92,403.83,2132420003,745.74
1479,84.39,162.86,297.82,276.33,,,,,382.21,439.19,2132642497,821.40
1480,80.34,162.15,284.61,271.44,,,,,364.95,433.59,2134862016,798.54
1481,75.01,151.83,266.82,254.68,,,,,341.84,406.51,2132497060,748.34


### PCM PCIe data

In [187]:
df = pd.read_csv(path_pcie)

# Filter rows where Skt == 0
df_filtered = df[df['Skt'] == '0']

# Further filter rows where 'WiL' ends with '(Miss)' or '(Hit)'
# This assumes 'WiL' is the name of the last column containing the strings ending with '(Miss)' or '(Hit)'
pcie_miss = df_filtered[df_filtered['WiL'].str.endswith(('(Miss)'))].reset_index()
pcie_hit = df_filtered[df_filtered['WiL'].str.endswith(('(Hit)'))].reset_index()

pcie_miss.columns = ["misses_" +c  for c in pcie_miss.columns]
pcie_hit.columns = ["hits_"+c for c in pcie_hit.columns]

pcie_miss, pcie_hit

(      misses_index misses_Skt misses_PCIRdCur misses_RFO misses_CRd  \
 0                1          0      2085216700    1479744      13790   
 1                8          0           12166    1371482      15568   
 2               15          0            9758    1296316      18256   
 3               22          0            7028    1240624      29498   
 4               29          0            5768    1182748      19418   
 ...            ...        ...             ...        ...        ...   
 1467         10270          0             336     659680      65324   
 1468         10277          0             322     654990      12642   
 1469         10284          0            1120     755930      51982   
 1470         10291          0             322     718718      18634   
 1471         10298          0             378     714168      23562   
 
      misses_DRd misses_ItoM misses_PRd    misses_WiL  
 0       1499624     4817064        140  404390(Miss)  
 1       1677466     4

In [188]:
# Step 1: Find the length of the shortest DataFrame
min_length = min(len(pcm), len(pcm_mem), len(pcie_miss), len(pcie_hit), len(tx), len(rx), len(lat))
dataset = pd.concat([pcm, pcm_mem, pcie_miss, pcie_hit, tx, rx, lat], axis=1)[:min_length]
dataset

Unnamed: 0,rx-EXEC,rx-IPC,rx-FREQ,rx-AFREQ,rx-L3MISS,rx-L2MISS,rx-L3HIT,rx-L2HIT,rx-L3MPI,rx-L2MPI,...,hits_CRd,hits_DRd,hits_ItoM,hits_PRd,hits_WiL,TX-PacketRate,TX-Mbit,RX-PacketRate,Rx-Mbit,latency
0,0.21,1.38,0.15,1.12,0.04,2.24,0.98,0.20,0.0001,0.0041,...,4501392,93003386,3427536,0,6552(Hit),0.223695,633.850641,0.157123,437.098079,15000000
1,0.22,1.40,0.15,1.12,0.04,2.28,0.98,0.20,0.0001,0.0041,...,2429938,95400172,3595732,0,6566(Hit),0.163790,461.976003,0.184404,518.488288,15000000
2,0.22,1.44,0.15,1.12,0.04,2.30,0.98,0.20,0.0001,0.0040,...,1680042,95688166,3594150,0,12390(Hit),0.194785,548.198136,0.163170,453.943473,15000000
3,0.21,1.41,0.15,1.12,0.04,2.23,0.98,0.20,0.0001,0.0041,...,1661296,99475054,3701796,0,3528(Hit),0.241951,680.744695,0.247730,697.441342,15000000
4,0.22,1.44,0.15,1.12,0.04,2.31,0.98,0.20,0.0001,0.0040,...,1667946,99203748,3665550,0,4144(Hit),1.061914,2996.165706,1.155552,3261.797215,10762
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1467,0.28,1.76,0.16,1.12,0.00,3.34,1.00,0.19,0.0000,0.0046,...,1652672,124126002,3518410,0,14406(Hit),2.384976,6747.726857,2.385341,6747.901442,15309
1468,0.28,1.79,0.16,1.12,0.00,3.38,1.00,0.19,0.0000,0.0046,...,1576904,124253794,2646756,0,24080(Hit),2.383988,6747.750242,2.384221,6746.844005,18467
1469,0.26,1.72,0.15,1.12,0.01,3.15,1.00,0.20,0.0000,0.0046,...,2189502,124659668,3487554,0,0(Hit),2.385533,6744.443323,2.385157,6745.678019,23350
1470,0.28,1.78,0.16,1.12,0.00,3.37,1.00,0.19,0.0000,0.0046,...,2200254,125293042,3517878,0,0(Hit),2.381860,6736.320823,2.382483,6735.684616,15162


In [192]:
dataset.to_csv("random_rate_" + exp + ".csv")