In [79]:
import io
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causallearn.utils.GraphUtils import GraphUtils
plt.style.use('seaborn')

In [80]:
# Specify the data path and experiment name
path = "../linear/data/pcm/random_input"
exp = "exp-7"

In [81]:
# Find all the csv files under the target directory.
def list_csv_files(directory):
    """
    List all CSV files in the specified directory without traversing into subdirectories.

    Args:
        directory (str): The path to the directory where the search should be performed.

    Returns:
        list: A list of paths to CSV files found in the directory.
    """
    # Pattern to match all CSV files in the directory
    pattern = f"{directory}/*.csv"
    
    # Use glob.glob to find all files matching the pattern
    csv_files = glob.glob(pattern)
    
    return csv_files

csv_files = list_csv_files(os.path.join(path, exp))
print("CSV files found:")
for csv_file in csv_files:
    print(csv_file)

CSV files found:
../linear/data/pcm/random_input/exp-7/rx-pcm.csv
../linear/data/pcm/random_input/exp-7/bridge-pcm.csv
../linear/data/pcm/random_input/exp-7/pcm-pcie.csv
../linear/data/pcm/random_input/exp-7/ndpi_stats-pcm.csv
../linear/data/pcm/random_input/exp-7/nf_router-pcm.csv
../linear/data/pcm/random_input/exp-7/tx_stats.csv
../linear/data/pcm/random_input/exp-7/tx-pcm.csv
../linear/data/pcm/random_input/exp-7/rx_stats.csv
../linear/data/pcm/random_input/exp-7/firewall-pcm.csv
../linear/data/pcm/random_input/exp-7/latency.csv
../linear/data/pcm/random_input/exp-7/pcm-memory.csv
../linear/data/pcm/random_input/exp-7/payload_scan-pcm.csv
../linear/data/pcm/random_input/exp-7/nf_out.csv


In [82]:
vnfs = []
for f in csv_files:
    if os.path.basename(f) == "latency.csv": path_latency = f
    elif os.path.basename(f) == "rx_stats.csv": path_rx = f
    elif os.path.basename(f) == "tx_stats.csv": path_tx = f
    elif os.path.basename(f) == "nf_out.csv": path_nfout = f
    elif os.path.basename(f) == "pcm-memory.csv": path_mem = f
    elif os.path.basename(f) == "pcm-pcie.csv": path_pcie = f
    else:
        vnfs.append(f)

### Extract PCM data for each VNF

In [83]:
# Load the PCM files
df_list = []
for p in vnfs:
    vnf = os.path.basename(p).split("-")[0]
    print("VNF: ", vnf)
    
    # Read the PCM csv file for each function, using the first two rows as header fields
    df = pd.read_csv(p, header=[0,1])
    
    # Define the relevant fields that need to be extracted from the source.
    columns_filtered = [col for col in df.columns if re.match(r'Core\d',col[0])]
    
    columns_new = []
    for c in columns_filtered:
        columns_new.append(vnf + '-' + c[1])
    
    df_1 = df[columns_filtered]
    df_1.columns = columns_new
    df_list.append(df_1)

VNF:  rx
VNF:  bridge
VNF:  ndpi_stats
VNF:  nf_router
VNF:  tx
VNF:  firewall
VNF:  payload_scan


In [84]:
# Step 1: Find the length of the shortest DataFrame
min_length = min(len(df) for df in df_list)

# Step 2: Truncate each DataFrame to the length of the shortest one
truncated_dfs = [df.head(min_length) for df in df_list]

# Step 3: Concatenate the DataFrames column-wise
pcm = pd.concat(truncated_dfs, axis=1)
pcm

Unnamed: 0,rx-EXEC,rx-IPC,rx-FREQ,rx-AFREQ,rx-L3MISS,rx-L2MISS,rx-L3HIT,rx-L2HIT,rx-L3MPI,rx-L2MPI,...,payload_scan-C6res%,payload_scan-C7res%,payload_scan-TEMP,payload_scan-INST,payload_scan-ACYC,payload_scan-TIME(ticks),payload_scan-PhysIPC,payload_scan-PhysIPC%,payload_scan-INSTnom,payload_scan-INSTnom%
0,0.21,1.38,0.15,1.12,0.04,2.24,0.98,0.2,0.0001,0.0041,...,0.0,0.0,46,1650.17,503.29,2602.73,6.56,163.94,1.27,31.70
1,0.22,1.40,0.15,1.12,0.04,2.28,0.98,0.2,0.0001,0.0041,...,0.0,0.0,46,1080.03,394.15,2592.26,5.48,137.01,0.83,20.83
2,0.22,1.44,0.15,1.12,0.04,2.30,0.98,0.2,0.0001,0.0040,...,0.0,0.0,46,1132.69,405.40,2601.26,5.59,139.70,0.87,21.77
3,0.21,1.41,0.15,1.12,0.04,2.23,0.98,0.2,0.0001,0.0041,...,0.0,0.0,47,1113.67,404.43,2597.69,5.51,137.68,0.86,21.44
4,0.22,1.44,0.15,1.12,0.04,2.31,0.98,0.2,0.0001,0.0040,...,0.0,0.0,46,1113.63,403.55,2597.67,5.52,137.98,0.86,21.44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1478,0.30,1.92,0.16,1.12,0.00,2.33,1.00,0.2,0.0000,0.0030,...,0.0,0.0,34,1235.78,409.96,2597.13,6.03,150.72,0.95,23.79
1479,0.29,1.87,0.16,1.12,0.01,2.29,1.00,0.2,0.0000,0.0030,...,0.0,0.0,35,1220.22,412.06,2597.03,5.92,148.07,0.94,23.49
1480,0.30,1.91,0.15,1.12,0.00,2.27,1.00,0.2,0.0000,0.0030,...,0.0,0.0,35,1194.32,399.06,2597.21,5.99,149.64,0.92,22.99
1481,0.30,1.91,0.16,1.12,0.00,2.33,1.00,0.2,0.0000,0.0030,...,0.0,0.0,34,1217.94,412.11,2597.31,5.91,147.77,0.94,23.45


### Extract the TX/RX/Latency data

In [85]:
df_tx = pd.read_csv(path_tx)
df_rx = pd.read_csv(path_rx)
df_lat = pd.read_csv(path_latency)

len(df_tx), len(df_rx), len(df_lat)

(1488, 1488, 1492)

### PCM Memory

In [89]:
pcm_mem = pd.read_csv(path_mem, header=[0,1])
pcm_mem = pcm_mem.drop([c for c in pcm_mem.columns if c[0]=="SKT1"], axis=1)

### PCM PCIe data