# 1. Extract useful information to CSV files

In [None]:
import os
import pandas as pd

folder_path = "running-results"

csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

filtered_dataframes = {}

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)

    if df.empty:
        continue

    df_filtered = df[~df['Operation'].str.contains("Scan", case=False, na=False)]

    if not df_filtered.empty:
        filtered_dataframes[file] = df_filtered

for file_name, df in filtered_dataframes.items():
    print(f"\n=== {file_name} (Filtered) ===")
    print(df.head()) 


In [None]:
import os
import pandas as pd

folder_path = "running-results" 

csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

ceb_data = []
other_data = []

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)

    if df.empty:
        continue

    df_filtered = df[~df['Operation'].str.contains("Scan", case=False, na=False)]

    if not df_filtered.empty:
        if "ceb" in file:
            ceb_data.append(df_filtered)
        else:
            other_data.append(df_filtered)

if ceb_data:
    ceb_merged_df = pd.concat(ceb_data, ignore_index=True)
    print("\n=== Merged CEB Data ===")
    print(ceb_merged_df.head())  

if other_data:
    other_merged_df = pd.concat(other_data, ignore_index=True)
    print("\n=== Merged Other Data ===")
    print(other_merged_df.head()) 

In [None]:
print(ceb_merged_df)

In [None]:
import os

output_folder = "2-merged-results"
os.makedirs(output_folder, exist_ok=True)

if 'ceb_merged_df' in locals() and not ceb_merged_df.empty:
    ceb_merged_df.to_csv(os.path.join(output_folder, "merged_ceb.csv"), index=False)
    print("✅ 'merged_ceb.csv'")

if 'other_merged_df' in locals() and not other_merged_df.empty:
    other_merged_df.to_csv(os.path.join(output_folder, "merged_other.csv"), index=False)
    print("✅ 'merged_other.csv'")


# 2. Compute R and R0

In [None]:
import os
import pandas as pd

folder_path = "2-merged-results"
ceb_file = os.path.join(folder_path, "merged_ceb.csv")
other_file = os.path.join(folder_path, "merged_other.csv")

def extract_last_column(file_path):
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        last_col_name = df.columns[-1] 
        return df[last_col_name].tolist()
    else:
        return []

ceb_values = extract_last_column(ceb_file)
job_values = extract_last_column(other_file)

In [None]:
# Copy from e-computation => Alignment
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from scipy.integrate import simps 

def compute_tv (R, R0):
    # 2. Calculate two KDEs (probability density functions PDF)
    kde_p = gaussian_kde(R, bw_method='scott')  # p(x)
    kde_q = gaussian_kde(R0, bw_method='scott') # q(x)

    # 3. Define the integration interval
    x_vals = np.linspace(min(min(R), min(R0)), max(max(R), max(R0)), 1000)  
    pdf_p = kde_p(x_vals)  # Compute p(x)
    pdf_q = kde_q(x_vals)  # Compute q(x)

    # 4. Compute TVD
    integrand = np.abs(pdf_q - pdf_p)  # Compute |q(x) - p(x)|
    TV_distance = 0.5 * simps(integrand, x_vals)  # Integrate using Simpson's rule

    print(f"Total Variation Distance (TVD) = {TV_distance:.5f}")

    plt.figure(figsize=(8, 5))
    plt.plot(x_vals, pdf_p, label="p(x) (KDE of R - CEB)", color="blue", linewidth=2)
    plt.plot(x_vals, pdf_q, label="q(x) (KDE of R0 - JOB)", color="red", linewidth=2)
    plt.fill_between(x_vals, pdf_p, pdf_q, color="gray", alpha=0.3, label="|q(x) - p(x)|")
    plt.xlabel("X values")
    plt.ylabel("Density")
    plt.title("Kernel Density Estimation (KDE) and TV Distance")
    plt.legend()
    plt.show()

    return TV_distance

In [None]:
tv = compute_tv(ceb_values, job_values)