In [1]:
import pandas as pd
import json
import os
from IPython.display import display

In [12]:
import pandas as pd
import json
import os
from IPython.display import display

def load_and_merge_test_data(test_ids, max_time_diff="30s"):
    merged_dfs = []

    for test_id in test_ids:
        print(f"\n===== Test ID: {test_id} =====")
        
        # === Determine source folder for JSONL based on test ID ===
        if "naive" in test_id:
            log_folder = "./naive_logs"
        elif "marla" in test_id:
            log_folder = "./marla_logs"
        else:
            print(f"Unknown strategy in test_id: {test_id}. Skipping.")
            continue
        
        # === Paths ===
        csv_path = f"./traffic_generator_csvs/performance_{test_id}.csv"
        jsonl_path = os.path.join(log_folder, f"{test_id}.jsonl")
        
        # === Load CSV ===
        if os.path.exists(csv_path):
            df_perf = pd.read_csv(csv_path)
            df_perf["Time"] = pd.to_datetime(df_perf["Time"])
        else:
            print(f"CSV not found for {test_id}: {csv_path}")
            continue
        
        # === Load JSONL ===
        if os.path.exists(jsonl_path):
            with open(jsonl_path, "r") as f:
                records = [json.loads(line.strip()) for line in f if line.strip()]
            df_dist = pd.DataFrame(records)
            df_dist["timestamp"] = pd.to_datetime(df_dist["timestamp"])
        else:
            print(f"JSONL not found for {test_id}: {jsonl_path}")
            continue
        
        # === Merge by nearest timestamp ===
        df_perf_sorted = df_perf.sort_values("Time").copy()
        df_dist_sorted = df_dist.sort_values("timestamp").copy()

        merged_df = pd.merge_asof(
            df_perf_sorted,
            df_dist_sorted,
            left_on="Time",
            right_on="timestamp",
            direction="nearest",
            tolerance=pd.Timedelta(max_time_diff) if max_time_diff else None
        )

        print("→ Merged Data (Closest Time Match):")
        display(merged_df.head())

        merged_dfs.append(merged_df)

    return merged_dfs


In [14]:
merged_dfs = load_and_merge_test_data(["cpu_naive_v01"])
print("\nMerged DataFrames:")
for df in merged_dfs:
    print(df.head())
    print("\n")
merged_dfs = load_and_merge_test_data(["cpu_marla_v01"])


===== Test ID: cpu_naive_v01 =====
→ Merged Data (Closest Time Match):


Unnamed: 0,Test_ID,Minute,Time,RPS,Throughput,Avg_Latency,P50_Latency,P75_Latency,P90_Latency,P95_Latency,P99_Latency,Max_Latency,Min_Latency,Errors,timestamp,rps,desired_replicas,replica_distribution
0,FinalTest,1,2025-07-22 10:43:46.658831+00:00,500,500.009928,0.114,0.095,0.0,0.156,0.164,0.197,9.4,0.06,0,2025-07-22 10:43:36.510764+00:00,206,2,"{'minikube-m02': 1, 'minikube': 1}"
1,FinalTest,2,2025-07-22 10:44:46.846653+00:00,650,650.012795,0.1,0.086,0.0,0.147,0.156,0.186,1.602,0.057,0,2025-07-22 10:44:36.511310+00:00,626,1,{'minikube-m02': 1}
2,FinalTest,3,2025-07-22 10:45:47.076340+00:00,800,800.01887,0.096,0.086,0.0,0.143,0.151,0.173,2.23,0.066,0,2025-07-22 10:45:31.498237+00:00,664,1,{'minikube-m02': 1}
3,FinalTest,4,2025-07-22 10:46:47.344304+00:00,950,950.004583,0.09,0.085,0.0,0.105,0.117,0.164,3.482,0.058,0,2025-07-22 10:46:31.500334+00:00,913,1,{'minikube-m02': 1}
4,FinalTest,5,2025-07-22 10:47:47.664710+00:00,1100,1100.011578,0.092,0.086,0.0,0.11,0.135,0.192,2.593,0.054,0,2025-07-22 10:47:36.518162+00:00,1072,3,"{'minikube-m02': 2, 'minikube': 1}"



Merged DataFrames:
     Test_ID  Minute                             Time   RPS   Throughput  \
0  FinalTest       1 2025-07-22 10:43:46.658831+00:00   500   500.009928   
1  FinalTest       2 2025-07-22 10:44:46.846653+00:00   650   650.012795   
2  FinalTest       3 2025-07-22 10:45:47.076340+00:00   800   800.018870   
3  FinalTest       4 2025-07-22 10:46:47.344304+00:00   950   950.004583   
4  FinalTest       5 2025-07-22 10:47:47.664710+00:00  1100  1100.011578   

   Avg_Latency  P50_Latency  P75_Latency  P90_Latency  P95_Latency  \
0        0.114        0.095          0.0        0.156        0.164   
1        0.100        0.086          0.0        0.147        0.156   
2        0.096        0.086          0.0        0.143        0.151   
3        0.090        0.085          0.0        0.105        0.117   
4        0.092        0.086          0.0        0.110        0.135   

   P99_Latency  Max_Latency  Min_Latency  Errors  \
0        0.197        9.400        0.060       0  