In [None]:
# !pip install ujson

In [None]:
import common_utils
import os
import pandas as pd

# Example usage
root_folder = '../../../data_warehouse/minimized_warehouse_3'
filename = 'worker1.feather'
subfolders = common_utils.find_subfolders_with_file(root_folder, filename)
print(subfolders)
prom_data_paths = {os.path.basename(x): x for x in subfolders}
worker_qos_paths = {key: os.path.join(val, "worker_qos.feather") for key, val in prom_data_paths.items()}
master_qos_paths = {key: os.path.join(val, "master_qos.feather") for key, val in prom_data_paths.items()}


In [None]:
dfs = []
for key in worker_qos_paths:
    try:
        master_df = common_utils.read_feather_cached(master_qos_paths[key])
    except Exception as e:
        print(f"Failed to read master for {key}: {e}")
        continue
    try:
        worker_df = common_utils.read_feather_cached(worker_qos_paths[key])
    except Exception as e:
        print(f"Failed to read worker for {key}: {e}")
        continue

    # Add prefixes to the column names
    master_df = master_df.add_prefix("master_")
    worker_df = worker_df.add_prefix("worker_")

    # Rename 'master_msg_id' and 'worker_msg_id' to a common name for merging
    master_df = master_df.rename(columns={"master_id": "msg_id"})
    worker_df = worker_df.rename(columns={"worker_id": "msg_id"})

    # Merge the dataframes on 'msg_id'
    combined_df = pd.merge(master_df, worker_df, on="msg_id", how="inner")

    # Add metadata
    timestamp, workers, resolution = common_utils.path_to_workers_and_pcl_size(key)
    combined_df["workers"] = workers
    combined_df["resolution"] = resolution
    combined_df["name"] = f"({workers},{resolution})"
    dfs.append(combined_df)

# Print or process the combined dataframe
combined_df = pd.concat(dfs)

In [None]:
combined_df.name.unique()

In [None]:
combined_df.where(combined_df.name == "(6,10000)").worker_source.unique()