In [2]:
import pandas as pd
import pre_processing as prep


# Preprocess
def preprocess(df: pd.DataFrame):
    """ Renaming columns to more generalized format (e.g., "worker" instead of "worker1") """
    df.columns = [prep.rename_workers(col) for col in df.columns]
    df.columns = [prep.rename_container_columns(col) for col in df.columns]

    """ Filtering out columns that are useless or difficult to use (e.g., process specific metrics) """
    filtered_columns = [col for col in df.columns if prep.filter_namespaces(col)]
    df = df[filtered_columns]

    filtered_columns = [col for col in df.columns if prep.filter_go_specific(col)]
    df = df[filtered_columns]

    filtered_columns = [col for col in df.columns if prep.filter_process_specific(col)]
    df = df[filtered_columns]

    filtered_columns = [col for col in df.columns if prep.filter_durations(col)]
    df = df[filtered_columns]

    """ Convert monontonically increasing cols to rates (counters to gauges) (e.g., total joules to joules per unit of time) """
    counters = prep.get_counters(df)
    df = prep.convert_to_rates(df, counters)
    filtered_columns = [col for col in df.columns if col not in counters]
    df = df[filtered_columns]
    df = df.copy()  # Get defragmented copy of the df after all the edits (probably does nothing useful)
    return df


# Save feather

In [6]:
# Get all model subfolders
root_folder = '../data/minimized_run_5'
filename = 'worker1.feather'
model_folders = prep.find_subfolders_with_file(root_folder, filename)
print(model_folders)

# For each worker feather
save_folder = "preprocessed/"
for model_folder in model_folders:
    for worker in range(5):
        print(f"Preprocessing worker {worker}")
        path = f"{model_folder}/instances/worker{worker}.feather"
        df = pd.read_feather(path)
        df = preprocess(df)
        save_path = f"{save_folder}/{model_folder}/instances/"
        df.to_feather(f"{save_path}worker{worker}.feather")
        print(f"Saved worker{worker} to {save_path}worker{worker}.feather")

['../data/minimized_run_5/1732144183_yolov8m_640', '../data/minimized_run_5/1732123252_yolo11m_320', '../data/minimized_run_5/1732156444_yolov8n_1280', '../data/minimized_run_5/1732124777_yolov10s_320', '../data/minimized_run_5/1732120270_yolov9m_160', '../data/minimized_run_5/1732129367_yolov8l_320', '../data/minimized_run_5/1732159264_yolov8x_1280', '../data/minimized_run_5/1732129943_yolov8x_320', '../data/minimized_run_5/1732142717_yolov8n_640', '../data/minimized_run_5/1732151684_yolov10m_1280', '../data/minimized_run_5/1732154682_yolov9m_1280', '../data/minimized_run_5/1732149252_yolo11l_1280', '../data/minimized_run_5/1732118879_yolov10m_160', '../data/minimized_run_5/1732121677_yolov8m_160', '../data/minimized_run_5/1732143492_yolov8s_640', '../data/minimized_run_5/1732135028_yolov10s_640', '../data/minimized_run_5/1732127347_yolov9c_320', '../data/minimized_run_5/1732130352_yolo11n_640', '../data/minimized_run_5/1732157808_yolov8m_1280', '../data/minimized_run_5/1732122570_yol