In [3]:
import common_utils
import os
import pandas as pd
import difflib

# Example usage
root_folder = '../../../data_warehouse/minimized_warehouse_3'
filename = 'worker1.feather'
subfolders = common_utils.find_subfolders_with_file(root_folder, filename)
print(subfolders)
prom_data_paths = {os.path.basename(x): x for x in subfolders}
yolo_data_paths = {key: os.path.join(val, "yolo_qos.feather") for key, val in prom_data_paths.items()}

['../../../data_warehouse/minimized_warehouse_3/1735649744_(3.1000)', '../../../data_warehouse/minimized_warehouse_3/1735650171_(3.10000)', '../../../data_warehouse/minimized_warehouse_3/1735652627_(4.10000)', '../../../data_warehouse/minimized_warehouse_3/1735647513_(2.1000)', '../../../data_warehouse/minimized_warehouse_3/1735647009_(1.10000)', '../../../data_warehouse/minimized_warehouse_3/1735652869_(5.1000)', '../../../data_warehouse/minimized_warehouse_3/1735655731_(6.10000)', '../../../data_warehouse/minimized_warehouse_3/1735651459_(4.5000)', '../../../data_warehouse/minimized_warehouse_3/1735653779_(6.1000)', '../../../data_warehouse/minimized_warehouse_3/1735653082_(5.5000)', '../../../data_warehouse/minimized_warehouse_3/1735649957_(3.5000)', '../../../data_warehouse/minimized_warehouse_3/1735645847_(1.5000)', '../../../data_warehouse/minimized_warehouse_3/1735650657_(4.1000)', '../../../data_warehouse/minimized_warehouse_3/1735645043_(1.1000)', '../../../data_warehouse/mini

In [5]:
# Clean dataframe and calculate power
def get_total_joules(dataframe):
    cleaned_df = dataframe
    
    """ Sort by timestamp to make sure it makes sense to compute difference between first and last values """
    cleaned_df.sort_values(by="timestamp", inplace=True)
    
    """ Get all relevant columns for power calculation """
    target_word = 'kepler node package joules total dynamic'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=2, cutoff=0.05)
    
    """ Compute joules per match """
    joules_per_match = []
    for match in closest_matches:
        joules = cleaned_df[match].max() - cleaned_df[match].min()
        joules_per_match.append(joules)
    
    """ Compute total joules """
    total_joules = sum(joules_per_match)
    return total_joules

total_joules_per_model = {}
for key in prom_data_paths.keys():
    paths = []
    """ Get all workers """
    for work_num in range(1, 6):
        temp_path = os.path.join(prom_data_paths[key], f"worker{work_num}.feather")
        paths.append(temp_path)

    """ Get joules per image for each worker """
    joules_per_worker = [get_total_joules(common_utils.get_cleaned_df(x)) for x in paths]
    joules_total = sum(joules_per_worker)
    num_images = 1000 #common_utils.get_number_of_images(key)  # TODO: Get from somewhere
    joules_per_image = joules_total / num_images

    """ Add result to dict for current model and resolution """
    model_info = common_utils.path_to_workers_and_pcl_size(key)
    if model_info.resolution not in total_joules_per_model:
        total_joules_per_model[model_info.resolution] = {}
    total_joules_per_model[model_info.resolution][model_info.num_vehicles] = joules_per_image

max_joules = {}
for resolution in sorted(total_joules_per_model.keys()):
    joules = pd.DataFrame.from_dict(total_joules_per_model[resolution], orient='index', columns=['Joules'])
    joules.columns = [f'{resolution}']
    max_joules[resolution] = joules



Loaded 14 rows and 501 columns
Removing 0 static columns (501 remaining)
Unable to read timestamp as json
Loaded 14 rows and 364 columns
Removing 0 static columns (364 remaining)
Unable to read timestamp as json
Loaded 14 rows and 544 columns
Removing 0 static columns (544 remaining)
Unable to read timestamp as json
Loaded 14 rows and 481 columns
Removing 0 static columns (481 remaining)
Unable to read timestamp as json
Loaded 14 rows and 411 columns
Removing 0 static columns (411 remaining)
Unable to read timestamp as json
Loaded 192 rows and 663 columns
Removing 0 static columns (663 remaining)
Unable to read timestamp as json
Loaded 192 rows and 640 columns
Removing 0 static columns (640 remaining)
Unable to read timestamp as json
Loaded 192 rows and 707 columns
Removing 0 static columns (707 remaining)
Unable to read timestamp as json
Loaded 192 rows and 628 columns
Removing 0 static columns (628 remaining)
Unable to read timestamp as json
Loaded 192 rows and 612 columns
Removing 0

In [9]:
# Grouped bars
import plotly.express as px

# Define width based on resolution
# resolution_to_width = {160: 0.2, 320: 0.4, 640: 0.6, 1280: 0.8}
max_joules_df = pd.concat(max_joules.values(), axis=1)
max_joules_df_sorted = max_joules_df
# max_joules_df_sorted = common_utils.sort_by_model_size_then_version(max_joules_df)

fig = px.bar(max_joules_df_sorted, barmode='group', title='Joules per PCL', labels={'value': 'Max Power (Watts)', 'index': 'Model'})
fig.update_layout(xaxis_title='Num_workers', yaxis_title='Joules', legend_title_text='Resolution')
fig.show()

fig = px.bar(max_joules_df_sorted, barmode='group', title='Joules per PCL (Log Scale)', labels={'value': 'Max Power (Watts)', 'index': 'Model'})
fig.update_layout(xaxis_title='Num_workers', yaxis_title='Joules', yaxis_type='log', legend_title_text='Resolution')
fig.show()

In [4]:
""" Stacked bars, adjusted to the correct heights for each resolution """

# Calculate differences in joules for each resolution to represent the stacked bar chart
diff_joules_per_model = {}
resolutions = sorted(total_joules_per_model.keys())

for resolution in resolutions:
    diff_joules_per_model[resolution] = total_joules_per_model[resolution].copy()
    if resolution > resolutions[0]:
        prev_resolution = resolutions[resolutions.index(resolution) - 1]
        for model in diff_joules_per_model[resolution].keys():
            if model in total_joules_per_model[prev_resolution]:
                diff_joules_per_model[resolution][model] -= total_joules_per_model[prev_resolution][model]

# Create DataFrame for the stacked bar graph
diff_max_joules = {resolution: pd.DataFrame.from_dict(diff_joules_per_model[resolution], orient='index', columns=[f'{resolution}']) for resolution in resolutions}

stacked_diff_max_joules_df = pd.concat(diff_max_joules.values(), axis=1)

# Create stacked bar graph
fig = px.bar(stacked_diff_max_joules_df, barmode='relative', title='Joules per image (Adjusted for Resolution Differences)', labels={'value': 'Joules', 'index': 'Model'})
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', legend_title_text='Resolution')
fig.show()

In [5]:


max_joules_df_sorted = common_utils.sort_by_model_size_then_version(max_joules_df)

fig = px.bar(max_joules_df_sorted, title='Joules per image (relative stacked)', labels={'value': 'Max Power (Watts)', 'index': 'Model'})
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', legend_title_text='Resolution')
fig.show()
fig = px.bar(max_joules_df_sorted, title='Joules per image (relative stacked)', labels={'value': 'Max Power (Watts)', 'index': 'Model'})
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', yaxis_type='log', legend_title_text='Resolution')
fig.show()

In [6]:

# Define width based on resolution
# resolution_to_width = {160: 0.2, 320: 0.4, 640: 0.6, 1280: 0.8}
diff_joules_df_sorted = common_utils.sort_by_model_size_then_version(stacked_diff_max_joules_df)

fig = px.bar(
    diff_joules_df_sorted, 
    title='Joules per image (overlapping stacked)', 
    labels={'value': 'Max Power (Watts)', 'index': 'Model'},
    # width=[resolution_to_width.get(int(col), 0.5) for col in diff_joules_df_sorted.columns]
)
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', legend_title_text='Resolution')
fig.show()
fig = px.bar(
    diff_joules_df_sorted, 
    title='Joules per image (overlapping stacked)', 
    labels={'value': 'Max Power (Watts)', 'index': 'Model'},
    # width=[resolution_to_width.get(int(col), 0.5) for col in diff_joules_df_sorted.columns]
)
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', yaxis_type='log', legend_title_text='Resolution')
fig.show()

fig = px.bar(max_joules_df_sorted, barmode='group', title='Joules per image (Log Scale)', labels={'value': 'Max Power (Watts)', 'index': 'Model'})
fig.update_layout(xaxis_title='Model', yaxis_title='Joules', yaxis_type='log', legend_title_text='Resolution')
fig.show()

In [7]:
diff_joules_df_sorted

Unnamed: 0,160,320,640,1280
yolo8n,0.165642,0.135784,0.673568,2.603084
yolo9n,0.174703,0.136223,0.682131,2.600894
yolo10n,0.167173,0.120309,0.570643,2.449582
yolo11n,0.152009,0.11596,0.572286,2.345751
yolo8s,0.270859,0.425566,1.866956,7.249903
yolo9s,0.278651,0.406514,1.833307,6.999466
yolo10s,0.24935,0.346692,1.556392,6.275852
yolo11s,0.242423,0.336378,1.541513,6.14715
yolo8m,0.575169,1.092686,4.737581,19.578121
yolo9m,0.572958,1.126932,4.905687,20.193819


In [8]:
diff_joules_df_sorted.to_csv("data_diff_joules.csv")