In [10]:
import common_utils
import os
import pandas as pd

# Example usage
root_folder = '../../../data_warehouse/minimized_warehouse_3'
filename = 'worker1.feather'
subfolders = common_utils.find_subfolders_with_file(root_folder, filename)
print(subfolders)
prom_data_paths = {os.path.basename(x): x for x in subfolders}
yolo_data_paths = {key: os.path.join(val, "worker_qos.feather") for key, val in prom_data_paths.items()}


['../../../data_warehouse/minimized_warehouse_3/1735649744_(3.1000)', '../../../data_warehouse/minimized_warehouse_3/1735650171_(3.10000)', '../../../data_warehouse/minimized_warehouse_3/1735652627_(4.10000)', '../../../data_warehouse/minimized_warehouse_3/1735647513_(2.1000)', '../../../data_warehouse/minimized_warehouse_3/1735647009_(1.10000)', '../../../data_warehouse/minimized_warehouse_3/1735652869_(5.1000)', '../../../data_warehouse/minimized_warehouse_3/1735655731_(6.10000)', '../../../data_warehouse/minimized_warehouse_3/1735651459_(4.5000)', '../../../data_warehouse/minimized_warehouse_3/1735653779_(6.1000)', '../../../data_warehouse/minimized_warehouse_3/1735653082_(5.5000)', '../../../data_warehouse/minimized_warehouse_3/1735649957_(3.5000)', '../../../data_warehouse/minimized_warehouse_3/1735645847_(1.5000)', '../../../data_warehouse/minimized_warehouse_3/1735650657_(4.1000)', '../../../data_warehouse/minimized_warehouse_3/1735645043_(1.1000)', '../../../data_warehouse/mini

In [11]:
"""
Get corresponding yolo stats for each model 
"""
response_time = {}
for key in prom_data_paths.keys():
    try:
        yolo_df = common_utils.read_feather_cached(yolo_data_paths[key])
    except:
        print(f"Failed to read {key}")
        continue
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['end'] = pd.to_datetime(yolo_df['end_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('end', inplace=True)
    resampled_df = yolo_df.resample('5S')
    model_info = common_utils.path_to_workers_and_pcl_size(key)
    if model_info.resolution not in response_time:
        response_time[model_info.resolution] = {}
    response_time[model_info.resolution][model_info.num_vehicles] = resampled_df.agg({'end_to_end_response_time': 'count'}).reset_index()['end_to_end_response_time'].rename(key) / 5

Failed to read 1735649744_(3.1000)
Failed to read 1735650171_(3.10000)
Failed to read 1735652869_(5.1000)
Failed to read 1735653082_(5.5000)
Failed to read 1735649957_(3.5000)
Failed to read 1735653296_(5.10000)


In [13]:
import plotly.express as px

for resolution in sorted(response_time.keys()):
    queue_df = pd.DataFrame.from_dict(response_time[resolution])
    fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
    fig.update_layout(title=f'PCL throughput (Resolution: {resolution})', xaxis_title='Time', yaxis_title='PCL per second', yaxis_type='linear')
    fig.show()

In [9]:

import plotly.express as px
import pandas as pd

# Dictionary to store max throughput for each model
max_throughput = {}

for resolution in sorted(response_time.keys()):
    queue_df = pd.DataFrame.from_dict(response_time[resolution])
    
    # Calculate the maximum throughput for each model
    max_throughput[resolution] = queue_df.max()

# Create a DataFrame from the max throughput dictionary
max_throughput_df = pd.DataFrame(max_throughput)

# Sort 
max_throughput_df_sorted = common_utils.sort_by_model_size_then_version(max_throughput_df)

# Plot the maximum throughput for each model
fig = px.bar(max_throughput_df_sorted, barmode='group')
fig.update_layout(title='Maximum Throughput per Model', xaxis_title='Model', yaxis_title='Max Images per Second', yaxis_type='linear')
fig.show()

fig = px.bar(max_throughput_df_sorted, barmode='group')
fig.update_layout(title='Maximum Throughput per Model', xaxis_title='Model', yaxis_title='Max Images per Second', yaxis_type='log')
fig.show()

AttributeError: 'int' object has no attribute 'replace'

In [7]:
max_throughput_df_sorted.to_csv("table_plots/data_throughput.csv")

In [8]:
max_throughput_df_sorted

Unnamed: 0,160,320,640,1280
yolo8n,454.2,326.4,145.6,42.4
yolo9n,431.8,309.8,140.6,41.6
yolo10n,447.6,333.4,160.8,45.6
yolo11n,467.8,346.6,163.4,47.4
yolo8s,348.2,183.8,59.2,15.8
yolo9s,334.4,182.2,60.2,16.0
yolo10s,362.4,204.4,70.6,18.2
yolo11s,363.0,208.2,70.8,18.2
yolo8m,219.2,88.4,24.4,6.6
yolo9m,217.4,85.4,23.8,6.0
