In [5]:
import common_utils
import os
import pandas as pd

# Example usage
root_folder = '../../../data_warehouse/minimized_warehouse_4'
filename = 'worker1.feather'
subfolders = common_utils.find_subfolders_with_file(root_folder, filename)
print(subfolders)
prom_data_paths = {os.path.basename(x): x for x in subfolders}
yolo_data_paths = {key: os.path.join(val, "master_qos.feather") for key, val in prom_data_paths.items()}


['../../../data_warehouse/minimized_warehouse_4/1735854367_(5.5000)', '../../../data_warehouse/minimized_warehouse_4/1735863576_(1.5000)', '../../../data_warehouse/minimized_warehouse_4/1735851224_(7.5000)', '../../../data_warehouse/minimized_warehouse_4/1735849172_(8.1000)', '../../../data_warehouse/minimized_warehouse_4/1735844045_(30.10000)', '../../../data_warehouse/minimized_warehouse_4/1735860195_(2.5000)', '../../../data_warehouse/minimized_warehouse_4/1735853373_(6.10000)', '../../../data_warehouse/minimized_warehouse_4/1735842931_(30.1000)', '../../../data_warehouse/minimized_warehouse_4/1735845629_(20.10000)', '../../../data_warehouse/minimized_warehouse_4/1735859344_(2.1000)', '../../../data_warehouse/minimized_warehouse_4/1735852779_(6.5000)', '../../../data_warehouse/minimized_warehouse_4/1735852257_(6.1000)', '../../../data_warehouse/minimized_warehouse_4/1735848689_(9.10000)', '../../../data_warehouse/minimized_warehouse_4/1735853852_(5.1000)', '../../../data_warehouse/m

In [6]:
"""
Get corresponding yolo stats for each model 
"""
response_time = {}
for key in prom_data_paths.keys():
    try:
        yolo_df = common_utils.read_feather_cached(yolo_data_paths[key])
    except:
        print(f"Failed to read {key}")
        continue
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['end'] = pd.to_datetime(yolo_df['end_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('end', inplace=True)
    resampled_df = yolo_df.resample('5s')
    model_info = common_utils.path_to_workers_and_pcl_size(key)
    if model_info.resolution not in response_time:
        response_time[model_info.resolution] = {}
    response_time[model_info.resolution][model_info.num_vehicles] = resampled_df.agg({'end_to_end_response_time': 'count'}).reset_index()['end_to_end_response_time'].rename(key) / 5

In [7]:
import plotly.express as px

for resolution in sorted(response_time.keys()):
    queue_df = pd.DataFrame.from_dict(response_time[resolution])
    fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
    fig.update_layout(title=f'PCL throughput (Resolution: {resolution})', xaxis_title='Time', yaxis_title='Images per second', yaxis_type='linear')
    fig.show()

In [8]:

import plotly.express as px
import pandas as pd

# Dictionary to store max throughput for each model
max_throughput = {}

for resolution in sorted(response_time.keys()):
    queue_df = pd.DataFrame.from_dict(response_time[resolution])
    
    # Calculate the maximum throughput for each model
    max_throughput[resolution] = queue_df.max()

# Create a DataFrame from the max throughput dictionary
max_throughput_df = pd.DataFrame(max_throughput)

# Sort 
max_throughput_df_sorted = common_utils.sort_by_model_size_then_version(max_throughput_df)

# Plot the maximum throughput for each model
fig = px.bar(max_throughput_df_sorted, barmode='group')
fig.update_layout(title='Maximum Throughput per Model', xaxis_title='Model', yaxis_title='Max Images per Second', yaxis_type='linear')
fig.show()

fig = px.bar(max_throughput_df_sorted, barmode='group')
fig.update_layout(title='Maximum Throughput per Model', xaxis_title='Model', yaxis_title='Max Images per Second', yaxis_type='log')
fig.show()

AttributeError: 'int' object has no attribute 'replace'

In [None]:
max_throughput_df_sorted.to_csv("table_plots/data_throughput.csv")

In [None]:
max_throughput_df_sorted