In [None]:
import os

# Function to find the subfolders with the file names
def find_subfolders_with_file(root_folder, filename):
    result = []
    for root, dirs, files in os.walk(root_folder):
        if filename in files:
            result.append(root)
    return result

# Example usage
root_folder = '../../../data/minimized'
filename = 'worker1.feather'
subfolders = find_subfolders_with_file(root_folder, filename)
print(subfolders)

In [None]:
# import os
# 
# # Function to find the subfolders with the file names
# def find_subfolders_with_file(root_folder, filename):
#     result = []
#     for root, dirs, files in os.walk(root_folder):
#         if filename in files:
#             result.append(root)
#     return result
# 
# # Example usage
# root_folder = '../../../data/yolo'
# filename = 'worker1.feather'
# subfolders = find_subfolders_with_file(root_folder, filename)
# print(subfolders)

In [None]:
from utils.header_cleaner import *
import difflib

#its really quite sinple - we're comparing pytorch and openvino yolov8n performance
# some easy things to compare would be power, and latency

def clean_and_calculate_power(dataframe):
    cleaned_df = clean_df(dataframe)
    #target word matching and plotting
    target_word = 'kepler node joules total'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    cleaned_df['total_joules'] = cleaned_df[closest_matches].sum(axis=1)
    ts = cleaned_df["timestamp"]
    interval = ts[1] - ts[0]
    cleaned_df['power_consumed'] = cleaned_df['total_joules'].diff() / interval
    return cleaned_df


prom_data_paths = {'ov_cpu_path': "../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/",
              'pytorch_path': "../../../data/processed/ov_vs_pytorch/prom/pytorch_2mbps/"}
prom_data_paths = {x.split("/")[-1]: x for x in subfolders}
yolo_data_paths = {key: val + "/yolo_qos.feather" for key, val in prom_data_paths.items()}

total_power = {}
response_time = {}
for key in prom_data_paths.keys():
    for work_num in range(1, 6):
        temp_path = prom_data_paths[key] + "/" + f"worker{work_num}.feather"
        print(temp_path)
        concatenated_power = pd.concat([df['power_consumed'] for df in [clean_and_calculate_power(pd.read_feather(temp_path))]], axis=1)
        total_power[key] = concatenated_power.mean(axis=1)

for key in prom_data_paths.keys():
    yolo_df = pd.read_feather(yolo_data_paths[key])
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    response_time[key] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time'].rename(key)






In [None]:
#plot the graphs
import plotly.express as px
from matplotlib import pyplot as plt

power_df = pd.DataFrame.from_dict(total_power)
fig = px.line(power_df)
fig.update_layout(title='Average Power Over Time', xaxis_title='Time', yaxis_title='Power in Watts',
                  yaxis_range=[-20,80])
fig.show()

queue_df = pd.DataFrame.from_dict(response_time)
fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
fig.update_layout(title='Average Response Time Over Time', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
fig.show()