In [3]:
from utils.header_cleaner import *
import difflib

#its really quite sinple - we're comparing pytorch and openvino yolov8n performance
# some easy things to compare would be power, and latency

def clean_and_calculate_power(dataframe):
    cleaned_df = clean_df(dataframe)
    #target word matching and plotting
    target_word = 'kepler node joules total'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    cleaned_df['total_joules'] = cleaned_df[closest_matches].sum(axis=1)
    cleaned_df['power_consumed'] = cleaned_df['total_joules'].diff()
    return cleaned_df


prom_data_paths = {'ov_cpu_path': "../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/",
              'pytorch_path': "../../../data/processed/ov_vs_pytorch/prom/pytorch_2mbps/"}
yolo_data_paths = {'ov_cpu_path': "../../../data/processed/ov_vs_pytorch/yolo/ov-cpu_2mbps-rerun.feather",
                   'pytorch_path': "../../../data/processed/ov_vs_pytorch/yolo/pytorch_2mbps.feather"}

total_power = {}
response_time = {}
for key in prom_data_paths.keys():
    for work_num in range(1, 6):
        temp_path = prom_data_paths[key] + f"worker{work_num}.feather"
        print(temp_path)
        concatenated_power = pd.concat([df['power_consumed'] for df in [clean_and_calculate_power(pd.read_feather(temp_path))]], axis=1)
        total_power[key] = concatenated_power.mean(axis=1)

for key in prom_data_paths.keys():
    yolo_df = pd.read_feather(yolo_data_paths[key])
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    response_time[key] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time']
   
#data has now been loaded, time to graph



../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/worker1.feather
Loaded 736 rows and 125119 columns
Removing 124314 static columns (805 remaining)
Unable to read timestamp as json
../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/worker2.feather
Loaded 736 rows and 110642 columns
Removing 109849 static columns (793 remaining)
Unable to read timestamp as json
../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/worker3.feather
Loaded 736 rows and 133502 columns
Removing 132751 static columns (751 remaining)
Unable to read timestamp as json
../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/worker4.feather
Loaded 736 rows and 110454 columns
Removing 109538 static columns (916 remaining)
Unable to read timestamp as json
../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/worker5.feather
Loaded 736 rows and 119047 columns
Removing 118271 static columns (776 remaining)
Unable to read timestamp as json
../../../data/processed/ov_vs_pytor

In [4]:
for key in prom_data_paths.keys():
    yolo_df = pd.read_feather(yolo_data_paths[key])
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    response_time[key] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time'].rename(key)
    
    

In [7]:
#plot the graphs
import plotly.express as px

power_df = pd.concat([total_power['ov_cpu_path'], total_power['pytorch_path']], axis=1).rename(columns={0: 'ov_cpu', 1: 'pytorch'})
fig = px.line(power_df, x=power_df.index, y=power_df.columns)
fig.update_layout(title='Average Power Over Time', xaxis_title='Time', yaxis_title='Power in Watts')
fig.show()

queue_df = pd.concat([response_time['ov_cpu_path'], response_time['pytorch_path']], axis=1)
fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
fig.update_layout(title='Average Response Time Over Time', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
fig.show()

