In [None]:
import os

from pandas.core.interchange.dataframe_protocol import DataFrame


# Function to find the subfolders with the file names
def find_subfolders_with_file(root_folder, filename):
    result = []
    for root, dirs, files in os.walk(root_folder):
        if filename in files:
            result.append(root)
    return result

# Example usage
root_folder = '../../../../data/minimized'
filename = 'worker1.feather'
subfolders = find_subfolders_with_file(root_folder, filename)
# subfolders = [x for x in subfolders if "yolov9" in x or "yolov10" in x]
print(subfolders)

In [None]:
import os
import pandas as pd
from functools import lru_cache
from collections import namedtuple

def read_feather_cached(file_path):
    return read_feather_cached2(file_path).copy()

@lru_cache(maxsize=None)
def read_feather_cached2(file_path):
    return pd.read_feather(file_path)

def path_to_name_and_resolution(path):
    """ Run_3 specific naming: '1730280141_yolov9e_1280' -> timestamp_model_resolution """
    ModelInfo = namedtuple('ModelInfo', ['timestamp', 'model', 'resolution'])
    timestamp, model, resolution = path.split("_")
    resolution = int(resolution)
    return ModelInfo(timestamp, model, resolution)

In [None]:
read_feather_cached2.cache_info()

In [None]:
from utils.header_cleaner import *
import difflib
import os

# It's really quite simple - we're comparing PyTorch and OpenVINO YOLOv8n performance
# Some easy things to compare would be power, and latency

def clean_and_calculate_power(dataframe):
    cleaned_df = clean_df(dataframe)
    cleaned_df.sort_values(by="timestamp", inplace=True)
    # Target word matching and plotting
    """
    Compute power as sum of cpu package dynamic and cpu package idle
    
    - Package should include power from 'CPU cores' and 'CPU uncore'.
    - Kepler has separated total power to 'dynamic' and 'idle'
    """
    target_word = 'kepler node package joules total dynamic'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=2, cutoff=0.05)
    print(closest_matches)
    cleaned_df['total_joules'] = cleaned_df[closest_matches].sum(axis=1)
    ts = cleaned_df["timestamp"]
    # Drop rows where any column from closest_matches is NaN (otherwise power will be close to infinite when data is missing)
    cleaned_df.dropna(subset=closest_matches, inplace=True)
    time_diff = cleaned_df['timestamp'].diff()
    cleaned_df['power_consumed'] = cleaned_df['total_joules'].diff() / time_diff
    return cleaned_df

"""
Fetch paths to the data
"""
prom_data_paths = {os.path.basename(x): x for x in subfolders}
yolo_data_paths = {key: os.path.join(val, "yolo_qos.feather") for key, val in prom_data_paths.items()}

"""
Compute avg powers from prom data
"""
total_power = {}
for key in prom_data_paths.keys():
    paths = []
    for work_num in range(1, 6):
        temp_path = os.path.join(prom_data_paths[key], f"worker{work_num}.feather")
        print(temp_path)
        paths.append(temp_path)
    concatenated_power = pd.concat([df['power_consumed'] for df in [clean_and_calculate_power(read_feather_cached(x)) for x in paths]], axis=1)
    model_info = path_to_name_and_resolution(key)
    if model_info.resolution not in total_power:
        total_power[model_info.resolution] = {}
    total_power[model_info.resolution][model_info.model] = concatenated_power.sum(axis=1)

"""
Get corresponding yolo stats for each model 
"""
response_time = {}
for key in prom_data_paths.keys():
    yolo_df = read_feather_cached(yolo_data_paths[key])
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    model_info = path_to_name_and_resolution(key)
    if model_info.resolution not in response_time:
        response_time[model_info.resolution] = {}
    response_time[model_info.resolution][model_info.model] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time'].rename(key)


In [None]:
import plotly.express as px
from matplotlib import pyplot as plt

# Function to compute simple moving average
def moving_average(data, window_size):
    return data.rolling(window=window_size).mean()

# You can adjust the window size for different levels of smoothing
window_size = 3

for resolution in sorted(response_time.keys()):
    queue_df = pd.DataFrame.from_dict(response_time[resolution])
    fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
    fig.update_layout(title=f'End-to-end latency (Resolution: {resolution})', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
    fig.show()

In [None]:
import pandas as pd
import plotly.express as px

power_response_ratio = {}
for resolution, models in total_power.items():
    power_response_ratio[resolution] = {}
    for model in models.keys():
        power_df = pd.DataFrame(total_power[resolution][model])
        response_df = pd.DataFrame(response_time[resolution][model])
        if not power_df.empty and not response_df.empty:
            response_df_aligned, power_df_aligned = response_df.align(power_df, join='inner', axis=0)
            if not response_df_aligned.empty and not power_df_aligned.empty:
                power_response_ratio[resolution][model] = response_df_aligned.div(power_df_aligned.values, axis=0, level=0)

for resolution, models in sorted(power_response_ratio.items()):
    ratio_df = pd.concat(models, axis=1)
    ratio_df.columns = [f"{model}_ratio" for model in models.keys()]
    fig = px.line(ratio_df, title=f'Latency per Watt (Resolution: {resolution})')
    fig.update_layout(xaxis_title='Time', yaxis_title='Response Time per Watt')
    fig.show()

In [None]:
import pandas as pd
import plotly.express as px

power_response_ratio = {}
for resolution, models in total_power.items():
    power_response_ratio[resolution] = {}
    for model in models.keys():
        power_df = pd.DataFrame(total_power[resolution][model])
        response_df = pd.DataFrame(response_time[resolution][model])
        if not power_df.empty and not response_df.empty:
            response_df_aligned, power_df_aligned = response_df.align(power_df, join='inner', axis=0)
            if not response_df_aligned.empty and not power_df_aligned.empty:
                power_response_ratio[resolution][model] = power_df_aligned.div(response_df_aligned.values, axis=0, level=0)

for resolution, models in sorted(power_response_ratio.items()):
    ratio_df = pd.concat(models, axis=1)
    ratio_df.columns = [f"{model}_ratio" for model in models.keys()]
    fig = px.line(ratio_df, title=f'Watt per latency (Resolution: {resolution})')
    fig.update_layout(xaxis_title='Time', yaxis_title='Watt per latency')
    fig.show()