In [None]:
import os

from pandas.core.interchange.dataframe_protocol import DataFrame


# Function to find the subfolders with the file names
def find_subfolders_with_file(root_folder, filename):
    result = []
    for root, dirs, files in os.walk(root_folder):
        if filename in files:
            result.append(root)
    return result

# Example usage
root_folder = '../../../data/minimized'
filename = 'worker1.feather'
subfolders = find_subfolders_with_file(root_folder, filename)
print(subfolders)

In [None]:
import os
import pandas as pd
from functools import lru_cache

@lru_cache(maxsize=None)
def read_feather_cached(file_path):
    return pd.read_feather(file_path)

In [None]:
from utils.header_cleaner import *
import difflib
import os

# It's really quite simple - we're comparing PyTorch and OpenVINO YOLOv8n performance
# Some easy things to compare would be power, and latency

def clean_and_calculate_power(dataframe):
    cleaned_df = clean_df(dataframe)
    cleaned_df.sort_values(by="timestamp", inplace=True)
    # Target word matching and plotting
    target_word = 'kepler node joules total dynamic'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    print(closest_matches)
    cleaned_df['total_joules'] = cleaned_df[closest_matches].sum(axis=1)
    ts = cleaned_df["timestamp"]
    interval = ts[1] - ts[0]
    cleaned_df['power_consumed'] = cleaned_df['total_joules'].diff() / interval
    return cleaned_df


prom_data_paths = {'ov_cpu_path': "../../../data/processed/ov_vs_pytorch/prom/ov-cpu_2mbps-rerun/",
                   'pytorch_path': "../../../data/processed/ov_vs_pytorch/prom/pytorch_2mbps/"}
prom_data_paths = {os.path.basename(x): x for x in subfolders}
yolo_data_paths = {key: os.path.join(val, "yolo_qos.feather") for key, val in prom_data_paths.items()}

total_power = {}
response_time = {}
for key in prom_data_paths.keys():
    paths = []
    for work_num in range(1, 6):
        temp_path = os.path.join(prom_data_paths[key], f"worker{work_num}.feather")
        print(temp_path)
        paths.append(temp_path)
    concatenated_power = pd.concat([df['power_consumed'] for df in [clean_and_calculate_power(read_feather_cached(x)) for x in paths]], axis=1)
    total_power[key] = concatenated_power.mean(axis=1)

for key in prom_data_paths.keys():
    yolo_df = read_feather_cached(yolo_data_paths[key])
    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    response_time[key] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time'].rename(key)

In [None]:
#plot the graphs
import plotly.express as px
from matplotlib import pyplot as plt

power_df = pd.DataFrame.from_dict(total_power)
fig = px.line(power_df)
fig.update_layout(title='Average Power Over Time', xaxis_title='Time', yaxis_title='Power in Watts',
                  yaxis_range=[-20,80])
fig.show()

queue_df = pd.DataFrame.from_dict(response_time)
fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
fig.update_layout(title='Average Response Time Over Time', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
fig.show()

In [None]:
#plot the graphs
import plotly.express as px
from matplotlib import pyplot as plt

power_df = pd.DataFrame.from_dict(total_power)
fig = px.line(power_df)
fig.update_layout(title='Average Power Over Time', xaxis_title='Time', yaxis_title='Power in Watts',
                  yaxis_range=[-20,80])
fig.show()

queue_df = pd.DataFrame.from_dict(response_time)
fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
fig.update_layout(title='Average Response Time Over Time', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
fig.show()

In [None]:
import os
import difflib
import pandas as pd
import plotly.express as px
from utils.header_cleaner import *

def calculate_yolo_input_rate(dataframe):
    cleaned_df = dataframe.copy()
    cleaned_df.sort_values(by="timestamp", inplace=True)
    target_word = 'kafka_server_brokertopicmetrics_bytesin_total yolo_input'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    cleaned_df['total_yolo_input'] = cleaned_df[closest_matches].sum(axis=1)
    ts = cleaned_df["timestamp"]
    interval = ts[1] - ts[0]
    cleaned_df['yolo_input_rate'] = cleaned_df['total_yolo_input'].diff() / interval
    new_df = pd.DataFrame()
    new_df.index = ts
    new_df['yolo_input_rate'] = cleaned_df['yolo_input_rate']
    return  cleaned_df['yolo_input_rate']

download_rate = {}

for key in prom_data_paths.keys():
    path = prom_data_paths[key] + "/" + "intermediate/full.feather"
    download_rate[key] = calculate_yolo_input_rate(read_feather_cached(path))
    # concatenated_df = pd.concat([clean_and_calculate_downloadrate(read_feather_cached(x)) for x in paths], axis=0)

yolo_input_df = pd.DataFrame.from_dict(download_rate)
# download_rate_df = pd.concat(download_rate.values(), axis=1)

fig_download_rate = px.line(yolo_input_df, x=yolo_input_df.index, y=yolo_input_df.columns)
fig_download_rate.update_layout(title='Download Rate Over Time', xaxis_title='Time', yaxis_title='Download Rate')
fig_download_rate.show()

In [None]:
power_df = pd.DataFrame.from_dict(total_power)
fig_power = px.line(power_df, title='Average Power Over Time', labels={'value': 'Power in Watts'})
fig_power.show()

fig_download_rate = px.line(yolo_input_df,
                            title='Download Rate Over Time', labels={'yolo_input_rate': 'Download Rate'})
fig_download_rate.show()

queue_df = pd.DataFrame.from_dict(response_time)
fig = px.line(queue_df, x=queue_df.index, y=queue_df.columns)
fig.update_layout(title='Average Response Time Over Time', xaxis_title='Time', yaxis_title='Reponse Time in MS', yaxis_type='log')
fig.show()

In [None]:
def calculate_rate(dataframe, target_word):
    cleaned_df = dataframe.copy()
    cleaned_df.sort_values(by="timestamp", inplace=True)
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    match = closest_matches[0]
    print(match)
    cleaned_df['total_count'] = cleaned_df[match]
    ts = cleaned_df["timestamp"]
    interval = ts[1] - ts[0]
    cleaned_df['rate'] = cleaned_df['total_count'].diff() / interval
    return  cleaned_df['rate']

In [None]:

def plot_rate(keyword, title):
    df_dict = {}
    
    for key in prom_data_paths.keys():
        path = prom_data_paths[key] + "/" + "intermediate/full.feather"
        df_dict[key] = calculate_rate(read_feather_cached(path), keyword)
        # concatenated_df = pd.concat([clean_and_calculate_downloadrate(read_feather_cached(x)) for x in paths], axis=0)
    
    df = pd.DataFrame.from_dict(df_dict)
    # download_rate_df = pd.concat(download_rate.values(), axis=1)
    
    fig = px.line(df, x=df.index, y=df.columns)
    fig.update_layout(title=title, xaxis_title='Time')
    fig.show()

In [None]:
plot_rate("kafka_server_brokertopicmetrics_bytesin_total yolo_output", "yolo output rate")

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

# Add power data
for column in power_df.columns:
    fig.add_trace(go.Scatter(x=power_df.index, y=power_df[column], mode='lines', name=f'Power {column}'))

# Add response time data
for column in queue_df.columns:
    fig.add_trace(go.Scatter(x=queue_df.index, y=queue_df[column], mode='lines', name=f'Response Time {column}', yaxis="y2"))

# Update layout for dual y-axis
fig.update_layout(
    title='Power and Response Time Over Time',
    xaxis_title='Time',
    yaxis=dict(
        title='Power in Watts',
        range=[-20, 80]
    ),
    yaxis2=dict(
        title='Response Time in MS',
        overlaying='y',
        side='right',
        type='log'
    )
)

fig.show()