In [None]:
import pandas as pd



def clean_df(dataframe):
    df = dataframe
    #Count number of rows and cols in the original df
    print(f"Loaded {len(df)} rows and {len(df.columns)} columns")
    # Count the number of unique values in each column
    unique_counts = df.nunique()
    # Find all static columns (columns with only one or two unique values)
    static_columns = unique_counts[unique_counts <= 2].index
    # Remove the static columns from the dataframe
    df = df.drop(static_columns, axis=1)
    print(f"Removing {len(static_columns)} static columns ({len(df.columns)} remaining)")
    if len(df.columns) < 100:
        # Only display if the df is small enough to not stall the IDE (thousands of columns really slows things down)
        df.head()

    # changing the dataframe headers to a more human-readable format
    return clean_up_headers(df)



In [None]:

#Next we will create plots of power consumption vs quality of service for 1 (low), 2 (stable), and 4 (high) mbps input of data
from utils.header_cleaner import clean_up_headers
import difflib


def clean_and_calculate_power(dataframe):
    cleaned_df = clean_df(dataframe)
    #target word matching and plotting
    target_word = 'kepler node joules total'
    closest_matches = difflib.get_close_matches(target_word, cleaned_df.columns, n=6, cutoff=0.05)
    cleaned_df['total_joules'] = cleaned_df[closest_matches].sum(axis=1)
    cleaned_df['power_consumed'] = cleaned_df['total_joules'].diff()
    return cleaned_df



In [None]:

average_power = {}

input_speeds = [9, 1, 2, 4]
for mbps in input_speeds:
    if mbps == 9:
        prom_path = "../../../data/processed/19.8_ajo/prom/linear_9/"    
    else:
        path = "../../../data/processed/8.8_ajo/"
        prom_path = path + f"prom/{mbps} mbps/"
    for work_num in range(1, 6):
        temp_path = prom_path + f"worker{work_num}.feather"
        print(temp_path)
        concatenated_power = pd.concat([df['power_consumed'] for df in [clean_and_calculate_power(pd.read_feather(temp_path))]], axis=1)
        average_power[f'{mbps}'] = concatenated_power.mean(axis=1)
# 1. yolo total time vs average power consumption per worker



In [None]:
import plotly.express as px
import plotly.graph_objs as go

def min_max_norm(dataframe, col_name):
    return (dataframe[col_name] - dataframe[col_name].min()) / (dataframe[col_name].max() - dataframe[col_name].min())

power_consumption_df = pd.DataFrame()
for mbps in input_speeds:
    if mbps == 9:
        yolo_df = pd.read_feather("../../../data/processed/19.8_ajo/yolo/yolo_qos.feather")
        yolo_df = yolo_df.loc[yolo_df['start_time'] > 1724070345000]

    else:
        yolo_df = pd.read_feather(f"../../../data/processed/8.8_ajo/yolo/{mbps}_mbps/yolo_qos.feather")


    result_df = pd.DataFrame({f'average_power_consumed_{mbps}': average_power[f'{mbps}']})
    result_df[f'queue_{mbps}'] = yolo_df['queue']
    power_consumption_df = pd.concat([power_consumption_df, result_df], axis=1)
    # Assuming result_df is your DataFrame
    # 2. queue time, total power consumption, and images processed per second over time all into one graph
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
    # Group by 10-second intervals and count the number of rows in each interval
    intervals = yolo_df.resample('5S', on='start')
    
    
    interval_counts_df = intervals.size().reset_index(name='yolo_instances_processed')
    yolo_df.set_index('start', inplace=True)
    average_queue_df = yolo_df.resample('5S').agg({'queue': 'mean'})
    average_queue_df.reset_index(inplace=True)
    average_queue_df = average_queue_df.rename(columns={'queue': 'queue_time'})
    
    col1 = min_max_norm(average_queue_df, 'queue_time')
    
    col2 = min_max_norm(result_df, f'average_power_consumed_{mbps}')
    #col2 = result_df[f'average_power_consumed_{mbps}']

    col3 = min_max_norm(interval_counts_df, 'yolo_instances_processed')
    queue_power_images_df = pd.concat([col1, col2, col3], axis=1)
    fig = px.line(queue_power_images_df, x=queue_power_images_df.index, y=queue_power_images_df.columns)
    fig.update_layout(title='Queue Time, Average Power, and Instances Processed Over Time', xaxis_title='Time', yaxis_title='Normalized')
    fig.show()
    #queue_power_images_df.plot()
# 
traces = []
pcd = power_consumption_df
for mbps in reversed(input_speeds):
    traces.append(go.Scatter(x=pcd[f'average_power_consumed_{mbps}'], y=pcd[f'queue_{mbps}'], mode='markers', name=f'{mbps}_mbps'))
# Combine traces into a figure
fig = go.Figure(data=traces)
# Customize the layout
fig.update_layout(
    title='Power Consumption vs Queue Time for 1, 2, 4, 9 mbps input speed',
    xaxis_title='Power Consumption',
    yaxis_title='Queue Time'
)

# Show the plot
fig.show()

#jupyter nbconvert --clear-output --inplace src/dataset-tools/plot_testing/roope_plotting.ipynb


In [None]:
#here are scripts for looking at multiple measure of QoS
# - inf + pre + post (Total Inference Time)
# - inf + pre + post + queue (end-to-end response Time)
# - images per second processed vs energy expenditure (system throughput wrt to energy usage)
# The idea is to compare the different QOS metrics under different severities of load

qos_df = pd.DataFrame()


for mbps in input_speeds:

    if mbps == 9:
        yolo_df = pd.read_feather("../../../data/processed/19.8_ajo/yolo/yolo_qos.feather")
        yolo_df = yolo_df.loc[yolo_df['start_time'] > 1724070345000]
    else:
        yolo_df = pd.read_feather(f"../../../data/processed/8.8_ajo/yolo/{mbps}_mbps/yolo_qos.feather")



    yolo_df['total_inference_time'] = yolo_df['inf'] + yolo_df['post'] + yolo_df['pre']
    yolo_df['end_to_end_response_time'] = yolo_df['total_inference_time'] + yolo_df['queue']
    yolo_df['start'] = pd.to_datetime(yolo_df['start_time'], unit='ms')  # Convert to datetime (optional)
        
    yolo_df.set_index('start', inplace=True)
    resampled_df = yolo_df.resample('5S')
    qos_df[f'end_to_end_{mbps}'] = resampled_df.agg({'end_to_end_response_time': 'mean'}).reset_index()['end_to_end_response_time'] / 1000
    qos_df[f'throughput_{mbps}'] = qos_df[f'end_to_end_{mbps}'] / pcd[f'average_power_consumed_{mbps}']


#qos_df[f'total_inf_{mbps}'] = resampled_df.agg({'total_inference_time': 'mean'}).reset_index()['total_inference_time']
    
# bar chart:
# x: eri runit (esim ne mbps)
# y: avg_latency, avg_power, avg_latency_per_avg_power  (nää voi plotata jollain error-barilla et se näyttää samassa palkissa min,max,avg tai sit 95-confidence intervallin ja averagen)

import numpy as np
import scipy.stats as stats

qos_map = {}
errors = {}
for mbps in input_speeds:
    errors[f'{mbps}'] = []
    qos_map[f'{mbps}'] = []
    qos_map[f'{mbps}'].append(qos_df[f'end_to_end_{mbps}'].mean())
    qos_map[f'{mbps}'].append(pcd[f'average_power_consumed_{mbps}'].mean())
    qos_map[f'{mbps}'].append(qos_df[f'throughput_{mbps}'].mean())
    for data in [qos_df[f'end_to_end_{mbps}'], pcd[f'average_power_consumed_{mbps}'], qos_df[f'throughput_{mbps}']]:
        sem = stats.sem(data)  # Standard error of the mean
        conf_interval = sem * stats.t.ppf((1 + 0.95) / 2., len(data) - 1)  # 95% confidence interval
        errors[f'{mbps}'].append(conf_interval)

fig = go.Figure()

# Assuming each list in the dictionary represents a different category (e.g., A, B, C)
categories = ['latency', 'power', 'latency per power']

for i, category in enumerate(categories):
    y_means = []
    y_errors = []
    for key in qos_map.keys():
        y_means.append(qos_map[key][i])
        y_errors.append(errors[key][i])

    fig.add_trace(go.Bar(
        x=list(qos_map.keys()),
        y=y_means,
        name=category,
        error_y=dict(
            type='data',
            array=y_errors,
            visible=True
        )
    ))

# Update layout for better readability
fig.update_layout(
    barmode='group',  # Group bars for each x value
    title='Grouped Bar Graph with 95% Confidence Intervals',
    xaxis_title='Keys',
    yaxis_title='Queue Time (s) / Power (W) / latency per watt',
    legend_title='Categories'
)

# Show the figure
fig.show()



