In [None]:
import packages
import pandas as pd
import os

In [2]:
df = pd.read_csv(f"{packages.APP_PATH}/data/logs/performance_logs.csv")

In [None]:
df.dtypes

In [None]:
print(f"metadata_algorithm: {df['metadata_algorithm'].unique()}")
print(f"metadata_dataset: {df['metadata_dataset'].unique()}")
print(f"metadata_inference_server: {df['metadata_inference_server'].unique()}")
print(f"metadata_model: {df['metadata_model'].unique()}")
print(f"performance_data_cpu_usage: {df['performance_data_cpu_usage'][0][:50]}")
print(f"performance_data_ram_usage: {df['performance_data_ram_usage'][0][:50]}")
print(f"performance_data_peak_memory (float): {df['performance_data_peak_memory'][0]}")
print(f"performance_data_system_performance_cpu_percent_avg: {df['performance_data_system_performance_cpu_percent_avg'][0]}")
print(f"performance_data_system_performance_cpu_percent_peak: {df['performance_data_system_performance_cpu_percent_peak'][0]}")
print(f"performance_data_system_performance_cpu_system: {df['performance_data_system_performance_cpu_system'][0]}")
print(f"performance_data_system_performance_execution_time: {df['performance_data_system_performance_execution_time'][0]}")
print(f"performance_data_system_performance_ram_peak: {df['performance_data_system_performance_ram_peak'][0]}")
print(f"system_info_cpu: {df['system_info_cpu'][0]}")
print(f"system_info_cpu_count: {df['system_info_cpu_count'][0]}")
print(f"system_info_device_name: {df['system_info_device_name'][0]}")
print(f"system_info_os: {df['system_info_os'][0]}")
print(f"system_info_python_version: {df['system_info_python_version'][0]}")
print(f"system_info_total_ram: {df['system_info_total_ram'][0]}")
print(f"performance_data_llm_performance_execution_time: {df['performance_data_llm_performance_execution_time'][0]}")
print(f"performance_data_llm_performance_first_token_time: {df['performance_data_llm_performance_first_token_time'][0]}")
print(f"performance_data_llm_performance_tokens_per_second: {df['performance_data_llm_performance_tokens_per_second'][0]}")


In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ast

# Load the data
df = pd.read_csv(f"{packages.APP_PATH}/data/logs/performance_logs.csv")

# Function to safely evaluate and calculate mean of list-like strings
def safe_mean(x):
    try:
        values = ast.literal_eval(x)
        if isinstance(values, list) and all(isinstance(i, (int, float)) for i in values):
            return sum(values) / len(values)
        else:
            return None
    except:
        return None

# Convert relevant columns to numeric
df['performance_data_cpu_usage'] = df['performance_data_cpu_usage'].apply(safe_mean)
df['performance_data_ram_usage'] = df['performance_data_ram_usage'].apply(safe_mean)
df['performance_data_system_performance_cpu_percent_avg'] = df['performance_data_system_performance_cpu_percent_avg'].str.rstrip('%').astype('float')
df['performance_data_system_performance_cpu_percent_peak'] = df['performance_data_system_performance_cpu_percent_peak'].str.rstrip('%').astype('float')
df['performance_data_system_performance_cpu_system'] = df['performance_data_system_performance_cpu_system'].str.split().str[0].astype('float')
df['performance_data_system_performance_execution_time'] = df['performance_data_system_performance_execution_time'].str.split().str[0].astype('float')
df['performance_data_system_performance_ram_peak'] = df['performance_data_system_performance_ram_peak'].str.split().str[0].astype('float')

# Create a reorganized figure
fig = make_subplots(
    rows=3, cols=3,
    subplot_titles=(
        "<b>Total Execution Time</b>", "<b>LLM Tokens per Second</b>", "<b>Time to First Token</b>",
        "<b>CPU Usage</b>", "<b>RAM Usage</b>", "<b>Model and Dataset Info</b>",
        "<b>System Info</b>", "<b>Inference Info</b>"
    ),
    specs=[
        [{"type": "box"}, {"type": "box"}, {"type": "box"}],
        [{"type": "box"}, {"type": "box"}, None],
        [{"type": "table"}, {"type": "table"}, {"type": "table"}],
    ],
    vertical_spacing=0.07,
    row_heights=[0.38, 0.38, 0.24]
)

# Function to add box plot
def add_box_plot(fig, df, y_column, row, col):
    fig.add_trace(
        go.Box(x=df['metadata_model'], y=df[y_column], name=y_column),
        row=row, col=col
    )

# Add box plots
add_box_plot(fig, df, 'performance_data_llm_performance_execution_time', 1, 1)
add_box_plot(fig, df, 'performance_data_llm_performance_tokens_per_second', 1, 2)
add_box_plot(fig, df, 'performance_data_llm_performance_first_token_time', 1, 3)
add_box_plot(fig, df, 'performance_data_system_performance_cpu_percent_avg', 2, 1)
add_box_plot(fig, df, 'performance_data_system_performance_ram_peak', 2, 2)

# Model and Dataset Info
model_dataset_info = df.groupby(['metadata_model', 'metadata_dataset']).size().reset_index(name='count')
fig.add_trace(go.Table(
    header=dict(values=["<b>Model</b>", "<b>Dataset</b>", "<b>Count</b>"],
                align="left", font=dict(size=11)),
    cells=dict(values=[model_dataset_info[col] for col in model_dataset_info.columns],
               align="left", font=dict(size=10))
), row=3, col=1)

# System Info
system_info = df[['system_info_cpu', 'system_info_cpu_count', 'system_info_os', 'system_info_python_version', 'system_info_total_ram']].drop_duplicates()
fig.add_trace(go.Table(
    header=dict(values=["<b>CPU</b>", "<b>CPU Count</b>", "<b>OS</b>", "<b>Python Version</b>", "<b>Total RAM</b>"],
                align="left", font=dict(size=11)),
    cells=dict(values=[system_info[col] for col in system_info.columns],
               align="left", font=dict(size=10))
), row=3, col=2)

# Inference Info
inference_info = df.groupby(['metadata_inference_server', 'metadata_algorithm']).size().reset_index(name='algo_count')
fig.add_trace(go.Table(
    header=dict(values=["<b>Inference Server</b>", "<b>Algorithm</b>", "<b>Count</b>"],
                align="left", font=dict(size=11)),
    cells=dict(values=[inference_info[col] for col in inference_info.columns],
               align="left", font=dict(size=10))
), row=3, col=3)

# Update layout
fig.update_layout(
    height=1700,
    width=1500,
    title_text="LLM Performance Analysis",
    showlegend=False,
    margin=dict(l=50, r=50, t=100, b=50)
)

# Update x-axis labels for box plots
for i in range(1, 6):
    row = 1 if i <= 3 else 2
    col = i if i <= 3 else i - 3
    fig.update_xaxes(title_text="Model", row=row, col=col)

# Adjust subplot titles
for i in fig['layout']['annotations']:
    i['font'] = dict(size=14, color="black")

# Show the plot
fig.show()