In [12]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

# Read the CSV file
df = pd.read_csv("../data/llm_results.csv")

# If model column starts with 'text-' or 'gpt-' then modify gpu to 'cloud'
df.loc[df["model"].str.startswith("text-"), "gpu"] = "Cloud"
df.loc[df["model"].str.startswith("gpt-"), "gpu"] = "Cloud"

# Drop the below models from the dataframe
drop_models = ["text-davinci-002", "text-ada-001", "text-babbage-001", "text-curie-001"]
df = df[~df["model"].isin(drop_models)]

# Remove models 'gpt2' and 'gpt2-medium' from the dataframe
df = df[~df["model"].str.contains("gpt2")]
df = df[~df["model"].str.contains("gpt2-medium")]

# Sort the data by tokens_per_second in descending order
df_sorted = df.sort_values(by="tokens_per_second", ascending=False)

# Group the data by model and gpu
grouped = df_sorted.groupby(["model", "gpu"])

# Extract unique models and GPUs
models = df_sorted["model"].unique()
gpus = df_sorted["gpu"].unique()

# Define custom colors for each GPU
gpu_colors = {
    "RTX3090": "rgb(31, 119, 180)",  # Blue
    "Cloud": "rgb(255, 127, 14)",  # Orange
    "A10": "rgb(44, 160, 44)",  # Green
    "H100": "rgb(214, 39, 40)",  # Red
}

# Create a horizontal grouped bar chart
fig = go.Figure()

# Set the bar width
bar_width = 0.8 / len(gpus)

# Iterate over each GPU and plot the bars
for i, gpu in enumerate(gpus):
    tokens = []
    for model in models:
        try:
            tokens.append(grouped.get_group((model, gpu))["tokens_per_second"].iloc[0])
        except KeyError:
            tokens.append(0)

    # Plot the bars
    fig.add_trace(
        go.Bar(
            y=models,
            x=tokens,
            name=gpu,
            orientation="h",
            width=bar_width,
            marker_color=gpu_colors[gpu],
        )
    )

# Update the layout
fig.update_layout(
    barmode="group",
    title="Model Inference Comparison",
    xaxis_title="Tokens per Second",
    yaxis_title="Model",
)

# Display the chart
fig.show()

# pio.write_html(fig, file="benchmarks_all.html", auto_open=True)

# Save as PNG
fig.write_image("benchmarks_all_models.png", format="png", width=400, height=500, scale=5)

In [15]:
import pandas as pd
import plotly.graph_objects as go

# Read the CSV file
df = pd.read_csv("llm_results.csv")

# If model column starts with 'text-' or 'gpt-' then modify gpu to 'cloud'
df.loc[df["model"].str.startswith("text-"), "gpu"] = "Cloud"
df.loc[df["model"].str.startswith("gpt-"), "gpu"] = "Cloud"

# Remove any rows with models starting with 'gpt2' or 'text'
df = df[~df["model"].str.startswith("gpt2")]
df = df[~df["model"].str.startswith("text")]

# Sort the data by tokens_per_second in descending order
df_sorted = df.sort_values(by="tokens_per_second", ascending=False)

# Group the data by model and gpu
grouped = df_sorted.groupby(["model", "gpu"])

# Extract unique models and GPUs
models = df_sorted["model"].unique()
gpus = df_sorted["gpu"].unique()

# Define custom colors for each GPU
gpu_colors = {
    "RTX3090": "rgb(31, 119, 180)",  # Blue
    "Cloud": "rgb(255, 127, 14)",  # Orange
    "A10": "rgb(44, 160, 44)",  # Green
    "H100": "rgb(214, 39, 40)",  # Red
}

# Create a horizontal grouped bar chart
fig = go.Figure()

# Set the bar width
bar_width = 0.8 / len(gpus)

# Iterate over each GPU and plot the bars
for i, gpu in enumerate(gpus):
    tokens = []
    for model in models:
        try:
            tokens.append(grouped.get_group((model, gpu))["tokens_per_second"].iloc[0])
        except KeyError:
            tokens.append(0)

    # Plot the bars
    fig.add_trace(
        go.Bar(
            y=models,
            x=tokens,
            name=gpu,
            orientation="h",
            width=bar_width,
            marker_color=gpu_colors[gpu],
        )
    )

# Update the layout
fig.update_layout(
    barmode="group",
    title="Token Performance Comparison",
    xaxis_title="Tokens per Second",
    yaxis_title="Model",
)

# Display the chart
fig.show()

# Save as PNG
fig.write_image("benchmarks_large_models.png", format="png", width=800, height=400, scale=5)