# IMPORTS

In [1]:
import re
import polars as pl
from pathlib import Path
import plotly.express as px

# CONFIGS

In [2]:
pl.Config.set_fmt_str_lengths(900)
pl.Config.set_tbl_width_chars(900)

polars.config.Config

# VARIABLES

In [3]:
RESULTS_PATH = Path("results")
STATS_GLOB = "*_stats.csv"
FAILURES_GLOB = "*_failures.csv"
EXCEPTIONS_GLOB = "*_exceptions.csv"
HISTORY_GLOB = "*_stats_history.csv"

# DATASETS

In [None]:
def load_dataset(root: Path, glob: str, pattern: re.Pattern = r"^(?P<approach>.+?)_(?P<model>.+?)_.+\.csv$") -> pl.DataFrame:
    dataframes = []
    for file in root.glob(glob):
        match = re.search(pattern, file.name)
        if match:
            dataframe = pl.read_csv(file, try_parse_dates=True)
            dataframe = dataframe.with_columns([
                pl.lit(match.group("approach")).alias("approach"),
                pl.lit(match.group("model")).alias("model"),
                pl.lit(f"{match.group('approach')} - {match.group('model')}").alias("test"),
                pl.arange(0, dataframe.height).alias("index")
            ])
            dataframes.append(dataframe)
    return pl.concat(dataframes, how="vertical")

In [5]:
stats = load_dataset(RESULTS_PATH, STATS_GLOB)
stats

Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,approach,model,index
str,str,i64,i64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,str,str,i64
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/us-central1/endpoints/2990351669667037184:predict""",1213,311,28000.0,29287.715304,2225.500275,60491.58465,831.317395,4.049889,1.038347,28000,46000,60000,60000,60000,60000,60000,60000,60000,60000,60000,"""VertexAIEndpoint""","""gemma-3-12b-it""",0
,"""Aggregated""",1213,311,28000.0,29287.715304,2225.500275,60491.58465,831.317395,4.049889,1.038347,28000,46000,60000,60000,60000,60000,60000,60000,60000,60000,60000,"""VertexAIEndpoint""","""gemma-3-12b-it""",1
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash-lite-preview-06-17:generateContent""",156894,153610,210.0,275.777045,102.164956,3885.674632,446.343436,524.603762,513.623108,210,240,260,280,370,670,1000,1800,3400,3700,3900,"""VertexAIMaaS""","""gemini-2.5-flash-lite-preview-06-17-thinking""",0
,"""Aggregated""",156894,153610,210.0,275.777045,102.164956,3885.674632,446.343436,524.603762,513.623108,210,240,260,280,370,670,1000,1800,3400,3700,3900,"""VertexAIMaaS""","""gemini-2.5-flash-lite-preview-06-17-thinking""",1
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent""",139640,137688,200.0,311.450341,104.102484,10104.550656,378.308973,465.570523,459.062405,200,230,250,260,320,610,1000,3400,8400,9200,10000,"""VertexAIMaaS""","""gemini-2.5-flash-thinking""",0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
,"""Aggregated""",163993,160879,190.0,264.983806,102.714846,4958.318723,479.88008,546.984741,536.598258,190,220,240,260,320,620,1100,2100,3500,4000,5000,"""VertexAIMaaS""","""gemini-2.5-flash-lite-preview-06-17""",1
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent""",112776,110780,190.0,2760.920479,103.059125,3.0859e7,352.599826,3.624184,3.560041,190,220,240,250,340,700,1100,2600,8700,10000,30859000,"""VertexAIMaaS""","""gemini-2.5-flash""",0
,"""Aggregated""",112776,110780,190.0,2760.920479,103.059125,3.0859e7,352.599826,3.624184,3.560041,190,220,240,250,340,700,1100,2600,8700,10000,30859000,"""VertexAIMaaS""","""gemini-2.5-flash""",1
"""POST""","""/api/generate?key=secret""",145810,145649,150.0,290.910375,118.192629,122661.092766,323.85586,487.405383,486.867201,150,170,190,200,290,620,960,1200,21000,75000,123000,"""CloudRunOllama""","""gemma3:12b""",0


In [15]:
history = load_dataset(RESULTS_PATH, HISTORY_GLOB)
history = history.with_columns(
    pl.from_epoch("Timestamp", time_unit="s"),
    # % of failure vs total requesr
    (pl.col("Total Failure Count") / pl.col("Total Request Count") * 100).alias("Failure %"),
)
#  'User Count',
#  'Requests/s',
#  'Failures/s',
#  'Total Request Count',
#  'Total Failure Count',
# ]
history

Timestamp,User Count,Type,Name,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,Total Request Count,Total Failure Count,Total Median Response Time,Total Average Response Time,Total Min Response Time,Total Max Response Time,Total Average Content Size,approach,model,test,index,Failure %
datetime[μs],i64,str,str,f64,f64,str,str,str,str,str,str,str,str,str,str,str,i64,i64,f64,f64,f64,f64,f64,str,str,str,i64,f64
2025-07-13 19:24:27,0,,"""Aggregated""",0.0,0.0,"""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""",0,0,0.0,0.0,0.0,0.0,0.0,"""VertexAIEndpoint""","""gemma-3-12b-it""","""VertexAIEndpoint - gemma-3-12b-it""",0,
2025-07-13 19:24:28,1,,"""Aggregated""",0.0,0.0,"""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""",0,0,0.0,0.0,0.0,0.0,0.0,"""VertexAIEndpoint""","""gemma-3-12b-it""","""VertexAIEndpoint - gemma-3-12b-it""",1,
2025-07-13 19:24:29,2,,"""Aggregated""",0.0,0.0,"""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""",0,0,0.0,0.0,0.0,0.0,0.0,"""VertexAIEndpoint""","""gemma-3-12b-it""","""VertexAIEndpoint - gemma-3-12b-it""",2,
2025-07-13 19:24:30,3,,"""Aggregated""",0.0,0.0,"""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""","""N/A""",0,0,0.0,0.0,0.0,0.0,0.0,"""VertexAIEndpoint""","""gemma-3-12b-it""","""VertexAIEndpoint - gemma-3-12b-it""",3,
2025-07-13 19:24:31,4,"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/us-central1/endpoints/2990351669667037184:predict""",0.0,0.0,"""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""",1,0,2467.049646,2467.049646,2467.049646,2467.049646,949.0,"""VertexAIEndpoint""","""gemma-3-12b-it""","""VertexAIEndpoint - gemma-3-12b-it""",4,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-07-14 10:33:02,250,,"""Aggregated""",900.8,900.6,"""180""","""200""","""220""","""240""","""430""","""680""","""870""","""910""","""11000""","""88000""","""88000""",140157,139997,150.0,293.654948,118.192629,105778.587566,324.020841,"""CloudRunOllama""","""gemma3:12b""","""CloudRunOllama - gemma3:12b""",585,99.885842
2025-07-14 10:33:03,250,"""POST""","""/api/generate?key=secret""",891.8,891.6,"""170""","""190""","""210""","""220""","""310""","""510""","""810""","""860""","""11000""","""123000""","""123000""",141483,141322,150.0,293.172475,118.192629,122661.092766,324.556851,"""CloudRunOllama""","""gemma3:12b""","""CloudRunOllama - gemma3:12b""",586,99.886205
2025-07-14 10:33:03,250,,"""Aggregated""",891.8,891.6,"""170""","""190""","""210""","""220""","""310""","""510""","""810""","""860""","""11000""","""123000""","""123000""",141483,141322,150.0,293.172475,118.192629,122661.092766,324.556851,"""CloudRunOllama""","""gemma3:12b""","""CloudRunOllama - gemma3:12b""",587,99.886205
2025-07-14 10:33:04,250,"""POST""","""/api/generate?key=secret""",889.6,889.4,"""170""","""190""","""210""","""220""","""290""","""440""","""810""","""860""","""11000""","""123000""","""123000""",142478,142317,150.0,292.695944,118.192629,122661.092766,324.389218,"""CloudRunOllama""","""gemma3:12b""","""CloudRunOllama - gemma3:12b""",588,99.887


In [7]:
failures = load_dataset(RESULTS_PATH, FAILURES_GLOB)
failures

Method,Name,Error,Occurrences,approach,model,index
str,str,str,i64,str,str,i64
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/us-central1/endpoints/2990351669667037184:predict""","""RemoteDisconnected('Remote end closed connection without response')""",311,"""VertexAIEndpoint""","""gemma-3-12b-it""",0
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash-lite-preview-06-17:generateContent""","""HTTPError('429 Client Error: Too Many Requests for url: /v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash-lite-preview-06-17:generateContent')""",153610,"""VertexAIMaaS""","""gemini-2.5-flash-lite-preview-06-17-thinking""",0
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent""","""HTTPError('429 Client Error: Too Many Requests for url: /v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent')""",137688,"""VertexAIMaaS""","""gemini-2.5-flash-thinking""",0
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash-lite-preview-06-17:generateContent""","""HTTPError('429 Client Error: Too Many Requests for url: /v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash-lite-preview-06-17:generateContent')""",160879,"""VertexAIMaaS""","""gemini-2.5-flash-lite-preview-06-17""",0
"""POST""","""/v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent""","""HTTPError('429 Client Error: Too Many Requests for url: /v1/projects/gcp-llm-serving-benchmarks/locations/global/publishers/google/models/gemini-2.5-flash:generateContent')""",110780,"""VertexAIMaaS""","""gemini-2.5-flash""",0
"""POST""","""/api/generate?key=secret""","""HTTPError('500 Server Error: Internal Server Error for url: /api/generate?key=secret')""",9,"""CloudRunOllama""","""gemma3:12b""",0
"""POST""","""/api/generate?key=secret""","""HTTPError('503 Server Error: Service Unavailable for url: /api/generate?key=secret')""",143821,"""CloudRunOllama""","""gemma3:12b""",1
"""POST""","""/api/generate?key=secret""","""HTTPError('429 Client Error: Too Many Requests for url: /api/generate?key=secret')""",1819,"""CloudRunOllama""","""gemma3:12b""",2


In [8]:
exceptions = load_dataset(RESULTS_PATH, EXCEPTIONS_GLOB)
exceptions

Count,Message,Traceback,Nodes,approach,model,index
str,str,str,str,str,str,i64


# FAILURES

In [None]:
px.line(
    history,
    x="index",
    y="Failure %",
    color="test",
    title="Failure % by Index"
)

In [None]:
px.scatter(
    history,
    x="User Count",
    y="Failures/s",
    color="test",
    title="User Count vs Failures/s"
)

In [None]:
# for (approach, model), group in history.group_by(["approach", "model"]):
#     fig =px.line(
#         group,
#         x="index",
#         y=["Total Request Count", "Total Failure Count"],
#         title=f"{approach} - {model}",
#     )
#     fig.show()