In [1]:
import sys
from urllib.parse import urlencode

import hvplot.pandas  # noqa
import pandas as pd

sys.path.append("..")
import subprocess
from datetime import datetime

import helpers.dataframe as dataframe_helpers
import helpers.eodc_hub_role as eodc_hub_role

## Specify CMRBackend test results file

In [2]:
uri = "s3://nasa-eodc-data-store/test-results/20240814010546_CMRTileTest_C2723754850-GES_DISC.json"
test_id = uri.split("/")[-1].split(".")[0]
urls_output_file = f"urls/{test_id}.csv"
kwargs = {"rescale": "0,455", "colormap_name": "coolwarm"}

## Generate utility functions for defining API queries corresponding to the CMRBackend tests

In [3]:
def gen_datetime_query(temporal):
    ###
    return pd.to_datetime(temporal[0]).isoformat("T") + "Z"


def gen_query(
    concept_id,
    variable,
    temporal,
    tile,
    *,
    tms: str = "WebMercatorQuad",
    backend: str = "xarray",
    scale: int = 1,
    return_mask: str = "true",
    **kwargs,
):
    temporal = gen_datetime_query(temporal)
    x, y, z = tile
    query = urlencode(
        {
            "concept_id": concept_id,
            "variable": variable,
            "datetime": temporal,
            "backend": backend,
            "scale": scale,
            "return_mask": return_mask,
            **kwargs,
        }
    )
    return f"/tiles/{tms}/{z}/{x}/{y}?{query}"


def generate_locust_urls(uri):
    credentials = eodc_hub_role.fetch_and_set_credentials()
    df = dataframe_helpers.load_all_into_dataframe(credentials, [uri], use_boto3=False)
    df = dataframe_helpers.expand_timings(df).reset_index()
    df["temporal"] = df.apply(lambda x: x["cmr_query"]["temporal"], axis=1)
    df["query"] = df.apply(
        lambda x: gen_query(
            x["dataset_id"], x["variable"], x["temporal"], x["tile"], **kwargs
        ),
        axis=1,
    )
    df["query"].to_csv(urls_output_file, index=False, header=False)
    return df

## Use locust to time tile generation

In [4]:
df = generate_locust_urls(uri)
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_output = f"results/{test_id}-{current_datetime}"
command = f"locust -f locust_titiler_cmr.py --headless --users 1 --iterations 1 --csv {results_output} --urls-file {urls_output_file} --csv-full-history --host https://dev-titiler-cmr.delta-backend.com"
subprocess.call(command, shell=True)

[2024-08-14 01:06:54,484] jupyter-maxrjones--tile-2dbenchmarking/INFO/locust.main: No run time limit set, use CTRL+C to interrupt
[2024-08-14 01:06:54,484] jupyter-maxrjones--tile-2dbenchmarking/INFO/locust.main: Starting Locust 2.30.0
Type     Name                                                                          # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
--------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------
--------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------
         Aggregated                                                                         0     0(0.00%) |      0       0       0      0 |    0.00        0.00

[2024-08-14 01:06:54,505] jupyter-maxrjones--tile-2dbenchmarking/INFO/locust.runners: Ramping to 1 users at a rate of 1

0

## Define utility functions for processing results

In [5]:
def calc_individual_response_times(df, query):
    """Calculate individual response times based on the moving average stored by locust"""
    subset = df[df["Name"] == query]
    subset = subset.drop_duplicates(subset="Total Request Count", keep="last")
    subset = subset.set_index("Total Request Count")
    for ind, row in subset.iterrows():
        if ind == 1:
            subset.loc[ind, "Response Time"] = subset.loc[
                ind, "Total Average Response Time"
            ]
        elif ind == 2 and 1 not in subset.index.unique():
            subset.loc[ind, "Response Time"] = subset.loc[
                ind, "Total Max Response Time"
            ]
            subset.loc[1] = subset.loc[ind]
            subset.loc[1, "Total Request Count"] = 1
            subset.loc[1, "Response Time"] = subset.loc[ind, "Total Min Response Time"]
        else:
            subset.loc[ind, "Response Time"] = (
                ind * subset.loc[ind, "Total Average Response Time"]
                - (ind - 1) * subset.loc[ind - 1, "Total Average Response Time"]
            )
    subset = subset[["Name", "Response Time"]]
    return subset

In [6]:
def split_aggregated_results(df, full_df):
    """Transform aggregated results into individual response times"""
    queries = df[df["Request Count"] > 1]["Name"].to_list()
    df = df[df["Request Count"] == 1]
    df = df.rename(columns={"Average Response Time": "Response Time"})
    df = df[["Name", "Request Count", "Response Time", "Min Response Time"]]
    for query in queries:
        split_data = calc_individual_response_times(full_df, query)
        df = pd.concat([df, split_data], axis=0)
    return df

In [7]:
def process_locust_results(results_location):
    """Load locust results and extract relevant information"""
    df = pd.read_csv(f"{results_location}_stats.csv")
    full_results = pd.read_csv(f"{results_location}_stats_history.csv")
    df = df[df["Type"] == "GET"]
    df = split_aggregated_results(df, full_results)
    df["zoom"] = df.apply(lambda x: int(x["Name"].split("?")[0].split("/")[3]), axis=1)
    df["tile"] = df.apply(lambda x: x["Name"].split("?")[0].split("/")[3:6], axis=1)

    df = df.rename(columns={"Name": "query"})
    df["method"] = "FastAPI"
    return df

## Process locust results and merge with CMRBackend results

In [8]:
locust_df = process_locust_results(results_output)
df["method"] = "CMRBackend"
df["Response Time"] = df.apply(lambda x: float(x["timings"][0]), axis=1)
df = df[["query", "Response Time", "method", "tile", "zoom"]]
combined_df = pd.concat([df, locust_df], axis=0).reset_index()

## Plot results

In [10]:
cmap = ["#E1BE6A", "#40B0A6"]
plt_opts = {"width": 1800, "height": 400, "xrotation": 90}

In [11]:
combined_df.hvplot.box(
    y="Response Time",
    by=["zoom", "method"],
    c="method",
    cmap=cmap,
    ylabel="Response time (s)",
    xlabel="Zoom level",
    legend=False,
).opts(**plt_opts)

