Benchmark table schema

model_metadata: STRUCT<uc_model_name: STRING, uc_model_version: INT, model_uri: STRING >

overall_metrics: STRUCT<"f1": float, "recall": float, "precision": float>

per_class_metrics: STRUCT<"ct": STRUCT<"f1": float, "recall": float, "precision": float>>

benchmarked_at: timestamp

# ASSUMPTIONS
Only models from the new Ultralytics lib will be benchmarked

Benchmark dataset does not change frequently (so conversion to MDS files isn't to cumbersome)

In [0]:
from copy import copy

import mlflow
from tsdb.ml.types import Hyperparameters
from tsdb.ml.utils import UCModelName
from tsdb.ml.datasets import get_dataloader
from tsdb.preprocessing.preprocess import convert_to_mds
from ultralytics.nn.tasks import attempt_load_one_weight, DetectionModel

import torch
from torch.utils.data import DataLoader

from tsdb.ml.validate import ModifiedDetectionValidator

import os
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [0]:
df = (
        spark
        .read
        .format("delta")
        .table("edav_dev_csels.towerscout.benchmark_scored")
        .selectExpr(
            "substring(path, 6, length(path)) as image_path",  # remove 'dbfs:' prefix on image paths
            "results.bboxes as 2D_bboxes", 
            "transform(2D_bboxes, x -> float(0)) AS cls", 
            "flatten(transform(2D_bboxes, x -> array(x.x1, x.y1, x.x2, x.y2))) AS bboxes",  # flatten bboxes to align with mds data types
        )
    )

display(df)

In [0]:
@F.udf(T.StringType())
def get_location(file_path: str) -> str:

    img_filename = file_path.split("/")[-1]
    nyc_path = "/Volumes/edav_dev_csels/towerscout/images/okr1/benchmark/"
    denver_path = "/Volumes/edav_dev_csels/towerscout/images/okr1/benchmark-denver/"

    if os.path.isfile( os.path.join(nyc_path, img_filename) ):
        return "nyc"
    elif os.path.isfile( os.path.join(denver_path, img_filename) ):
        return "denver"
    else:
        return "error"

In [0]:
df_locations = df.withColumn("location", get_location(F.col('image_path')))
display(df_locations.groupBy("location").count())

In [0]:
# convert_to_mds(df_locations.filter("location = 'denver'"), "/Volumes/edav_dev_csels/towerscout/data/mds_benchmark/denver")

In [0]:
def benchmark_model(
    model: DetectionModel, mds_benchmark_dir: str, cache_dir: str, batch_size: int
) -> tuple[dict[str, float], dict[str, dict[str, float]]]:
    """
    This function takes a YOLO model from the newer Ultralytics library and benchmarks it against the MDS dataset
    at the directory supplied. To do this we use the ModifiedDetectionValidator object from the tsdb library.

    Args:
        model: the DetectionModel we want to benchmark
        mds_benchmark_dir: the directory containing the MDS files of the benchmark dataset
        cache_dir: the directory for the dataloader to use
        batch_size: the batch size for the dataloader

    Returns:
        overall_metrics: a dictionary of the overall metrics (f1, precision, recall) for the model
        per_class_metrics: a dictionary of the metrics (f1, precision, recall) for each class for the model
    """

    device = "cuda" if torch.cuda.is_available() else "cpu"

    dataloader = get_dataloader(
        local_dir=cache_dir,
        remote_dir=mds_benchmark_dir,
        hyperparams=Hyperparameters(batch_size=batch_size),
        split=None,
        transform=False,
    )

    validator = ModifiedDetectionValidator(
        dataloader=dataloader, training=False, device=device, args=copy(model.args)
    )

    metrics = validator(model)
    per_class = validator.metrics.box

    overall_metrics = {
        "f1": metrics["metrics/f1(B)"],
        "precision": metrics["metrics/precision(B)"],
        "recall": metrics["metrics/recall(B)"],
    }

    per_class_metrics = {
        class_name: {
            "f1": per_class.f1[class_label],
            "precision": per_class.p[class_label],
            "recall": per_class.r[class_label],
        }
        for class_label, class_name in model.names.items()
    }

    return overall_metrics, per_class_metrics


def update_benchmark_table(
    benchmark_table: str,
    overall_metrics: dict[str, float],
    per_class_metrics: dict[str, dict[str, float]],
    model_metadata: dict,
) -> None:
    """
    This function updates the benchmark table with the supplied overall metrics, per class
    metrics and model metadata for a benchmark run of the model.

    Args:
        benchmark_table: the name of the benchmark table to update in the format:
                         {catalog}.{schema}.{table_name}
        overall_metrics: a dictionary of the overall metrics (f1, precision, recall) for the model
        per_class_metrics: a dictionary of the metrics (f1, precision, recall) for each class for the model
        model_metadata: a dictionary of the model metadata (uc_model_name, uc_model_version, model_uri) to include in the benchmark table
    """

    # must cast to float from numpy.float64 because PySpark FloatType can't take numpy floats.
    overall_metrics = {key: float(value) for key, value in overall_metrics.items()}
    per_class_metrics = {
        key: {k: float(v) for k, v in value.items()}
        for key, value in per_class_metrics.items()
    }

    data = [(model_metadata, None, overall_metrics, per_class_metrics)]

    schema = spark.read.format("delta").table(benchmark_table).schema

    new_row_df = spark.createDataFrame(data, schema)
    new_row_df = new_row_df.withColumn("benchmarked_at", F.current_timestamp())
    new_row_df.write.format("delta").mode("append").saveAsTable(benchmark_table)


In [0]:
bm_df = spark.read.format("delta").table("edav_dev_csels.towerscout.benchmark_results")
bm_df.schema

In [0]:
# %sql

# USE CATALOG edav_dev_csels;
# USE SCHEMA towerscout;

# CREATE TABLE IF NOT EXISTS benchmark_results (
#   model_metadata STRUCT<uc_model_name: STRING, uc_model_version: INT, model_uri: STRING >,
#   benchmarked_at TIMESTAMP,
#   overall_metrics STRUCT<f1: FLOAT, recall: FLOAT, precision: FLOAT>,
#   per_class_metrics STRUCT<ct: STRUCT<f1: FLOAT, recall: FLOAT, precision: FLOAT>>
#   )
#   USING delta

In [0]:
mlflow.set_registry_uri("databricks-uc")

device = "cuda" if torch.cuda.is_available() else "cpu"

model = mlflow.pytorch.load_model(
    "runs:/ec50663136d5413b89b1600e6d66c7f2/towerscout_model", map_location=device
)
model.to(device)

cache_dir = "/local/cache/path1"

mds_benchmark_dir = "/Volumes/edav_dev_csels/towerscout/data/mds_benchmark/nyc"

overall_metrics, per_class_metrics = benchmark_model(model=model, mds_benchmark_dir=mds_benchmark_dir, cache_dir=cache_dir, batch_size=16)

In [0]:
print(overall_metrics, per_class_metrics)

In [0]:
model_metadata = {"uc_model_name": "Model A", "uc_model_version": 1, "model_uri": "s3://models/model_a"}

display(update_benchmark_table("edav_dev_csels.towerscout.benchmark_results", overall_metrics, per_class_metrics, model_metadata))