# Compute Metrics
Precomputes every (model viz, benchmark viz) pair and performs an analysis.
For visualization similarity, uses the structural similarity index measure (SSIM). 

In [9]:
from server.model_setup import get_ncNetInstance, get_nl4dv_instance
from server.scripts import config

# Parallel processing
from dask.distributed import Client, LocalCluster
import multiprocessing
import dask

# Logging
import logging
import warnings
warnings.filterwarnings("ignore")

# Image processing
import skimage.metrics as skm
from PIL import Image
import numpy as np
import subprocess

import json
import os
import uuid


In [9]:
# from vega import VegaLite
# import altair as alt
# import panel as pn
# import json

pn.extension("vega")

bench_spec = {
    "data": {
        "values": [
            {
                "aircraft_id": 1,
                "fleet_series": "444-464 (21)",
                "fuel_propulsion": "Diesel",
                "manufacturer": "Gillig",
                "model": "Phantom (High Floor)",
                "order_year": 1992,
                "powertrain": "DD S50EGR Allison WB-400R",
            },
            {
                "aircraft_id": 2,
                "fleet_series": "465-467 (3)",
                "fuel_propulsion": "Diesel",
                "manufacturer": "Gillig",
                "model": "Phantom (High Floor)",
                "order_year": 1996,
                "powertrain": "DD S50 Allison WB-400R",
            },
            {
                "aircraft_id": 3,
                "fleet_series": "468-473 (6)",
                "fuel_propulsion": "Diesel",
                "manufacturer": "Gillig",
                "model": "Phantom (High Floor)",
                "order_year": 1998,
                "powertrain": "DD S50 Allison WB-400R",
            },
            {
                "aircraft_id": 4,
                "fleet_series": "474-481 (8)",
                "fuel_propulsion": "Diesel",
                "manufacturer": "Gillig",
                "model": "Advantage (Low Floor)",
                "order_year": 2000,
                "powertrain": "Cummins ISC Allison WB-400R",
            },
            {
                "aircraft_id": 5,
                "fleet_series": "482-492 (11)",
                "fuel_propulsion": "Diesel",
                "manufacturer": "Gillig",
                "model": "Advantage (Low Floor)",
                "order_year": 2002,
                "powertrain": "Cummins ISL Allison WB-400R",
            },
            {
                "aircraft_id": 6,
                "fleet_series": "300-309 (10)",
                "fuel_propulsion": "Hybrid",
                "manufacturer": "NFI",
                "model": "GE40LFR",
                "order_year": 2010,
                "powertrain": "Ford Triton V10 ISE-Thundervolt TB40-HG",
            },
            {
                "aircraft_id": 7,
                "fleet_series": "310-329 (20)",
                "fuel_propulsion": "CNG",
                "manufacturer": "NFI",
                "model": "C40LFR",
                "order_year": 2011,
                "powertrain": "Cummins Westport ISL-G Allison WB-400R",
            },
        ]
    },
    "encoding": {
        "color": {"field": "model", "type": "nominal"},
        "theta": {
            "aggregate": "count",
            "field": "description",
            "type": "quantitative",
        },
    },
    "mark": "arc",
}

spec = {
    "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
    "mark": {"type": "line", "tooltip": True},
    "encoding": {
        "x": {
            "field": "DepartmentID",
            "type": "quantitative",
            "aggregate": None,
            "bin": True,
            "axis": {"format": "s"},
        },
        "y": {
            "field": "DepartmentID",
            "type": "quantitative",
            "aggregate": "count",
            "axis": {"format": "s"},
        },
    },
    "transform": [],
    "data": {
        "url": "server\\assets\\benchmark\\data\\department.csv",
        "format": {"type": "csv"},
    },
}

viz = VegaLite(spec)

# print(alt.VegaLite(spec).save("test.html"))
# Save the panel to a png
# pn.panel(spec, width=600, height=400).save("test.json")
with open("test.json", "w") as f:
    json.dump(spec, f)

# subprocess.run("npm install vega-lite", shell=True)
subprocess.run(
    "npx -p vega -p vega-lite vl2png test.json test.png".split(),
    shell=True,
    capture_output=True,
)
# print(viz.render_type)


CompletedProcess(args=['npx', '-p', 'vega', '-p', 'vega-lite', 'vl2png', 'test.json', 'test.png'], returncode=0, stdout=b'', stderr=b'')

Define the process to get the visualization comparison metrics. It is kind of a
pain to convert a VegaLite spec to an image in Python, so we first have to save
the spec as a JSON object, then use the `vega-lite` CLI (Node) to convert the spec into
a png.

In [10]:
def get_viz_metrics(spec1, spec2, cleanup=True):
    if spec1 is None:
        return {"ssim": 0}

    spec1["autosize"] = "fit"
    spec1["width"] = 500
    spec1["height"] = 500
    spec1["background"] = "#fafafa"

    spec2["autosize"] = "fit"
    spec2["width"] = 500
    spec2["height"] = 500
    spec2["background"] = "#fafafa"

    pair_id = str(uuid.uuid4())

    try:
        os.listdir("tmp")
    except FileNotFoundError:
        os.mkdir("tmp")

    tmp_dir = lambda x: os.path.join("tmp", x)

    # Model result spec
    spec1_json_path = tmp_dir(f"spec1-{pair_id}.json")
    with open(spec1_json_path, "w") as f:
        json.dump(spec1, f)

    # Benchmark spec
    spec2_json_path = tmp_dir(f"spec2-{pair_id}.json")
    with open(spec2_json_path, "w") as f:
        json.dump(spec2, f)

    # Convert the specs to pngs using the vega-lite CLI
    spec1_png_path = tmp_dir(f"spec1-{pair_id}.png")
    subprocess.run(
        f"npx -p vega -p vega-lite vl2png {spec1_json_path} {spec1_png_path}".split(),
        shell=True,
    )
    spec2_png_path = tmp_dir(f"spec2-{pair_id}.png")
    subprocess.run(
        f"npx -p vega -p vega-lite vl2png {spec2_json_path} {spec2_png_path}".split(),
        shell=True,
    )

    # Load the pngs into PIL and compute the metrics
    viz1 = np.array(Image.open(spec1_png_path).convert("RGB"))
    viz2 = np.array(Image.open(spec2_png_path).convert("RGB"))

    score_ssim = skm.structural_similarity(viz1, viz2, multichannel=True)

    result = {
        "ssim": score_ssim,
    }

    if not cleanup:
        return result

    os.remove(spec1_json_path)
    os.remove(spec2_json_path)
    os.remove(spec1_png_path)
    os.remove(spec2_png_path)

    return result


In [24]:
get_viz_metrics(spec, spec)

{'ssim': 1.0}

Here we create a JSON lookup mapping datasets to the NL queries from the benchmark.

In [5]:
with open(config.BENCHMARK_META_PATH, "r") as file:
    benchmark_metadata: dict = json.load(file)

with open(config.TABLE_TO_BENCHMARK_LOOKUP_PATH, "r") as file:
    lookup = json.load(file)

dataset_to_queries_lookup = {}
for dataset_name, benchmark_ids in lookup.items():
    # Get all of the NL queries for the dataset
    nl_queries = [
        nl_query
        for benchmark_id in benchmark_ids
        for nl_query in benchmark_metadata[benchmark_id]["nl_queries"]
    ]
    
    dataset_to_queries_lookup[dataset_name] = nl_queries
    
# Save the dataset_to_queries_lookup to a file
with open(os.path.join(config.BENCHMARK_DIR_PATH, "dataset_to_queries_lookup.json"), "w") as file:
    json.dump(dataset_to_queries_lookup, file, indent=4)
    

Create the Dask client for multiprocessing

In [6]:
# Get the number of cores
n_cores = multiprocessing.cpu_count()
print("Number of cores we have: ", n_cores)

# Create a cluster and client
print("> Creating a cluster and client...")
cluster = LocalCluster(
    ip=None,
    n_workers=n_cores,
    processes=True,
    silence_logs=logging.ERROR,
    # interface="lo",
)
client = Client(cluster)
client

Number of cores we have:  12
> Creating a cluster and client...


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 12
Total threads: 12,Total memory: 15.79 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:55677,Workers: 12
Dashboard: http://127.0.0.1:8787/status,Total threads: 12
Started: Just now,Total memory: 15.79 GiB

0,1
Comm: tcp://127.0.0.1:55793,Total threads: 1
Dashboard: http://127.0.0.1:55794/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55688,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-79nzjbma,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-79nzjbma

0,1
Comm: tcp://127.0.0.1:55778,Total threads: 1
Dashboard: http://127.0.0.1:55779/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55690,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-y1imlmp3,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-y1imlmp3

0,1
Comm: tcp://127.0.0.1:55772,Total threads: 1
Dashboard: http://127.0.0.1:55773/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55691,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-102pj70w,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-102pj70w

0,1
Comm: tcp://127.0.0.1:55803,Total threads: 1
Dashboard: http://127.0.0.1:55806/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55684,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-8k_8ue0r,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-8k_8ue0r

0,1
Comm: tcp://127.0.0.1:55796,Total threads: 1
Dashboard: http://127.0.0.1:55797/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55689,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-lt_phk98,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-lt_phk98

0,1
Comm: tcp://127.0.0.1:55787,Total threads: 1
Dashboard: http://127.0.0.1:55788/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55685,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-gafq5wwe,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-gafq5wwe

0,1
Comm: tcp://127.0.0.1:55782,Total threads: 1
Dashboard: http://127.0.0.1:55784/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55683,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-rykfg91r,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-rykfg91r

0,1
Comm: tcp://127.0.0.1:55781,Total threads: 1
Dashboard: http://127.0.0.1:55783/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55682,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-uz0rkcx0,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-uz0rkcx0

0,1
Comm: tcp://127.0.0.1:55798,Total threads: 1
Dashboard: http://127.0.0.1:55800/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55680,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-73ptup_x,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-73ptup_x

0,1
Comm: tcp://127.0.0.1:55802,Total threads: 1
Dashboard: http://127.0.0.1:55804/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55686,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-em0jv5cl,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-em0jv5cl

0,1
Comm: tcp://127.0.0.1:55790,Total threads: 1
Dashboard: http://127.0.0.1:55791/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55681,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-0yj3thva,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-0yj3thva

0,1
Comm: tcp://127.0.0.1:55775,Total threads: 1
Dashboard: http://127.0.0.1:55776/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:55687,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-uop6al81,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-uop6al81


In [19]:
def compute_metrics(model_name, dataset_name):
    """The primary computation function to be parallelized."""

    def save_result():
        with open(
            os.path.join(
                config.BENCHMARK_EVAL_DIR_PATH, f"eval_{model_name}_{dataset_name}.json"
            ),
            "w",
        ) as f:
            json.dump(result, f, indent=4)

    dataset = dataset_name.replace(".csv", "") + ".csv"
    data_path = os.path.join(config.BENCHMARK_DATA_DIR_PATH, dataset)

    # Get the queries for the dataset
    nl_queries = dataset_to_queries_lookup[dataset_name]

    # Get the benchmarks for this dataset
    with open(config.BENCHMARK_META_PATH, "r") as file:
        benchmark_metadata: dict = json.load(file)
    with open(config.TABLE_TO_BENCHMARK_LOOKUP_PATH, "r") as file:
        lookup = json.load(file)
    b_ids = lookup[dataset_name]
    benchmarks_with_dataset = [
        benchmark for b_id, benchmark in benchmark_metadata.items() if b_id in b_ids
    ]

    result = {
        "model_name": model_name,
        "dataset_name": dataset_name,
        "results": [],
        "errors": [],
    }

    # Create the model isntances
    try:
        if model_name == "nl4dv":
            model = get_nl4dv_instance(data_path=data_path)
        elif model_name == "ncNet":
            model = get_ncNetInstance(data_path=data_path)
    except FileNotFoundError:
        result["errors"].append("Data path not found when trying to set up model")
        save_result()
        return result

    # For each query, execute it and get the similarity metrics between the
    # model output and the benchmark
    for i, nl_query in enumerate(nl_queries):
        # Get the benchmark for this query, and get its spec
        benchmarks_with_query = [
            benchmark
            for benchmark in benchmarks_with_dataset
            if nl_query in benchmark["nl_queries"]
        ]
        num_benchmarks = len(benchmarks_with_query)
        if num_benchmarks != 1:
            result["errors"].append(
                f'"{nl_query}" has {num_benchmarks} benchmarks associated with it'
            )
        if num_benchmarks == 0:
            # No benchmark, there is no point in executing the query
            continue
        benchmark_spec = benchmarks_with_query[0]["vega_spec"]

        # Execute the query
        produced_spec = False
        model_vl_spec = None
        if model_name == "nl4dv":
            print(f'EXECUTING QUERY: "{nl_query}" on nl4dv')
            model_result = model.analyze_query(nl_query)
            # Get the first VegaLite spec
            vis_list = model_result["visList"]
            if len(vis_list) > 0:
                produced_spec = True
                model_vl_spec = vis_list[0]["vlSpec"]

        elif model_name == "ncNet":
            print(f'EXECUTING QUERY: "{nl_query}" on ncNet')
            try:
                viz = model.nl2vis(nl_query)[
                    0
                ]  # nl2vis will return a list a [Vis, VegaLiteSpec]
            except Exception as e:
                result["errors"].append(f"Error when executing query: \"{nl_query}\"")
                produced_spec = False
            else:
                model_vl_spec = viz.spec
                produced_spec = True

        metrics = get_viz_metrics(model_vl_spec, benchmark_spec)

        result["results"].append(
            {
                "query": nl_query,
                "produced_spec": produced_spec,
                "metrics": metrics,
            }
        )

    # Write the results to a file
    save_result()

    return result


In [20]:
parameters = [
    (model_name, dataset_name)
    for model_name in ["ncNet", "nl4dv"]
    for dataset_name in dataset_to_queries_lookup.keys()
]

lazy_results = []
for i, (model_name, dataset_name) in enumerate(parameters):
    # Temporary stopping measure
    lazy_result = dask.delayed(compute_metrics)(model_name, dataset_name)
    lazy_results.append(lazy_result)
    print(lazy_result)

# for result in dask.compute(*lazy_results):
#     print(result)
result = dask.compute(*lazy_results)
# with open(os.path.join(config.BENCHMARK_DIR_PATH, "evaluation.json"), "w", encoding='utf-8') as f:
#     json.dump(result, f, indent=4)

Delayed('compute_metrics-fde82521-9a75-4c3c-938f-699e0db26fd3')
Delayed('compute_metrics-88003398-6d2a-465e-8409-a0d890c8f124')
Delayed('compute_metrics-f13da7d1-63e7-4b56-a6ff-7cefeb1630ac')
Delayed('compute_metrics-806d3a67-471d-46e7-95ef-604ef74cc67b')
Delayed('compute_metrics-908c4f5c-109d-4184-92d9-0aaa959a51c1')
Delayed('compute_metrics-de715d24-8bad-42f8-9fb8-9c7f64e6de71')
Delayed('compute_metrics-3ba439cb-2a15-456e-81f5-25d2c287cb69')
Delayed('compute_metrics-4deb9d91-6622-4f4d-94e4-3fb2fb690baf')
Delayed('compute_metrics-3c7a5618-7ba1-4126-b859-3fafae62ff63')
Delayed('compute_metrics-f53b12ed-974a-4ba9-bc70-2656d1eb7e4e')
Delayed('compute_metrics-59e78fd6-4bc9-446b-a932-932d5e76c8e2')
Delayed('compute_metrics-205ad429-2edc-4423-809c-4c98e6e3d3b7')
Delayed('compute_metrics-ce0fd69a-942c-4d33-8065-1c9a331509c4')
Delayed('compute_metrics-fc50cad7-cdac-4c96-bbf2-58af460f4600')
Delayed('compute_metrics-0add620a-48df-4c3a-a7bf-32e1b5f62c48')
Delayed('compute_metrics-3cbe72cf-fd52-4

AttributeError: module 'server.scripts.config' has no attribute 'BENCHMARK_EVAL_DIR_PATH'

In [46]:
client.close()