# Compute Metrics
Precomputes every (model viz, benchmark viz) pair and performs an analysis.
For visualization similarity, uses the structural similarity index measure (SSIM). 

In [1]:
from server.model_setup import get_ncNetInstance, get_nl4dv_instance
from server.scripts import config

# Parallel processing
from dask.distributed import Client, LocalCluster
import multiprocessing
import dask

# Logging
import logging
import warnings
warnings.filterwarnings("ignore")

# Image processing
import skimage.metrics as skm
from PIL import Image
import numpy as np
import subprocess
import time

import json
import os
import uuid


c:\Users\casil\Documents\Spring_2022\6.S079\final-project\server\model_setup.py


Define the process to get the visualization comparison metrics. It is kind of a
pain to convert a VegaLite spec to an image in Python, so we first have to save
the spec as a JSON object, then use the `vega-lite` CLI (Node) to convert the spec into
a png.

In [2]:
def get_viz_metrics(spec1, spec2, cleanup=True):
    if spec1 is None:
        return {"metrics": {"ssim": 0}, "errors": []}

    spec1["autosize"] = "fit"
    spec1["width"] = 500
    spec1["height"] = 500
    spec1["background"] = "#fafafa"

    spec2["autosize"] = "fit"
    spec2["width"] = 500
    spec2["height"] = 500
    spec2["background"] = "#fafafa"

    pair_id = str(uuid.uuid4())

    try:
        os.listdir("tmp")
    except FileNotFoundError:
        os.mkdir("tmp")

    tmp_dir = lambda x: os.path.join("tmp", x)

    # Model result spec
    spec1_json_path = tmp_dir(f"spec1-{pair_id}.json")
    with open(spec1_json_path, "w") as f:
        json.dump(spec1, f)

    # Benchmark spec
    spec2_json_path = tmp_dir(f"spec2-{pair_id}.json")
    with open(spec2_json_path, "w") as f:
        json.dump(spec2, f)

    # Convert the specs to pngs using the vega-lite CLI
    spec1_png_path = tmp_dir(f"spec1-{pair_id}.png")
    subprocess.run(
        f"npx -p vega -p vega-lite vl2png {spec1_json_path} {spec1_png_path}".split(),
        shell=True,
    )
    spec2_png_path = tmp_dir(f"spec2-{pair_id}.png")
    subprocess.run(
        f"npx -p vega -p vega-lite vl2png {spec2_json_path} {spec2_png_path}".split(),
        shell=True,
    )

    # Wait a little bit to let the png files get written
    time.sleep(2)

    # Load the pngs into PIL and compute the metrics
    try:
        viz1 = np.array(Image.open(spec1_png_path).convert("RGB"))
        viz2 = np.array(Image.open(spec2_png_path).convert("RGB"))
    except FileNotFoundError:
        time.sleep(10)
        try:
            viz1 = np.array(Image.open(spec1_png_path).convert("RGB"))
            viz2 = np.array(Image.open(spec2_png_path).convert("RGB"))
        except FileNotFoundError:
            return {"metrics": {"ssim": -1}, "errors": ["FileNotFoundError"]}

    try:
        score_ssim = skm.structural_similarity(viz1, viz2, multichannel=True)
        errors = []
    except Exception as e:
        score_ssim = -1
        errors = [f"Exception when finding similarity - {type(e)}: {e}"]

    result = {
        "metrics": {
            "ssim": score_ssim,
        },
        "errors": errors,
    }

    if not cleanup:
        return result

    os.remove(spec1_json_path)
    os.remove(spec2_json_path)
    os.remove(spec1_png_path)
    os.remove(spec2_png_path)

    return result


Here we create a JSON lookup mapping datasets to the NL queries from the benchmark.

In [3]:
with open(config.BENCHMARK_META_PATH, "r") as file:
    benchmark_metadata: dict = json.load(file)

with open(config.TABLE_TO_BENCHMARK_LOOKUP_PATH, "r") as file:
    lookup = json.load(file)

dataset_to_queries_lookup = {}
for dataset_name, benchmark_ids in lookup.items():
    # Get all of the NL queries for the dataset
    nl_queries = [
        nl_query
        for benchmark_id in benchmark_ids
        for nl_query in benchmark_metadata[benchmark_id]["nl_queries"]
    ]
    
    dataset_to_queries_lookup[dataset_name] = nl_queries
    
# Save the dataset_to_queries_lookup to a file
with open(os.path.join(config.BENCHMARK_DIR_PATH, "dataset_to_queries_lookup.json"), "w") as file:
    json.dump(dataset_to_queries_lookup, file, indent=4)
    

Create the Dask client for multiprocessing

In [4]:
# Get the number of cores
n_cores = multiprocessing.cpu_count()
print("Number of cores we have: ", n_cores)

# Create a cluster and client
print("> Creating a cluster and client...")
cluster = LocalCluster(
    ip=None,
    n_workers=n_cores,
    processes=True,
    silence_logs=logging.ERROR,
    # interface="lo",
)
client = Client(cluster)
client

Number of cores we have:  12
> Creating a cluster and client...


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 12
Total threads: 12,Total memory: 15.79 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:58302,Workers: 12
Dashboard: http://127.0.0.1:8787/status,Total threads: 12
Started: Just now,Total memory: 15.79 GiB

0,1
Comm: tcp://127.0.0.1:58413,Total threads: 1
Dashboard: http://127.0.0.1:58415/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58311,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-1k3w9vzg,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-1k3w9vzg

0,1
Comm: tcp://127.0.0.1:58422,Total threads: 1
Dashboard: http://127.0.0.1:58423/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58305,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-q0gxb2ik,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-q0gxb2ik

0,1
Comm: tcp://127.0.0.1:58392,Total threads: 1
Dashboard: http://127.0.0.1:58393/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58306,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-z10bz02j,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-z10bz02j

0,1
Comm: tcp://127.0.0.1:58410,Total threads: 1
Dashboard: http://127.0.0.1:58411/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58310,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-pq1gr7og,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-pq1gr7og

0,1
Comm: tcp://127.0.0.1:58407,Total threads: 1
Dashboard: http://127.0.0.1:58408/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58308,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-_9y5iyc7,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-_9y5iyc7

0,1
Comm: tcp://127.0.0.1:58414,Total threads: 1
Dashboard: http://127.0.0.1:58416/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58315,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-_sraf_va,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-_sraf_va

0,1
Comm: tcp://127.0.0.1:58395,Total threads: 1
Dashboard: http://127.0.0.1:58396/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58313,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-l4n0fsi3,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-l4n0fsi3

0,1
Comm: tcp://127.0.0.1:58419,Total threads: 1
Dashboard: http://127.0.0.1:58420/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58309,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-ioxcx1ra,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-ioxcx1ra

0,1
Comm: tcp://127.0.0.1:58401,Total threads: 1
Dashboard: http://127.0.0.1:58402/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58307,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-nxdhqtps,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-nxdhqtps

0,1
Comm: tcp://127.0.0.1:58398,Total threads: 1
Dashboard: http://127.0.0.1:58399/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58316,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-ozw36peu,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-ozw36peu

0,1
Comm: tcp://127.0.0.1:58404,Total threads: 1
Dashboard: http://127.0.0.1:58405/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58314,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-nbiboxw6,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-nbiboxw6

0,1
Comm: tcp://127.0.0.1:58375,Total threads: 1
Dashboard: http://127.0.0.1:58378/status,Memory: 1.32 GiB
Nanny: tcp://127.0.0.1:58312,
Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-0vgu3qpi,Local directory: c:\Users\casil\Documents\Spring_2022\6.S079\final-project\dask-worker-space\worker-0vgu3qpi


In [5]:
def compute_metrics(model_name, dataset_name):
    """The primary computation function to be parallelized."""

    save_path = os.path.join(
        config.BENCHMARK_EVAL_DIR_PATH,
        f"eval_{model_name}_{dataset_name}.json",
    )
    if os.path.exists(save_path):
        # We already computed this, so skip it
        return

    dataset = dataset_name.replace(".csv", "") + ".csv"
    data_path = os.path.join(config.BENCHMARK_DATA_DIR_PATH, dataset)

    # Get the queries for the dataset
    nl_queries = dataset_to_queries_lookup[dataset_name]

    # Get the benchmarks for this dataset
    with open(config.BENCHMARK_META_PATH, "r") as file:
        benchmark_metadata: dict = json.load(file)
    with open(config.TABLE_TO_BENCHMARK_LOOKUP_PATH, "r") as file:
        lookup = json.load(file)
    b_ids = lookup[dataset_name]
    benchmarks_with_dataset = [
        benchmark for b_id, benchmark in benchmark_metadata.items() if b_id in b_ids
    ]

    result = {
        "model_name": model_name,
        "dataset_name": dataset_name,
        "results": [],
        "errors": [],
    }

    # Create the model isntances
    try:
        if model_name == "nl4dv":
            model = get_nl4dv_instance(data_path=data_path)
        elif model_name == "ncNet":
            try:
                model = get_ncNetInstance(data_path=data_path, table_name=dataset_name)
            except Exception as e:
                result['errors'].append(f"Exception when creating model - {type(e)}: {e}")
                with open(save_path, "w", encoding="utf-8") as file:
                    json.dump(result, file, indent=4)    
                return result    
                
    except FileNotFoundError:
        result["errors"].append("Data path not found when trying to set up model")
        with open(save_path, "w", encoding="utf-8") as file:
            json.dump(result, file, indent=4)
        return result

    # For each query, execute it and get the similarity metrics between the
    # model output and the benchmark
    for i, nl_query in enumerate(nl_queries):
        # Get the benchmark for this query, and get its spec
        benchmarks_with_query = [
            benchmark
            for benchmark in benchmarks_with_dataset
            if nl_query in benchmark["nl_queries"]
        ]
        num_benchmarks = len(benchmarks_with_query)
        if num_benchmarks != 1:
            result["errors"].append(
                f'"{nl_query}" has {num_benchmarks} benchmarks associated with it'
            )
        if num_benchmarks == 0:
            # No benchmark, there is no point in executing the query
            continue
        benchmark_spec = benchmarks_with_query[0]["vega_spec"]

        # Execute the query
        produced_spec = False
        model_vl_spec = None
        if model_name == "nl4dv":
            print(f'EXECUTING QUERY: "{nl_query}" on nl4dv')
            model_result = model.analyze_query(nl_query)
            # Get the first VegaLite spec
            vis_list = model_result["visList"]
            if len(vis_list) > 0:
                produced_spec = True
                model_vl_spec = vis_list[0]["vlSpec"]

        elif model_name == "ncNet":
            print(f'EXECUTING QUERY: "{nl_query}" on ncNet')
            try:
                viz = model.nl2vis(nl_query)[
                    0
                ]  # nl2vis will return a list a [Vis, VegaLiteSpec]
            except Exception as e:
                result["errors"].append(f'Error when executing "{nl_query}" - {type(e)}: {e}')
                produced_spec = False
            else:
                model_vl_spec = viz.spec
                produced_spec = True

        metrics = get_viz_metrics(model_vl_spec, benchmark_spec)
        result['errors'].extend(metrics['errors'])

        result["results"].append(
            {
                "query": nl_query,
                "produced_spec": produced_spec,
                "metrics": metrics['metrics'],
            }
        )

    # Write the results to a file
    with open(save_path, "w", encoding="utf-8") as file:
        json.dump(result, file, indent=4)

    return result


In [6]:
parameters = [
    (model_name, dataset_name)
    for model_name in ["ncNet", "nl4dv"]
    for dataset_name in dataset_to_queries_lookup.keys()
]

# parameters = [("ncNet", "Invoices")]
lazy_results = []
for i, (model_name, dataset_name) in enumerate(parameters):
    # Temporary stopping measure
    lazy_result = dask.delayed(compute_metrics)(model_name, dataset_name)
    lazy_results.append(lazy_result)
    print(lazy_result)

# for result in dask.compute(*lazy_results):
#     print(result)
result = dask.compute(*lazy_results)
# with open(os.path.join(config.BENCHMARK_DIR_PATH, "evaluation.json"), "w", encoding='utf-8') as f:
#     json.dump(result, f, indent=4)

Delayed('compute_metrics-05411de0-4688-48f5-b00a-12a4e8771cbd')
Delayed('compute_metrics-d1ef2366-c89c-411f-92fc-5097d0d9da03')
Delayed('compute_metrics-ddc0e730-8338-4649-8e4f-d17c941585ea')
Delayed('compute_metrics-411c4fae-94c1-4ca8-b601-53c2f0ecbfd7')
Delayed('compute_metrics-aec97b39-920e-481c-be74-c4a884e1b06d')
Delayed('compute_metrics-98d0c92d-ac69-47dd-adf1-26e21af8e7b9')
Delayed('compute_metrics-060f3bbd-7638-4cf8-9186-d71fd1951192')
Delayed('compute_metrics-8fdf5d6e-7b07-4eca-a133-630ecbcb71e3')
Delayed('compute_metrics-c85325d3-e729-4bc8-8bc0-34f231c2b946')
Delayed('compute_metrics-f467a5a4-ffe5-4d7d-bfc4-492ee4513db5')
Delayed('compute_metrics-98d8c5a8-14f8-47e5-bec9-ded7de169132')
Delayed('compute_metrics-b9a3871a-be7c-4f72-a96e-bd73acad69bc')
Delayed('compute_metrics-4565e6b4-f33f-40f1-87ab-91be02ef4c11')
Delayed('compute_metrics-875ce14e-b8b6-4444-a4f0-80d723705aa9')
Delayed('compute_metrics-adeb3ecc-9af8-43e3-ab63-b40a8d38e57c')
Delayed('compute_metrics-f4c21794-ace5-4

In [None]:
client.close()