In [None]:
%%bash
python - <<'PY'
import random, numpy as np, torch, os
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
print("Seeds fixed to", SEED)
PY

In [None]:
!pip install -q transformers==4.52.2 accelerate==1.7.0 datasets==2.20.0 sqlalchemy==2.0.30 sqlite-utils==3.38 evaluate==0.4.3

In [None]:
!pip list | grep -E 'transformers|accelerate|datasets|sqlalchemy|sqlite-utils|moz-sql-parser|evaluate'

accelerate                            1.7.0
datasets                              2.20.0
evaluate                              0.4.3
sentence-transformers                 4.1.0
sqlite-utils                          3.38
tensorflow-datasets                   4.9.8
transformers                          4.52.2
vega-datasets                         0.9.0


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import login

login()

In [None]:
from datasets import load_dataset

spider_train   = load_dataset("spider")["train"]
spider_val = load_dataset("spider")["validation"]

print(spider_train)
print(spider_val)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "meta-llama/Llama-3.1-8B"  # use preferred model
tokenizer  = AutoTokenizer.from_pretrained(model_name)
model      = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)
model.eval()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_

In [None]:
!pip install -q kagglehub            # tiny, pure-python package
import kagglehub, os, json, zipfile
from pathlib import Path

# Either:
#   kagglehub.login()                              # will prompt
#   – or –
os.environ["KAGGLE_USERNAME"] = ""        # <- set once per session
os.environ["KAGGLE_KEY"]     = ""


In [None]:
import kagglehub, zipfile
from pathlib import Path

root = Path( kagglehub.dataset_download(
           "jeromeblanchet/yale-universitys-spider-10-nlp-dataset") )

# 1️⃣  Look for a zip first (old mirror versions)
zip_files = list(root.rglob("spider.zip"))
if zip_files:
    print("Found spider.zip – extracting…")
    with zipfile.ZipFile(zip_files[0], "r") as zf:
        zf.extractall("spider")          # creates ./spider/database/…
    SPIDER_DIR = Path("spider")

# 2️⃣  Otherwise assume the folder is already there (current mirror)
else:
    print("No spider.zip – using unpacked folder")
    # the mirror root itself is usually “…/spider/”, but we search just in case
    try:
        SPIDER_DIR = next(root.rglob("spider/database")).parent
    except StopIteration as e:
        raise FileNotFoundError(
            "Could not locate Spider data inside the mirror download") from e

DB_ROOT = SPIDER_DIR / "database"
print("Spider databases live at:", DB_ROOT.resolve())


No spider.zip – using unpacked folder
Spider databases live at: /root/.cache/kagglehub/datasets/jeromeblanchet/yale-universitys-spider-10-nlp-dataset/versions/1/spider/database


In [None]:
DB_ROOT = Path("/kaggle/input/yale-universitys-spider-10-nlp-dataset/spider/database")

In [None]:
#   reflect every .sqlite file to create a plain-text schema description


import sqlalchemy as sa

_schema_cache: dict[str, str] = {}            # {db_id: "…schema string…"}

def get_schema_string(db_id: str) -> str:
    """Return a compact textual schema for the given Spider database."""
    if db_id in _schema_cache:
        return _schema_cache[db_id]

    db_file = DB_ROOT / db_id / f"{db_id}.sqlite"
    engine  = sa.create_engine(f"sqlite:///{db_file}")
    insp    = sa.inspect(engine)

    parts   = []
    for tbl in sorted(insp.get_table_names()):
        cols = [c["name"] for c in insp.get_columns(tbl)]
        parts.append(f"{tbl}({', '.join(cols)})")

    schema_str = ", ".join(parts)
    _schema_cache[db_id] = schema_str
    return schema_str

In [None]:
from datasets import load_dataset

# Use any split you like – here we sample 5 from Spider-train
NUM_SHOTS      = 5
spider_train   = load_dataset("spider")["train"]

random.seed(42
            )
# Turn each row into a normal dict and attach the schema string
DEMO_SET = [
    {
        **ex,                                  # copy all original fields
        "schema_str": get_schema_string(ex["db_id"])
    }
    for ex in spider_train.shuffle(seed=42).select(range(NUM_SHOTS))
]


In [None]:
def build_prompt(nl_question: str, target_schema: str) -> str:
    """
    Compose a 5-shot prompt:
        [global instruction]
        For i = 1..5:   ### Example-i Schema: …   ### Example-i Question: …   ### Example-i SQL: …
        ### Database Schema:  [schema for *this* db]
        ### Question:         [new NL question]
        ### SQL:
    """
    parts = [
        "### Instruction:\n"
        "You are an expert SQL developer. Given a database schema and a natural-language\n"
        "question, write ONE syntactically correct SQL query that answers the question.\n"
        "Return **only** the SQL; do not repeat the schema or add explanations.\n"
    ]

    for i, ex in enumerate(DEMO_SET, 1):
        parts.append(f"### Example {i} Schema:\n{ex['schema_str']}\n")
        parts.append(f"### Example {i} Question:\n{ex['question']}\n")
        parts.append(f"### Example {i} SQL:\n{ex['query'].strip()}\n")

    parts.append(f"### Database Schema:\n{target_schema}\n")
    parts.append(f"### Question:\n{nl_question}\n")
    parts.append("### SQL:\n")

    return "\n".join(parts)


In [None]:
import re


def generate_sql(nl_question: str, schema: str) -> str:
    prompt     = build_prompt(nl_question, schema)
    inputs     = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_len  = inputs["input_ids"].shape[1]

    model.config.pad_token_id = tokenizer.eos_token_id

    out        = model.generate(
        **inputs,
        max_new_tokens=128,
        num_beams=5,
        early_stopping=True,
        do_sample=False,
    )

    gen_tokens = out[0, input_len:]
    sql        = tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()

    # trim markdown fences etc.
    #sql = sql.split("```")[-1].strip()
    sql = sql.split(";")[0].replace("\n", " ").strip()
    #return sql

    # keep everything *before* first sentinel
    sql = re.split(r"(###|\n\n)", sql, maxsplit=1)[0]
    # remove prefix for llama3.1
    prefix = "```sql "
    if sql.startswith(prefix):
      sql = sql[len(prefix):]
    else:
      sql = sql


    return sql




In [None]:
example      = spider_val[1]
db_id        = example["db_id"]
schema_str   = get_schema_string(db_id)
predicted_sql= generate_sql(example["question"], schema_str)
print("Gold SQL: ", example["query"])
print("Predicted SQL: ", predicted_sql)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Gold SQL:  SELECT count(*) FROM singer
Predicted SQL:  SELECT COUNT(DISTINCT T1.Singer_ID) FROM singer AS T1 JOIN singer_in_concert AS T2 ON T1.Singer_ID = T2.Singer_ID


In [None]:
def exact_match(pred, gold):
    return pred.strip().lower() == gold.strip().lower()

In [None]:
from sqlglot import parse_one, expressions

def flatten_ast(node):
    """
    Recursively collect all node‐type names and literal values as lowercase strings.
    """
    out = set()

    def walk(n):
        # record the AST node type
        out.add(type(n).__name__.lower())

        # record any literal (e.g. identifiers, strings, numbers)
        if hasattr(n, "this") and isinstance(n.this, (str, int, float)):
            out.add(str(n.this).lower())

        # recurse into child expressions
        for arg in n.args.values():
            if isinstance(arg, list):
                for child in arg:
                    if isinstance(child, expressions.Expression):
                        walk(child)
            elif isinstance(arg, expressions.Expression):
                walk(arg)

    walk(node)
    return out

def component_match(pred_sql, gold_sql):
    try:
        pred_ast = parse_one(pred_sql)
        gold_ast = parse_one(gold_sql)
    except Exception:
        return 0.0

    pred_set = flatten_ast(pred_ast)
    gold_set = flatten_ast(gold_ast)
    if not gold_set:
        return 0.0
    return len(pred_set & gold_set) / len(gold_set)


In [None]:
import sqlite3, pandas as pd, numpy as np
from tqdm import tqdm

def run_query(sql: str, db_path: Path):
    """Return query result as a sorted list of tuples (order-independent)."""
    try:
        with sqlite3.connect(db_path) as conn:
            df = pd.read_sql_query(sql, conn)
        # sort rows + cols for order-invariant comparison
        return tuple(map(tuple, df.sort_index(axis=1).sort_values(list(df.columns)).to_numpy()))
    except Exception as e:
        # any failure counts as wrong
        return f"ERROR-{e}"

def execution_accuracy(dataset):
    """Compute Spider-style Execution Accuracy on a HF split (e.g. validation)."""
    correct = 0
    for ex in tqdm(dataset, desc="Evaluating"):
        db_id   = ex["db_id"]
        schema  = get_schema_string(db_id)
        pred_sql= generate_sql(ex["question"], schema)

        db_file = DB_ROOT / db_id / f"{db_id}.sqlite"
        gold    = run_query(ex["query"],     db_file)
        pred    = run_query(pred_sql,        db_file)

        if gold == pred:
            correct += 1

    return correct / len(dataset)

In [None]:
spider_val

Dataset({
    features: ['db_id', 'query', 'question', 'query_toks', 'query_toks_no_value', 'question_toks'],
    num_rows: 1034
})

In [None]:
p = 835
print(spider_val[p]['query'])
print(generate_sql(spider_val[p]['question'], get_schema_string(spider_val[p]['db_id'])))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


SELECT count(DISTINCT Nationality) FROM conductor
SELECT COUNT(DISTINCT T1.Nationality) FROM conductor AS T1


In [None]:
exact_match(generate_sql(spider_val[p]['question'], get_schema_string(spider_val[p]['db_id'])), spider_val[p]['query'])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


False

In [None]:
component_match(generate_sql(spider_val[p]['question'], get_schema_string(spider_val[p]['db_id'])), spider_val[p]['query'])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


1.0

# Quick Sanity Check

In [None]:
# 1) Size of a 20 % slice (Spider dev has 1 034 samples)
num_samples = int(0.20 * len(spider_val))      # → 206

sample_val  = spider_val.shuffle(seed=42).select(range(num_samples))

example      = sample_val[1]
db_id        = example["db_id"]
schema_str   = get_schema_string(db_id)
predicted_sql= generate_sql(example["question"], schema_str)
print("Gold SQL: ", example["query"])
print("Predicted SQL: ", predicted_sql)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Gold SQL:  SELECT count(*) FROM Documents AS T1 JOIN Templates AS T2 ON T1.Template_ID  =  T2.Template_ID WHERE T2.Template_Type_Code  =  'PPT'
Predicted SQL:  SELECT COUNT(*) FROM documents AS T1 JOIN templates AS T2 ON T1.template_id = T2.template_id WHERE T2.template_type_code = "PPT"  


In [None]:
sample_val[1]

{'db_id': 'cre_Doc_Template_Mgt',
 'query': "SELECT count(*) FROM Documents AS T1 JOIN Templates AS T2 ON T1.Template_ID  =  T2.Template_ID WHERE T2.Template_Type_Code  =  'PPT'",
 'question': "How many documents are using the template with type code 'PPT'?",
 'query_toks': ['SELECT',
  'count',
  '(',
  '*',
  ')',
  'FROM',
  'Documents',
  'AS',
  'T1',
  'JOIN',
  'Templates',
  'AS',
  'T2',
  'ON',
  'T1.Template_ID',
  '=',
  'T2.Template_ID',
  'WHERE',
  'T2.Template_Type_Code',
  '=',
  "'PPT",
  "'"],
 'query_toks_no_value': ['select',
  'count',
  '(',
  '*',
  ')',
  'from',
  'documents',
  'as',
  't1',
  'join',
  'templates',
  'as',
  't2',
  'on',
  't1',
  '.',
  'template_id',
  '=',
  't2',
  '.',
  'template_id',
  'where',
  't2',
  '.',
  'template_type_code',
  '=',
  'value'],
 'question_toks': ['How',
  'many',
  'documents',
  'are',
  'using',
  'the',
  'template',
  'with',
  'type',
  'code',
  "'PPT",
  "'",
  '?']}

In [None]:
print(exact_match(predicted_sql, example["query"]))
print(component_match(predicted_sql, example["query"]))

False
0.9473684210526315


# Simple Random Sampling of 20% of the validation set.



In [None]:
# Evaluation on a fixed 20% Spider dev sample
import time, torch, numpy as np
from tqdm import tqdm
import warnings
from transformers import logging
import random

logging.set_verbosity_error()
warnings.filterwarnings('ignore')

random.seed(42)

# 1) Size of a 20 % slice (Spider dev has 1 034 samples)
num_samples = int(0.20 * len(spider_val))      # → 206

sample_val  = spider_val.shuffle(seed=42).select(range(num_samples))


# 2) containers
em_scores, cm_scores, ex_scores, times = [], [], [], []

# 3) start fresh CUDA-peak tracking
if torch.cuda.is_available():
    torch.cuda.reset_peak_memory_stats()

# 4) main loop
for ex in tqdm(sample_val, desc="Evaluating"):
    db_id   = ex["db_id"]
    schema  = get_schema_string(db_id)

    t0      = time.perf_counter()
    pred_sql= generate_sql(ex["question"], schema)
    times.append(time.perf_counter() - t0)

    gold_sql= ex["query"]

    # exact + component match
    em_scores.append( float(exact_match(pred_sql, gold_sql)) )
    cm_scores.append( component_match(pred_sql, gold_sql) )

    # execution accuracy
    db_path = DB_ROOT / db_id / f"{db_id}.sqlite"
    gold_res= run_query(gold_sql,  db_path)
    pred_res= run_query(pred_sql, db_path)
    ex_scores.append( int(gold_res == pred_res) )

# 5) aggregate & report
print("\n\nExact Match    :", np.mean(em_scores))
print("Component Match    :", np.mean(cm_scores))
print("Execution Accuracy :", np.mean(ex_scores))
print("Avg. Latency  (s)  :", np.mean(times))
print("95% Latency  (s)   :", np.percentile(times, 95))
if torch.cuda.is_available():
    print("GPU Mem Peak       :", torch.cuda.max_memory_allocated() / 1e9, "GB")
else:
    print("GPU Mem Peak       : N/A (CPU)")


Evaluating: 100%|██████████| 206/206 [16:09<00:00,  4.71s/it]



Exact Match        : 0.009708737864077669
Component Match    : 0.9260067581323649
Execution Accuracy : 0.470873786407767
Avg. Latency  (s)  : 4.621980702470875
95% Latency  (s)   : 6.426491680250251
GPU Mem Peak       : 18.3346304 GB





# Full Spider Validation data commented out

In [None]:
# # Evaluation on complete Spider dev sample
# import time, torch, numpy as np
# from tqdm import tqdm
# import warnings
# warnings.filterwarnings('ignore')

# # 1) Full spider val
# sample_val = spider_val

# # 2) containers
# em_scores, cm_scores, ex_scores, times = [], [], [], []

# # 3) start fresh CUDA-peak tracking
# if torch.cuda.is_available():
#     torch.cuda.reset_peak_memory_stats()

# # 4) main loop
# for ex in tqdm(sample_val, desc="Evaluating"):
#     db_id   = ex["db_id"]
#     schema  = get_schema_string(db_id)

#     t0      = time.perf_counter()
#     pred_sql= generate_sql(ex["question"], schema)
#     times.append(time.perf_counter() - t0)

#     gold_sql= ex["query"]

#     # exact + component match
#     em_scores.append( float(exact_match(pred_sql, gold_sql)) )
#     cm_scores.append( component_match(pred_sql, gold_sql) )

#     # execution accuracy
#     db_path = DB_ROOT / db_id / f"{db_id}.sqlite"
#     gold_res= run_query(gold_sql,  db_path)
#     pred_res= run_query(pred_sql, db_path)
#     ex_scores.append( int(gold_res == pred_res) )

# # 5) aggregate & report
# print("\n\nExact Match        :", np.mean(em_scores))
# print("Component Match    :", np.mean(cm_scores))
# print("Execution Accuracy :", np.mean(ex_scores))
# print("Avg. Latency  (s)  :", np.mean(times))
# print("95% Latency  (s)   :", np.percentile(times, 95))
# if torch.cuda.is_available():
#     print("GPU Mem Peak       :", torch.cuda.max_memory_allocated() / 1e9, "GB")
# else:
#     print("GPU Mem Peak       : N/A (CPU)")


In [None]:
# !pip install --upgrade accelerate
# !pip install --upgrade transformers
# !pip install --upgrade sentence-transformers

# Introducing MiniLM Setence Transformer for Few Shot Prompting

In [None]:

# 1 ▪▪▪  Dynamic-retrieval helper: get_k_shots
from sentence_transformers import SentenceTransformer, util

# (Run this exactly once.)  ──────────────────────────────────────────────
embedder   = SentenceTransformer("all-MiniLM-L6-v2")
spider_train = load_dataset("spider")["train"]          # if not in memory
train_emb  = embedder.encode(spider_train["question"], convert_to_tensor=True)

def get_k_shots(nl_question: str, k: int = 5):
    """
    Return k training examples whose NL questions are semantically closest
    to `nl_question`.  Each example is a plain dict containing question,
    gold SQL, db_id, and a pre-computed schema string.
    """
    q_emb  = embedder.encode(nl_question, convert_to_tensor=True)
    hits   = util.semantic_search(q_emb, train_emb, top_k=k)[0]   # [{corpus_id, score}, …]

    demos  = []
    for h in hits:
        idx = h["corpus_id"]
        ex  = spider_train[idx]
        demos.append({
            "question"    : ex["question"],
            "query"       : ex["query"],
            "db_id"       : ex["db_id"],
            "schema_str"  : get_schema_string(ex["db_id"]),
        })
    return demos


# ║ 2 ▪▪▪  (Optional) schema-pruning stub                                ║
def prune_schema(query_sql: str, full_schema: str) -> str:
    """
    Return the subset of `full_schema` that is actually referenced in `query_sql`.
    For now we simply return full_schema; refine later if you like.
    """
    return full_schema

# 3 Build prompt

def build_prompt(nl_question, target_schema):

    demos = get_k_shots(nl_question, k=5)      # dynamically chosen

    parts = ["### Instruction:\nReturn ONE SQL query only.\n"]

    for i, ex in enumerate(demos, 1):

        used_schema = prune_schema(ex['query'], ex['schema_str'])  # optional

        parts += [

            f"### Example {i} Schema:\n{used_schema}",

            f"### Example {i} Question:\n{ex['question']}",

            f"### Example {i} SQL:\n{ex['query'].strip()}",

            "### End\n"

        ]

    parts += [f"### Database Schema:\n{target_schema}",

              f"### Question:\n{nl_question}",

              "### SQL:\n"]

    return "\n".join(parts)


# 3  Fix the small typo in generate_sql
import re

def generate_sql(nl_question: str, schema: str) -> str:
    prompt     = build_prompt(nl_question, schema)
    inputs     = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_len  = inputs["input_ids"].shape[1]

    model.config.pad_token_id = tokenizer.eos_token_id
    out = model.generate(
        **inputs,
        max_new_tokens=128,
        num_beams=5,
        early_stopping=True,
        do_sample=False,
    )

    gen_text = tokenizer.decode(out[0, input_len:], skip_special_tokens=True)

    # keep everything before the first sentinel (`###`, blank line, or back-ticks)
    sql = re.split(r"(###|\n\s*\n|```)", gen_text, maxsplit=1)[0]
    sql = sql.split(";", 1)[0]                 # Spider queries are single-stmt
    return sql.strip()                         # ← RETURN VARIABLE WAS “SQL” BEFORE


In [None]:
# import re


# def generate_sql(nl_question: str, schema: str) -> str:
#     prompt     = build_prompt(nl_question, schema)
#     inputs     = tokenizer(prompt, return_tensors="pt").to(model.device)
#     input_len  = inputs["input_ids"].shape[1]

#     model.config.pad_token_id = tokenizer.eos_token_id

#     out        = model.generate(
#         **inputs,
#         max_new_tokens=128,
#         num_beams=5,
#         early_stopping=True,
#         do_sample=False,
#     )

#     gen_tokens = out[0, input_len:]
#     sql        = tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()

#     # trim markdown fences etc.
#     #sql = sql.split("```")[-1].strip()
#     sql = sql.split(";")[0].replace("\n", " ").strip()
#     #return sql

#     # keep everything *before* first sentinel
#     sql = re.split(r"(###|\n\n)", sql, maxsplit=1)[0]
#     # remove prefix for llama3.1
#     prefix = "```sql "
#     if sql.startswith(prefix):
#       sql = sql[len(prefix):]
#     else:
#       sql = sql


#     return sql

In [None]:
# Evaluation on a fixed 20% Spider dev sample
import time, torch, numpy as np
from tqdm import tqdm
import warnings
from transformers import logging
import random

logging.set_verbosity_error()
warnings.filterwarnings('ignore')

random.seed(42)

# 1) Size of a 20 % slice (Spider dev has 1 034 samples)
num_samples = int(0.20 * len(spider_val))      # → 206

sample_val  = spider_val.shuffle(seed=42).select(range(num_samples))


# 2) containers
em_scores, cm_scores, ex_scores, times = [], [], [], []

# 3) start fresh CUDA-peak tracking
if torch.cuda.is_available():
    torch.cuda.reset_peak_memory_stats()

# 4) main loop
for ex in tqdm(sample_val, desc="Evaluating"):
    db_id   = ex["db_id"]
    schema  = get_schema_string(db_id)

    t0      = time.perf_counter()
    pred_sql= generate_sql(ex["question"], schema)
    times.append(time.perf_counter() - t0)

    gold_sql= ex["query"]

    # exact + component match
    em_scores.append( float(exact_match(pred_sql, gold_sql)) )
    cm_scores.append( component_match(pred_sql, gold_sql) )

    # execution accuracy
    db_path = DB_ROOT / db_id / f"{db_id}.sqlite"
    gold_res= run_query(gold_sql,  db_path)
    pred_res= run_query(pred_sql, db_path)
    ex_scores.append( int(gold_res == pred_res) )

# 5) aggregate & report
print("\n\nExact Match    :", np.mean(em_scores))
print("Component Match    :", np.mean(cm_scores))
print("Execution Accuracy :", np.mean(ex_scores))
print("Avg. Latency  (s)  :", np.mean(times))
print("95% Latency  (s)   :", np.percentile(times, 95))
if torch.cuda.is_available():
    print("GPU Mem Peak       :", torch.cuda.max_memory_allocated() / 1e9, "GB")
else:
    print("GPU Mem Peak       : N/A (CPU)")


Evaluating: 100%|██████████| 206/206 [20:00<00:00,  5.83s/it]



Exact Match    : 0.1553398058252427
Component Match    : 0.9015262503611605
Execution Accuracy : 0.5631067961165048
Avg. Latency  (s)  : 5.71617229285437
95% Latency  (s)   : 6.922948900249821
GPU Mem Peak       : 21.245072896 GB



