# DS 677-004 - GROUP PROJECT: <br/>SQL Instruction-Response Generation Pipeline using vLLM Self-Alignment


# STEP 1: Seed Dataset Gathering & Curation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!git clone https://github.com/bigcode-project/starcoder2-self-align.git

fatal: destination path 'starcoder2-self-align' already exists and is not an empty directory.


In [None]:
!pip install --quiet -r "/content/drive/MyDrive/DS 677/Project/requirements.txt"

In [None]:
# !pip install --upgrade "numpy<2.0"
# !pip install "pybind11>=2.12"
# !pip install --upgrade --force-reinstall pandas

!pip install --quiet tree-sitter
!pip install --quiet tree-sitter-language-pack
!pip install --quiet tree-sitter-sql
!pip install --quiet boto3
!pip install sqlparse



## Imports & Setup

In [None]:
# tree_sitter_parser.py
import tree_sitter_sql as tssql
from tree_sitter import Language, Parser
from tree_sitter_language_pack import get_parser, get_language

import torch
import gc

import sys
sys.path.append("starcoder2-self-align/seed_gathering")

from huggingface_hub import login
login("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")

import os, time, gzip, json
import pandas as pd
import boto3
from botocore import UNSIGNED
from botocore.config import Config
import smart_open
from datasets import load_dataset

from tqdm import tqdm
import random
import argparse

import json
from tree_sitter_language_pack import get_parser

import sqlparse
import sqlite3

LANGUAGE = get_language("sql")   # tree_sitter.Language
_PARSER = get_parser("sql")      # tree_sitter.Parser


def make_parser():
    """
    Returns a Tree-Sitter Parser configured for Python.
    """
    return _PARSER

def node_to_string(node, source_bytes: bytes) -> str:
    """
    Given a Tree-Sitter AST node and the raw source bytes,
    return the substring corresponding to that node.
    """
    return source_bytes[node.start_byte:node.end_byte].decode("utf8", errors="replace")



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.11/dist-package

## Config & Setup

In [None]:
MAX_FILES  = 1000
CACHE_DIR  = "adp232/stack"
OUTPUT_DIR = "/content/drive/MyDrive/DS 677/Project/output_sql"
os.makedirs(OUTPUT_DIR, exist_ok=True)

s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))
def download_contents(blob_id, src_encoding):
    s3_url = f"s3://softwareheritage/content/{blob_id}"
    with smart_open.open(s3_url, "rb", transport_params={"client": s3}) as fin:
        data = fin.read()
    return gzip.decompress(data).decode(src_encoding, errors="replace")

## Load Metadata

In [None]:
ds = load_dataset(
    "bigcode/the-stack-v2-dedup",
    "SQL",
    cache_dir=CACHE_DIR,
    split="train",
    streaming=False
)

print(f"✅ Metadata records available: {len(ds)}")
print("Example record keys:", list(ds[0].keys()))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Resolving data files:   0%|          | 0/757 [00:00<?, ?it/s]

✅ Metadata records available: 4245523
Example record keys: ['blob_id', 'directory_id', 'path', 'content_id', 'detected_licenses', 'license_type', 'repo_name', 'snapshot_id', 'revision_id', 'branch_name', 'visit_date', 'revision_date', 'committer_date', 'github_id', 'star_events_count', 'fork_events_count', 'gha_license_id', 'gha_event_created_at', 'gha_created_at', 'gha_language', 'src_encoding', 'language', 'is_vendor', 'is_generated', 'length_bytes', 'extension', 'filename']


## Sub-step 1: Gathers unfiltered seed functions

In [None]:
rows = []
for i, item in enumerate(ds):
    if i >= MAX_FILES:
        break
    blob_id = item.get("blob_id")
    if not blob_id:
        continue
    sql = download_contents(blob_id, item.get("src_encoding","utf-8")).strip()
    if sql:
        rows.append({"path": item.get("path",""), "content": sql})
    if (i+1) % 20 == 0:
        print(f"  Downloaded {i+1}/{MAX_FILES}")
    time.sleep(0.05)

raw_path = os.path.join(OUTPUT_DIR, "1_sql_raw.jsonl")
pd.DataFrame(rows).to_json(raw_path, orient="records", lines=True)
print(f"✅ Phase A complete → {raw_path} ({len(rows)} rows)")

In [None]:
def extract_sql_texts_with_delimiter(input_jsonl_path: str, output_txt_path: str, delimiter: str = "\n" + "-"*80 + "\n") -> list:
    """
    Reads a JSONL file where each line is a JSON object containing at least a 'content' key.
    Extracts the 'content' values, converts literal '\\n' to real newlines,
    separates each snippet by the given delimiter, writes them to a text file,
    and returns the list of snippets.
    """
    contents = []
    with open(input_jsonl_path, 'r') as infile:
        for line in infile:
            try:
                rec = json.loads(line)
                content = rec.get('content')
                if content:
                    # Convert literal '\n' to actual newlines
                    content = content.replace('\\n', '\n')
                    contents.append(content)
            except json.JSONDecodeError:
                continue

    # Write out as plain text with delimiter between snippets
    with open(output_txt_path, 'w') as outfile:
        for i, snippet in enumerate(contents):
            outfile.write(snippet)
            # Write delimiter after each snippet, but not after the last one
            if i < len(contents) - 1:
                outfile.write(delimiter)

    return contents

# input_path = '/content/drive/MyDrive/DS 677/Project/output_sql/1_sql_raw.jsonl'
# output_path = '/content/drive/MyDrive/DS 677/Project/output_sql/1_sql_content_list.json'
# sql_contents = extract_sql_texts_with_delimiter(input_path, output_path)

In [None]:
def jsonl_to_pipe_txt(
    input_path: str,
    output_path: str,
    fields: list[str],
    delimiter: str = "|",
    write_header: bool = True
):
    """
    Convert a JSONL file to a pipe-delimited TXT.

    Args:
      input_path:  path to the .jsonl input
      output_path: path to write the .txt output
      fields:      list of top-level JSON keys to extract (in order)
      delimiter:   the field separator (default '|')
      write_header: if True, writes a header line with field names
    """
    with open(input_path,  "r", encoding="utf-8") as in_f, \
         open(output_path, "w", encoding="utf-8") as out_f:

        if write_header:
            out_f.write(delimiter.join(fields) + "\n")

        for line in in_f:
            rec = json.loads(line)
            vals = []
            for key in fields:
                v = rec.get(key, "")
                if isinstance(v, list):
                    # join lists with commas
                    v = ",".join(map(str, v))
                else:
                    v = str(v)
                # escape real newlines:
                v = v.replace("\n", "\\n")
                vals.append(v)
            out_f.write(delimiter.join(vals) + "\n")

    print(f"✅ Wrote {len(fields)}‑column {delimiter!r}-delimited file → {output_path}")

## Sub-step 2: Parsing and Syntax Checking -> High Quality Subset

In [None]:
def preprocess_sql(sql: str) -> str:
    """
    Normalize line endings, strip non-printables, collapse whitespace,
    remove BOM, and ensure it ends with a semicolon.
    """
    # # 1. Remove BOM
    # sql = sql.lstrip('\ufeff')
    # # 2. Normalize line breaks
    # sql = sql.replace('\r\n', '\n').replace('\r', '\n')
    # # 3. Remove control chars (except newline & tab)
    # sql = re.sub(r'[^\x09\x0A\x0D\x20-\x7E]', ' ', sql)
    # # 4. Collapse runs of spaces/tabs
    # sql = re.sub(r'[ \t]+', ' ', sql)
    # # 5. Trim and ensure terminator
    # sql = sql.strip()
    # if not sql.endswith(';'):
    #     sql += ';'


    # sql = sql.replace("`", "'").replace(";",'')
    return sql

def checking_parse(sql: str) -> bool:
    """
    Returns True if:
      - sqlparse.parse(sql) yields ≥1 statement (i.e. valid syntax), and
      - the uppercase text contains 'RETURNS'
    """
    sql_clean = preprocess_sql(sql)
    try:
        stmts = sqlparse.parse(sql_clean)
        if not stmts:
            return False
        else:
            return True
    except Exception as e:
        print(e)
        return False

def checking_syntax(sql):

    sql_clean = preprocess_sql(sql)
    try:
        conn = sqlite3.connect(":memory:")
        conn.executescript(sql_clean)
        return True
    except sqlite3.Error as e:
        msg = str(e).lower()
        return msg
        # return "syntax error" not in msg
    finally:
        conn.close()

In [None]:
IN_FILE    = "/content/drive/MyDrive/DS 677/Project/output_sql/1_sql_raw.jsonl"
OUT_FILE = "/content/drive/MyDrive/DS 677/Project/output_sql/2_sql_parsed.jsonl"

# Instantiate a SQL parser (prebuilt grammar)
parser = make_parser()

# Read raw and write parsed statements
with open(IN_FILE) as rf, open(OUT_FILE, "w") as wf:
    for line in tqdm(rf, desc="Parsing and Filtering for Valid SQL"):
        rec = json.loads(line)

        # src = rec["content"]
        # if checking_parse(src) and checking_syntax(src):
        #     wf.write(json.dumps(rec) + "\n")

        src = rec["content"].encode("utf8")
        tree = parser.parse(src)
        # Extract each top-level 'statement'
        for node in tree.root_node.children:
            # if node.type == "statement" or "comment" in node.type:
            # print(f"node.type: {node.type}")
            if node.type == "statement" in node.type:
                stmt = node_to_string(node, src).strip()
                if stmt:
                    if checking_parse(stmt) and checking_syntax(stmt):
                        out = {"path": rec["path"], "content": stmt}
                        wf.write(json.dumps(out) + "\n")

                # stmt = src[node.start_byte:node.end_byte].decode("utf8").strip()
                # if stmt:
                #     if checking_parse(sql) and checking_syntax(stmt):
                #         out = {"path": rec["path"], "content": stmt}
                #         wf.write(json.dumps(out) + "\n")

print(f"✅ Phase B complete → {OUT_FILE}")

# input_path = OUT_FILE
# output_path = '/content/drive/MyDrive/DS 677/Project/output_sql/2_sql_content_list.json'
# sql_contents = extract_sql_texts_with_delimiter(input_path, output_path)

Parsing and Filtering for Valid SQL: 1000it [07:45,  2.15it/s]

✅ Phase B complete → /content/drive/MyDrive/DS 677/Project/output_sql/2_sql_parsed.jsonl





In [None]:
# dummy_sql = "CREATE TABLE `db` (\n  `id` bigint(20) NOT NULL AUTO_INCREMENT,\n  `db_name` varchar(255) DEFAULT NULL,\n  `driver_class_name` varchar(255) DEFAULT NULL,\n  `jdbc_url` varchar(255) DEFAULT NULL,\n  `password` varchar(255) DEFAULT NULL,\n  `pool_name` varchar(255) DEFAULT NULL,\n  `username` varchar(255) DEFAULT NULL,\n  `group_name` varchar(255) DEFAULT '',\n  `balance_type` varchar(50) DEFAULT '',\n  `minimum_idle` int(11) DEFAULT '1',\n  `maximum_pool_size` int(11) DEFAULT '1',\n  `connection_test_query` varchar(255) DEFAULT 'SELECT 1',\n  PRIMARY KEY (`id`)\n) ENGINE=MyISAM AUTO_INCREMENT=7 DEFAULT CHARSET=utf8;"
# result_check = checking_syntax(dummy_sql)
# print(result_check)


## Sub-step 3: Filter SQL Dataset via vLLM Self-Validation

In [None]:
import numpy
import pandas
from vllm import LLM, SamplingParams

import datasets

In [None]:
IN_FILE   = "/content/drive/MyDrive/DS 677/Project/output_sql/2_sql_parsed.jsonl"
OUT_FILE  = "/content/drive/MyDrive/DS 677/Project/output_sql/3_sql_filtered.jsonl"

# Argument parsing
def parse_args():
    parser = argparse.ArgumentParser(description="Stage 3 Filter SQL seeds via vLLM")
    parser.add_argument("--input", type=str, default=IN_FILE, help="Parsed SQL statements JSONL from sub-step 2")
    parser.add_argument("--output", type=str, default=OUT_FILE, help="Output path for filtered JSONL")
    parser.add_argument("--model", type=str, default="bigcode/starcoder2-15b", help="Model for validation")
    parser.add_argument("--batch-size", type=int, default=16, help="Prompts per vLLM call")
    parser.add_argument("--sample-size", type=int, default=None, help="Random subsample before filtering")
    parser.add_argument("--summarize-batch", type=int, default=64, help="Batch size for summarization")
    parser.add_argument("--validate-batch",  type=int, default=32, help="Batch size for validation")
    args = parser.parse_args([])  # replace [] with sys.argv[1:] in real use

    return args

random.seed(42)

def load_sqls(path):
    with open(path) as f:
        for line in f:
            rec = json.loads(line)
            sql = rec.get("content", "").strip()
            if sql:
                yield sql

def init_model(name):
    return LLM(
        name,
        enforce_eager=True,
        gpu_memory_utilization=0.8)


In [None]:
# Few‑shot examples for summarization
SQL_FEW_SHOTS_SUMMARIZE = [
    (
        "CREATE TABLE users (\n"
        "  id INT PRIMARY KEY,\n"
        "  name VARCHAR(100)\n"
        ");",
        "Creates a table named users with two columns: id (integer primary key) and name (varchar)."
    ),
    (
        "SELECT id, name FROM users WHERE active = 1;",
        "Selects the id and name of all active users from the users table."
    ),
]

# Few-shot examples for SQL validation
SQL_FEW_SHOTS_VALIDATE = [
    ("CREATE TABLE users (\n  id INT PRIMARY KEY,\n  name VARCHAR(100)\n);",
     "Yes",
     "This creates a users table with columns id (integer primary key) and name (varchar)."),
    ("-- This comment describes the orders table",
     "Yes",
     "This is a valid single-line SQL comment."),
    ("INSERT users VALUES (1, 'Alice');",
     "No",
     "Missing INTO keyword: correct syntax is INSERT INTO users ..."),
    ("SELECT * FROM non_existing_table;",
     "Yes",
     "Selects all columns from non_existing_table; syntax is valid even if table may not exist.")
]


In [None]:
def summarize_sqls(sqls, model, batch_size=32):
    """
    Given a list of SQL strings and a vLLM engine, return a list of (sql, description) tuples.
    Uses two-shot examples plus clear '###' sentinels.
    """

    if not sqls:
        return []

    # 1) Pre-build the few-shot prefix once
    few_shot_lines = []
    for ex_sql, ex_desc in SQL_FEW_SHOTS_SUMMARIZE:
        few_shot_lines.append("SQL:")
        few_shot_lines.append(ex_sql)
        few_shot_lines.append(f"Explanation: {ex_desc}")
        few_shot_lines.append("###")
    prefix = "\n".join(few_shot_lines) + "\n"

    seen = set()
    unique_sqls = []
    for sql in sqls:
        if sql not in seen:
            seen.add(sql)
            unique_sqls.append(sql)

    # 2) Build the list of (sql, prompt) pairs
    prompts = [
        (sql,
         prefix
         + "SQL:\n"
         + sql
         + "\n"
         + "Explanation:")
        for sql in sqls
    ]

    summaries = []
    # 3) Simple slicing loop to batch
    for i in tqdm(range(0, len(prompts), batch_size), desc="Summarizing"):
        batch = prompts[i : i + batch_size]
        sql_batch, ctxs = zip(*batch)

        # one vLLM call for the entire batch
        outs = model.generate(
            list(ctxs),
            SamplingParams(
                temperature=0.0,  # deterministic
                max_tokens=100,   # enough for one-sentence summary
                stop="###"        # stop at our sentinel
            )
        )

        # 4) extract the very first line (up to sentinel/newline)
        for sql, out in zip(sql_batch, outs):
            txt = out.outputs[0].text.strip()
            desc = txt.split("###")[0].split("\n")[0].strip()
            summaries.append((sql, desc))

    return summaries




# Prompt templates
def build_validation_prompt(sql, desc):
    buf = ""
    for code, ans, rat in SQL_FEW_SHOTS_VALIDATE:
        buf += f"""<issue_start>username_0: Check if this SQL snippet’s description is accurate.
```sql
{code}
```
Description:
```
{rat.splitlines()[0] if ans=='Yes' else 'Incorrect description…'}
```
Is this accurate? Answer "Yes" or "No".<issue_comment>username_1: My answer is: {ans}

{rat}
Upvotes: 42

"""
    buf += f"""<issue_start>username_0: Please check this SQL snippet and its description:
```sql
{sql}
```
Description:
```
{desc}
```
Is this accurate? Answer "Yes" or "No".<issue_comment>username_1: My answer is:"""
    return buf


def validate_pairs(pairs, model, batch_size, tokenizer):
    dummy = "SELECT 1;"
    dummy_pr = build_validation_prompt(dummy, dummy)
    fewshot = len(tokenizer.encode(dummy_pr)) - len(tokenizer.encode(dummy))
    max_len = tokenizer.model_max_length
    print(f"Few-shot overhead ≈ {fewshot} tokens")

    print("Building prompts…")
    prompts = []
    for sql, desc in tqdm(pairs, desc="Prompts"):
        pr = build_validation_prompt(sql, desc)
        if fewshot + len(tokenizer.encode(sql)) > max_len:
            pr = dummy_pr
        prompts.append(pr)

    print("Generating validations via vLLM…")
    valid = []
    for i in tqdm(range(0, len(prompts), batch_size), desc="Validating"):
        batch = prompts[i:i+batch_size]
        outs  = model.generate(
            batch,
            SamplingParams(temperature=0.0, max_tokens=4, stop="\n")
        )
        for out in outs:
            txt = out.outputs[0].text.strip().lower()
            valid.append(("yes" in txt) and ("no" not in txt))
    return valid

In [None]:
torch.cuda.empty_cache()
gc.collect()

args = parse_args()
#load dataset from previous step
sqls = list(load_sqls(args.input))
print(f"→ load dataset from previous step: {len(sqls)} records")

# Initialize vLLM
print(f"Initializing vLLM model {args.model}…")

model = init_model(args.model)
tokenizer = model.get_tokenizer()

→ load dataset from previous step: 221848 records
Initializing vLLM model bigcode/starcoder2-7b…




INFO 05-11 23:16:20 llm_engine.py:98] Initializing an LLM engine (v0.4.1) with config: model='bigcode/starcoder2-7b', speculative_config=None, tokenizer='bigcode/starcoder2-7b', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0)
INFO 05-11 23:16:21 utils.py:608] Found nccl from library /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 05-11 23:16:21 selector.py:77] Cannot use FlashAttention backend because the flash_attn package is not found. Please install it for better performance.
INFO 05-11 23:16:21 selector.py:33] Using XFormers backend.
INFO 05-11 23:16:23 weight_utils.py:193] Using model we

In [None]:
# SQL-Description Pairs
sql_descs = summarize_sqls(sqls, model, args.summarize_batch)

Summarizing:   0%|          | 0/3467 [00:00<?, ?it/s]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:55,  1.13it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:01<00:05, 10.43it/s][A
Processed prompts:  33%|███▎      | 21/64 [00:01<00:01, 29.10it/s][A
Processed prompts:  45%|████▌     | 29/64 [00:01<00:01, 30.52it/s][A
Processed prompts:  55%|█████▍    | 35/64 [00:01<00:00, 35.19it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:01<00:00, 38.33it/s][A
Processed prompts:  73%|███████▎  | 47/64 [00:01<00:00, 32.37it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:02<00:00, 22.41it/s][A
Processed prompts:  88%|████████▊ | 56/64 [00:02<00:00, 22.46it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 23.53it/s]
Summarizing:   0%|          | 1/3467 [00:02<2:39:20,  2.76s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:01<01:08,  1.09s/it]




Processed prompts:   2%|▏         | 1/64 [00:00<01:00,  1.04it/s][A




Processed prompts:   5%|▍         | 3/64 [00:02<00:39,  1.54it/s][A
Processed prompts:   6%|▋         | 4/64 [00:02<00:45,  1.33it/s][A
Processed prompts:  14%|█▍        | 9/64 [00:03<00:13,  4.12it/s][A
Processed prompts:  31%|███▏      | 20/64 [00:03<00:03, 11.16it/s][A
Processed prompts:  38%|███▊      | 24/64 [00:03<00:03, 12.56it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:03<00:02, 13.93it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:03<00:01, 16.72it/s][A
Processed prompts:  55%|█████▍    | 35/64 [00:03<00:01, 19.80it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:04<00:01, 19.62it/s][A
Processed prompts:  70%|███████   | 45/64 [00:04<00:00, 25.29it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:04<00:00, 25.10it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:04<00:00, 26.07it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:04<00:00, 18.52it/s][A
Processed prompts:  91%|█████████ | 58/64 [00:05<00:00, 13.92it/s][A
Processed prompts:  94




Processed prompts:   2%|▏         | 1/64 [00:01<02:03,  1.96s/it][A
Processed prompts:   3%|▎         | 2/64 [00:02<01:07,  1.09s/it][A
Processed prompts:   5%|▍         | 3/64 [00:02<00:40,  1.49it/s][A
Processed prompts:  20%|██        | 13/64 [00:02<00:04, 10.23it/s][A
Processed prompts:  28%|██▊       | 18/64 [00:02<00:03, 13.36it/s][A
Processed prompts:  36%|███▌      | 23/64 [00:03<00:02, 17.31it/s][A
Processed prompts:  55%|█████▍    | 35/64 [00:03<00:00, 31.79it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:03<00:00, 47.43it/s][A
Processed prompts: 100%|██████████| 64/64 [00:04<00:00, 13.26it/s]
Summarizing:   1%|          | 32/3467 [01:20<3:41:07,  3.86s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<01:01,  1.03it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:01<00:06,  8.88it/s][A
Processed prompts: 100%|██████████| 64/64 [00:01<00:00, 50.08it/s]
Summarizing:   1%|          | 33/3467 [01:21




Processed prompts:   2%|▏         | 1/64 [00:04<05:03,  4.81s/it][A




Processed prompts:   5%|▍         | 3/64 [00:05<01:20,  1.32s/it][A
Processed prompts:  11%|█         | 7/64 [00:05<00:30,  1.88it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:05<00:25,  2.20it/s][A
Processed prompts:  41%|████      | 26/64 [00:05<00:03, 12.33it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:06<00:01, 21.46it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:07<00:01, 13.50it/s][A
Processed prompts:  84%|████████▍ | 54/64 [00:07<00:00, 12.02it/s][A
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.42it/s]
Summarizing:   4%|▎         | 125/3467 [04:04<5:11:55,  5.60s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:48,  1.73s/it][A




Processed prompts:   5%|▍         | 3/64 [00:02<00:53,  1.14it/s][A
Processed prompts:   6%|▋         | 4/64 [00:03<00:54,  1.10it/s][A
Processed prompts:  27%|██▋       | 17/64 [00:04<00:06,  7.53it/s][A
Processed prompts:  52%|█████▏    | 33/64 [00:04<00:01, 17.56it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:05<00:02, 11.30it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:05<00:01, 12.69it/s][A
Processed prompts:  80%|███████▉  | 51/64 [00:06<00:01, 11.82it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:06<00:00, 11.95it/s][A
Processed prompts:  92%|█████████▏| 59/64 [00:06<00:00, 12.59it/s][A
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.97it/s]
Summarizing:   4%|▎         | 126/3467 [04:12<5:47:37,  6.24s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:53,  1.18it/s][A
Processed prompts:   8%|▊         | 5/64 [00:00<00:08,  6.73it/s][A
Processed prompts:  16%|█▌        | 10/




Processed prompts:   2%|▏         | 1/64 [00:00<00:47,  1.34it/s][A
Processed prompts:   3%|▎         | 2/64 [00:00<00:28,  2.21it/s][A
Processed prompts:   6%|▋         | 4/64 [00:01<00:12,  4.69it/s][A
Processed prompts:  14%|█▍        | 9/64 [00:01<00:04, 12.47it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:01<00:00, 41.25it/s][A
Processed prompts:  61%|██████    | 39/64 [00:01<00:00, 55.15it/s][A
Processed prompts:  73%|███████▎  | 47/64 [00:01<00:00, 57.44it/s][A
Processed prompts:  84%|████████▍ | 54/64 [00:01<00:00, 45.63it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 22.48it/s]
Summarizing:   4%|▎         | 128/3467 [04:18<4:12:57,  4.55s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:50,  1.24it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:00<00:04, 11.49it/s][A
Processed prompts:  38%|███▊      | 24/64 [00:01<00:01, 36.16it/s][A
Processed prompts:  52%|█████▏    | 33/64




Processed prompts:   2%|▏         | 1/64 [00:01<01:09,  1.10s/it][A
Processed prompts:   3%|▎         | 2/64 [00:01<00:48,  1.29it/s][A
Processed prompts:   5%|▍         | 3/64 [00:01<00:29,  2.06it/s][A
Processed prompts:   9%|▉         | 6/64 [00:01<00:11,  5.13it/s][A
Processed prompts:  27%|██▋       | 17/64 [00:02<00:02, 19.51it/s][A
Processed prompts:  53%|█████▎    | 34/64 [00:02<00:00, 41.96it/s][A
Processed prompts:  66%|██████▌   | 42/64 [00:02<00:00, 48.58it/s][A
Processed prompts:  78%|███████▊  | 50/64 [00:02<00:00, 44.44it/s][A
Processed prompts:  89%|████████▉ | 57/64 [00:02<00:00, 30.35it/s][A
Processed prompts: 100%|██████████| 64/64 [00:03<00:00, 17.87it/s]
Summarizing:   4%|▍         | 138/3467 [04:50<3:26:30,  3.72s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:01<01:07,  1.06s/it][A
Processed prompts:   5%|▍         | 3/64 [00:01<00:20,  3.03it/s][A
Processed prompts:  73%|███████▎  | 47/64




Processed prompts:   2%|▏         | 1/64 [00:01<01:08,  1.09s/it][A




Processed prompts:   3%|▎         | 2/64 [00:02<01:08,  1.10s/it][A




Processed prompts:   5%|▍         | 3/64 [00:02<00:41,  1.46it/s][A
Processed prompts:   9%|▉         | 6/64 [00:03<00:22,  2.56it/s][A
Processed prompts:  16%|█▌        | 10/64 [00:03<00:11,  4.79it/s][A
Processed prompts:  31%|███▏      | 20/64 [00:03<00:03, 12.55it/s][A
Processed prompts:  45%|████▌     | 29/64 [00:03<00:01, 20.25it/s][A
Processed prompts:  53%|█████▎    | 34/64 [00:03<00:01, 18.32it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:04<00:01, 20.64it/s][A
Processed prompts:  66%|██████▌   | 42/64 [00:04<00:01, 15.73it/s][A
Processed prompts:  70%|███████   | 45/64 [00:04<00:01, 16.00it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:04<00:00, 18.74it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:04<00:00, 19.04it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:05<00:00, 14.19it/s][A
Processed prompts:  89%|████████▉ | 57/64 [00:05<00:00,  9.94it/s][A
Processed prompts:  92%|█████████▏| 59/64 [00:06<00:00,  7.39it/s][A
Processed prompts: 10




Processed prompts:   2%|▏         | 1/64 [00:00<00:54,  1.15it/s][A
Processed prompts:   6%|▋         | 4/64 [00:01<00:16,  3.67it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:01<00:01, 30.82it/s][A
Processed prompts:  56%|█████▋    | 36/64 [00:01<00:00, 36.15it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 22.65it/s]
Summarizing:   6%|▌         | 213/3467 [07:14<3:08:38,  3.48s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:04<05:03,  4.82s/it][A
Processed prompts:  36%|███▌      | 23/64 [00:04<00:06,  6.48it/s][A
Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.73it/s]
Summarizing:   6%|▌         | 214/3467 [07:21<4:02:08,  4.47s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:04<05:13,  4.98s/it][A
Processed prompts:   6%|▋         | 4/64 [00:05<00:58,  1.03it/s][A
Processed prompts:  22%|██▏       | 14/64 [00:05<00:10, 




Processed prompts:   2%|▏         | 1/64 [00:01<01:36,  1.53s/it][A
Processed prompts:   3%|▎         | 2/64 [00:02<01:04,  1.04s/it][A
Processed prompts:   6%|▋         | 4/64 [00:02<00:28,  2.12it/s][A
Processed prompts:   9%|▉         | 6/64 [00:02<00:17,  3.25it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:02<00:12,  4.61it/s][A
Processed prompts:  17%|█▋        | 11/64 [00:03<00:07,  7.08it/s][A
Processed prompts:  22%|██▏       | 14/64 [00:03<00:05,  8.68it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:03<00:03, 13.55it/s][A
Processed prompts:  33%|███▎      | 21/64 [00:03<00:03, 12.39it/s][A
Processed prompts:  50%|█████     | 32/64 [00:03<00:01, 27.12it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:04<00:00, 34.42it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:04<00:00, 31.67it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:04<00:00, 18.50it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:05<00:00, 16.71it/s][A
Processed prompts:  91%|




Processed prompts:   2%|▏         | 1/64 [00:00<00:35,  1.78it/s][A
Processed prompts:   3%|▎         | 2/64 [00:00<00:19,  3.17it/s][A
Processed prompts:   9%|▉         | 6/64 [00:00<00:05, 10.86it/s][A
Processed prompts:  16%|█▌        | 10/64 [00:00<00:03, 17.49it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:01<00:00, 62.85it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:01<00:00, 57.17it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:01<00:00, 48.77it/s][A
Processed prompts:  89%|████████▉ | 57/64 [00:01<00:00, 55.24it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 31.95it/s]
Summarizing:   9%|▉         | 304/3467 [10:06<1:46:26,  2.02s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:52,  1.21it/s][A
Processed prompts:   8%|▊         | 5/64 [00:00<00:08,  6.79it/s][A
Processed prompts:  17%|█▋        | 11/64 [00:01<00:03, 15.65it/s][A
Processed prompts:  73%|███████▎  | 47/6




Processed prompts:   2%|▏         | 1/64 [00:02<02:32,  2.41s/it][A
Processed prompts:   5%|▍         | 3/64 [00:03<00:55,  1.09it/s][A
Processed prompts:   6%|▋         | 4/64 [00:03<00:43,  1.38it/s][A
Processed prompts:  19%|█▉        | 12/64 [00:03<00:08,  6.12it/s][A
Processed prompts:  31%|███▏      | 20/64 [00:03<00:03, 11.01it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:04<00:01, 19.99it/s][A
Processed prompts:  56%|█████▋    | 36/64 [00:04<00:01, 16.21it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:04<00:01, 15.44it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:05<00:01, 15.48it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:05<00:01, 16.69it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:05<00:00, 15.53it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:05<00:00, 15.79it/s][A
Processed prompts:  84%|████████▍ | 54/64 [00:05<00:00, 12.53it/s][A
Processed prompts:  88%|████████▊ | 56/64 [00:06<00:00,  9.02it/s][A
Processed prompts:  91




Processed prompts:   2%|▏         | 1/64 [00:00<00:52,  1.19it/s][A
Processed prompts:   3%|▎         | 2/64 [00:01<00:29,  2.07it/s][A
Processed prompts:  14%|█▍        | 9/64 [00:01<00:04, 11.04it/s][A
Processed prompts:  22%|██▏       | 14/64 [00:01<00:02, 16.97it/s][A
Processed prompts:  36%|███▌      | 23/64 [00:01<00:01, 30.25it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:01<00:00, 52.75it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:01<00:00, 54.68it/s][A
Processed prompts:  88%|████████▊ | 56/64 [00:01<00:00, 64.42it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 22.36it/s]
Summarizing:  24%|██▍       | 846/3467 [21:09<2:01:14,  2.78s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:52,  1.19it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:00<00:05, 10.91it/s][A
Processed prompts:  39%|███▉      | 25/64 [00:01<00:01, 36.50it/s][A
Processed prompts:  59%|█████▉    | 38/6




Processed prompts:   2%|▏         | 1/64 [00:00<00:49,  1.27it/s][A
Processed prompts:   5%|▍         | 3/64 [00:01<00:18,  3.24it/s][A
Processed prompts:  11%|█         | 7/64 [00:01<00:06,  8.56it/s][A
Processed prompts:  14%|█▍        | 9/64 [00:01<00:05,  9.69it/s][A
Processed prompts:  19%|█▉        | 12/64 [00:01<00:03, 13.39it/s][A
Processed prompts:  27%|██▋       | 17/64 [00:01<00:02, 19.88it/s][A
Processed prompts:  39%|███▉      | 25/64 [00:01<00:01, 29.12it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:01<00:00, 48.02it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:02<00:00, 44.40it/s][A
Processed prompts:  80%|███████▉  | 51/64 [00:02<00:00, 35.00it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:02<00:00, 31.24it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 21.66it/s]
Summarizing:  25%|██▌       | 869/3467 [22:00<3:01:21,  4.19s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/6




Processed prompts:   2%|▏         | 1/64 [00:01<01:14,  1.18s/it][A




Processed prompts:   3%|▎         | 2/64 [00:02<01:10,  1.14s/it][A
Processed prompts:  75%|███████▌  | 48/64 [00:02<00:00, 30.47it/s][A
Processed prompts: 100%|██████████| 64/64 [00:04<00:00, 13.39it/s]
Summarizing:  25%|██▌       | 872/3467 [22:17<3:58:46,  5.52s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:00<00:28,  2.21it/s][A
Processed prompts:  41%|████      | 26/64 [00:00<00:00, 48.35it/s][A
Processed prompts:  52%|█████▏    | 33/64 [00:00<00:00, 52.50it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:00<00:00, 60.90it/s][A
Processed prompts:  80%|███████▉  | 51/64 [00:01<00:00, 60.47it/s][A
Processed prompts:  91%|█████████ | 58/64 [00:01<00:00, 34.96it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 28.01it/s]
Summarizing:  25%|██▌       | 873/3467 [22:21<3:45:25,  5.21s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:43,  1.43it/s][A
Processed prompts:   6%|▋         | 4/64 [00:00<00:10,  5.91it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:00<00:01, 31.81it/s][A
Processed prompts:  69%|██████▉   | 44/64 [00:01<00:00, 75.46it/s][A
Processed prompts: 100%|██████████| 64/64 [00:01<00:00, 43.29it/s]
Summarizing:  25%|██▌       | 874/3467 [2




Processed prompts:   2%|▏         | 1/64 [00:00<00:57,  1.10it/s][A




Processed prompts:   3%|▎         | 2/64 [00:02<01:32,  1.49s/it][A




Processed prompts:  12%|█▎        | 8/64 [00:06<00:39,  1.42it/s][A




Processed prompts:  14%|█▍        | 9/64 [00:09<01:04,  1.17s/it][A




Processed prompts:  16%|█▌        | 10/64 [00:10<01:03,  1.17s/it][A
Processed prompts:  28%|██▊       | 18/64 [00:11<00:20,  2.19it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:12<00:19,  2.26it/s][A
Processed prompts:  38%|███▊      | 24/64 [00:12<00:10,  3.82it/s][A
Processed prompts:  47%|████▋     | 30/64 [00:12<00:05,  6.30it/s][A
Processed prompts:  52%|█████▏    | 33/64 [00:12<00:04,  7.61it/s][A
Processed prompts:  58%|█████▊    | 37/64 [00:12<00:02,  9.98it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:12<00:02, 11.21it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:13<00:01, 12.06it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:13<00:02,  8.09it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:13<00:01,  9.60it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:14<00:01, 11.27it/s][A
Processed prompts:  84%|████████▍ | 54/64 [00:14<00:01,  5.95it/s][A
Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.07it/s]
Summarizing:  28%|██▊ 




Processed prompts:   2%|▏         | 1/64 [00:00<00:23,  2.74it/s][A
Processed prompts:   3%|▎         | 2/64 [00:02<01:18,  1.26s/it][A
Processed prompts:   6%|▋         | 4/64 [00:02<00:31,  1.89it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:02<00:12,  4.61it/s][A
Processed prompts:  17%|█▋        | 11/64 [00:02<00:07,  6.63it/s][A
Processed prompts:  20%|██        | 13/64 [00:03<00:08,  6.35it/s][A
Processed prompts:  23%|██▎       | 15/64 [00:03<00:06,  7.34it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:03<00:04, 10.73it/s][A
Processed prompts:  34%|███▍      | 22/64 [00:03<00:03, 12.76it/s][A
Processed prompts:  39%|███▉      | 25/64 [00:03<00:02, 14.60it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:03<00:02, 14.76it/s][A
Processed prompts:  47%|████▋     | 30/64 [00:04<00:02, 15.54it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:04<00:00, 41.19it/s][A
Processed prompts: 100%|██████████| 64/64 [00:05<00:00, 11.45it/s]
Summarizing:  48%|████▊   




Processed prompts:   2%|▏         | 1/64 [00:01<01:04,  1.02s/it][A
Processed prompts:   3%|▎         | 2/64 [00:01<00:53,  1.16it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:02<00:09,  5.69it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:02<00:01, 23.91it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:02<00:00, 45.16it/s][A
Processed prompts: 100%|██████████| 64/64 [00:03<00:00, 17.88it/s]
Summarizing:  49%|████▊     | 1690/3467 [43:26<2:05:22,  4.23s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:03<03:26,  3.27s/it][A
Processed prompts:   5%|▍         | 3/64 [00:03<00:55,  1.10it/s][A
Processed prompts:   9%|▉         | 6/64 [00:03<00:22,  2.59it/s][A
Processed prompts:  17%|█▋        | 11/64 [00:03<00:09,  5.71it/s][A
Processed prompts:  22%|██▏       | 14/64 [00:03<00:06,  7.54it/s][A
Processed prompts:  36%|███▌      | 23/64 [00:04<00:02, 14.95it/s][A
Processed prompts:  55%|█████▍    | 35/6




Processed prompts:   2%|▏         | 1/64 [00:00<00:28,  2.24it/s][A
Processed prompts:  70%|███████   | 45/64 [00:00<00:00, 84.35it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 28.81it/s]
Summarizing:  51%|█████     | 1768/3467 [45:41<2:05:19,  4.43s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:13,  1.16s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:02<00:14,  3.93it/s][A
Processed prompts:  19%|█▉        | 12/64 [00:02<00:08,  5.78it/s][A
Processed prompts:  23%|██▎       | 15/64 [00:02<00:06,  7.40it/s][A
Processed prompts:  27%|██▋       | 17/64 [00:02<00:06,  7.65it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:03<00:05,  8.55it/s][A
Processed prompts:  33%|███▎      | 21/64 [00:03<00:04,  9.49it/s][A
Processed prompts:  36%|███▌      | 23/64 [00:03<00:03, 10.39it/s][A
Processed prompts:  70%|███████   | 45/64 [00:03<00:00, 44.44it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:04<00:00, 27.97it/s][A
Processed prompts:  91%|█████████ | 58/64 [00:05<00:00, 13.12it/s][A
Processed prompts: 100%|██████████| 64/64 [00:05<00:00, 11.11it/s]
Summarizing:  51%|█████     | 1769/3467 [45:48<2:27:09,  5.20s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 




Processed prompts:   2%|▏         | 1/64 [00:01<01:09,  1.10s/it][A




Processed prompts:   3%|▎         | 2/64 [00:01<00:53,  1.17it/s][A
Processed prompts:   6%|▋         | 4/64 [00:02<00:33,  1.79it/s][A
Processed prompts:  11%|█         | 7/64 [00:02<00:15,  3.65it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:02<00:14,  3.91it/s][A
Processed prompts:  16%|█▌        | 10/64 [00:03<00:10,  5.20it/s][A
Processed prompts:  20%|██        | 13/64 [00:03<00:06,  7.59it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:03<00:00, 42.79it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:03<00:00, 37.57it/s][A
Processed prompts:  84%|████████▍ | 54/64 [00:04<00:00, 28.86it/s][A
Processed prompts:  92%|█████████▏| 59/64 [00:04<00:00, 18.15it/s][A
Processed prompts: 100%|██████████| 64/64 [00:05<00:00, 11.29it/s]
Summarizing:  53%|█████▎    | 1833/3467 [47:28<1:37:08,  3.57s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 64/64 [00:01<00:00, 59.31it/s]
Summarizing:  53%|█████▎    | 1834/3467 [




Processed prompts:   2%|▏         | 1/64 [00:23<24:59, 23.80s/it][A
Processed prompts:   3%|▎         | 2/64 [00:26<11:53, 11.51s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:27<01:36,  1.75s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:27<01:21,  1.51s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:27<01:09,  1.32s/it][A
Processed prompts:  20%|██        | 13/64 [00:28<00:46,  1.11it/s][A
Processed prompts:  31%|███▏      | 20/64 [00:28<00:15,  2.79it/s][A
Processed prompts:  34%|███▍      | 22/64 [00:28<00:12,  3.26it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:28<00:07,  4.91it/s][A
Processed prompts:  45%|████▌     | 29/64 [00:28<00:06,  5.51it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:29<00:05,  6.20it/s][A
Processed prompts:  52%|█████▏    | 33/64 [00:29<00:04,  7.31it/s][A
Processed prompts:  56%|█████▋    | 36/64 [00:29<00:03,  7.62it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:29<00:02,  8.84it/s][A
Processed prompts:  64




Processed prompts:   2%|▏         | 1/64 [00:11<12:15, 11.68s/it][A




Processed prompts:   3%|▎         | 2/64 [00:13<06:22,  6.16s/it][A




Processed prompts:   5%|▍         | 3/64 [00:15<04:19,  4.25s/it][A
Processed prompts:   6%|▋         | 4/64 [00:21<04:50,  4.84s/it][A
Processed prompts:   9%|▉         | 6/64 [00:21<02:14,  2.32s/it][A
Processed prompts:  11%|█         | 7/64 [00:22<01:48,  1.90s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:22<01:20,  1.43s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:23<01:00,  1.10s/it][A
Processed prompts:  31%|███▏      | 20/64 [00:23<00:09,  4.75it/s][A
Processed prompts:  44%|████▍     | 28/64 [00:23<00:04,  8.27it/s][A
Processed prompts:  52%|█████▏    | 33/64 [00:23<00:02, 10.79it/s][A
Processed prompts:  58%|█████▊    | 37/64 [00:23<00:02, 10.87it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:24<00:02, 11.07it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:24<00:01, 12.99it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:24<00:01, 11.37it/s][A
Processed prompts:  80%|███████▉  | 51/64 [00:25<00:01,  6.60it/s][A
Processed prompts: 100%|█




Processed prompts:   2%|▏         | 1/64 [00:10<11:31, 10.97s/it][A




Processed prompts:   3%|▎         | 2/64 [00:15<07:12,  6.98s/it][A
Processed prompts:   5%|▍         | 3/64 [00:21<06:51,  6.75s/it][A
Processed prompts:   6%|▋         | 4/64 [00:22<04:26,  4.44s/it][A
Processed prompts:   8%|▊         | 5/64 [00:22<02:56,  2.99s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:23<01:07,  1.21s/it][A
Processed prompts:  34%|███▍      | 22/64 [00:23<00:10,  3.98it/s][A
Processed prompts:  41%|████      | 26/64 [00:23<00:07,  4.99it/s][A
Processed prompts:  45%|████▌     | 29/64 [00:24<00:07,  4.76it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:24<00:06,  5.42it/s][A
Processed prompts:  53%|█████▎    | 34/64 [00:24<00:04,  6.84it/s][A
Processed prompts:  58%|█████▊    | 37/64 [00:24<00:03,  7.52it/s][A
Processed prompts:  61%|██████    | 39/64 [00:24<00:03,  8.01it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:25<00:02,  8.20it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:26<00:05,  3.72it/s][A
Processed prompts: 100%|




Processed prompts:   2%|▏         | 1/64 [00:03<03:38,  3.46s/it][A




Processed prompts:   3%|▎         | 2/64 [00:07<03:42,  3.59s/it][A




Processed prompts:   5%|▍         | 3/64 [00:14<05:26,  5.35s/it][A
Processed prompts:   6%|▋         | 4/64 [00:23<06:40,  6.68s/it][A
Processed prompts:   8%|▊         | 5/64 [00:24<04:33,  4.63s/it][A
Processed prompts:   9%|▉         | 6/64 [00:24<03:08,  3.24s/it][A
Processed prompts:  27%|██▋       | 17/64 [00:25<00:24,  1.91it/s][A
Processed prompts:  34%|███▍      | 22/64 [00:25<00:15,  2.79it/s][A
Processed prompts:  41%|████      | 26/64 [00:25<00:10,  3.70it/s][A
Processed prompts:  48%|████▊     | 31/64 [00:25<00:06,  5.34it/s][A
Processed prompts:  53%|█████▎    | 34/64 [00:26<00:05,  5.51it/s][A
Processed prompts:  58%|█████▊    | 37/64 [00:26<00:03,  6.82it/s][A
Processed prompts:  62%|██████▎   | 40/64 [00:26<00:03,  7.64it/s][A
Processed prompts:  67%|██████▋   | 43/64 [00:26<00:02,  8.83it/s][A
Processed prompts:  70%|███████   | 45/64 [00:27<00:03,  5.02it/s][A
Processed prompts: 100%|██████████| 64/64 [00:28<00:00,  2.22it/s]
Summarizing:  84%|████████




Processed prompts:   2%|▏         | 1/64 [00:02<02:34,  2.45s/it][A




Processed prompts:   3%|▎         | 2/64 [00:13<08:02,  7.77s/it][A
Processed prompts:   5%|▍         | 3/64 [00:17<06:03,  5.96s/it][A
Processed prompts:   8%|▊         | 5/64 [00:17<02:35,  2.64s/it][A
Processed prompts:   9%|▉         | 6/64 [00:18<01:55,  1.98s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:18<00:50,  1.09it/s][A
Processed prompts:  25%|██▌       | 16/64 [00:18<00:16,  2.95it/s][A
Processed prompts:  30%|██▉       | 19/64 [00:18<00:11,  3.78it/s][A
Processed prompts:  41%|████      | 26/64 [00:19<00:06,  6.27it/s][A
Processed prompts:  55%|█████▍    | 35/64 [00:19<00:02, 10.74it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:19<00:02, 11.25it/s][A
Processed prompts:  66%|██████▌   | 42/64 [00:19<00:01, 13.29it/s][A
Processed prompts:  70%|███████   | 45/64 [00:20<00:02,  8.69it/s][A
Processed prompts:  73%|███████▎  | 47/64 [00:21<00:02,  6.13it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:21<00:02,  6.36it/s][A
Processed prompts: 100%|




Processed prompts:   2%|▏         | 1/64 [00:00<00:45,  1.37it/s][A
Processed prompts:   3%|▎         | 2/64 [00:00<00:27,  2.28it/s][A
Processed prompts:   8%|▊         | 5/64 [00:01<00:09,  6.36it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:01<00:05, 10.38it/s][A
Processed prompts:  66%|██████▌   | 42/64 [00:01<00:00, 57.56it/s][A
Processed prompts:  78%|███████▊  | 50/64 [00:01<00:00, 58.26it/s][A
Processed prompts:  88%|████████▊ | 56/64 [00:01<00:00, 49.58it/s][A
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 23.32it/s]
Summarizing:  98%|█████████▊| 3403/3467 [1:24:15<03:26,  3.22s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:55,  1.13it/s][A
Processed prompts:  12%|█▎        | 8/64 [00:00<00:05, 10.56it/s][A
Processed prompts:  25%|██▌       | 16/64 [00:01<00:02, 21.08it/s][A
Processed prompts:  34%|███▍      | 22/64 [00:01<00:01, 25.26it/s][A
Processed prompts:  70%|███████   | 45/6




Processed prompts:   2%|▏         | 1/64 [00:01<01:04,  1.02s/it][A
Processed prompts:   5%|▍         | 3/64 [00:04<01:28,  1.45s/it][A
Processed prompts:   8%|▊         | 5/64 [00:04<00:45,  1.30it/s][A
Processed prompts:  16%|█▌        | 10/64 [00:04<00:15,  3.40it/s][A
Processed prompts:  27%|██▋       | 17/64 [00:04<00:06,  7.15it/s][A
Processed prompts:  42%|████▏     | 27/64 [00:04<00:02, 13.83it/s][A
Processed prompts:  53%|█████▎    | 34/64 [00:05<00:02, 14.60it/s][A
Processed prompts:  59%|█████▉    | 38/64 [00:05<00:02, 12.72it/s][A
Processed prompts:  64%|██████▍   | 41/64 [00:06<00:02, 11.44it/s][A
Processed prompts:  69%|██████▉   | 44/64 [00:06<00:01, 10.36it/s][A
Processed prompts:  72%|███████▏  | 46/64 [00:06<00:01, 11.04it/s][A
Processed prompts:  75%|███████▌  | 48/64 [00:06<00:01, 11.31it/s][A
Processed prompts:  78%|███████▊  | 50/64 [00:07<00:02,  6.64it/s][A
Processed prompts:  81%|████████▏ | 52/64 [00:07<00:01,  6.99it/s][A
Processed prompts: 100




Processed prompts:   2%|▏         | 1/64 [00:01<01:07,  1.07s/it][A
Processed prompts:   5%|▍         | 3/64 [00:01<00:22,  2.77it/s][A
Processed prompts:   6%|▋         | 4/64 [00:01<00:19,  3.05it/s][A
Processed prompts:   9%|▉         | 6/64 [00:01<00:12,  4.78it/s][A
Processed prompts:  23%|██▎       | 15/64 [00:01<00:02, 16.53it/s][A
Processed prompts:  39%|███▉      | 25/64 [00:01<00:01, 29.58it/s][A
Processed prompts:  50%|█████     | 32/64 [00:02<00:00, 36.97it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:02<00:00, 63.54it/s][A
Processed prompts: 100%|██████████| 64/64 [00:03<00:00, 17.71it/s]
Summarizing:  99%|█████████▉| 3431/3467 [1:25:26<01:52,  3.12s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:01<01:12,  1.16s/it][A
Processed prompts: 100%|██████████| 64/64 [00:01<00:00, 48.76it/s]
Summarizing:  99%|█████████▉| 3432/3467 [1:25:27<01:30,  2.59s/it]
Processed prompts:   0%|          | 0/64 [00:




Processed prompts:   2%|▏         | 1/64 [00:00<00:53,  1.18it/s][A
Processed prompts:   3%|▎         | 2/64 [00:01<00:31,  1.97it/s][A
Processed prompts:   8%|▊         | 5/64 [00:01<00:10,  5.74it/s][A
Processed prompts:  14%|█▍        | 9/64 [00:01<00:05,  9.22it/s][A
Processed prompts:  41%|████      | 26/64 [00:01<00:01, 34.44it/s][A
Processed prompts:  55%|█████▍    | 35/64 [00:01<00:00, 32.57it/s][A
Processed prompts:  66%|██████▌   | 42/64 [00:02<00:00, 37.87it/s][A
Processed prompts:  77%|███████▋  | 49/64 [00:02<00:00, 41.84it/s][A
Processed prompts:  86%|████████▌ | 55/64 [00:02<00:00, 29.62it/s][A
Processed prompts:  94%|█████████▍| 60/64 [00:02<00:00, 20.93it/s][A
Processed prompts: 100%|██████████| 64/64 [00:03<00:00, 20.45it/s]
Summarizing:  99%|█████████▉| 3438/3467 [1:25:39<01:09,  2.40s/it]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:00<00:55,  1.13it/s][A
Processed prompts:   6%|▋         | 4/6

In [None]:
# SQL-Instruction Pairs
instr_out_pairs = []
for sql, desc in sql_descs:
    instruction = f"Write SQL that {desc.lower()}."
    instr_out_pairs.append((instruction, sql))

In [None]:
# Validate by checking SQL's Description
pairs_valid = validate_pairs(sql_descs, model, args.validate_batch, tokenizer)

Few-shot overhead ≈ 463 tokens
Building prompts…


Prompts: 100%|██████████| 221848/221848 [01:29<00:00, 2473.22it/s]


Generating validations via vLLM…


Validating:   0%|          | 0/6933 [00:00<?, ?it/s]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.85it/s]
Validating:   0%|          | 1/6933 [00:01<2:31:27,  1.31s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.45it/s]
Validating:   0%|          | 2/6933 [00:02<2:22:58,  1.24s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 23.93it/s]
Validating:   0%|          | 3/6933 [00:03<2:31:46,  1.31s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.80it/s]
Validating:   0%|          | 4/6933 [00:05<2:21:54,  1.23s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:32,  1.05s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.96it/s]
Validating:   0%|          | 5/6933 [00:08<4:08:22,  2.15s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 12.94it/s]
Validating:   0%|          | 6/6933 [00:11<4:34:57,  2.38s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.06it/s]
Validating:   0%|          | 7/6933 [00:12<3:48:55,  1.98s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.23it/s]
Validating:   0%|          | 8/6933 [00:14<3:25:09,  1.78s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 31.02it/s]
Validating:   0%|          | 9/6933 [00:15<3:00:24,  1.56s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 31.02it/s]
Validating:   0%|          | 10/6933 [00:16<2:43:31,  1.42s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [




Processed prompts:   3%|▎         | 1/32 [00:00<00:30,  1.01it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.57it/s]
Validating:   1%|          | 64/6933 [01:28<3:36:18,  1.89s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.48it/s]
Validating:   1%|          | 65/6933 [01:29<3:13:39,  1.69s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.44it/s]
Validating:   1%|          | 66/6933 [01:30<2:57:46,  1.55s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.68it/s]
Validating:   1%|          | 67/6933 [01:31<2:46:17,  1.45s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.73it/s]
Validating:   1%|          | 68/6933 [01:32<2:38:16,  1.38s/it]
Processed prompts:   0%|       




Processed prompts:   3%|▎         | 1/32 [00:00<00:30,  1.01it/s][A




Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 15.04it/s]
Validating:   4%|▎         | 250/6933 [05:45<5:22:41,  2.90s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 19.21it/s]
Validating:   4%|▎         | 251/6933 [05:47<4:44:18,  2.55s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:33,  1.09s/it][A




Processed prompts:   6%|▋         | 2/32 [00:01<00:18,  1.62it/s][A




Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 10.89it/s]
Validating:   4%|▎         | 252/6933 [05:50<5:17:58,  2.86s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.83it/s]
Validating:   4%|▎         | 253/6933 [05:52<4:26:08,  2.39s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.79it/s]
Validating:   4%|▎         | 254/6933 [05:53<3:45:25,  2.03s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:31,  1.00s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 24.94it/s]
Validating:   4%|▎         | 255/6933 [05:54<3:24:39,  1.84s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.17it/s]
Validating:   4%|▎         | 256/6933 [05:55<3:01:56,  1.63s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.81it/s]
Validating:   4%|▎         | 257/6933 [05:57<2:47:59,  1.51s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.56it/s]
Validating:   4%|▎         | 258/6933 [05:58<2:35:43,  1.40s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.79it/s]
Validating:   4%|▎         | 259/6933 [05:59<2:29:32,  1.34s/it]
Processed prompts:   0%|  




Processed prompts:   3%|▎         | 1/32 [00:01<00:34,  1.12s/it][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 15.94it/s]
Validating:   4%|▍         | 275/6933 [06:24<3:54:52,  2.12s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.92it/s]
Validating:   4%|▍         | 276/6933 [06:25<3:24:38,  1.84s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.46it/s]
Validating:   4%|▍         | 277/6933 [06:26<3:04:10,  1.66s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.90it/s]
Validating:   4%|▍         | 278/6933 [06:27<2:50:39,  1.54s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.41it/s]
Validating:   4%|▍         | 279/6933 [06:29<2:40:28,  1.45s/it]
Processed prompts:   0%|  




Processed prompts:   3%|▎         | 1/32 [00:01<00:37,  1.20s/it][A




Processed prompts:   6%|▋         | 2/32 [00:02<00:34,  1.14s/it][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 10.78it/s]
Validating:   6%|▌         | 423/6933 [09:31<3:22:42,  1.87s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 31.47it/s]
Validating:   6%|▌         | 424/6933 [09:32<3:07:17,  1.73s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:00<00:29,  1.05it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.53it/s]
Validating:   6%|▌         | 425/6933 [09:34<3:02:30,  1.68s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.85it/s]
Validating:   6%|▌         | 426/6933 [09:35<2:50:13,  1.57s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 11.08it/s]
Validating:   6%|▌         | 427/6933 [09:38<3:37:25,  2.01s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:03<00:00, 10.09it/s]
Validating:   6%|▌         | 428/6933 [09:41<4:20:05,  2.40s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:03<00:00, 10.29it/s]
Validating:   6%|▌         | 429/6933 [09:45<4:47:51,  2.66s/it]
Processed prompts:   0%|  




Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 14.97it/s]
Validating:   7%|▋         | 493/6933 [11:18<3:03:48,  1.71s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.95it/s]
Validating:   7%|▋         | 494/6933 [11:19<2:45:01,  1.54s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.86it/s]
Validating:   7%|▋         | 495/6933 [11:21<2:31:55,  1.42s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.82it/s]
Validating:   7%|▋         | 496/6933 [11:22<2:22:55,  1.33s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.74it/s]
Validating:   7%|▋         | 497/6933 [11:23<2:16:42,  1.27s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████




Processed prompts:   3%|▎         | 1/32 [00:00<00:30,  1.01it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.86it/s]
Validating:   9%|▉         | 608/6933 [13:30<2:08:28,  1.22s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.36it/s]
Validating:   9%|▉         | 609/6933 [13:31<2:09:00,  1.22s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.39it/s]
Validating:   9%|▉         | 610/6933 [13:32<2:06:38,  1.20s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.51it/s]
Validating:   9%|▉         | 611/6933 [13:33<2:06:06,  1.20s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.59it/s]
Validating:   9%|▉         | 612/6933 [13:34<2:03:19,  1.17s/it]
Processed prompts:   0%|  




Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 10.85it/s]
Validating:  24%|██▍       | 1685/6933 [35:38<2:31:35,  1.73s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:00<00:30,  1.00it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.59it/s]
Validating:  24%|██▍       | 1686/6933 [35:40<2:20:41,  1.61s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.65it/s]
Validating:  24%|██▍       | 1687/6933 [35:41<2:07:28,  1.46s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.48it/s]
Validating:  24%|██▍       | 1688/6933 [35:42<2:00:23,  1.38s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.29it/s]
Validating:  24%|██▍       | 1689/6933 [35:43<1:59:22,  1.37s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.88it/s]
Validating:  24%|██▍       | 1690/6933 [35:45<1:55:22,  1.32s/it]
Processed prompts:   




Processed prompts:   3%|▎         | 1/32 [00:01<00:31,  1.00s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 23.79it/s]
Validating:  24%|██▍       | 1691/6933 [35:46<1:59:34,  1.37s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.90it/s]
Validating:  24%|██▍       | 1692/6933 [35:47<1:55:28,  1.32s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.16it/s]
Validating:  24%|██▍       | 1693/6933 [35:48<1:50:13,  1.26s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.74it/s]
Validating:  24%|██▍       | 1694/6933 [35:50<1:50:14,  1.26s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.51it/s]
Validating:  24%|██▍       | 1695/6933 [35:51<1:46:13,  1.22s/it]
Processed prompts:   




Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.66it/s]
Validating:  25%|██▌       | 1738/6933 [36:47<3:45:27,  2.60s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 12.60it/s]
Validating:  25%|██▌       | 1739/6933 [36:50<3:46:56,  2.62s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.12it/s]
Validating:  25%|██▌       | 1740/6933 [36:51<3:08:02,  2.17s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.61it/s]
Validating:  25%|██▌       | 1741/6933 [36:52<2:40:22,  1.85s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 23.26it/s]
Validating:  25%|██▌       | 1742/6933 [36:53<2:29:50,  1.73s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:36,  1.18s/it][A




Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 19.79it/s]
Validating:  25%|██▌       | 1743/6933 [36:58<3:33:07,  2.46s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.81it/s]
Validating:  25%|██▌       | 1744/6933 [37:01<4:07:45,  2.86s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:00<00:00, 103.99it/s]
Validating:  25%|██▌       | 1745/6933 [37:04<3:57:56,  2.75s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.61it/s]
Validating:  25%|██▌       | 1746/6933 [37:05<3:18:30,  2.30s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.75it/s]
Validating:  25%|██▌       | 1747/6933 [37:06<2:47:32,  1.94s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 31.60it/s]
Validating:  25%|██▌       | 1748/6933 [37:07<2:25:07,  1.68s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.82it/s]
Validating:  25%|██▌       | 1749/6933 [37:09<2:15:29,  1.57s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|████




Processed prompts:   3%|▎         | 1/32 [00:01<01:00,  1.96s/it][A




Processed prompts:   6%|▋         | 2/32 [00:03<00:44,  1.49s/it][A




Processed prompts:  25%|██▌       | 8/32 [00:06<00:17,  1.39it/s][A




Processed prompts: 100%|██████████| 32/32 [00:07<00:00,  4.13it/s]
Validating:  28%|██▊       | 1921/6933 [41:20<5:07:25,  3.68s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.46it/s]
Validating:  28%|██▊       | 1922/6933 [41:21<4:09:39,  2.99s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.10it/s]
Validating:  28%|██▊       | 1923/6933 [41:22<3:23:51,  2.44s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.65it/s]
Validating:  28%|██▊       | 1924/6933 [41:23<2:50:28,  2.04s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.41it/s]
Validating:  28%|██▊       | 1925/6933 [41:25<2:30:13,  1.80s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.53it/s]
Validating:  28%|██▊       | 1926/6933 [41:26<2:13:53,  1.60s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|█████




Processed prompts:   3%|▎         | 1/32 [00:00<00:11,  2.68it/s][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 15.78it/s]
Validating:  48%|████▊     | 3342/6933 [1:12:15<1:26:05,  1.44s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.51it/s]
Validating:  48%|████▊     | 3343/6933 [1:12:16<1:21:33,  1.36s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 30.26it/s]
Validating:  48%|████▊     | 3344/6933 [1:12:17<1:17:11,  1.29s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.11it/s]
Validating:  48%|████▊     | 3345/6933 [1:12:19<1:14:57,  1.25s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.07it/s]
Validating:  48%|████▊     | 3346/6933 [1:12:20<1:14:06,  1.24s/it]
Processed p




Processed prompts:   3%|▎         | 1/32 [00:00<00:30,  1.00it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 24.25it/s]
Validating:  49%|████▊     | 3379/6933 [1:13:08<1:57:12,  1.98s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 19.52it/s]
Validating:  49%|████▉     | 3380/6933 [1:13:10<1:52:46,  1.90s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 11.59it/s]
Validating:  49%|████▉     | 3381/6933 [1:13:13<2:10:17,  2.20s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.85it/s]
Validating:  49%|████▉     | 3382/6933 [1:13:14<1:53:48,  1.92s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.19it/s]
Validating:  49%|████▉     | 3383/6933 [1:13:16<1:40:15,  1.69s/it]
Processed p




Processed prompts: 100%|██████████| 32/32 [00:00<00:00, 104.30it/s]
Validating:  51%|█████     | 3535/6933 [1:16:19<2:06:43,  2.24s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts: 100%|██████████| 32/32 [00:00<00:00, 45.78it/s]
Validating:  51%|█████     | 3536/6933 [1:16:22<2:22:08,  2.51s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:35,  1.15s/it][A
Processed prompts: 100%|██████████| 32/32 [00:02<00:00, 15.76it/s]
Validating:  51%|█████     | 3537/6933 [1:16:25<2:36:12,  2.76s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.23it/s]
Validating:  51%|█████     | 3538/6933 [1:16:27<2:10:27,  2.31s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.50it/s]
Validating:  51%|█████     | 3539/6933 [1:16:28<1:53:02,  2.00s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.54it/s]
Validating:  51%|█████     | 3540/6933 [1:16:29<1:40:45,  1.78s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.48it/s]
Validating:  51%|█████     | 3541/6933 [1:16:30<1:32:09,  1.63s/it]
Processed p




Processed prompts:   3%|▎         | 1/32 [00:01<00:35,  1.15s/it][A



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.49it/s]
Validating:  60%|██████    | 4169/6933 [1:29:34<56:47,  1.23s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.49it/s]
Validating:  60%|██████    | 4170/6933 [1:29:36<56:43,  1.23s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.38it/s]
Validating:  60%|██████    | 4171/6933 [1:29:37<56:45,  1.23s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.47it/s]
Validating:  60%|██████    | 4172/6933 [1:29:38<56:44,  1.23s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.32it/s]
Validating:  60%|██████    | 4173/6933 [1:29:39<56:47,  1.23s/it]
Processed prompts:   0%|  




Processed prompts:   3%|▎         | 1/32 [00:10<05:20, 10.35s/it][A
Processed prompts: 100%|██████████| 32/32 [00:12<00:00,  2.49it/s]
Validating:  84%|████████▍ | 5834/6933 [2:04:41<3:08:13, 10.28s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts:   3%|▎         | 1/32 [00:07<04:00,  7.75s/it][A
Processed prompts: 100%|██████████| 32/32 [00:11<00:00,  2.74it/s]
Validating:  84%|████████▍ | 5835/6933 [2:04:53<3:18:22, 10.84s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:01<00:33,  1.08s/it][A




Processed prompts:   6%|▋         | 2/32 [00:03<00:57,  1.92s/it][A




Processed prompts:   9%|▉         | 3/32 [00:06<01:04,  2.23s/it][A
Processed prompts:  12%|█▎        | 4/32 [00:09<01:16,  2.73s/it][A
Processed prompts: 100%|██████████| 32/32 [00:11<00:00,  2.72it/s]
Validating:  84%|████████▍ | 5836/6933 [2:05:05<3:27:02, 11.32s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:11<00:00,  2.86it/s]
Validating:  84%|████████▍ | 5837/6933 [2:05:17<3:28:33, 11.42s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:00<00:28,  1.10it/s][A
Processed prompts:   9%|▉         | 3/32 [00:04<00:42,  1.46s/it][A




Processed prompts:  12%|█▎        | 4/32 [00:04<00:33,  1.20s/it][A
Processed prompts:  16%|█▌        | 5/32 [00:11<01:19,  2.96s/it][A
Processed prompts: 100%|██████████| 32/32 [00:12<00:00,  2.59it/s]
Validating:  84%|████████▍ | 5838/6933 [2:05:30<3:37:00, 11.89s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:03<01:57,  3.78s/it][A




Processed prompts:   6%|▋         | 2/32 [00:07<01:55,  3.86s/it][A
Processed prompts: 100%|██████████| 32/32 [00:12<00:00,  2.52it/s]
Validating:  84%|████████▍ | 5839/6933 [2:05:43<3:44:43, 12.32s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:02<01:20,  2.61s/it][A
Processed prompts:   6%|▋         | 2/32 [00:04<00:58,  1.95s/it][A
Processed prompts:   9%|▉         | 3/32 [00:12<02:17,  4.73s/it][A
Processed prompts: 100%|██████████| 32/32 [00:12<00:00,  2.58it/s]
Validating:  84%|████████▍ | 5840/6933 [2:05:56<3:48:02, 12.52s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:02<01:20,  2.59s/it][A
Processed prompts:   6%|▋         | 2/32 [00:07<01:51,  3.71s/it][A
Processed prompts: 100%|██████████| 32/32 [00:14<00:00,  2.13it/s]
Validating:  84%|████████▍ | 5841/6933 [2:06:12<4:06:18, 13.53s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:00<00:28,  1.11it/s][A
Processed prompts: 100%|██████████| 32/32 [00:04<00:00,  7.16it/s]
Validating:  84%|████████▍ | 5842/6933 [2:06:17<3:17:59, 10.89s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 27.05it/s]
Validating:  84%|████████▍ | 5843/6933 [2:06:18<2:25:18,  8.00s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.20it/s]
Validating:  84%|████████▍ | 5844/6933 [2:06:19<1:48:10,  5.96s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.07it/s]
Validating:  84%|████████▍ | 5845/6933 [2:06:20<1:22:13,  4.53s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.89it/s]
Validating:  84%|████████▍ | 5846/6933 [2:06:22<1:04:21,  3.55s/it]
Processed p




Processed prompts:   3%|▎         | 1/32 [00:01<00:31,  1.02s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.65it/s]
Validating:  98%|█████████▊| 6806/6933 [2:25:30<03:03,  1.45s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 29.43it/s]
Validating:  98%|█████████▊| 6807/6933 [2:25:31<02:51,  1.36s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.27it/s]
Validating:  98%|█████████▊| 6808/6933 [2:25:32<02:47,  1.34s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A




Processed prompts:   3%|▎         | 1/32 [00:00<00:18,  1.72it/s][A
Processed prompts:   9%|▉         | 3/32 [00:02<00:28,  1.01it/s][A
Processed prompts: 100%|██████████| 32/32 [00:03<00:00,  8.46it/s]
Validating:  98%|█████████▊| 6809/6933 [2:25:37<04:53,  2.36s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 20.97it/s]
Validating:  98%|█████████▊| 6810/6933 [2:25:39<04:23,  2.15s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 23.64it/s]
Validating:  98%|█████████▊| 6811/6933 [2:25:40<03:56,  1.94s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 21.18it/s]
Validating:  98%|█████████▊| 6812/6933 [2:25:42<03:42,  1.84s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 24.79it/s]
Validating:  98%|█




Processed prompts:   3%|▎         | 1/32 [00:01<00:32,  1.03s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 18.60it/s]
Validating:  99%|█████████▉| 6861/6933 [2:26:56<01:51,  1.55s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.49it/s]
Validating:  99%|█████████▉| 6862/6933 [2:26:57<01:42,  1.44s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.07it/s]
Validating:  99%|█████████▉| 6863/6933 [2:26:58<01:36,  1.37s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.17it/s]
Validating:  99%|█████████▉| 6864/6933 [2:27:00<01:31,  1.32s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.23it/s]
Validating:  99%|█████████▉| 6865/6933 [2:27:01<01:27,  1.29s/it]
Processed prompts:   




Processed prompts:   3%|▎         | 1/32 [00:01<00:31,  1.01s/it][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 24.34it/s]
Validating:  99%|█████████▉| 6876/6933 [2:27:14<01:14,  1.31s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 26.55it/s]
Validating:  99%|█████████▉| 6877/6933 [2:27:16<01:17,  1.38s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.43it/s]
Validating:  99%|█████████▉| 6878/6933 [2:27:17<01:13,  1.33s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 28.16it/s]
Validating:  99%|█████████▉| 6879/6933 [2:27:18<01:09,  1.29s/it]
Processed prompts:   0%|          | 0/32 [00:00<?, ?it/s][A
Processed prompts: 100%|██████████| 32/32 [00:01<00:00, 25.85it/s]
Validating:  99%|█████████▉| 6880/6933 [2:27:20<01:08,  1.30s/it]
Processed prompts:   

In [None]:
print(f"Writing filtered seeds to {args.output}…")
os.makedirs(os.path.dirname(args.output), exist_ok=True)

with open(args.output, "w") as wf:
    kept = 0
    for (instruction, sql), (sql2, desc), is_valid in zip(instr_out_pairs, sql_descs, pairs_valid):
        if is_valid:
            # Use SQL block comment style to embed the description at the top
            doc_comment    = f"/* {desc} */"
            sql_with_doc   = f"{doc_comment}\n{sql}"
            # Emit JSON with the commented SQL as the output
            wf.write(json.dumps({
                "instruction": instruction,
                "content": sql_with_doc
            }) + "\n")
            kept += 1

print(f"✅ Wrote {kept} seeds to {args.output}")


# input_path = OUT_FILE
# output_path = '/content/drive/MyDrive/DS 677/Project/output_sql/3_sql_content_list.json'
# sql_contents = extract_sql_texts_with_delimiter(input_path, output_path)

Writing filtered seeds to /content/drive/MyDrive/DS 677/Project/output_sql/3_sql_filtered.jsonl…
✅ Wrote 221575 seeds to /content/drive/MyDrive/DS 677/Project/output_sql/3_sql_filtered.jsonl


## Rename `content` → `seed` & Save Final Seed


In [None]:
import json

IN_FILE  = "/content/drive/MyDrive/DS 677/Project/output_sql/3_sql_filtered.jsonl"
OUT_FILE = "/content/drive/MyDrive/DS 677/Project/output_sql/4_sql_final_seeds.jsonl"

seen_instructions = set()
new_id = 1

with open(IN_FILE, "r") as in_f, open(OUT_FILE, "w") as out_f:
    for line in in_f:
        rec = json.loads(line)
        instr = rec.get("instruction")
        # skip if we've already seen this instruction
        if instr in seen_instructions:
            continue
        seen_instructions.add(instr)
        # assign new incremental id
        rec["id"] = new_id
        new_id += 1
        # rename content → seed
        rec["seed"] = rec.pop("content")

        # drop the now‑unneeded instruction field
        rec.pop("instruction", None)
        out_f.write(json.dumps(rec) + "\n")


print(f"✅ Final seeds written to {OUT_FILE}")

✅ Final seeds written to /content/drive/MyDrive/DS 677/Project/output_sql/4_sql_final_seeds.jsonl


In [None]:
###################### End of Step 1 ######################