# Dataset Generation Script

This script performs the following operations:
1. Extracts 5000 training questions and 100 testing questions for each domain (coding, math, trivia).
2. Saves the selected datasets.
3. Generates responses using 4 different teacher models via OpenRouter.
4. Applies templates in half of the cases.
5. Saves the generated data.

In [11]:
import os
import json
import random
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
# from tqdm.notebook import tqdm # Causing ImportError with IProgress
from tqdm import tqdm # Use standard tqdm instead
from openai import OpenAI

# --- Configuration ---

# Ensure you have your OpenRouter API key set in your environment
# os.environ["OPENROUTER_API_KEY"] = "sk-or-v1-..."
API_KEY = os.getenv("OPENROUTER_API_KEY")

if not API_KEY:
    raise ValueError("Please set OPENROUTER_API_KEY environment variable.")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=API_KEY,
)

DOMAINS = ["trivia", "math", "general"]
TEACHER_MODELS = [
    "qwen/qwen-2.5-72b-instruct",
    "qwen/qwen-2.5-7b-instruct",
    "meta-llama/llama-3.1-70b-instruct",
    "meta-llama/llama-3.1-8b-instruct",
]
TRAIN_SIZE = 5000
TEST_SIZE = 100
SEED = 42
MAX_WORKERS = 5 # reduced per-config workers since we run configs in parallel
MAX_PARALLEL_CONFIGS = 4 # Number of configurations to run simultaneously


In [12]:
try:
    print("Testing OpenRouter connection...")
    completion = client.chat.completions.create(
        model="meta-llama/llama-3.1-8b-instruct",
        messages=[
            {"role": "user", "content": "Hello, world!"}
        ]
    )
    print("Connection successful!")
    print("Response:", completion.choices[0].message.content)
except Exception as e:
    print("Connection failed:", str(e))
    raise e

Testing OpenRouter connection...
Connection successful!
Response: Hello! It's nice to meet you! Is there something I can help you with or would you like to chat?


In [13]:
def load_and_select_data(domain, split, count):
    """
    Loads data for a domain/split.
    If 'selected' file exists, returns it.
    Otherwise, samples from 'raw' file, saves to 'selected', and returns it.
    """
    base_path = os.path.join("experiments", domain)
    selected_file = os.path.join(base_path, f"{split}_selected.jsonl")
    raw_file = os.path.join(base_path, f"{split}_raw.jsonl")

    # Check if selected file exists
    if os.path.exists(selected_file):
        print(f"Loading existing selected data for {domain}/{split}...")
        with open(selected_file, 'r', encoding='utf-8') as f:
            data = [json.loads(line) for line in f]
        # Verify size (warn if mismatch, but allow reuse)
        if len(data) != count:
            print(f"Warning: Existing file has {len(data)} items, expected {count}.")
        return data

    print(f"Sampling new data for {domain}/{split}...")
    # Load raw data
    data = []
    with open(raw_file, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                data.append(json.loads(line))

    # Sample
    random.seed(SEED)
    if len(data) > count:
        selected_data = random.sample(data, count)
    else:
        selected_data = data

    # Save selected
    with open(selected_file, 'w', encoding='utf-8') as f:
        for item in selected_data:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")

    return selected_data

def load_template(domain):
    path = os.path.join("experiments", domain, "template.txt")
    with open(path, 'r', encoding='utf-8') as f:
        return f.read().strip()

In [14]:
def generate_single_response(item, model, template_text, use_template):
    """
    Generates a response for a single item.
    Returns the enriched item with 'response', 'model', 'template_applied'.
    """
    question = item['question']

    if use_template and template_text:
        # Assume template has [QUESTION] placeholder based on file inspection
        prompt = template_text.replace("[QUESTION]", question)
    else:
        prompt = question

    # Retry logic
    max_retries = 3
    for attempt in range(max_retries):
        try:
            completion = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            response = completion.choices[0].message.content

            result = item.copy()
            result['response'] = response
            result['model'] = model
            result['template_applied'] = use_template
            result['prompt'] = prompt
            return result

        except Exception as e:
            if attempt == max_retries - 1:
                print(f"Failed to generate for prompt: {prompt[:50]}... Error: {e}")
                result = item.copy()
                result['error'] = str(e)
                return result
            time.sleep(2 ** attempt)  # Exponential backoff

In [15]:
def run_generation_process(domain, split, data, model, use_template):
    """
    Runs generation for a specific configuration and saves results.
    """
    template_text = load_template(domain)

    # Construct output filename
    # Format: experiments/{domain}/{split}_generated_{model_clean}_{template}.jsonl
    model_clean = model.replace("/", "_")
    template_str = "with_template" if use_template else "no_template"
    output_filename = f"{split}_generated_{model_clean}_{template_str}.jsonl"
    output_path = os.path.join("experiments", domain, output_filename)

    if os.path.exists(output_path):
        print(f"  [Skipping] {output_filename} already exists.")
        return

    print(f"  [Generating] {domain} | {split} | {model} | Template={use_template}")

    results = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = [
            executor.submit(generate_single_response, item, model, template_text, use_template)
            for item in data
        ]

        # Using tqdm for progress within the generation
        for future in tqdm(as_completed(futures), total=len(data), desc=f"    Progress", leave=False):
            results.append(future.result())

    # Save results
    with open(output_path, 'w', encoding='utf-8') as f:
        for res in results:
            f.write(json.dumps(res, ensure_ascii=False) + "\n")
    print(f"  [Saved] {output_filename}")

In [None]:
# --- Main Execution Loop ---

# Pre-load all data serially to avoid race conditions
domain_data = {}
for domain in DOMAINS:
    print(f"Loading data for {domain}...")
    try:
        train_data = load_and_select_data(domain, "train", TRAIN_SIZE)
        test_data = load_and_select_data(domain, "test", TEST_SIZE)
        domain_data[domain] = {"train": train_data, "test": test_data}
    except Exception as e:
        print(f"Skipping domain {domain} due to error: {e}")

# Collect all tasks
tasks = []
for domain, datasets in domain_data.items():
    for model in TEACHER_MODELS:
        for use_template in [True, False]:
            for split_name in ["train", "test"]:
                tasks.append({
                    "domain": domain,
                    "split": split_name,
                    "data": datasets[split_name],
                    "model": model,
                    "use_template": use_template
                })

print(f"\nTotal configurations to run: {len(tasks)}")

def process_task(task):
    try:
        run_generation_process(
            domain=task["domain"],
            split=task["split"],
            data=task["data"],
            model=task["model"],
            use_template=task["use_template"]
        )
        return f"Success: {task['domain']} {task['model']}"
    except Exception as e:
        return f"Error in {task['domain']} {task['model']}: {e}"

print("Starting parallel execution...")
with ThreadPoolExecutor(max_workers=MAX_PARALLEL_CONFIGS) as executor:
    futures = [executor.submit(process_task, task) for task in tasks]

    for future in as_completed(futures):
        print(future.result())

print("\nAll generation tasks completed!")

Loading data for trivia...
Loading existing selected data for trivia/train...
Loading existing selected data for trivia/test...
Loading data for math...
Loading existing selected data for math/train...
Loading existing selected data for math/test...
Loading data for general...
Loading existing selected data for general/train...
Loading existing selected data for general/test...

Total configurations to run: 48
Starting parallel execution...
  [Skipping] test_generated_qwen_qwen-2.5-72b-instruct_with_template.jsonl already exists.
  [Generating] trivia | train | qwen/qwen-2.5-72b-instruct | Template=False
  [Skipping] train_generated_qwen_qwen-2.5-72b-instruct_with_template.jsonl already exists.
Success: trivia qwen/qwen-2.5-72b-instruct
Success: trivia qwen/qwen-2.5-72b-instruct
  [Skipping] test_generated_qwen_qwen-2.5-72b-instruct_no_template.jsonl already exists.
Success: trivia qwen/qwen-2.5-72b-instruct
  [Skipping] test_generated_qwen_qwen-2.5-7b-instruct_with_template.jsonl alre

    Progress:   0%|          | 0/5000 [00:00<?, ?it/s]
[A

    Progress:   0%|          | 1/5000 [00:00<40:04,  2.08it/s]
[A
[A

[A[A
[A
[A
[A

[A[A
[A
[A

[A[A
[A
[A
    Progress:   0%|          | 2/5000 [00:03<2:43:00,  1.96s/it]
[A

[A[A

[A[A
    Progress:   0%|          | 3/5000 [00:04<1:54:32,  1.38s/it]

[A[A
[A
[A
[A
[A
[A

[A[A
[A

[A[A
[A
[A

[A[A
[A

    Progress:   0%|          | 4/5000 [00:06<2:15:28,  1.63s/it]
[A
[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

[A[A
    Progress:   0%|          | 5/5000 [00:07<2:18:18,  1.66s/it]
    Progress:   0%|          | 6/5000 [00:08<1:42:35,  1.23s/it]

[A[A
[A
[A

[A[A
    Progress:   0%|          | 8/5000 [00:09<1:15:57,  1.10it/s]
[A
[A

    Progress:   0%|          | 9/5000 [00:09<1:06:24,  1.25it/s]
[A

[A[A

[A[A
[A
[A

[A[A
[A

[A[A
[A

    Progress:   0%|          | 10/5000 [00:11<1:32:55,  1.12s/it]
[A
[A

[A[A
[A
[A
[A

    Progress:   0%|          | 11/50

  [Saved] train_generated_meta-llama_llama-3.1-70b-instruct_with_template.jsonl
Success: trivia meta-llama/llama-3.1-70b-instruct
  [Generating] trivia | test | meta-llama/llama-3.1-70b-instruct | Template=False



[A

[A[A
[A
    Progress:  25%|██▍       | 1239/5000 [14:33<1:21:36,  1.30s/it]
[A
[A

[A[A

[A[A
[A
[A
[A

[A[A

[A[A
    Progress:  25%|██▍       | 1241/5000 [14:35<1:14:55,  1.20s/it]

    Progress:  25%|██▍       | 1242/5000 [14:35<59:12,  1.06it/s]  
    Progress:  25%|██▍       | 1243/5000 [14:36<46:36,  1.34it/s]
[A

[A[A

[A[A

[A[A
[A
[A

[A[A

    Progress:  25%|██▍       | 1244/5000 [14:37<1:02:17,  1.00it/s]

[A[A
[A

[A[A
[A
[A

[A[A
[A

[A[A

    Progress:  25%|██▍       | 1245/5000 [14:39<1:22:35,  1.32s/it]
[A

[A[A
[A

[A[A

[A[A
    Progress:  25%|██▍       | 1246/5000 [14:40<1:16:49,  1.23s/it]

[A[A

[A[A

[A[A

    Progress:  25%|██▍       | 1247/5000 [14:42<1:15:36,  1.21s/it]

[A[A

[A[A
[A
    Progress:  25%|██▍       | 1248/5000 [14:43<1:10:06,  1.12s/it]

    Progress:  25%|██▍       | 1249/5000 [14:43<59:03,  1.06it/s]  
[A
[A

[A[A
[A
[A

    Progress:  25%|██▌       | 1251/5000 [14:44<49:25, 

  [Saved] test_generated_meta-llama_llama-3.1-70b-instruct_no_template.jsonl
Success: trivia meta-llama/llama-3.1-70b-instruct
  [Generating] trivia | train | meta-llama/llama-3.1-8b-instruct | Template=True



[A

[A[A

[A[A
[A
    Progress:  26%|██▌       | 1291/5000 [15:14<51:26,  1.20it/s]

[A[A
[A
[A
    Progress:  26%|██▌       | 1292/5000 [15:15<45:20,  1.36it/s]
[A
[A
[A
    Progress:  26%|██▌       | 1294/5000 [15:15<31:25,  1.97it/s]

[A[A
[A

[A[A
    Progress:  26%|██▌       | 1295/5000 [15:16<28:38,  2.16it/s]

    Progress:  26%|██▌       | 1296/5000 [15:16<28:37,  2.16it/s]

[A[A
[A
[A

[A[A
[A
[A

[A[A
[A

[A[A
[A
    Progress:  26%|██▌       | 1297/5000 [15:18<45:00,  1.37it/s]
[A

[A[A
[A
[A

[A[A
[A
[A

[A[A
[A

[A[A
[A
[A
[A

[A[A

[A[A
[A

[A[A

    Progress:  26%|██▌       | 1299/5000 [15:20<52:21,  1.18it/s]  
[A
[A
[A
[A

[A[A
[A

[A[A
[A
[A

[A[A
    Progress:  26%|██▌       | 1300/5000 [15:21<1:02:58,  1.02s/it]
[A
[A
[A

    Progress:  26%|██▌       | 1301/5000 [15:22<53:20,  1.16it/s]  
    Progress:  26%|██▌       | 1302/5000 [15:22<41:03,  1.50it/s]

[A[A
[A
    Progress:  26%|██▌       |

  [Saved] train_generated_meta-llama_llama-3.1-8b-instruct_with_template.jsonl
Success: trivia meta-llama/llama-3.1-8b-instruct
  [Generating] trivia | test | meta-llama/llama-3.1-8b-instruct | Template=True



    Progress:  51%|█████     | 2536/5000 [31:13<36:20,  1.13it/s]
[A
[A
[A

[A[A
[A
[A
[A

    Progress:  51%|█████     | 2538/5000 [31:14<29:17,  1.40it/s]
[A

[A[A
[A
[A
[A
[A
[A
[A
    Progress:  51%|█████     | 2539/5000 [31:16<36:24,  1.13it/s]

[A[A
[A
[A
[A
    Progress:  51%|█████     | 2540/5000 [31:16<35:11,  1.17it/s]
    Progress:  51%|█████     | 2542/5000 [31:17<24:40,  1.66it/s]
[A

    Progress:  51%|█████     | 2543/5000 [31:17<20:52,  1.96it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
    Progress:  51%|█████     | 2544/5000 [31:19<37:17,  1.10it/s]
[A
[A
    Progress:  51%|█████     | 2545/5000 [31:20<37:28,  1.09it/s]
[A
[A

[A[A
[A
    Progress:  51%|█████     | 2546/5000 [31:21<32:11,  1.27it/s]
[A

    Progress:  51%|█████     | 2547/5000 [31:21<26:06,  1.57it/s]
    Progress:  51%|█████     | 2548/5000 [31:21<20:00,  2.04it/s]
    Progress:  51%|█████     | 2550/5000 [31:21<13:14,  3.08it/s]
[A
[A
[A

[A[A
[A

[A[A
[A


  [Saved] test_generated_meta-llama_llama-3.1-8b-instruct_with_template.jsonl
Success: trivia meta-llama/llama-3.1-8b-instruct
  [Generating] trivia | train | meta-llama/llama-3.1-8b-instruct | Template=False



[A

    Progress:  51%|█████     | 2552/5000 [31:23<21:03,  1.94it/s]
[A
[A

[A[A
    Progress:  51%|█████     | 2554/5000 [31:24<16:24,  2.48it/s]

    Progress:  51%|█████     | 2555/5000 [31:24<16:41,  2.44it/s]
[A
[A
[A
    Progress:  51%|█████     | 2556/5000 [31:25<18:16,  2.23it/s]
[A
[A
[A

[A[A
    Progress:  51%|█████     | 2557/5000 [31:26<32:34,  1.25it/s]

[A[A

[A[A
[A

[A[A
[A
[A
[A
[A

[A[A

    Progress:  51%|█████     | 2558/5000 [31:28<47:51,  1.18s/it]

    Progress:  51%|█████     | 2559/5000 [31:29<38:50,  1.05it/s]

[A[A
[A

    Progress:  51%|█████     | 2560/5000 [31:29<32:37,  1.25it/s]
[A

[A[A
[A

[A[A
[A
[A
[A

[A[A
[A

[A[A
    Progress:  51%|█████     | 2561/5000 [31:33<1:05:11,  1.60s/it]

    Progress:  51%|█████     | 2562/5000 [31:33<49:55,  1.23s/it]  
[A
    Progress:  51%|█████▏    | 2564/5000 [31:33<28:27,  1.43it/s]
    Progress:  51%|█████▏    | 2565/5000 [31:33<21:16,  1.91it/s]
    Progress:  51%|██

  [Saved] train_generated_qwen_qwen-2.5-7b-instruct_no_template.jsonl
Success: trivia qwen/qwen-2.5-7b-instruct
  [Generating] trivia | test | meta-llama/llama-3.1-8b-instruct | Template=False




[A[A
[A

[A[A
    Progress:  52%|█████▏    | 2624/5000 [32:16<33:31,  1.18it/s]
[A

    Progress:  52%|█████▎    | 2625/5000 [32:16<27:01,  1.46it/s]
[A

[A[A
[A

[A[A

[A[A
[A

[A[A
[A

[A[A
[A

[A[A
[A
    Progress:  53%|█████▎    | 2626/5000 [32:18<40:21,  1.02s/it]

    Progress:  53%|█████▎    | 2628/5000 [32:19<24:06,  1.64it/s]

[A[A
[A
[A

[A[A
[A

[A[A
[A
    Progress:  53%|█████▎    | 2629/5000 [32:20<31:02,  1.27it/s]
[A
[A
[A

[A[A
[A

[A[A
[A

[A[A
[A
[A

[A[A
    Progress:  53%|█████▎    | 2631/5000 [32:22<36:58,  1.07it/s]

[A[A
[A
    Progress:  53%|█████▎    | 2632/5000 [32:23<33:24,  1.18it/s]
[A
    Progress:  53%|█████▎    | 2634/5000 [32:24<23:59,  1.64it/s]
[A
[A
    Progress:  53%|█████▎    | 2635/5000 [32:24<21:47,  1.81it/s]

[A[A
[A
[A

[A[A
[A
[A

[A[A
[A
    Progress:  53%|█████▎    | 2636/5000 [32:26<37:19,  1.06it/s]

[A[A
[A
[A

    Progress:  53%|█████▎    | 2637/5000 [32:27<36:31,  

  [Saved] test_generated_meta-llama_llama-3.1-8b-instruct_no_template.jsonl
Success: trivia meta-llama/llama-3.1-8b-instruct
  [Generating] math | train | qwen/qwen-2.5-72b-instruct | Template=True




[A[A
    Progress:  54%|█████▎    | 2680/5000 [32:55<31:31,  1.23it/s]
[A
[A
    Progress:  54%|█████▎    | 2681/5000 [32:56<31:49,  1.21it/s]
[A
[A
    Progress:  54%|█████▎    | 2682/5000 [32:57<41:00,  1.06s/it]
[A
    Progress:  54%|█████▎    | 2685/5000 [32:58<20:56,  1.84it/s]
[A
[A
    Progress:  54%|█████▎    | 2686/5000 [32:59<26:46,  1.44it/s]
    Progress:  54%|█████▎    | 2687/5000 [33:00<24:58,  1.54it/s]
    Progress:  54%|█████▍    | 2688/5000 [33:01<27:27,  1.40it/s]
[A
[A
[A
[A
[A

[A[A
[A

[A[A
    Progress:  54%|█████▍    | 2689/5000 [33:02<39:07,  1.02s/it]
[A

    Progress:  54%|█████▍    | 2691/5000 [33:03<26:04,  1.48it/s]
[A
[A
    Progress:  54%|█████▍    | 2692/5000 [33:04<27:48,  1.38it/s]
[A
[A
    Progress:  54%|█████▍    | 2693/5000 [33:05<27:35,  1.39it/s]
[A

[A[A
[A
[A

    Progress:  54%|█████▍    | 2694/5000 [33:06<31:54,  1.20it/s]
[A
[A

    Progress:  54%|█████▍    | 2695/5000 [33:07<35:25,  1.08it/s]
    Progress: 

  [Saved] train_generated_qwen_qwen-2.5-72b-instruct_no_template.jsonl
Success: trivia qwen/qwen-2.5-72b-instruct
  [Generating] math | test | qwen/qwen-2.5-72b-instruct | Template=True


    Progress:   0%|          | 0/100 [00:00<?, ?it/s]
[A
[A
[A
[A

[A[A

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
    Progress:   1%|          | 1/100 [00:09<16:01,  9.72s/it]
[A
[A
    Progress:   2%|▏         | 2/100 [00:11<08:01,  4.91s/it]
    Progress:   3%|▎         | 3/100 [00:11<04:36,  2.85s/it]
[A
    Progress:   4%|▍         | 4/100 [00:12<03:09,  1.97s/it]
[A
    Progress:   5%|▌         | 5/100 [00:13<02:41,  1.70s/it]
[A
[A
[A

[A[A
[A
[A

[A[A
[A
[A
[A
[A
    Progress:   6%|▌         | 6/100 [00:16<03:29,  2.23s/it]
[A
[A
[A
[A
[A

[A[A
[A
    Progress:   7%|▋         | 7/100 [00:19<03:33,  2.30s/it]
[A

[A[A
[A
[A

    Progress:   9%|▉         | 9/100 [00:21<02:24,  1.58s/it]

[A[A
[A

[A[A
[A

[A[A
    Progress:  10%|█         | 10/100 [00:22<02:17,  1.52s/it]

[A[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A
[A
[A
[A
    Progress:  11%|█         | 11/100 [00:30<05:14

  [Saved] train_generated_meta-llama_llama-3.1-8b-instruct_no_template.jsonl
Success: trivia meta-llama/llama-3.1-8b-instruct
  [Generating] math | train | qwen/qwen-2.5-72b-instruct | Template=False



[A

[A[A

[A[A

[A[A

    Progress:  53%|█████▎    | 53/100 [01:36<01:41,  2.15s/it]

    Progress:  55%|█████▌    | 55/100 [01:38<01:10,  1.57s/it]
    Progress:  56%|█████▌    | 56/100 [01:40<01:05,  1.49s/it]
[A

[A[A

[A[A
[A
    Progress:  57%|█████▋    | 57/100 [01:43<01:21,  1.89s/it]
    Progress:  58%|█████▊    | 58/100 [01:44<01:08,  1.62s/it]
[A

    Progress:  59%|█████▉    | 59/100 [01:45<01:06,  1.62s/it]

[A[A
[A

    Progress:  61%|██████    | 61/100 [01:49<01:04,  1.66s/it]

[A[A

[A[A

    Progress:  62%|██████▏   | 62/100 [01:51<01:07,  1.77s/it]
    Progress:  63%|██████▎   | 63/100 [01:51<00:52,  1.43s/it]
[A
    Progress:  65%|██████▌   | 65/100 [01:55<00:54,  1.56s/it]

[A[A
[A
    Progress:  66%|██████▌   | 66/100 [01:58<01:01,  1.82s/it]
[A

[A[A

[A[A

[A[A
[A
    Progress:  67%|██████▋   | 67/100 [02:00<01:04,  1.95s/it]

    Progress:  68%|██████▊   | 68/100 [02:01<00:55,  1.74s/it]
    Progress:  69%|██████▉   | 69/100 [02:

  [Saved] test_generated_qwen_qwen-2.5-72b-instruct_with_template.jsonl
Success: math qwen/qwen-2.5-72b-instruct
  [Generating] math | test | qwen/qwen-2.5-72b-instruct | Template=False


    Progress:   0%|          | 0/100 [00:00<?, ?it/s]
[A
[A

[A[A

    Progress:   1%|          | 1/100 [00:04<06:47,  4.12s/it]
[A

[A[A
[A

[A[A

[A[A

[A[A

[A[A

    Progress:   3%|▎         | 3/100 [00:09<04:15,  2.64s/it]
    Progress:   5%|▌         | 5/100 [00:09<02:07,  1.34s/it]
    Progress:   6%|▌         | 6/100 [00:11<02:08,  1.36s/it]
[A

[A[A
    Progress:   8%|▊         | 8/100 [00:13<01:36,  1.05s/it]

[A[A
[A

[A[A
[A
[A

[A[A

    Progress:   9%|▉         | 9/100 [00:15<02:06,  1.39s/it]

[A[A
[A
[A

[A[A
    Progress:  12%|█▏        | 12/100 [00:21<02:18,  1.57s/it]

[A[A

[A[A

    Progress:  14%|█▍        | 14/100 [00:22<01:18,  1.09it/s]
[A
    Progress:  15%|█▌        | 15/100 [00:24<01:45,  1.24s/it]

[A[A
[A
    Progress:  16%|█▌        | 16/100 [00:25<01:47,  1.27s/it]

[A[A
[A
    Progress:  18%|█▊        | 18/100 [00:27<01:29,  1.09s/it]

    Progress:  19%|█▉        | 19/100 [00:28<01:29,  1.11s/it]

[A[A

[

  [Saved] test_generated_qwen_qwen-2.5-72b-instruct_no_template.jsonl
Success: math qwen/qwen-2.5-72b-instruct
  [Generating] math | train | qwen/qwen-2.5-7b-instruct | Template=True


    Progress:   0%|          | 0/5000 [00:00<?, ?it/s]
[A

[A[A

[A[A
[A

    Progress:   0%|          | 6/5000 [00:06<1:13:12,  1.14it/s]

[A[A

[A[A
[A
    Progress:   0%|          | 7/5000 [00:07<1:20:57,  1.03it/s]
[A

    Progress:   0%|          | 8/5000 [00:09<1:32:06,  1.11s/it]
[A

[A[A
[A
[A

[A[A
    Progress:   0%|          | 9/5000 [00:10<1:43:22,  1.24s/it]

    Progress:   0%|          | 11/5000 [00:12<1:23:50,  1.01s/it]

[A[A

[A[A
    Progress:   0%|          | 12/5000 [00:14<1:45:39,  1.27s/it]

    Progress:   0%|          | 13/5000 [00:14<1:20:37,  1.03it/s]

    Progress:   0%|          | 14/5000 [00:17<2:01:24,  1.46s/it]

[A[A
[A

[A[A
    Progress:   0%|          | 16/5000 [00:19<1:31:27,  1.10s/it]
[A

[A[A
[A

[A[A

    Progress:   0%|          | 18/5000 [00:21<1:32:14,  1.11s/it]
[A

[A[A
    Progress:   0%|          | 20/5000 [00:22<1:09:16,  1.20it/s]
[A

    Progress:   0%|          | 21/5000 [00:24<1:22:22,  1.01it/s

  [Saved] train_generated_meta-llama_llama-3.1-70b-instruct_no_template.jsonl
Success: trivia meta-llama/llama-3.1-70b-instruct
  [Generating] math | test | qwen/qwen-2.5-7b-instruct | Template=True




    Progress:   1%|▏         | 66/5000 [01:07<1:00:51,  1.35it/s]
[A
[A
    Progress:   1%|▏         | 67/5000 [01:09<1:16:24,  1.08it/s]

    Progress:   1%|▏         | 68/5000 [01:09<1:06:01,  1.24it/s]
[A
    Progress:   1%|▏         | 69/5000 [01:10<58:21,  1.41it/s]  

[A[A

    Progress:   1%|▏         | 70/5000 [01:10<52:41,  1.56it/s]
[A

[A[A
    Progress:   1%|▏         | 72/5000 [01:11<51:32,  1.59it/s]  

[A[A

    Progress:   1%|▏         | 73/5000 [01:13<1:07:20,  1.22it/s]
    Progress:   2%|▏         | 77/5000 [01:16<54:56,  1.49it/s]  
    Progress:   2%|▏         | 78/5000 [01:16<45:21,  1.81it/s]

[A[A

[A[A
[A

    Progress:   2%|▏         | 79/5000 [01:17<59:33,  1.38it/s]
[A

    Progress:   2%|▏         | 80/5000 [01:19<1:20:18,  1.02it/s]

[A[A

    Progress:   2%|▏         | 81/5000 [01:20<1:32:12,  1.12s/it]

    Progress:   2%|▏         | 82/5000 [01:21<1:22:39,  1.01s/it]

[A[A

[A[A

[A[A

    Progress:   2%|▏         | 86/5000 [01

  [Saved] test_generated_qwen_qwen-2.5-7b-instruct_with_template.jsonl
Success: math qwen/qwen-2.5-7b-instruct
  [Generating] math | train | qwen/qwen-2.5-7b-instruct | Template=False



[A

    Progress:   3%|▎         | 164/5000 [02:36<1:33:36,  1.16s/it]
    Progress:   3%|▎         | 165/5000 [02:36<1:09:36,  1.16it/s]

    Progress:   3%|▎         | 166/5000 [02:37<1:02:58,  1.28it/s]
[A
    Progress:   3%|▎         | 167/5000 [02:39<1:28:43,  1.10s/it]
    Progress:   3%|▎         | 168/5000 [02:39<1:06:30,  1.21it/s]

[A[A

[A[A
    Progress:   3%|▎         | 170/5000 [02:41<1:17:09,  1.04it/s]
[A

    Progress:   3%|▎         | 171/5000 [02:43<1:30:33,  1.13s/it]
    Progress:   3%|▎         | 172/5000 [02:43<1:14:19,  1.08it/s]

    Progress:   3%|▎         | 173/5000 [02:45<1:30:24,  1.12s/it]

[A[A

    Progress:   3%|▎         | 174/5000 [02:45<1:17:38,  1.04it/s]

[A[A
    Progress:   4%|▎         | 177/5000 [02:47<54:54,  1.46it/s]  

    Progress:   4%|▎         | 178/5000 [02:48<1:06:55,  1.20it/s]

[A[A
[A

    Progress:   4%|▎         | 179/5000 [02:50<1:35:24,  1.19s/it]
    Progress:   4%|▎         | 180/5000 [02:51<1:21:41,  1.02s/it