In [None]:
import json
import random
import requests
from pathlib import Path
import importlib.util

In [None]:
# Download and parse JSON from URL
def load_json_url(url: str):
    return requests.get(url).json()

# Fetch piCom2Sense (100 samples)
def fetch_com2sense(num_samples=100) -> list:
    url = "https://raw.github.com/google/BIG-bench/main/bigbench/benchmark_tasks/com2sense/big-bench-data.json"
    task_data = load_json_url(url)
    data = task_data["examples"]
    selected = random.sample(data, num_samples)
    com2sense_samples = [{
            "id": f"com2sense_{i}",
            "task": "com2sense",
            "question": sample["sent"],
            "answer": sample["label"] == "True"
        } for i, sample in enumerate(selected)]
    return com2sense_samples

# Fetch Cause and Effect (100 samples)
def fetch_cause_effect(num_samples=100) -> list:
    url = "https://raw.githubusercontent.com/google/BIG-bench/main/bigbench/benchmark_tasks/cause_and_effect/one_sentence_no_prompt/task.json"
    task_json = load_json_url(url)
    samples = []
    for ex in task_json["examples"]:
        for sentence, score in ex["target_scores"].items():
            samples.append({
                "id": f"cause_effect_{len(samples)}",
                "task": "cause_effect",
                "question": sentence,
                "answer": score == 1.0
            })
    cause_effect_samples = random.sample(samples, num_samples)
    return cause_effect_samples

# Generate Web of Lies samples (100 samples)
def generate_web_of_lies(num_samples=100, num_sentences_per_sample=4) -> list:
    # Retrieve method to generate a Web of Lie sample
    def load_web_of_lies_task():
        # Download original script
        url = "https://raw.githubusercontent.com/google/BIG-bench/main/bigbench/benchmark_tasks/web_of_lies/task.py"
        code = requests.get(url).text

        # Patch out the bigbench import and task.Task inheritance
        patched_lines = []
        for line in code.splitlines():
            if "import bigbench.api.task" in line:
                continue
            if "class WebOfLiesTask(task.Task):" in line:
                patched_lines.append("class WebOfLiesTask(object):")
            else:
                patched_lines.append(line)

        path = Path("task_web_of_lies.py")
        path.write_text("\n".join(patched_lines))

        # Dynamically import patched module
        spec = importlib.util.spec_from_file_location("web_of_lies", path)
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        return mod.WebOfLiesTask()

    # Generate 100 samples
    task = load_web_of_lies_task()
    web_of_lies_samples = []
    for i in range(num_samples):
        q, a = task._get_expression(num_sentences_per_sample)
        web_of_lies_samples.append({
            "id": f"web_of_lies_{i}",
            "task": "web_of_lies",
            "question": q,
            "answer": a
        })
    return web_of_lies_samples

# Fetch Arithmetic (100 samples)
def fetch_arithmetic(num_samples=100) -> list:
    url = "https://raw.githubusercontent.com/google/BIG-bench/main/bigbench/benchmark_tasks/arithmetic/5_digit_multiplication/task.json"
    data = load_json_url(url)
    samples = random.sample(data["examples"], num_samples)
    arithmetic_samples = [{
        "id": f"arithmetic_{i}",
        "task": "arithmetic",
        "question": ex["input"],
        "answer": int(ex["target"])
    } for i, ex in enumerate(samples)]
    return arithmetic_samples

In [None]:
# Combine all and save
def build_unified_dataset(num_samples_per_task=100):
    data = (
        fetch_com2sense(num_samples_per_task) +
        fetch_cause_effect(num_samples_per_task) +
        generate_web_of_lies(num_samples_per_task) +
        fetch_arithmetic(num_samples_per_task)
    )
    with open("unified_bigbench_4x100.json", "w") as f:
        json.dump(data, f, indent=2)
    print("✅ Saved unified_bigbench_4x100.json")

In [None]:
build_unified_dataset(num_samples_per_task=100)

✅ Saved unified_bigbench_4x100.json


In [None]:
arithmetic_samples[:2]

[{'id': 'arithmetic_0',
  'task': 'arithmetic',
  'question': 'What is 22814 times 78923?',
  'answer': 1800549322},
 {'id': 'arithmetic_1',
  'task': 'arithmetic',
  'question': 'What is 65511 times 33321?',
  'answer': 2182892031}]

In [None]:
web_of_lies_samples[:2]

[{'id': 'web_of_lies_0',
  'task': 'web_of_lies',
  'question': 'Kandi tells the truth. Raymond says Kandi lies. Gwenn says Raymond tells the truth. Delfina says Gwenn lies. Does Delfina tell the truth?',
  'answer': True},
 {'id': 'web_of_lies_1',
  'task': 'web_of_lies',
  'question': 'Ka tells the truth. Michaela says Ka lies. Lorine says Michaela tells the truth. Shalonda says Lorine lies. Does Shalonda tell the truth?',
  'answer': True}]

In [None]:
causeeffect_samples[:2]

[{'id': 'cause_effect_10',
  'task': 'cause_effect',
  'question': 'The boy lifted his eyes from the book because there was a loud noise.',
  'answer': True},
 {'id': 'cause_effect_82',
  'task': 'cause_effect',
  'question': 'My friend celebrated with their family because my friend got a promotion.',
  'answer': True}]

In [None]:
com2sense_samples[:2]

[{'id': 'com2sense_0',
  'task': 'com2sense',
  'question': "It's easier to make more money in the stock market after 3 years than after 1 month.",
  'answer': True},
 {'id': 'com2sense_1',
  'task': 'com2sense',
  'question': 'If you have a cold, it is less accurate to stick a thermometer under your tongue instead of over your tongue.',
  'answer': False}]