# INSTALLING THE TESTS
Each test battery is stored as a list of JSON objects formatted as
```js
{ "format": 0 | 1 | 2,
, "name": "string"
, ... // other info
, "cases":
  [ { "prompt": "string"
    , "truth": Value
    }
  , ...
  ]
}
```
with potentially other properties specific to the battery stored in that same object.

`"format"` corresponds to the enum `BatteryFormat`:

```python
BatteryFormat = Enum(
    "BatteryFormat",
    ["FreeText", "MultipleChoice", "FixedMultipleChoice"]
)
```

For multiple choice batteries, the choices are provided as a `"choices": ["choice 1", "choice 2", ...]` property stored in the object, and `"truth"` is an index.

But, multiple choice batteries where the choices are the same across test cases (e.g. boolean), the `"choices"` property is omitted.

In [11]:
import json
import os.path
from enum import IntEnum

In [12]:
BatteryFormat = IntEnum(
    "BatteryFormat",
    ["FreeText", "MultipleChoice", "FixedMultipleChoice"]
)
def save_battery(name, *, fmt, choices=None, gen=None, cases=None):
    if cases is None:
        assert gen, "Expected generator when no cases given"
        cases = gen.TestCases()
    
    json_object = {
        "format": fmt,
        "name": name,
    }

    if fmt == BatteryFormat.FixedMultipleChoice:
        assert choices, \
            "Expected corresponding choices option when creating FixedMultipleChoice battery"
        json_object["choices"] = choices

    # put cases last for the sake of json human readability
    json_object["cases"] = list(cases)
    
    content = json.dumps(json_object, separators=(",", ":"))
    print(f"{content[:70]}\n... {len(content) - 140} bytes omitted ...\n{content[-70:]}")
    output_path = os.path.join("data", "compiled", f"{name}.json")
    with open(output_path, "w") as f:
        f.write(content)
    print("done")

In [29]:
def load_json(py_object, fixed=False, file_name="task.json"):
    path = os.path.join(*py_object.__path__, file_name)
    results = []
    fixed_choices = None
    with open(path, "r") as tasks_file:
        data = json.load(tasks_file)
        for example in data["examples"]:
            # reformat
            target_scores = example["target_scores"]
            targets = list(target_scores.keys())
            
            if fixed:
                if fixed_choices is None:
                    fixed_choices = targets
            
            if "target" in example:
                truth = example["target"]
                truth = fixed_choices.index(truth)
            else:
                truth_key = max(target_scores, key=target_scores.get)
                truth = targets.index(truth_key)

            task_object = {
                "prompt": example["input"],
                "truth": truth
            }
            if not fixed:
                task_object["targets"] = targets
            
            results.append(task_object)
    
    return fixed_choices, results

## `boolean_expressions`

In [24]:
from bigbench.benchmark_tasks.boolean_expressions import task as be_task
import random

# we will simply take the tasks, rather than use bigbench's evaluator
# this converts the task into a json task
class ECBooleanExpressions(be_task.BooleanExpressionsTask):
    """Extracting boolean expressions task of variable difficulty."""

    def _generate_expressions(self, expr_size, shots):
        expressions = list(self._yield_expression(expr_size))

        for shot in shots:
            # sort and reshuffle for each shot
            expressions = sorted(expressions)
            random.shuffle(expressions)
            # gather expressions in shot+1 windows
            for i in range(0, len(expressions), shot + 1):
                *shot_prompts, question = [
                    self._eval_expression(expr)
                    for expr in expressions[i : i + shot + 1]
                ]
                if len(shot_prompts) != shot:
                    continue
                prompt = "".join(
                    prompt + str(truth) + " . "
                    for (truth, prompt) in shot_prompts
                )
                question_truth, question_prompt = question
                prompt += question_prompt
                yield {
                    "prompt": prompt,
                    "truth": int(question_truth),
                    "shot": shot,
                    "expr_size": expr_size,
                }
    
    def expressions_for(self, shots=None):
        if shots is None:
            shots = [self.num_shots]
        
        for expr_size in self.expression_lengths:
            yield from self._generate_expressions(expr_size, shots)

    @staticmethod
    def TestCases(*, seed=13, shots=[0, 1, 2]):
        ecbe = ECBooleanExpressions(seed=seed)
        yield from ecbe.expressions_for(shots=shots)

In [25]:
save_battery(
    "boolean_expressions",
    fmt=BatteryFormat.FixedMultipleChoice,
    choices=["False", "True"],
    gen=ECBooleanExpressions
)

{"format":3,"name":"boolean_expressions","choices":["False","True"],"c
... 634321 bytes omitted ...
( True and not not not True ) is ","truth":0,"shot":2,"expr_size":8}]}
done


## `code_line_description`

In [31]:
import bigbench.benchmark_tasks.code_line_description as cld_task
choices, cases = load_json(cld_task)
save_battery(
    "code_line_description",
    fmt=BatteryFormat.MultipleChoice,
    cases=cases
)

{"format":2,"name":"code_line_description","cases":[{"prompt":"for i i
... 17376 bytes omitted ...
mbers","prints 5","returns numbers which are multiples of 10 or 5"]}]}
done


## `color`: `color.hex`

In [30]:
import bigbench.benchmark_tasks.color.hex as color_hex_task
choices, cases = load_json(color_hex_task, fixed=True)
save_battery(
    "color.hex",
    fmt=BatteryFormat.FixedMultipleChoice,
    choices=choices,
    cases=cases
)

{"format":3,"name":"color.hex","choices":["black","blue","brown","gray
... 97992 bytes omitted ...
most closely matching this HEX representation: #f8b434 ?","truth":9}]}
done


## `geometric_shapes`

In [33]:
import bigbench.benchmark_tasks.geometric_shapes as shapes_task
choices, cases = load_json(shapes_task, fixed=True)
save_battery(
    "geometric_shapes",
    fmt=BatteryFormat.FixedMultipleChoice,
    choices=choices,
    cases=cases
)

{"format":3,"name":"geometric_shapes","choices":["circle","heptagon","
... 57587 bytes omitted ...
lement <path d=\"M 38.35,49.41 L 31.18,9.15\"/> draws a ","truth":4}]}
done


## `program_synthesis`

In [None]:
# TODO: this is more involved

## `python_programming_challenge`

## `semantic_parsing_in_context_sparc`

## `semantic_parsing_spider`

## `data4CopynetV3.zip`

## [https://zenodo.org/records/10491384](https://zenodo.org/records/10491384)

## `auto_debugging`

## `color`: `color.hex` (free text)

## `geometric_shapes` (free text)

## CoDiSum's data4CopynetV3.zip

In [5]:
import requests
import zipfile
import io
import os

url = "https://github.com/SoftWiser-group/CoDiSum/raw/master/data4CopynetV3.zip"

output_directory = "./"
os.makedirs(output_directory, exist_ok=True)
response = requests.get(url)

if response.status_code == 200:
    zip_content = io.BytesIO(response.content)
    
    with zipfile.ZipFile(zip_content, 'r') as zip_ref:
        zip_ref.extractall(output_directory)
    
    print("Extraction successful.")
else:
    print(f"Failed to download the zip file. Status code: {response.status_code}")

Extraction successful.


In [14]:
# put in data dir

def read_json_text(json_path):
    try:
        with open(json_path, "r") as json_file:
            json_data = json.load(json_file)
    except FileNotFoundError:
        print(f"JSON file not found at path: {json_file_path}")
    except json.JSONDecodeError:
        print(f"Error decoding JSON file: {json_file_path}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

    print("JSON file", json_path, "loaded successfully.") 
    return json_data

VERSION = 12
input_path = os.path.join(output_directory, "data4CopynetV3", f"difftextV{VERSION}.json")
output_path = os.path.join(output_directory, "data4CopynetV3", f"msgtextV{VERSION}.json")

inputs = read_json_text(input_path)
outputs = read_json_text(output_path)
assert len(inputs) == len(outputs), "Mismatch between input and output test cases"

cases = []
for diff, comment in zip(inputs, outputs):
    cases.append({
        "prompt": diff,
        "truth": comment
    })

save_battery(
    "commit_message_generation_codisum", 
    fmt=BatteryFormat.FreeText,
    cases=cases
)

JSON file ./data4CopynetV3/difftextV12.json loaded successfully.
JSON file ./data4CopynetV3/msgtextV12.json loaded successfully.
{"format":1,"name":"commit_message_generation_codisum","cases":[{"prom
... 107692476 bytes omitted ...
create(\"m\");\n \n","truth":"remove extra - from --match-original"}]}
done
