In [75]:
import json
import glob
import imageio.v2 as imageio


oracle_trajectories_train = []
oracle_results = {
    "/plancraft/outputs/oracle_real/train/0/*.json": [],
    "/plancraft/outputs/oracle_real/val/0/*.json": [],
}
x, c = 0,0
for path in oracle_results.keys():
    for json_path in sorted(glob.glob(path)):
        with open(json_path, "r") as file:
            traj = json.load(file)
        if (
             traj["success"] and len(traj["model_trace"]["inventory_history"]) == len(traj["model_trace"]["action_history"])
        ):
            # find gif of the trajectory
            gif_path = json_path.replace("json", "gif")
            # load the gif as list of images
            images = imageio.mimread(gif_path)

            assert len(images) == len(traj["model_trace"]["action_history"])
            traj["images"] = images
            # ignore impossible actions
            oracle_results[path].append(traj)

In [76]:
from plancraft.models.prompts import get_system_prompt
from collections import defaultdict

SYSTEM_PROMPT = get_system_prompt(actions=["move", "smelt"])

SYSTEM_PROMPT

'You are crafting in Minecraft. You need to decide on the next action.\n\nCrafting Grid: The crafting table is organized into a 3x3 grid. Each slot in the grid has a unique identifier:\n    - Top row: [A1] [A2] [A3]\n    - Middle row: [B1] [B2] [B3]\n    - Bottom row: [C1] [C2] [C3]\n\nThe output of the crafting process is placed in a designated output slot labeled [0] You cannot move or smelt items directly into slot [0]\n\nInventory Slots: The remaining inventory slots (outside of the crafting grid) are used for storing items. These slots are labeled as [I1] to [I36]\n\nActions:\n\t- move: Transfer a specific quantity of an item from one slot to another\n\t- smelt: Smelt an item in a furnace and moves the output to a specific slot\n\nFormat\n\t- `move: from [Source] to [Target] with quantity N`\n\t- `smelt: from [Source] to [Target] with quantity N`\n\nExample:\n    - `move: from [I2] to [A1] with quantity 3`\n    - `smelt: from [I5] to [I6] with quantity 1`\n\nConstraints:\n   - You

In [77]:
import os
from plancraft.environments.actions import convert_from_slot_index
from plancraft.models.utils import objective_and_inventory_to_str

def convert_action_to_text(action: dict):
    slot_from = convert_from_slot_index(action['slot_from'])
    slot_to = convert_from_slot_index(action['slot_to'])
    action_type = action["action_type"]
    return f"{action_type}: from {slot_from} to {slot_to} with quantity {action['quantity']}"


# convert action and inventory to dialogue history
def convert_trajectory_to_base_dialogue(traj: dict):
    dialogue = [{"role": "system", "content": SYSTEM_PROMPT}]
    objective = traj["model_trace"]["objective"]
    for action, inventory in zip(
        traj["model_trace"]["action_history"],
        traj["model_trace"]["inventory_history"],
    ):
        dialogue.append(
            {
                "role": "user",
                "content": objective_and_inventory_to_str(objective, inventory),
            }
        )
        dialogue.append(
            {
                "role": "assistant",
                "content": convert_action_to_text(action),
            }
        )
    example = {
        "messages": dialogue,
        "example_id": traj["example_id"],
    }
    return example

In [78]:
data = defaultdict(list)
for path, trajs in oracle_results.items():
    split = path.split("/")[-3]
    for traj in trajs:
        text_example = convert_trajectory_to_base_dialogue(traj)
        images = traj["images"]
        data[split].append((text_example, images))

In [86]:
for split in data:
    for example, images in data[split]:
        # save under data/oracle/{split}/oa/{example_id}.json
        example_id = example["example_id"]
        example_path = os.path.join(
            "/plancraft/data/oracle", split, "oa", f"{example_id}.json"
        )
        os.makedirs(os.path.dirname(example_path), exist_ok=True)
        with open(example_path, "w") as f:
            f.write(json.dumps(example, indent=2))

        # save under data/oracle/{split}/images/{example_id}.gif
        gif_folder = os.path.join(
            "/plancraft/data/oracle", split, "images"
        )
        os.makedirs(gif_folder, exist_ok=True)
        gif_path = os.path.join(gif_folder, f"{example_id}.gif")
        imageio.mimsave(gif_path, images, duration=0.5)


## Ensure images are correct

In [87]:
import glob
import json

splits = ["train", "val"]

cannot_solve = []

for split in splits:
    c = 0
    paths = glob.glob(f"/plancraft/data/oracle/{split}/oa/*.json")
    for json_example in paths:
        with open(json_example, "r") as f:
            example = json.load(f)
        action_messages = []
        for m in example["messages"]:
            if m["role"] == "assistant":
                action_messages.append(m["content"])
        gif_path = f"/plancraft/data/oracle/{split}/images/{example['example_id']}.gif"
        images_for_example = imageio.mimread(gif_path)
        if len(images_for_example) == len(action_messages):
            c += 1
        else:
            cannot_solve.append(example["example_id"])

    print(f"Split: {split}, {c} out of {len(paths)} have images")

Split: train, 945 out of 945 have images
Split: val, 470 out of 470 have images


In [82]:
cannot_solve

[]

### fix dataset after a oracle run

In [None]:
import glob
import json

examples = glob.glob("/plancraft/outputs/oracle_real/*/0/*.json")
non_valid_ids = set()

for example in examples:
    data = json.load(open(example))
    if not data["success"]:
        non_valid_ids.add(data["example_id"])

print(len(non_valid_ids))


In [None]:
for dataset_name in glob.glob("data/*.json"):
    dataset = json.load(open(dataset_name))
    new_dataset = []
    for example in dataset:
        if example["id"] not in non_valid_ids:
            new_dataset.append(example)
    print(dataset_name, len(dataset), len(new_dataset))
    # replace
    json.dump(new_dataset, open(dataset_name, "w"), indent=2)
