# Use assistant as a teacher

```bash
$ home-assistant-datasets assist collect --dataset ./datasets/device-actions-v2-fixtures/ --model_output_dir=./datasets/device-actions-v2-collect/ --models=assistant
```

# Mark Failed

In [2]:
import pathlib
import yaml
import slugify

DATASET_DIR = pathlib.Path("../datasets/")
FIXTTURES_DIR = DATASET_DIR / "device-actions-v2-fixtures"
COLLECT_DIR = DATASET_DIR / "device-actions-v2-collect"
ASSIST_TEACHER_DIR = COLLECT_DIR / "assistant"

(COLLECT_DIR / "train").mkdir(exist_ok=True)

success = {}
total = {}

for path in FIXTTURES_DIR.glob("**/*.yaml"):
    if path.name == "_fixtures.yaml":
        continue
    home_id = path.parent.name
    category = path.name.split(".")[0]

    fixture_record = yaml.load(path.read_text(), Loader=yaml.CSafeLoader)

    file_prefix = "_".join([
        slugify.slugify(home_id, separator="_"),
        slugify.slugify(category, separator="_"),
    ])
    assist_outputs = ASSIST_TEACHER_DIR.glob(f"{file_prefix}*.yaml")
    tool_calls = {}
    for filename in assist_outputs:
        total[category] = total.get(category, 0) + 1
        record = yaml.load(filename.read_text(), Loader=yaml.CSafeLoader)
        sentence = record["task"]["input_text"]
        if record["response"].startswith("Sorry"):
            continue
        success[category] = success.get(category, 0) + 1

        context = record["context"]
        conversation_trace = context["conversation_trace"]
        if len(conversation_trace) < 2:
            continue
        if conversation_trace[1]["event_type"] != "tool_call":
            continue
        tool_call = conversation_trace[1]["data"]
        tool_calls[sentence] = tool_call

    output_record = fixture_record.copy()
    output_record["tests"] = []

    tests = fixture_record["tests"]
    for test in tests:
        tool_call = None
        for sentence in test["sentences"]:
            if sentence not in tool_calls:
                continue
            tool_call = tool_calls[sentence]
            if tool_call:
                output_record["tests"].append({
                    "sentences": [sentence],
                    "tool_call": tool_call,
                })

    if output_record["tests"]:
        (COLLECT_DIR / "train" / home_id).mkdir(exist_ok=True)
        out_file = COLLECT_DIR / "train" / home_id / f"{category}.yaml"
        out_file.write_text(yaml.dump(output_record, sort_keys=False, explicit_start=True))


In [None]:
for category in total:
    s = success.get(category, 0)
    t = total[category]
    print(f"{category} - {s} - {100*(s / t):0.2f}%")