# Device Actions

Generates datasets for performing actions on devices in a synthetic home. This will generate a list
of text / voice commands that you can perform in a home. These are not labeled with the outcome
which is generated in a later step.

In [29]:
import openai
import google.generativeai as genai

from home_assistant_datasets import secrets
from home_assistant_datasets.secrets import get_secret
from home_assistant_datasets import model_client

secrets.DEFAULT_SECRETS_FILE = "../secrets.yaml"

# MODEL_ID = "gpt-3.5-turbo-0125"
# openai = openai.OpenAI(api_key=secrets.get_secret("openai_api_key"))
# model = model_client.ModelClient(openai, MODEL_ID)

# Gemini flash is higher quality and cheaper model than the GPT alternatives.
MODEL_ID = "gemini-1.5-flash"
genai.configure(api_key=secrets.get_secret("google_api_key"))
model = model_client.GoogleClient(MODEL_ID)

# Generate few-shot exapmles

Read the seed data used as a few-shot exampe.

In [30]:
import pathlib
import yaml
from synthetic_home import device_types


DATASET_DIR = pathlib.Path("../datasets/")
DEVICES_DIR = DATASET_DIR / "devices"
SEEDS_DIR = pathlib.Path("./seeds")
SEED_DEVICE_ACTIONS_FILE = SEEDS_DIR / "device-actions.yaml"
SEED_DEVICE_ACTIONS_CAPABILITIES_FILE = SEEDS_DIR / "device-actions-capabilities.yaml"

with open(SEED_DEVICE_ACTIONS_FILE) as f:
    seed_device_actions = list(yaml.load_all(f.read(), Loader=yaml.Loader))

# This is a fixed list of capabilities that any particular synthetic home device type support
with open(SEED_DEVICE_ACTIONS_CAPABILITIES_FILE) as f:
    capabilities = {
        cap["device_type"]: cap["actions"]
        for cap in yaml.load(f.read(), Loader=yaml.Loader)
    }

seed_devices_prompt = "".join(yaml.dump(content, sort_keys=False, explicit_start=True) for content in seed_device_actions)
print(seed_devices_prompt)

registry = device_types.load_device_type_registry()
# Find any devices missing explicit action capabilities definitions
missing_devices = [
    {"device_type": dt, "actions": []}
    for dt in registry.device_types
    if dt not in capabilities
]
if missing_devices:
    print(yaml.dump(missing_devices, sort_keys=False))


---
home: mountain-cabin-us
device:
  name: Kitchen Overhead Light
  area: Kitchen
  device_type: light
  device_info:
    model: Smart LED Bulb
    manufacturer: Philips
    sw_version: 1.2.3
capabilities:
- Turn on
- Turn off
---
actions:
- action: Turn on
  sentences:
  - Please turn on the kitchen overhead light
  - Turn on the kitchen light
  - Kitchen light on
- action: Turn off
  sentences:
  - Please turn off the kitchen overhead light
  - Turn off the kitchen light
  - Kitchen light off



In [31]:
SUMMARY_PROMPT = f"""
You are an expert Smart Home agent who can evaluate the performance of a smart
home, and perform useful actions on behalf of a user.

A device in Home Assistant represents a physical or virtual object, represented
by different entities. A device has attributes for its configuration and state,
for example a thermostat may have a mode attribute, or target or current temperature
attributes.

You generate a simple evaluation dataset for home data. The input dataset
contains the home, description information like location, areas, and devices.
The output data are actions a user may ask to take on a devie.

This is the input yaml document and the output actions yaml document:

{seed_devices_prompt}

Generate a few sentences to control the device. Answer in yaml plain text and do not answer with markdown.
"""

In [32]:
import itertools
import random
from tqdm.auto import tqdm
import shutil
import slugify

homes = []
for path in DEVICES_DIR.glob("*.yaml"):
    with path.open("r") as f:
        content = f.read()
    home_id = path.name.split(".")[0]  # Strip the .yaml extension
    home_data = yaml.load(content, Loader=yaml.Loader)
    homes.append((home_id, home_data))

tasks = []
no_actions = 0
for home_id, home in homes:
    home_template = {
            "home": home_id,
            "location": home["location"],
            "type": home["type"],
    }
    for area, devices in home["devices"].items():
        for device in devices:
            device_type = device["device_type"]
            if not (device_caps := capabilities.get(device_type)):
                # No supported actions
                no_actions += 1
                continue
            device_info = {
                    **home_template,
                    "device": {
                        **device,
                        "area": area,
                    },
                    "capabilities": device_caps,
            }
            tasks.append(device_info)
len(homes), len(tasks), no_actions

(39, 294, 47)

In [33]:
random.shuffle(tasks)
print(yaml.dump(tasks[0], sort_keys=False, explicit_start=True))

---
home: family-retreat-us
location: Suburban neighborhood in Texas
type: Two-story townhouse
device:
  name: Garage Door Opener
  device_type: garage-door
  device_info:
    model: Smart Garage Door Opener
    manufacturer: Chamberlain
    sw_version: 5.6.2
  area: Garage
capabilities:
- Open
- Close



# Generate Output

In [34]:
import slugify

# Total number of records to generate
N_DATAPOINTS = -1

DEVICE_ACTIONS_OUTPUT_DIR = DATASET_DIR / "device-actions"

# Wipe existing summaries
shutil.rmtree(DEVICE_ACTIONS_OUTPUT_DIR, ignore_errors=True)
DEVICE_ACTIONS_OUTPUT_DIR.mkdir(exist_ok=True)

random.shuffle(tasks)
if N_DATAPOINTS > 0 and len(tasks) > N_DATAPOINTS:
    tasks = tasks[:N_DATAPOINTS]

skipped = 0
with tqdm(total=len(tasks)) as pbar:
    for task in tasks:
        home_id =  slugify.slugify(task["home"], separator="-"),
        task_id = "_".join([
              slugify.slugify(task["device"]["area"], separator="-"),
              slugify.slugify(task["device"]["name"], separator="-"),
        ])
        home_dir = DEVICE_ACTIONS_OUTPUT_DIR / home_id
        home_dir.mkdir()
        with open(DEVICE_ACTIONS_OUTPUT_DIR / home_id / f"{task_id}.yaml", "w") as action_output:
            task_yaml = yaml.dump(task, sort_keys=False, explicit_start=True)
            response_obj = None
            for i in range(3):
                response = model.complete(SUMMARY_PROMPT, task_yaml)
                try:
                    response_obj = yaml.safe_load(response)
                except yaml.YAMLError as err:
                    print(err)
                    skipped += 1
                    continue
            if response_obj is not None:
                updated_task = task.copy()
                updated_task.update({"actions": response_obj})
                action_output.write(yaml.dump(updated_task, explicit_start=True, sort_keys=False))
            pbar.set_description(f"Skipped {skipped}")
            pbar.update(1)

Skipped 1:  11%|█         | 32/294 [01:31<12:21,  2.83s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 18, column 1:
    ---
    ^


Skipped 1:  13%|█▎        | 39/294 [01:50<11:10,  2.63s/it]

while parsing a block mapping
  in "<unicode string>", line 3, column 3:
    - action: Turn on
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 7, column 26:
      - "Cour int\xE9rieure" light on
                             ^


Skipped 2:  19%|█▉        | 56/294 [02:33<09:23,  2.37s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 20, column 1:
    --- 
    ^


Skipped 4:  20%|██        | 59/294 [02:43<10:40,  2.72s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 15, column 1:
    ---
    ^


Skipped 4:  31%|███▏      | 92/294 [04:04<08:52,  2.64s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 18, column 1:
    ---
    ^


Skipped 6:  53%|█████▎    | 156/294 [06:50<06:23,  2.78s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 13, column 1:
    --- 
    ^


Skipped 6:  54%|█████▍    | 159/294 [06:57<05:21,  2.38s/it]

while parsing a block mapping
  in "<unicode string>", line 3, column 3:
    - action: Turn on
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 6, column 22:
      - "Salle à manger" light on
                         ^
while parsing a block mapping
  in "<unicode string>", line 3, column 3:
    - action: Turn on
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 7, column 22:
      - "Salle à manger" light on
                         ^


Skipped 9:  54%|█████▍    | 160/294 [06:59<05:18,  2.38s/it]

while parsing a block mapping
  in "<unicode string>", line 3, column 3:
    - action: Turn on
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 7, column 22:
      - "Salle à manger" light on
                         ^


Skipped 9:  70%|███████   | 206/294 [08:55<03:18,  2.26s/it]

while parsing a block mapping
  in "<unicode string>", line 13, column 3:
    - action: Set brightness
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 20, column 26:
      - "Luces del Sal\xF3n" brightness 50%
                             ^


Skipped 11:  76%|███████▌  | 222/294 [09:40<02:57,  2.46s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 13, column 1:
    ---
    ^


Skipped 11: 100%|██████████| 294/294 [12:50<00:00,  2.62s/it]


In [48]:
# Repair old name format
for file in DEVICE_ACTIONS_OUTPUT_DIR.glob("*.yaml"):
    filename = file.name
    parts = filename.split(".")[0].split("-")
    parts = [slugify.slugify(part, separator="-") for part in parts]
    home_id = parts[0]
    task_id = "_".join(parts[1:])
    home_dir = DEVICE_ACTIONS_OUTPUT_DIR / home_id
    if not home_dir.exists():
        home_dir.mkdir()
    task_file = home_dir / f"{task_id}.yaml"
    file.rename(task_file)


In [51]:
for file in DEVICE_ACTIONS_OUTPUT_DIR.glob("*/*"):
    if not file.name.endswith(".yaml"):
        dest = f"{file}.yaml"
        print(dest)
        file.rename(dest)


../datasets/device-actions/home7-nl/rooftop-terrace_terrace-light.yaml
../datasets/device-actions/home7-nl/rooftop-terrace_outdoor-speaker-system.yaml
../datasets/device-actions/home7-nl/living-space_light.yaml
../datasets/device-actions/home7-nl/living-space_smart-thermostat.yaml
../datasets/device-actions/home7-nl/bathroom_bathroom-light.yaml
../datasets/device-actions/home7-nl/bedroom_bedroom-light.yaml
../datasets/device-actions/home8-nl/workshop_workshop-lights.yaml
../datasets/device-actions/home8-nl/carport_carport-light.yaml
../datasets/device-actions/home8-nl/bedroom-2_bedroom-2-light.yaml
../datasets/device-actions/home8-nl/bathroom_bathroom-light.yaml
../datasets/device-actions/home8-nl/sunroom_sunroom-light.yaml
../datasets/device-actions/home8-nl/bedroom-1_bedroom-1-light.yaml
../datasets/device-actions/parisian-apartment-fr/living-room_smart-speaker.yaml
../datasets/device-actions/parisian-apartment-fr/living-room_living-room-light.yaml
../datasets/device-actions/parisian