# Device Actions

Generates datasets for performing actions on devices in a synthetic home. This will generate a list
of text / voice commands that you can perform in a home. These are not labeled with the outcome
which is generated in a later step.

In [68]:
import openai
import google.generativeai as genai

from home_assistant_datasets import secrets
from home_assistant_datasets.secrets import get_secret
from home_assistant_datasets import model_client

secrets.DEFAULT_SECRETS_FILE = "../secrets.yaml"

# MODEL_ID = "gpt-3.5-turbo-0125"
# openai = openai.OpenAI(api_key=secrets.get_secret("openai_api_key"))
# model = model_client.ModelClient(openai, MODEL_ID)

# Gemini flash is higher quality and cheaper model than the GPT alternatives.
MODEL_ID = "gemini-1.5-flash"
genai.configure(api_key=secrets.get_secret("google_api_key"))
model = model_client.GoogleClient(MODEL_ID)

# Generate few-shot exapmles

Read the seed data used as a few-shot exampe.

In [69]:
import pathlib
import yaml
from synthetic_home import device_types


DATASET_DIR = pathlib.Path("../datasets/")
DEVICES_DIR = DATASET_DIR / "devices-v3"
SEEDS_DIR = pathlib.Path("./seeds")
SEED_DEVICE_ACTIONS_FILE = SEEDS_DIR / "device-actions.yaml"
SEED_DEVICE_ACTIONS_CAPABILITIES_FILE = SEEDS_DIR / "device-actions-capabilities.yaml"

with open(SEED_DEVICE_ACTIONS_FILE) as f:
    seed_device_actions = list(yaml.load_all(f.read(), Loader=yaml.Loader))

# This is a fixed list of capabilities that any particular synthetic home device type support
with open(SEED_DEVICE_ACTIONS_CAPABILITIES_FILE) as f:
    capabilities = {
        cap["device_type"]: cap["actions"]
        for cap in yaml.load(f.read(), Loader=yaml.Loader)
    }

seed_devices_prompt = "".join(yaml.dump(content, sort_keys=False, explicit_start=True) for content in seed_device_actions)
print(seed_devices_prompt)

registry = device_types.load_device_type_registry()
# Find any devices missing explicit action capabilities definitions
missing_devices = [
    {"device_type": dt, "actions": []}
    for dt in registry.device_types
    if dt not in capabilities
]
if missing_devices:
    print(yaml.dump(missing_devices, sort_keys=False))


---
home: mountain-cabin-us
device:
  name: Kitchen Overhead Light
  area: Kitchen
  device_type: light
  device_info:
    model: Smart LED Bulb
    manufacturer: Philips
    sw_version: 1.2.3
capabilities:
- Turn on
- Turn off
---
actions:
- action: Turn on
  sentences:
  - Please turn on the kitchen overhead light
  - Turn on the kitchen light
  - Kitchen light on
- action: Turn off
  sentences:
  - Please turn off the kitchen overhead light
  - Turn off the kitchen light
  - Kitchen light off



In [70]:
SUMMARY_PROMPT = f"""
You are an expert Smart Home agent who can evaluate the performance of a smart
home, and perform useful actions on behalf of a user.

A device in Home Assistant represents a physical or virtual object, represented
by different entities. A device has attributes for its configuration and state,
for example a thermostat may have a mode attribute, or target or current temperature
attributes.

You generate a simple evaluation dataset for home data. The input dataset
contains the home, description information like location, areas, and devices.
The output data are actions a user may ask to take on a devie.

This is the input yaml document and the output actions yaml document:

{seed_devices_prompt}

Generate a few sentences to control the device. Answer in yaml plain text and do not answer with markdown.
"""

In [71]:
import itertools
import random
from tqdm.auto import tqdm
import shutil
import slugify

homes = []
for path in DEVICES_DIR.glob("*.yaml"):
    with path.open("r") as f:
        content = f.read()
    home_id = path.name.split(".")[0]  # Strip the .yaml extension
    home_data = yaml.load(content, Loader=yaml.Loader)
    homes.append((home_id, home_data))

tasks = []
no_actions = 0
task_types = {}
for home_id, home in homes:
    home_template = {
            "home": home_id,
            "location": home["location"],
            "type": home["type"],
    }
    for area, devices in home["devices"].items():
        for device in devices or []:
            device_type = device["device_type"]
            if not (device_caps := capabilities.get(device_type)):
                # No supported actions
                no_actions += 1
                continue
            task_types[device_type] = task_types.get(device_type, 0) + 1
            device_info = {
                    **home_template,
                    "device": {
                        **device,
                        "area": area,
                    },
                    "capabilities": device_caps,
            }
            tasks.append(device_info)
print((len(homes), len(tasks), no_actions))
print(yaml.dump(task_types))

(40, 480, 91)
exhaust-fan: 11
fan-oscilating: 2
garage-door: 6
heat-pump: 3
hvac: 30
light: 203
light-dimmable: 85
smart-blinds: 1
smart-lock: 5
smart-plug: 34
smart-speaker: 49
smart-sprinkler: 17
smart-tv: 17
switch: 8
vacuum: 3
water-valve: 6



In [72]:
random.shuffle(tasks)
print(yaml.dump(tasks[0], sort_keys=False, explicit_start=True))

---
home: home4-us
location: Coastal town in Florida
type: Beach house
device:
  name: Kids Bathroom Light
  device_type: light
  device_info:
    model: Smart LED Bulb
    manufacturer: Philips
    sw_version: 1.2.3
  area: Kids Bathroom
capabilities:
- Turn on
- Turn off



# Generate Output

In [75]:
import slugify

# Total number of records to generate
N_DATAPOINTS = -1

DEVICE_ACTIONS_OUTPUT_DIR = DATASET_DIR / "device-actions-v2"

# Wipe existing summaries
shutil.rmtree(DEVICE_ACTIONS_OUTPUT_DIR, ignore_errors=True)
DEVICE_ACTIONS_OUTPUT_DIR.mkdir(exist_ok=True)

random.shuffle(tasks)
if N_DATAPOINTS > 0 and len(tasks) > N_DATAPOINTS:
    tasks = tasks[:N_DATAPOINTS]

skipped = 0
with tqdm(total=len(tasks)) as pbar:
    for task in tasks:
        home_id = slugify.slugify(task["home"], separator="-")
        task_id = "_".join([
              slugify.slugify(task["device"]["area"], separator="-"),
              slugify.slugify(task["device"]["name"], separator="-"),
        ])
        home_dir = DEVICE_ACTIONS_OUTPUT_DIR / home_id
        if not home_dir.exists():
            home_dir.mkdir()
        with open(DEVICE_ACTIONS_OUTPUT_DIR / home_id / f"{task_id}.yaml", "w") as action_output:
            task_yaml = yaml.dump(task, sort_keys=False, explicit_start=True)
            response_obj = None
            for i in range(3):
                response = model.complete(SUMMARY_PROMPT, task_yaml)
                try:
                    response_obj = yaml.safe_load(response)
                except yaml.YAMLError as err:
                    print(err)
                    skipped += 1
                    continue
            if response_obj is not None:
                updated_task = task.copy()
                updated_task.update({"actions": response_obj})
                action_output.write(yaml.dump(updated_task, explicit_start=True, sort_keys=False))
            pbar.set_description(f"Skipped {skipped}")
            pbar.update(1)

  0%|          | 0/480 [00:00<?, ?it/s]

Skipped 0:  10%|█         | 48/480 [02:13<15:24,  2.14s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 20, column 1:
    ---
    ^


Skipped 1:  14%|█▍        | 69/480 [03:14<19:18,  2.82s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 13, column 1:
    ---
    ^


Skipped 2:  21%|██        | 99/480 [04:35<16:23,  2.58s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 13, column 1:
    ---
    ^


Skipped 4:  33%|███▎      | 158/480 [07:12<13:14,  2.47s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 13, column 1:
    ---
    ^


Skipped 4:  39%|███▉      | 186/480 [08:47<12:00,  2.45s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 19, column 1:
    ---
    ^


Skipped 6:  45%|████▌     | 217/480 [10:06<11:23,  2.60s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 22, column 1:
    ---
    ^


Skipped 6:  57%|█████▋    | 272/480 [12:30<07:35,  2.19s/it]

while parsing a block mapping
  in "<unicode string>", line 13, column 3:
    - action: Set position
      ^
expected <block end>, but found '<scalar>'
  in "<unicode string>", line 17, column 16:
      - [position] the shower
                   ^


Skipped 8:  66%|██████▋   | 318/480 [14:27<07:41,  2.85s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 15, column 1:
    ---
    ^


Skipped 9:  80%|████████  | 386/480 [17:46<04:14,  2.70s/it]

expected a single document in the stream
  in "<unicode string>", line 2, column 1:
    actions:
    ^
but found another document
  in "<unicode string>", line 21, column 1:
    ---
    ^


Skipped 9: 100%|██████████| 480/480 [22:00<00:00,  2.75s/it]
