# Summaries

Generate a dataset of summaries or classifications about the current state of the smart home.

In [1]:
import openai

from home_assistant_datasets.secrets import get_secret
from home_assistant_datasets import model_client

MODEL_ID = "gpt-3.5-turbo-0125"

openai = openai.OpenAI(api_key=get_secret("openai_key"))
model = model_client.ModelClient(openai, MODEL_ID)

# Generation

In [35]:
import pathlib
import yaml


DATASET_DIR = pathlib.Path("../datasets/")
DEVICES_DIR = DATASET_DIR / "devices"
SEEDS_DIR = pathlib.Path("./seeds")
SEED_SUMMARIES_FILE = SEEDS_DIR / "summaries.yaml"

with open(SEED_SUMMARIES_FILE) as f:
    seed_summaries = yaml.load(f.read(), Loader=yaml.Loader)

seed_summaries_prompt = yaml.dump(seed_summaries, sort_keys=False, explicit_start=True)
print(seed_summaries_prompt)

---
area_summaries:
- Summarize what is currently happening in the living room
- If the home has a garage, determine if the garage door has been left open and it
  is late at night
- Determine if a smart doorbell has detected a person or package at the door
- Determine if the leak detector found a problem
- Determine if the smart lock has been left unlocked for a long time
- Determine if the battery levels in the sensors are appropriate
home_summaries:
- Evaluate the weather and determine if I need an umbrella today or may be cold
- Notify me when a family member is about to arrive home
- Let me know if the air quality is bad today
- Determine if one room is much warmer or colder than other rooms



In [41]:
SUMMARY_PROMPT = f"""
You are an expert Smart Home agent who can evaluate the performance of a smart
home, and generate useful summaries, or classifying the state of the home
finding unique and novel insights while ignoring irrelevant, redundant, or mundate.

A device in Home Assistant represents a physical or virtual object, represented
by different entities. A device has attributes for its configuration and state,
for example a thermostat may have a mode attribute, or target or current temperature
attributes.

You generate a simple evaluation dataset for home data. The input dataset
contains the home, location, areas, and devices. The output are types of
summaries specific to individual areas or the state of multiple areas combined.
You may add summary types that are not relevant to the home since we can test
those too. Prefer single area summaries.

Here are examples of area or home summaries:
You answer in yaml, and here is an example input.

{seed_summaries_prompt}
"""

In [37]:
response = model.complete(SUMMARY_PROMPT, "Generate 20 possible summaries in a yaml list.\n---\n")

In [38]:
print(response)


area_summaries:
- Check if the lights in the kitchen are left turned on
- Summarize the current energy usage in the home office
- Monitor if the living room TV has been on for an unusually long time
- Verify if the temperature in the nursery is within a safe range
- Alert if the front door has been open for an extended period
- Analyze if the washing machine in the laundry room has finished its cycle
- Check the humidity levels in the bathroom
- Determine if the home security system has been armed before leaving
- Monitor the CO2 levels in the basement
- Check if the sprinklers in the backyard are scheduled to run today
- Verify if the pet feeder in the pet area needs to be refilled
- Determine if the garage temperature is suitable for storing items
- Check the status of the windows in the study room
- Monitor if the plants in the conservatory need watering
- Verify if the coffee machine in the kitchen needs maintenance
- Determine if the trash bin in the front yard needs to be emptie

In [43]:
import itertools
import random
from tqdm.auto import tqdm
import shutil

N_DATAPOINTS = 40

homes = []
for path in DEVICES_DIR.glob("*.yaml"):
    with path.open("r") as f:
        content = f.read()
    home_id = path.name.split(".")[0]  # Strip the .yaml extension
    home_data = yaml.load(content, Loader=yaml.Loader)
    homes.append((home_id, home_data))

SUMMARIES_OUTPUT_DIR = DATASET_DIR / "summaries"

# Wipe existing summaries
shutil.rmtree(SUMMARIES_OUTPUT_DIR, ignore_errors=True)
SUMMARIES_OUTPUT_DIR.mkdir(exist_ok=True)

random.shuffle(homes)
if len(homes) > N_DATAPOINTS:
    homes = homes[:N_DATAPOINTS]

skipped = 0

with tqdm(total=len(homes)) as pbar:
    for home_id, home in homes:
        with open(SUMMARIES_OUTPUT_DIR / f"{home_id}.yaml", "w") as summary_output:
            home_yaml = yaml.dump(home, sort_keys=False)
            prompt = f"Generate 20 possible summaries for this home in a yaml.\n\n{home_yaml}\n\Output\n---"
            response_obj = None
            for i in range(3):
                response = model.complete(SUMMARY_PROMPT, prompt)
                try:
                    response_obj = yaml.safe_load(response)
                except yaml.YAMLError:
                    skipped += 1
                    continue
            if response_obj is not None:
                updated_home = home.copy()
                updated_home.update({"summaries": response_obj})
                summary_output.write(yaml.dump(updated_home, explicit_start=True, sort_keys=False))
            pbar.set_description(f"Skipped {skipped}")
            pbar.update(1)

Skipped 1: 100%|██████████| 40/40 [08:08<00:00, 12.22s/it]
