### Synthetic data

from this everysys manual - https://www.coffema.de/content/uploads/User-Manual-Legacy-english-compressed.pdf

In [1]:
"""
Synthetic telemetry for an Eversys-style 'Legacy' coffee machine.
- Produces exactly 10,000 time-ordered rows (ticks) of sensor + state data
- Emits plausible brewing/cleaning/product behavior with occasional injected faults
- Saves to synthetic_legacy_timeseries.csv

Python 3.9+  (pip install pandas numpy)
"""

from __future__ import annotations
import math
import random
from dataclasses import dataclass, field, asdict
from datetime import datetime, timedelta
from typing import Optional, Dict, Any, List
import pandas as pd
import numpy as np

# --------------------------- Configuration ---------------------------

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

N_ROWS = 10_000               # exact number of lines to write
TICK_MS = 500                 # granularity per row
START_TIME = datetime(2025, 1, 1, 6, 0, 0)  # 06:00 start

# "Business hours" envelope to modulate drink probability
OPEN = timedelta(hours=6)
CLOSE = timedelta(hours=22)

# Probabilities per minute (converted internally to per-tick)
PROD_PER_MIN = 0.8            # likelihood to start any product within a minute
CLEAN_DAILY_PROB = 0.9

# Fault “rates” per shot (or per minute for ambient faults)
RATES = dict(
    flow_low_left=0.001,
    flow_low_right=0.001,
    brew_init_fail_left=0.0005,
    brew_init_fail_right=0.0005,
    boiler_overheat=0.0002,
    hopper_missing=0.0001,
    display_comm_error=0.00005,
    milk_unit_disconnected=0.0001,
    invalid_recipe=0.0002,
    cleaning_light_barrier=0.0003,
)

# Recipes (very light abstraction)
RECIPES = [
    # id, target_ml, has_milk, has_powder, foam_mode
    ("espresso", 30, False, False, "OFF"),
    ("lungo", 60, False, False, "OFF"),
    ("americano", 150, False, False, "OFF"),
    ("cappuccino", 180, True, False, "AUTO"),
    ("latte", 250, True, False, "AUTO"),
    ("choco", 220, True, True, "EVERFOAM"),
]


def per_tick_prob(per_min: float) -> float:
    # Convert a per-minute probability to per-tick (with TICK_MS granularity)
    ticks_per_min = 60_000 / TICK_MS
    # 1 - (1-p)^(1/ticks)  => approximate per-tick probability
    return 1 - (1 - per_min) ** (1 / ticks_per_min)


PROD_PER_TICK = per_tick_prob(PERD := (min(max(PROD_PER_MIN, 0.0), 0.9999)))


# --------------------------- Data structures ---------------------------

def clip(x, lo, hi):
    return hi if x > hi else lo if x < lo else x

def n(mu, sigma):
    return float(np.random.normal(mu, sigma))

@dataclass
class Grinder:
    rpm: float = 0.0
    motor_current_a: float = 0.0
    temperature_c: float = 25.0
    hopper_present: bool = True
    hopper_lid_closed: bool = True
    hopper_level_pct: float = 100.0

@dataclass
class BrewSide:
    unit_connected: bool = True
    init_ok: bool = True
    pressure_bar: float = 0.0
    temperature_c: float = 25.0
    flow_ml_s: float = 0.0
    shot_timer_s: float = 0.0  # accumulates while extracting

@dataclass
class Boiler:
    temp_left_c: float = 88.0
    temp_right_c: float = 88.0
    pressure_bar: float = 1.0

@dataclass
class Hydraulic:
    water_inlet_open: bool = True
    pump_on: bool = False
    flow_left_ml_s: float = 0.0
    flow_right_ml_s: float = 0.0
    boiler: Boiler = field(default_factory=Boiler)

@dataclass
class MilkModule:
    connected: bool = True
    fridge_temp_c: float = 3.0
    milk_temp_c: float = 3.5
    milk_level_pct: float = 100.0
    milk_pump_on: bool = False
    foam_mode: str = "OFF"

@dataclass
class SteamWand:
    wand_connected: bool = True
    nozzle_clean: bool = True
    steam_temp_c: float = 100.0
    steam_pressure_bar: float = 1.0
    purge_cycles_count: int = 0

@dataclass
class HotWater:
    outlet_enabled: bool = True
    temperature_c: float = 96.0
    volume_ml: float = 0.0

@dataclass
class CleaningDispenser:
    present: bool = True
    level_pct: float = 100.0
    light_barrier_ok: bool = True
    last_clean_started_ts: Optional[datetime] = None
    last_clean_completed_ts: Optional[datetime] = None

@dataclass
class Machine:
    machine_id: str = "LEGACY-UNIT-001"
    model: str = "L'2m"
    firmware_version: str = "1.9.4"
    mains_connected: bool = True
    main_switch_state: str = "ON"
    standby_state: str = "ACTIVE"
    econnect_enabled: bool = True

    # Subsystems
    grinders: Dict[str, Grinder] = field(default_factory=lambda: {
        "G1": Grinder(), "G2": Grinder()
    })
    brewing_left: BrewSide = field(default_factory=BrewSide)
    brewing_right: BrewSide = field(default_factory=BrewSide)
    hydraulic: Hydraulic = field(default_factory=Hydraulic)
    milk: MilkModule = field(default_factory=MilkModule)
    steam: SteamWand = field(default_factory=SteamWand)
    hotwater: HotWater = field(default_factory=HotWater)
    cleaning: CleaningDispenser = field(default_factory=CleaningDispenser)

    # State
    operating_mode: str = "IDLE"  # OFF, BOOTING, IDLE, PREHEAT, RINSE, BREWING_*, MILK_FROTHING, etc.
    product_active: Optional[Dict[str, Any]] = None
    active_error_code: Optional[str] = None
    active_warning_code: Optional[str] = None

# --------------------------- Simulation helpers ---------------------------

def within_business_hours(t: datetime) -> bool:
    t0 = t.replace(hour=0, minute=0, second=0, microsecond=0)
    mins = (t - t0)
    return OPEN <= mins <= CLOSE

def choose_recipe() -> Dict[str, Any]:
    rid, ml, milk, powder, foam = random.choice(RECIPES)
    return dict(id=rid, target_ml=ml, has_milk=milk, has_powder=powder, foam_mode=foam, ver="v1")

def pick_grinder_for_recipe(recipe_id: str) -> str:
    # simple split: espresso-based -> G1, milk/choco -> G2
    if recipe_id in {"espresso", "lungo"}:
        return "G1"
    return "G2"

def product_duration(recipe: Dict[str, Any]) -> Dict[str, int]:
    # rough stage durations in ticks (500ms per tick)
    if recipe["id"] in {"espresso", "lungo"}:
        grind = random.randint(4, 8)     # 2–4s
        preinf = random.randint(4, 10)   # 2–5s
        extract = random.randint(18, 42) # 9–21s
    else:
        grind = random.randint(6, 12)
        preinf = random.randint(6, 12)
        extract = random.randint(22, 60)
    milk = random.randint(8, 40) if recipe["has_milk"] else 0
    return dict(grind=grind, preinf=preinf, extract=extract, milk=milk)

def start_product(m: Machine, now: datetime):
    r = choose_recipe()
    m.product_active = dict(
        recipe=r,
        stage="GRIND",
        side=random.choices(["LEFT", "RIGHT", "BOTH"], [0.45, 0.45, 0.10])[0],
        remaining=product_duration(r),
        start_ts=now,
        grinder=pick_grinder_for_recipe(r["id"]),
        actual_ml=0.0,
    )
    m.operating_mode = "BREWING_DOUBLE" if m.product_active["side"] == "BOTH" else f"BREWING_{m.product_active['side']}"
    m.milk.foam_mode = r["foam_mode"]

def end_product(m: Machine):
    m.product_active = None
    m.operating_mode = "IDLE"
    m.milk.milk_pump_on = False
    m.hydraulic.pump_on = False
    m.brewing_left.shot_timer_s = 0.0
    m.brewing_right.shot_timer_s = 0.0
    m.hotwater.volume_ml = 0.0

def ramp(v: float, target: float, step: float) -> float:
    if v < target: v = min(target, v + step)
    elif v > target: v = max(target, v - step)
    return v

# --------------------------- Fault injection ---------------------------

def with_rate_shot(rate: float) -> bool:
    return random.random() < rate

def maybe_inject_faults(m: Machine, stage: str):
    # Stage-aware fault triggers (most happen around brew/clean)
    if stage in {"PREINFUSE", "EXTRACT"}:
        if with_rate_shot(RATES["flow_low_left"]):
            m.active_warning_code = "W-004"  # low flow left
        if with_rate_shot(RATES["flow_low_right"]):
            m.active_warning_code = "W-005"  # low flow right
        if with_rate_shot(RATES["brew_init_fail_left"]):
            m.active_warning_code = "W-011"
            m.brewing_left.init_ok = False
        if with_rate_shot(RATES["brew_init_fail_right"]):
            m.active_warning_code = "W-012"
            m.brewing_right.init_ok = False

    # Ambient-ish
    if with_rate_shot(RATES["boiler_overheat"]):
        m.active_warning_code = "W-002"
        m.hydraulic.boiler.temp_left_c += random.uniform(6, 12)
        m.hydraulic.boiler.temp_right_c += random.uniform(6, 12)

    if with_rate_shot(RATES["hopper_missing"]):
        g = m.grinders["G1"]
        g.hopper_present = False
        m.active_error_code = "E-000"

    if with_rate_shot(RATES["display_comm_error"]):
        m.active_error_code = "E-199"
        m.operating_mode = "ERROR_LOCKOUT"

    if with_rate_shot(RATES["milk_unit_disconnected"]):
        m.milk.connected = False
        m.active_error_code = "E-012"

    if with_rate_shot(RATES["invalid_recipe"]):
        m.active_warning_code = "W-010"

# --------------------------- Stage simulation ---------------------------

def step_grind(m: Machine):
    pa = m.product_active
    g = m.grinders[pa["grinder"]]
    g.rpm = clip(n(1500, 60), 1300, 1700)
    g.motor_current_a = clip(n(2.0, 0.3), 1.0, 3.5)
    g.temperature_c = clip(g.temperature_c + n(0.1, 0.05), 20, 60)
    if g.hopper_present and g.hopper_level_pct > 0:
        g.hopper_level_pct = clip(g.hopper_level_pct - random.uniform(0.02, 0.08), 0, 100)

def step_preinfuse(m: Machine):
    m.hydraulic.pump_on = True
    # gentle ramp-up of pressure and temp
    for side, brew in [("LEFT", m.brewing_left), ("RIGHT", m.brewing_right)]:
        target_p = 9.0
        brew.pressure_bar = ramp(brew.pressure_bar, target_p, step=1.0)
        brew.temperature_c = clip(ramp(brew.temperature_c, 92.0, step=0.6), 80, 100)

def step_extract(m: Machine):
    pa = m.product_active
    sides = ["LEFT", "RIGHT"] if pa["side"] == "BOTH" else [pa["side"]]
    for s in sides:
        brew = m.brewing_left if s == "LEFT" else m.brewing_right
        # flow: base on recipe size
        base_flow = 1.6 if pa["recipe"]["id"] in {"espresso"} else 2.2
        # fault impact
        low_flow = (m.active_warning_code in {"W-004", "W-005"}) and \
                   ((m.active_warning_code == "W-004" and s == "LEFT") or
                    (m.active_warning_code == "W-005" and s == "RIGHT"))
        flow = base_flow * (0.6 if low_flow else 1.0) + n(0.0, 0.15)
        brew.flow_ml_s = clip(flow, 0.2, 4.0)

        # pressure hover around 9 bar
        brew.pressure_bar = clip(n(9.0, 0.4), 7.5, 10.5)
        brew.temperature_c = clip(n(92.0, 1.2), 88, 98)
        brew.shot_timer_s += TICK_MS / 1000.0

        pa["actual_ml"] += brew.flow_ml_s * (TICK_MS / 1000.0)

    # milk frothing if needed
    if pa["recipe"]["has_milk"] and pa["remaining"]["milk"] > 0:
        m.milk.milk_pump_on = True
        m.milk.milk_temp_c = clip(n(58.0, 3.0), 45, 70)
        m.milk.milk_level_pct = clip(m.milk.milk_level_pct - random.uniform(0.02, 0.06), 0, 100)

def step_cleaning(m: Machine, now: datetime):
    m.operating_mode = "CLEANING_COFFEE"
    if m.cleaning.last_clean_started_ts is None:
        m.cleaning.last_clean_started_ts = now
    # consume cleaning balls / detergent
    m.cleaning.level_pct = clip(m.cleaning.level_pct - random.uniform(0.05, 0.12), 0, 100)
    # occasional light barrier issue
    if random.random() < RATES["cleaning_light_barrier"]:
        m.cleaning.light_barrier_ok = False
        m.active_warning_code = "W-006"

# --------------------------- Row encoding ---------------------------

def make_row(m: Machine, now: datetime) -> Dict[str, Any]:
    # Flatten relevant sensor/state signals. Keep it < 60 columns for readability.
    r: Dict[str, Any] = {
        "ts": now.isoformat(),
        "mode": m.operating_mode,
        "error_code": m.active_error_code or "",
        "warning_code": m.active_warning_code or "",

        # Grinders
        "G1_rpm": m.grinders["G1"].rpm, "G1_current_a": m.grinders["G1"].motor_current_a,
        "G1_temp_c": m.grinders["G1"].temperature_c, "G1_hopper_pct": m.grinders["G1"].hopper_level_pct,
        "G1_present": int(m.grinders["G1"].hopper_present),

        "G2_rpm": m.grinders["G2"].rpm, "G2_current_a": m.grinders["G2"].motor_current_a,
        "G2_temp_c": m.grinders["G2"].temperature_c, "G2_hopper_pct": m.grinders["G2"].hopper_level_pct,
        "G2_present": int(m.grinders["G2"].hopper_present),

        # Brewing L/R
        "brewL_pressure_bar": m.brewing_left.pressure_bar,
        "brewL_temp_c": m.brewing_left.temperature_c,
        "brewL_flow_ml_s": m.brewing_left.flow_ml_s,
        "brewL_timer_s": m.brewing_left.shot_timer_s,
        "brewL_init_ok": int(m.brewing_left.init_ok),

        "brewR_pressure_bar": m.brewing_right.pressure_bar,
        "brewR_temp_c": m.brewing_right.temperature_c,
        "brewR_flow_ml_s": m.brewing_right.flow_ml_s,
        "brewR_timer_s": m.brewing_right.shot_timer_s,
        "brewR_init_ok": int(m.brewing_right.init_ok),

        # Hydraulic / boiler
        "pump_on": int(m.hydraulic.pump_on),
        "hyd_flow_left_ml_s": m.hydraulic.flow_left_ml_s,
        "hyd_flow_right_ml_s": m.hydraulic.flow_right_ml_s,
        "boiler_temp_left_c": m.hydraulic.boiler.temp_left_c,
        "boiler_temp_right_c": m.hydraulic.boiler.temp_right_c,
        "boiler_pressure_bar": m.hydraulic.boiler.pressure_bar,

        # Milk / steam / hot water
        "milk_connected": int(m.milk.connected),
        "milk_temp_c": m.milk.milk_temp_c,
        "milk_level_pct": m.milk.milk_level_pct,
        "milk_pump_on": int(m.milk.milk_pump_on),
        "foam_mode": m.milk.foam_mode,

        "steam_temp_c": m.steam.steam_temp_c,
        "steam_pressure_bar": m.steam.steam_pressure_bar,

        "hotwater_temp_c": m.hotwater.temperature_c,
        "hotwater_volume_ml": m.hotwater.volume_ml,

        # Cleaning
        "clean_level_pct": m.cleaning.level_pct,
        "clean_light_barrier_ok": int(m.cleaning.light_barrier_ok),
    }
    # If a product is active, include a few product fields for convenience
    if m.product_active:
        r.update({
            "prod_id": m.product_active["recipe"]["id"],
            "prod_ml_target": m.product_active["recipe"]["target_ml"],
            "prod_side": m.product_active["side"],
            "prod_stage": m.product_active["stage"],
            "prod_actual_ml": m.product_active["actual_ml"],
        })
    else:
        r.update({
            "prod_id": "",
            "prod_ml_target": 0,
            "prod_side": "",
            "prod_stage": "",
            "prod_actual_ml": 0.0,
        })
    return r

# --------------------------- Main simulation loop ---------------------------

def simulate(n_rows: int) -> pd.DataFrame:
    m = Machine()
    rows: List[Dict[str, Any]] = []
    now = START_TIME

    # Set initial temps slightly sub-target to allow preheat dynamics
    m.brewing_left.temperature_c = 70.0
    m.brewing_right.temperature_c = 70.0

    cleaning_ticks_remaining = 0

    while len(rows) < n_rows:
        # Decide whether to start a product (when idle, within hours, no hard error)
        if (m.product_active is None
            and within_business_hours(now)
            and m.operating_mode == "IDLE"
            and m.active_error_code not in {"E-199", "E-012"}  # lockouts to simulate service needed
        ):
            if random.random() < PROD_PER_TICK:
                start_product(m, now)

        # Cleaning (ad-hoc light model): triggered occasionally near "close"
        hour = now.hour + now.minute / 60
        if (m.product_active is None
            and cleaning_ticks_remaining == 0
            and 21.0 <= hour <= 22.5
            and random.random() < 0.0015):
            cleaning_ticks_remaining = random.randint(60, 240)  # 30s to 2 minutes
            m.cleaning.last_clean_started_ts = now
            m.operating_mode = "CLEANING_COFFEE"

        # Advance states
        if cleaning_ticks_remaining > 0:
            step_cleaning(m, now)
            cleaning_ticks_remaining -= 1
            if cleaning_ticks_remaining == 0:
                m.cleaning.last_clean_completed_ts = now
                m.operating_mode = "IDLE"
                m.cleaning.light_barrier_ok = True
                m.active_warning_code = None
        elif m.product_active:
            pa = m.product_active
            stage = pa["stage"]
            maybe_inject_faults(m, stage)

            if stage == "GRIND":
                step_grind(m)
                pa["remaining"]["grind"] -= 1
                if pa["remaining"]["grind"] <= 0:
                    pa["stage"] = "PREINFUSE"

            elif stage == "PREINFUSE":
                step_preinfuse(m)
                pa["remaining"]["preinf"] -= 1
                if pa["remaining"]["preinf"] <= 0:
                    pa["stage"] = "EXTRACT"

            elif stage == "EXTRACT":
                step_extract(m)
                # reflect hydraulic flows (approximate mirror of brew flows)
                m.hydraulic.flow_left_ml_s = m.brewing_left.flow_ml_s
                m.hydraulic.flow_right_ml_s = m.brewing_right.flow_ml_s
                m.hydraulic.boiler.pressure_bar = clip(n(1.2, 0.1), 0.8, 1.6)

                pa["remaining"]["extract"] -= 1
                if pa["remaining"]["extract"] <= 0:
                    if pa["recipe"]["has_milk"] and pa["remaining"]["milk"] > 0:
                        # stay in EXTRACT stage but we’ll decrement milk ticks
                        pa["remaining"]["milk"] -= 1
                        if pa["remaining"]["milk"] <= 0:
                            end_product(m)
                    else:
                        end_product(m)

        else:
            # Idle drift / cool down a bit; reset some signals
            m.grinders["G1"].rpm = ramp(m.grinders["G1"].rpm, 0, 200)
            m.grinders["G2"].rpm = ramp(m.grinders["G2"].rpm, 0, 200)
            m.grinders["G1"].motor_current_a = ramp(m.grinders["G1"].motor_current_a, 0.1, 0.2)
            m.grinders["G2"].motor_current_a = ramp(m.grinders["G2"].motor_current_a, 0.1, 0.2)
            m.hydraulic.pump_on = False
            m.brewing_left.flow_ml_s = ramp(m.brewing_left.flow_ml_s, 0.0, 0.5)
            m.brewing_right.flow_ml_s = ramp(m.brewing_right.flow_ml_s, 0.0, 0.5)
            m.hotwater.volume_ml = ramp(m.hotwater.volume_ml, 0.0, 10)

            # Boiler temps relax towards 90C
            m.hydraulic.boiler.temp_left_c = ramp(m.hydraulic.boiler.temp_left_c, 90.0, 0.3)
            m.hydraulic.boiler.temp_right_c = ramp(m.hydraulic.boiler.temp_right_c, 90.0, 0.3)

            # If error was hopper missing, sometimes gets fixed “between ticks”
            if m.active_error_code == "E-000" and random.random() < 0.05:
                m.grinders["G1"].hopper_present = True
                m.active_error_code = None

            # If display or milk errors persist, leave them occasionally
            if m.active_error_code in {"E-199", "E-012"} and random.random() < 0.01:
                m.active_error_code = None
                m.operating_mode = "IDLE"
                m.milk.connected = True

            # clear warnings occasionally
            if m.active_warning_code and random.random() < 0.08:
                m.active_warning_code = None
                m.brewing_left.init_ok = True
                m.brewing_right.init_ok = True

        # Minor stochastic noise/updates every tick
        for g in m.grinders.values():
            g.temperature_c = clip(g.temperature_c + n(0.02, 0.03), 18, 65)

        m.steam.steam_temp_c = clip(n(100, 0.7), 96, 104)
        m.steam.steam_pressure_bar = clip(n(1.2, 0.08), 0.9, 1.6)

        # Encode row and advance clock
        rows.append(make_row(m, now))
        now += timedelta(milliseconds=TICK_MS)

        # Ensure we don't exceed n_rows
        if len(rows) >= n_rows:
            break

    df = pd.DataFrame(rows)
    return df


if __name__ == "__main__":
    df = simulate(N_ROWS)
    # Optional: enforce exact dtypes for convenience
    for c in [c for c in df.columns if c.endswith("_ok") or c.endswith("_on") or c.endswith("_present")]:
        df[c] = df[c].astype("int8")

    df.to_csv("synthetic_legacy_timeseries.csv", index=False)
    print("Wrote synthetic_legacy_timeseries.csv with", len(df), "rows")
    print("Columns:", len(df.columns))
    print(df.head(5).to_string(index=False))



Wrote synthetic_legacy_timeseries.csv with 10000 rows
Columns: 46
       2025-01-01T06:00:00 IDLE                             0.0           0.1  25.034901          100.0           1     0.0           0.1  25.015852          100.0           1                 0.0          70.0              0.0            0.0              1                 0.0          70.0              0.0            0.0              1        0                 0.0                  0.0                88.3                 88.3                  1.0               1          3.5           100.0             0       OFF    100.453382            1.321842             96.0                 0.0            100.0                       1                       0                                  0.0
2025-01-01T06:00:00.500000 IDLE                             0.0           0.1  25.047877          100.0           1     0.0           0.1  25.028828          100.0           1                 0.0          70.0              0.0            0.0 

In [2]:
import pandas as pd
data  = pd.read_csv("synthetic_legacy_timeseries.csv")
data.head()

Unnamed: 0,ts,mode,error_code,warning_code,G1_rpm,G1_current_a,G1_temp_c,G1_hopper_pct,G1_present,G2_rpm,...,steam_pressure_bar,hotwater_temp_c,hotwater_volume_ml,clean_level_pct,clean_light_barrier_ok,prod_id,prod_ml_target,prod_side,prod_stage,prod_actual_ml
0,2025-01-01T06:00:00,IDLE,,,0.0,0.1,25.034901,100.0,1,0.0,...,1.321842,96.0,0.0,100.0,1,,0,,,0.0
1,2025-01-01T06:00:00.500000,IDLE,,,0.0,0.1,25.047877,100.0,1,0.0,...,1.261395,96.0,0.0,100.0,1,,0,,,0.0
2,2025-01-01T06:00:01,IDLE,,,0.0,0.1,25.053793,100.0,1,0.0,...,1.162742,96.0,0.0,100.0,1,,0,,,0.0
3,2025-01-01T06:00:01.500000,IDLE,,,0.0,0.1,25.081051,100.0,1,0.0,...,1.155017,96.0,0.0,100.0,1,,0,,,0.0
4,2025-01-01T06:00:02,IDLE,,,0.0,0.1,25.070667,100.0,1,0.0,...,1.087016,96.0,0.0,100.0,1,,0,,,0.0
