# Perform a value based search and calculation

With this script, you can run a value-based search and apply calculations on the results directly within the MLHub notebook environment. The output is a Pandas DataFrame containing both your search hits and computed metrics—perfect for downstream analysis or for feeding into a DashHub dashboard for interactive visualization.

In [1]:
import os
import time
import re
import requests
import pandas as pd
from functools import lru_cache
from datetime import datetime, timedelta
import pytz

def value_search_df(
    conditions,
    calculations,
    days_back=10,
    min_duration=400,
    operator="AND",
    tz_name="Europe/Brussels",
    page_size=600
):
    """
    Run a TrendMiner VALUE_BASED_SEARCH and return a DataFrame with
    your requested calculations and search hits, timestamped in your timezone.

    Parameters
    ----------
    conditions : list of tuple
        List of (tag, op, val) tuples. Tags must match "[CS]" prefix and allowed characters.
    calculations : list of tuple
        List of (name, type, tag) tuples. Types must be one of MEAN, MIN, MAX, RANGE,
        START, END, DELTA, INTEGRAL, or STDEV.
    days_back : int, optional
        Number of days before now to start the search window. Default is 10.
    min_duration : int, optional
        Minimum duration for each hit at least twice index resolution. Default is 400.
    operator : str, optional
        How to combine conditions ("AND"/"OR"). Default is "AND".
    tz_name : str, optional
        Timezone for converting start/end timestamps. Default is "America/Mexico_City".
    page_size : int, optional
        Page size for pagination. Default is 600.

    Returns
    -------
    pandas.DataFrame
        DataFrame of search hits with calculated values and timezone-converted timestamps.

    Raises
    ------
    ValueError
        If input validation fails (invalid tags, ops, or calc types).
    RuntimeError
        If the search request fails or returns an error status.
    """
    # ─── Validation ───────────────────────────────────────────────────
    ALLOWED_OPS = {"<", ">", "<=", ">=", "==", "!=", "CONSTANT"}
    ALLOWED_CALCS = {
        "MEAN", "MIN", "MAX", "RANGE", "START",
        "END", "DELTA", "INTEGRAL", "STDEV"
    }

    # validate conditions
    if not isinstance(conditions, list):
        raise ValueError("`conditions` must be a list of (tag, op, val) tuples")
    for tag, op, val in conditions:
        if op not in ALLOWED_OPS:
            raise ValueError(f"Invalid operator {op!r}; must be one of {sorted(ALLOWED_OPS)}")

    # validate calculations
    if not isinstance(calculations, list):
        raise ValueError("`calculations` must be a list of (name, type, tag) tuples")
    for name, ctype, tag in calculations:
        if ctype not in ALLOWED_CALCS:
            raise ValueError(f"Invalid calc type {ctype!r}; must be one of {sorted(ALLOWED_CALCS)}")
            
    # ─── Setup ─────────────────────────────────────────────────────────
    token    = os.environ["KERNEL_USER_TOKEN"]
    base_url = os.environ["KERNEL_SERVER_URL"]
    session  = requests.Session()
    session.headers.update({"Authorization": f"Bearer {token}"})
    tz       = pytz.timezone(tz_name)

    @lru_cache(maxsize=None)
    def _tag_detail(name):
        resp = session.get(
            f"{base_url}/hps/api/tags/details",
            params={"tagName": name}
        ).json()
        return resp["id"], resp["interpolationType"]

    # ─── Time window ───────────────────────────────────────────────────
    now   = datetime.now(tz)
    start = (now - timedelta(days=days_back)).strftime("%Y-%m-%dT%H:%M:%S.000Z")
    end   = now.strftime("%Y-%m-%dT%H:%M:%S.000Z")

    # ─── Build payload ─────────────────────────────────────────────────
    queries = [
        {
            "reference": {"name": tag, "interpolationType": _tag_detail(tag)[1], "shift": 0},
            "condition": op,
            "values": [val]
        }
        for tag, op, val in conditions
    ]
    calcs = [
        {
            "name": name,
            "unit": "",
            "type": ctype,
            "reference": {"id": _tag_detail(tag)[0], "name": tag, "interpolationType": _tag_detail(tag)[1], "shift": 0}
        }
        for name, ctype, tag in calculations
    ]
    body = {
        "contextTimePeriod": {"startDate": start, "endDate": end},
        "definition": {"type": "VALUE_BASED_SEARCH", "queries": queries,
                       "parameters": {"minimumDuration": min_duration, "operator": operator},
                       "calculations": calcs}
    }

    # ─── Submit & Poll ─────────────────────────────────────────────────
    resp   = session.post(f"{base_url}/compute/search-requests", json=body).json()
    req_id = resp["id"]
    status_url = f"{base_url}/compute/search-requests/{req_id}"

    while True:
        st = session.get(status_url).json().get("status")
        if st == "DONE":
            break
        if st in {"FAILED", "ERROR"}:
            raise RuntimeError(f"Search failed ({st}) for ID {req_id}")
        time.sleep(1)

    # ─── Fetch All Pages ────────────────────────────────────────────────
    hits = []
    page = 0
    total_pages = 10
    url = f"{base_url}/compute/search-requests/{req_id}/results"
    while page < total_pages:
        resp = session.get(f"{url}?page={page}&size={page_size}").json()
        hits.extend(resp["content"])
        total_pages = resp["page"]["totalPages"]
        page += 1

    # — normalize and convert timestamps in one chained call —
    df = (
        pd.json_normalize(hits, sep="_")
          .assign(
              start=lambda d: pd.to_datetime(d["start"], utc=True).dt.tz_convert(tz),
              end=lambda d: pd.to_datetime(d["end"],   utc=True).dt.tz_convert(tz)
          )
    )

    return df

In [2]:
conds = [("[CS]BA:CONC.1", ">", 1), ("[CS]BA:TEMP.1", "<", 28)]
calcs = [("mean_temp", "MEAN", "[CS]BA:TEMP.1"), ("mean_conc", "MEAN", "[CS]BA:CONC.1")]

df = value_search_df(conds, calcs)
df  # your search results + calculated columns, ready to go

Unnamed: 0,id,type,start,end,duration,links,calculations_mean_conc,calculations_mean_temp,properties_openEnded
0,0e251fc3-82c6-45ce-b204-64710a4129f0,SEARCH_RESULT,2025-05-22 14:31:00+02:00,2025-05-22 14:46:00+02:00,900,[],44.268864,10.879795,True
1,0d175dd9-426e-4151-8091-92a4c67379ff,SEARCH_RESULT,2025-05-22 13:38:00+02:00,2025-05-22 14:15:00+02:00,2220,[],16.384157,18.925861,False
2,4c95e9c5-6995-49d3-81ef-d31acbac97ef,SEARCH_RESULT,2025-05-22 13:12:00+02:00,2025-05-22 13:26:00+02:00,840,[],43.739925,14.504011,False
3,bf8778b4-c235-4d00-a052-4a3b92a9b810,SEARCH_RESULT,2025-05-22 12:16:00+02:00,2025-05-22 12:53:00+02:00,2220,[],16.257261,19.432762,False
4,8c3a226d-42b4-4f7c-bcdb-6084b7f6783c,SEARCH_RESULT,2025-05-22 11:50:00+02:00,2025-05-22 12:05:00+02:00,900,[],42.757004,10.541932,False
...,...,...,...,...,...,...,...,...,...
348,fc87c46f-3f70-480b-8b02-7b4e28b331bd,SEARCH_RESULT,2025-05-12 19:44:00+02:00,2025-05-12 19:59:00+02:00,900,[],44.956585,12.521190,False
349,32569560-7bd3-43e0-8e96-382ea71e76dd,SEARCH_RESULT,2025-05-12 18:49:00+02:00,2025-05-12 19:27:00+02:00,2280,[],17.741660,18.087347,False
350,4ff942f3-ccdf-4cca-825a-5932eec90bfc,SEARCH_RESULT,2025-05-12 18:18:00+02:00,2025-05-12 18:38:00+02:00,1200,[],39.968506,7.815891,False
351,713e1bfd-1ca3-44b6-bedb-f0e76a919132,SEARCH_RESULT,2025-05-12 17:28:00+02:00,2025-05-12 18:05:00+02:00,2220,[],14.849813,16.496546,False
