# Get interpolated data

Rather than getting time series data from a view, it is possible to retrieve time series data directly using requests. Below is a general function that will return a pandas DataFrame of regularly interpolated time series data for given input tags. You can paste this function definition in your MLHub scripts to have an easier and more flexible way to retrieve time series data.

In [14]:
import os
import time
from typing import List, Union
import pandas as pd
import requests
import urllib3


def get_data(
    tagnames: List[str],
    start: pd.Timestamp,
    end: pd.Timestamp,
    freq: pd.Timedelta,
    tz: Union[str, None] = "UTC",
    verify: bool = True,
) -> pd.DataFrame:
    """
    Retrieve interpolated time series data from selected tags, paging transparently
    for long time ranges by following the API's `nextStartDate`.
    """
    # 1) Localize / round inputs
    if tz:
        if start.tzinfo is None:
            start = start.tz_localize(tz)
        if end.tzinfo is None:
            end = end.tz_localize(tz)
    start = start.round(freq)
    end   = end.round(freq)

    # 2) Auth + optional SSL‐skip
    headers = {"Authorization": f"Bearer {os.environ['KERNEL_USER_TOKEN']}"}
    if not verify:
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    series_list = []
    for tagname in tagnames:
        # fetch tag metadata
        details = requests.get(
            f"{os.environ['KERNEL_SERVER_URL']}/hps/api/tags/details",
            headers=headers,
            params={"tagName": tagname},
            verify=verify,
        ).json()

        chunks = []
        next_start = start

        while True:
            payload = {
                "tag": {
                    "id": details["id"],
                    "shift": 0,
                    "interpolationType": details["interpolationType"],
                },
                "timePeriod": {
                    "startDate": next_start.isoformat(timespec="milliseconds"),
                    "endDate":   end.isoformat(timespec="milliseconds"),
                },
                "step": int(freq.total_seconds()),
            }
            data = requests.post(
                f"{os.environ['KERNEL_SERVER_URL']}/compute/interpolatedData/paged",
                headers=headers,
                json=payload,
                verify=verify,
            ).json()

            # **NEW**: only build a chunk when there *are* values
            vals = data.get("values", [])
            if vals:
                chunk = (
                    pd.DataFrame(vals)
                      .assign(ts=lambda df: pd.to_datetime(df["ts"]))
                      .set_index("ts")["value"]
                )
                chunks.append(chunk)

            # advance to next page (or break if none)
            next_sd = data.get("nextStartDate")
            if not next_sd:
                break

            next_start = pd.to_datetime(next_sd)
            if next_start >= end:
                break

            time.sleep(0.1)

        # if we never got *any* data, make an empty Series
        if chunks:
            tag_series = pd.concat(chunks)
        else:
            tag_series = pd.Series(dtype=float, name=tagname)

        tag_series.name = tagname

        # map codes → names for STRING/DIGITAL tags
        if details.get("type") in ("DIGITAL", "STRING"):
            states = {s["Code"]: s["Name"] for s in details.get("States", [])}
            tag_series = tag_series.map(states)

        series_list.append(tag_series)

    # combine into one DataFrame, convert tz
    df = pd.concat(series_list, axis=1)
    if tz:
        df.index = df.index.tz_convert(tz)

    return df

Below is a example call to our `get_data` function. We retrieve the time series data for 3 tags for January 2023, at a 5 minute resolution, in our local timezone.

In [17]:
df = get_data(
    tagnames=["TM4-BP2-PRODUCT.1", "TM4-BP2-LEVEL.1", "TM4-BP2-CONC.1"],
    start=pd.Timestamp("2023-01-01"),
    end=pd.Timestamp("2023-02-01"),
    freq=pd.Timedelta(minutes=5),
    tz="Europe/Brussels",
)

df.head()