# Creating histograms
Histograms can be used to display a distribution of tag values in a given period. Histograms of tag values are available in the TrendHub scatter plot feature, but these do not have customization options. For fully customizable histograms, we can thus use Plotly in MLHub. This notebooks contains some histogram examples.

## Function for getting tag data
As a starting point, we implement a function to get time-series data directly from a tag. This function outputs DataFrame objects which we will use to create histograms.

In [1]:
import os
import time
from typing import List, Union
import pandas as pd
import requests
import urllib3


def get_data(
    tagnames: List[str],
    start: pd.Timestamp,
    end: pd.Timestamp,
    freq: pd.Timedelta,
    tz: Union[str, None] = "UTC",
    verify: bool = False,
) -> pd.DataFrame:
    """
    Retrieve interpolated time series data from selected tags, paging transparently
    for long time ranges by following the API's `nextStartDate`.
    """
    # 1) Localize / round inputs
    if tz:
        if start.tzinfo is None:
            start = start.tz_localize(tz)
        if end.tzinfo is None:
            end = end.tz_localize(tz)
    start = start.round(freq)
    end   = end.round(freq)

    # 2) Auth + optional SSL‐skip
    headers = {"Authorization": f"Bearer {os.environ['KERNEL_USER_TOKEN']}"}
    if not verify:
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    series_list = []
    for tagname in tagnames:
        # fetch tag metadata
        details = requests.get(
            f"{os.environ['KERNEL_SERVER_URL']}/hps/api/tags/details",
            headers=headers,
            params={"tagName": tagname},
            verify=verify,
        ).json()

        chunks = []
        next_start = start

        while True:
            payload = {
                "tag": {
                    "id": details["id"],
                    "shift": 0,
                    "interpolationType": details["interpolationType"],
                },
                "timePeriod": {
                    "startDate": next_start.isoformat(timespec="milliseconds"),
                    "endDate":   end.isoformat(timespec="milliseconds"),
                },
                "step": int(freq.total_seconds()),
            }
            data = requests.post(
                f"{os.environ['KERNEL_SERVER_URL']}/compute/interpolatedData/paged",
                headers=headers,
                json=payload,
                verify=verify,
            ).json()

            # **NEW**: only build a chunk when there *are* values
            vals = data.get("values", [])
            if vals:
                chunk = (
                    pd.DataFrame(vals)
                      .assign(ts=lambda df: pd.to_datetime(df["ts"]))
                      .set_index("ts")["value"]
                )
                chunks.append(chunk)

            # advance to next page (or break if none)
            next_sd = data.get("nextStartDate")
            if not next_sd:
                break

            next_start = pd.to_datetime(next_sd)
            if next_start >= end:
                break

            time.sleep(0.1)

        # if we never got *any* data, make an empty Series
        if chunks:
            tag_series = pd.concat(chunks)
        else:
            tag_series = pd.Series(dtype=float, name=tagname)

        tag_series.name = tagname

        # map codes → names for STRING/DIGITAL tags
        if details.get("type") in ("DIGITAL", "STRING"):
            states = {s["Code"]: s["Name"] for s in details.get("States", [])}
            tag_series = tag_series.map(states)

        series_list.append(tag_series)

    # combine into one DataFrame, convert tz
    df = pd.concat(series_list, axis=1)
    if tz:
        df.index = df.index.tz_convert(tz)

    return df

## Histograms of single tag
A simple histogram made with Plotly Express. The percentage distribution added on top of the histogram bars.

Note that the `nbins` parameter is a suggestion for the plot, and not completely hard-coded. The resulting number of bins can differ.

In [2]:
import pandas as pd
import plotly.express as px

# get tag data
df = get_data(
    tagnames=["TM4-HEX-PDI0620"],
    start=pd.Timestamp("2023-04-01"),
    end=pd.Timestamp("2023-05-01"),
    freq=pd.Timedelta("15m"),
    tz="Europe/Brussels"
)

fig = px.histogram(
    df,
    x=df.columns[0],
    nbins=10,
    histnorm="percent",  # normalize to percentage of total
    labels={
        df.columns[0]: "Value",
    },
    title=f"Histogram of {df.columns[0]}"
)

# Show percentage text outside each bin
fig.update_traces(
    texttemplate="%{y:.3f}%",
    textposition="outside",
    marker_line_color="black",     # optional: outline for clarity
    marker_line_width=1
)

# Add a little headroom on the y–axis
fig.update_layout(
    bargap=0.1,
    height=500,
    width=1200,
)

fig.show()

## Histogram with tag selection dropdown
A full customized plot in which we can choose the tag for which to show the histogram.

In [3]:
import pandas as pd
import plotly.graph_objects as go

# Load the full dataset
df = get_data(
    tagnames=["TM4-HEX-PDI0620", "TM4-HEX-FI0620", "TM4-HEX-PI06201"],
    start=pd.Timestamp("2023-04-01"),
    end=pd.Timestamp("2023-05-01"),
    freq=pd.Timedelta("15m"),
    tz="Europe/Brussels"
)

# Build the Figure with one go.Histogram per tag
fig = go.Figure()
for i, tag in enumerate(df.columns):
    fig.add_trace(
        go.Histogram(
            x=df[tag].dropna(),
            nbinsx=10,
            histnorm="percent",
            marker_line_color="black",
            marker_line_width=1,
            texttemplate="%{y:.3f}%",
            textposition="outside",
            visible=(i == 0),   # only first tag visible initially
            name=tag
        )
    )

# Add dropdown buttons
buttons = []
for idx, tag in enumerate(df.columns):
    visible = [False] * len(df.columns)
    visible[idx] = True
    buttons.append(dict(
        label=tag,
        method="update",
        args=[
            {"visible": visible},
            {"title": f"Histogram of {tag}"}
        ]
    ))

fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        x=0.5,            # 50% across the paper
        y=1.25,           # 125% up the paper (i.e. above the title)
        xanchor="center", # center the dropdown on x
        yanchor="top"     # align its top at y=1.25
    )],
    title=f"Histogram of {df.columns[0]}",
    xaxis_title="Value",
    yaxis_title="%",
    bargap=0.1,
    margin=dict(t=140),   # add extra top margin so nothing overlaps
    height=500,
    width=1200,
)

fig.show()

## Histogram with interval selection dropdown

In [4]:
import pandas as pd
import plotly.graph_objects as go

intervals = {
    "april 23": pd.Interval(
        left=pd.Timestamp("2023-04-01", tz="Europe/Brussels"),
        right=pd.Timestamp("2025-05-01", tz="Europe/Brussels")
    ),
    "may 23": pd.Interval(
        left=pd.Timestamp("2023-05-01", tz="Europe/Brussels"),
        right=pd.Timestamp("2025-06-01", tz="Europe/Brussels")
    ),
}

tagname = "TM4-HEX-PI06201"

# Load the data for one tag, for multiple intervals
df_dict = {
    key: get_data(
        tagnames=[tagname],
        start=interval.left,
        end=interval.right,
        freq=pd.Timedelta("15m"),
        tz=interval.left.tz
    ) for key, interval in intervals.items()
}

# Build the Figure with one go.Histogram per interval
fig = go.Figure()
visible = True
for key, df in df_dict.items():
    fig.add_trace(
        go.Histogram(
            x=df[tagname].dropna(),
            nbinsx=10,
            xbins=dict(start=3.5, end=6.5, size=0.1),
            histnorm="percent",
            marker_line_color="black",
            marker_line_width=1,
            texttemplate="%{y:.3f}%",
            textposition="outside",
            visible=visible,   
            name=tag
        )
    )
    visible = False  # only first interval visible initially

# Add dropdown buttons
buttons = []
for key, df in intervals.items():
    visible = [key == interval_key for interval_key in intervals]
    buttons.append(dict(
        label=key,
        method="update",
        args=[
            {"visible": visible},
        ]
    ))

fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        x=0.5,            # 50% across the paper
        y=1.25,           # 125% up the paper (i.e. above the title)
        xanchor="center", # center the dropdown on x
        yanchor="top"     # align its top at y=1.25
    )],
    title=f"Histogram of {tagname}",
    xaxis_title="Value",
    yaxis_title="%",
    bargap=0.1,
    margin=dict(t=140),   # add extra top margin so nothing overlaps
    height=500,
    width=1200,
)

fig.show()