# Step 3: build OCS data views, map metadata to streams 

#### This notebook synchronize OCS with the graph, meaning tags and metadata are added to target OCS streams and associated Data Views are created 

#### All Data Views share the same structure. The information needed to create one are:

* Database name (asset_db)
* List of Asset ID 
* OCS tag 

Data View sample below with: 

* `asset_db:brewey`
* `asset_id:FV31`
* `hub__all_columns` as tag 

In [1]:
import asyncio
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
import httpx
import json
import time
import urllib3
from ocs_academic_hub import HubClient
import yaml

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
config_file = "config-delmar.yaml"
# config_file = "config-windfarm.yaml"  # "config-acad-prod-desc-v2.yaml"
# config_file = "config-prod-ucd-v2.yaml"

In [3]:
with open(config_file) as f:
    config = yaml.safe_load(f)
# config

In [4]:
%env OCS_HUB_CONFIG=config-dv.ini
hub = HubClient()

env: OCS_HUB_CONFIG=config-dv.ini
> configuration file: None
@ --- authorization granted ---
@ Hub data file: hub_datasets.json


In [5]:
namespace_id = config["ocs"]["configuration"]["namespace"]  # "academic_hub_01"
asset_db = config["ocs"]["configuration"]["asset_db"]  # "deschutes"

tag_prefix = ""  # "hub__"
timeout = 45.0

## streams = hub.Streams.getStreams(namespace_id, query="name:*", count=20000)
## len(streams), streams[0].Id, streams[0].Name

In [6]:
## name2id = {s.Name: s.Id for s in streams}
## len(name2id)

In [7]:
async def request(method, url, params=None, data=None, headers=None, **kwargs):
    if not headers:
        headers = hub._OCSClient__baseClient.sdsHeaders()

    async with httpx.AsyncClient() as client:
        r = await client.request(
            method,
            url,
            params=params,
            data=data,
            headers=headers,
            timeout=timeout,
            **kwargs
        )
    return r


async def update_tags(namespace_id, stream_id, new_tags, hub_clean=False):
    if namespace_id is None:
        raise TypeError

    ## try:
    ##     streamId = name2id[stream_name]
    ## except KeyError:
    ##    return
    response = await request(
        "get",
        hub._OCSClient__Streams._Streams__streamsPath.format(
            tenant_id=hub.tenant, namespace_id=namespace_id, stream_id=stream_id
        )
        + "/Tags",
    )

    current_tags = json.loads(response.text)
    if hub_clean:
        tags = [tag for tag in current_tags if "|" not in tag]
    else:
        tags = current_tags + [tag_prefix + i for i in new_tags]

    response = await request(
        "put",
        hub._OCSClient__Streams._Streams__streamsPath.format(
            tenant_id=hub.tenant, namespace_id=namespace_id, stream_id=stream_id
        )
        + "/Tags",
        data=json.dumps(tags),
    )
    # print(f"[{stream_name}]-tags={tags}")
    return tags

In [8]:
async def update_metadata(namespace_id, stream_id, new_meta):
    if namespace_id is None:
        raise TypeError

    ## try:
    ##     streamId = name2id[stream_name]
    ## except KeyError:
    ##    return None
    response = await request(
        "get",
        hub._OCSClient__Streams._Streams__streamsPath.format(
            tenant_id=hub.tenant, namespace_id=namespace_id, stream_id=stream_id
        )
        + "/Metadata",
    )

    metadata = json.loads(response.text)
    metadata.update(new_meta)
    response = await request(
        "put",
        hub._OCSClient__Streams._Streams__streamsPath.format(
            tenant_id=hub.tenant, namespace_id=namespace_id, stream_id=stream_id
        )
        + "/Metadata",
        data=json.dumps(metadata),
    )
    return metadata

In [9]:
async def update_stream(stream_info):
    meta = {
        "asset_db": stream_info["asset_db"],
        "asset_id": stream_info["asset_id"],
    }
    tags = []
    for attr in stream_info["attributes"]:
        meta[f"{attr['parent']}|column"] = attr["name"]
        tags += [f"{attr['parent']}|element"]
    m = await update_metadata(namespace_id, stream_info["stream_id"], meta)
    ## dv = stream_info["dataviews"]
    ## new_tags = [i["ocs_tag"] for i in dv]
    t = await update_tags(namespace_id, stream_info["stream_id"], tags)
    if m and t:
        return True

In [10]:
async def update_streams(streams_info):
    for stream_info in streams_info:
        # print(f"-[{stream_info['stream_name']}]- ", end="")
        print(f"+", end="")
        r = await update_stream(stream_info)
        if not r:
            print(f"@error({stream_info['stream_name']})")

## Input Parameters

In [11]:
sample_transport = RequestsHTTPTransport(
    url=config["graphql"]["endpoint"], verify=False, retries=3
)
client = Client(transport=sample_transport, fetch_schema_from_transport=True)

In [12]:
streams_query = gql(
    """
query PIPoint_tags($asset_db: String) {
      PIPoint(asset_db: $asset_db, orderBy: [asset_id_asc, name_asc]) {
        asset_db
        asset_id
        name
        stream_id
        stream_name
        attributes(orderBy: name_asc) {
          name
          parent
        }
      }
    }
"""
)
streams = client.execute(
    streams_query,
    variable_values={"asset_db": config["ocs"]["configuration"]["asset_db"]},
)
print(streams["PIPoint"][0])
# print(json.dumps(dataviews, indent=4))

{'asset_db': 'pilot.plant', 'asset_id': 'HOT-1', 'name': 'E-101 Air Cooler Outlet Temperature', 'stream_id': 'PI_acad-pida-vm0_12246', 'stream_name': 'delmar.DELMAR_UNIT1_TI-107.PV', 'attributes': [{'name': 'E-101 Air Cooler Outlet Temperature', 'parent': 'TK-101'}, {'name': 'E-101 Air Cooler Outlet Temperature', 'parent': 'HOT-1'}, {'name': 'E-101 Outlet Temperature', 'parent': 'E-101'}]}


In [13]:
r = await update_stream(streams["PIPoint"][0])

In [14]:
r

True

In [15]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]


async def gather_func(f, items, ndiv):
    div = (len(items) // ndiv) + 1
    chunk_list = list(chunks(items, div))
    coroutines = [f(chunk_list[i]) for i in range(0, ndiv)]
    start_time = time.perf_counter()
    print("-OK-") if await asyncio.gather(*coroutines) else print("@oops")
    print(f"> runtime {time.perf_counter() - start_time:.2f} secs")

In [16]:
if config["ocs"]["configuration"].get("update_streams", True):
    await gather_func(update_streams, streams["PIPoint"], 4)

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-OK-
> runtime 35.54 secs


In [27]:
dataview_query = gql(
    """
query DataViews($asset_db: String) {
  DataView(ocs_sync: false, asset_db: $asset_db, orderBy: id_asc) {
    name
    id
    asset_db
    asset_id
    ocs_tag
    description
    has_stream {
      name
      stream_name
    }
  }
}
"""
)
dataviews = client.execute(
    dataview_query,
    variable_values={"asset_db": config["ocs"]["configuration"]["asset_db"]},
)
# print(dataviews["DataView"][0])
print(json.dumps(dataviews, indent=4))

{
    "DataView": [
        {
            "name": "Default",
            "id": "pilot.plant_hot-1",
            "asset_db": "pilot.plant",
            "asset_id": [
                "HOT-1"
            ],
            "ocs_tag": "HOT-1",
            "description": "Hub DV for Asset HOT-1 - Default",
            "has_stream": [
                {
                    "name": "P-103 Run Status",
                    "stream_name": "delmar.DELMAR_UNIT1_XB-103"
                },
                {
                    "name": "P-102 Run Status",
                    "stream_name": "delmar.DELMAR_UNIT1_XB-102"
                },
                {
                    "name": "P-102 Discharge pressure",
                    "stream_name": "delmar.DELMAR_UNIT1_PI-108.PV"
                },
                {
                    "name": "P-101B Run Status",
                    "stream_name": "delmar.DELMAR_UNIT1_XB-101B"
                },
                {
                    "name": "P-101A Run Status

In [28]:
def dv_header(asset_id, dv_name, dv_id, description):
    return {
        "Id": dv_id,
        "Name": dv_id,
        "Description": description,
        "IndexField": {"Source": "NotApplicable", "Keys": [], "Label": "Timestamp"},
    }


def dv_query(asset_db, asset_id, tag, asset_type, value_addition=""):
    if len(asset_id) == 1:
        if tag != asset_id[0]:
            asset = asset_id[0]
            asset_clause = f'"{asset}|element" AND "{tag}|element"'
        else:
            asset_clause = f'"{tag}|element"'
    else:
        asset_clause = (
            "(" + " OR ".join([f'"{asset_id}|element"' for asset in asset_id]) + ")"
        )
    return {
        "Id": f"Asset_{asset_type}",
        "Kind": "Stream",
        "Value": f'asset_db:"{asset_db}" AND {asset_clause}{value_addition}',
    }


def dv_datafield(asset_type, key, column_key, label_suffix=""):
    return {
        "QueryId": f"Asset_{asset_type}",
        "DataFields": [
            {
                "Source": "PropertyId",
                "Keys": [key],
                "Label": f"{{IdentifyingValue}}{label_suffix}",
            }
        ],
        "IdentifyingField": {
            "Source": "Metadata",
            "Keys": [f"{column_key}|column"],
            "Label": "{IdentifyingValue} {FirstKey}",
        },
    }


def dv_footer():
    return {
        "GroupingFields": [
            {
                "Source": "Metadata",
                "Keys": ["Asset_Id"],
                "Label": "{IdentifyingValue} {FirstKey}",
            }
        ],
        "DefaultStartIndex": "2017-02-07T00:00",
        "DefaultEndIndex": "2017-02-27T00:00",
        "DefaultInterval": "00:05:00",
        "IndexTypeCode": "DateTime",
        "Shape": "Standard",
    }

In [29]:
def build_dv(asset_id, tag, dv_id, dv_name, description):
    ## tag = "hub__" + tag
    dvh = dv_header(asset_id, dv_name, dv_id, description)
    dvq = {
        "Queries": [
            dv_query(asset_db, asset_id, tag, "value"),
            dv_query(asset_db, asset_id, tag, "digital", " AND TypeId:PI-Digital"),
        ]
    }
    dvdf = {
        "DataFieldSets": [
            dv_datafield("value", "Value", tag),
            dv_datafield("digital", "DigitalStateName", tag, "__ds"),
        ]
    }
    dvf = dv_footer()

    dv = {**dvh, **dvq, **dvdf, **dvf}

    return dv

In [30]:
## dv = build_dv(["FV31"], "all_columns", "test-dv", "Default", "Test Description")
## print(json.dumps(dv, indent=2))
dv = build_dv(["HOT-1"], "HTR-1", "test-dv", "Default", "Test Description")
print(json.dumps(dv, indent=2))

{
  "Id": "test-dv",
  "Name": "test-dv",
  "Description": "Test Description",
  "IndexField": {
    "Source": "NotApplicable",
    "Keys": [],
    "Label": "Timestamp"
  },
  "Queries": [
    {
      "Id": "Asset_value",
      "Kind": "Stream",
      "Value": "asset_db:\"pilot.plant\" AND \"HOT-1|element\" AND \"HTR-1|element\""
    },
    {
      "Id": "Asset_digital",
      "Kind": "Stream",
      "Value": "asset_db:\"pilot.plant\" AND \"HOT-1|element\" AND \"HTR-1|element\" AND TypeId:PI-Digital"
    }
  ],
  "DataFieldSets": [
    {
      "QueryId": "Asset_value",
      "DataFields": [
        {
          "Source": "PropertyId",
          "Keys": [
            "Value"
          ],
          "Label": "{IdentifyingValue}"
        }
      ],
      "IdentifyingField": {
        "Source": "Metadata",
        "Keys": [
          "HTR-1|column"
        ],
        "Label": "{IdentifyingValue} {FirstKey}"
      }
    },
    {
      "QueryId": "Asset_digital",
      "DataFields": [
        

In [31]:
from ocs_sample_library_preview import DataView

In [32]:
dataview_mutation = gql(
    """
mutation SyncDV($id: ID!) {
  MergeDataView(id: $id, ocs_sync: true) {
    id
    ocs_sync
  }
}
"""
)


def sync_dataview(dv_def):
    dv = build_dv(
        dv_def["asset_id"],
        dv_def["ocs_tag"],
        dv_def["id"],
        dv_def["name"],
        dv_def["description"],
    )
    # print(json.dumps(dv, indent=2))
    dataview = DataView.fromDictionary(dv)
    hub.DataViews.putDataView(namespace_id, dataview)
    result = client.execute(dataview_mutation, variable_values={"id": dv_def["id"]})
    print(f"[{result}]")

In [33]:
for dv_def in dataviews["DataView"]:
    sync_dataview(dv_def)

[{'MergeDataView': {'id': 'pilot.plant_hot-1', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_e-101', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_e-102', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_htr-101', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_tk-101', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_tk-102', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-1_tk-103', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3_bottoms_product', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3_energy_in', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3_energy_out', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3_overhead_product', 'ocs_sync': True}}]
[{'MergeDataView': {'id': 'pilot.plant_hot-3_pressure', 'ocs_sync': True}}]
[{'MergeDataView': {'id':