Step 2: Augment database with asset data view and supporting metadata

#### For all elements/assets with PIPoints, create a default Data View having as columns all those PIPoints

Data View column names are the attribute names in AF, so elements sharing a template have identical Data Views modulo missing streams

This notebook also creates specialized Data Views:

* Some are subsets of the default Data View, per asset
* Others are multi-assets version of the above  

#### After running this notebook, the graph has all the necessary information for Step 2: projection of tags/metadata on streams and Data View creation 

In [None]:
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import yaml
import pprint
from collections import OrderedDict

## Input Parameters

In [None]:
config_file = "config-windfarm.yaml"  # 
# config_file = "config-acad-prod-desc-v2.yaml"

In [None]:
with open(config_file) as f:
    config = yaml.safe_load(f)
# config

In [None]:
# Input parameters

database_name = config["db"]["database_name"]
root_element = config["db"]["root_element"]
dv_id_prefix = config["ocs"]["configuration"]["asset_db"]

# Each assets in a Hub dataset is endowed with default Data View (DV) "All_Columns" having all the asset streams
# The next line of code defines a dictionary to build all default (DV)
# If <column list> is empty list [], include all streams in Data View
dv_defs = {"Default": ("all_columns", [], lambda id: True)}
has_no_dv = lambda id: False


# YAML config file may embed additional code to define additional custom dataviews
try:
    exec(config["custom_dvs"])
except:
    pass

dv_defs

In [None]:
sample_transport = RequestsHTTPTransport(
    url=config["neo4j"]["graphql"], verify=False, retries=3
)
client = Client(transport=sample_transport, fetch_schema_from_transport=True)

In [None]:
db_query = gql(
    """
query DatabaseId($database: String) {
    Database(name: $database) {
        name
        id
        asset_db
    }
}
"""
)
print(f"database_name={database_name}")
db = client.execute(db_query, variable_values={"database": database_name})
print(json.dumps(db, indent=4))

In [None]:
db_id = db["Database"][0]["id"]
asset_db = db["Database"][0]["asset_db"]
asset_db, db_id

In [None]:
root_query = gql(
    """
query RootElementTree($root : String) {
    Element(name: $root) {
        name
        id
        has_element {
            name
            id
            has_dynamic {
                  name
                  stream_name
                  id
                  pointsource
            }
        }
    }
}
"""
)
j = client.execute(root_query, variable_values={"root": root_element})
print(json.dumps(j, indent=4))

In [None]:
elements = j["Element"][0]["has_element"] 
len(elements), elements

In [None]:
all_elements = elements
for element in elements:
    j = client.execute(root_query, variable_values={"root": element["name"]})
    all_elements += j["Element"][0]["has_element"] 
all_elements

In [None]:
len(all_elements), all_elements

In [None]:
elements = all_elements
len(elements), elements

In [None]:
len(set([i["name"] for i in elements]))

In [None]:
d = {}
for e in elements:
    d[e["name"]] = e
d.keys()

In [None]:
elements = [d[i] for i in sorted([i for i in d])]
elements

In [None]:
elements_info = [
    (
        elements[i]["name"],
        elements[i]["id"],
        [
            j for j in elements[i]["has_dynamic"] if j["pointsource"] != "AZURE"
        ],  # remove future tag
    )
    for i in range(len(elements))
    if len(elements[i]["has_dynamic"]) > 0
]
elements_info

In [None]:
for element, element_id, _ in elements_info:
    mutation = gql(
        """
mutation AssetWithDataView($from: _DatabaseInput!, $to: _ElementInput!) {
    MergeDatabaseAsset_with_dv(
        from: $from,
        to: $to
    ) {
        from {
            name
        }
        to {
            name
        }
    }
}
"""
    )
    # pprint.pprint(mutation)
    reply = client.execute(
        mutation, variable_values={"from": {"id": db_id}, "to": {"id": element_id}}
    )
    print(json.dumps(reply, indent=4))

In [None]:
def dataview_streams_transaction(dv_id, dv_streams):
    if len(dv_streams) == 0:
        print(f"@@ dv_id={dv_id}")
        return 
    mutations = []
    for i, stream in enumerate(dv_streams):
        stream_mutation = """
stream{0}: 
    AddDataViewHas_stream(
        from: {{id: "{1}" }}
        to: {{id: "{2}" }}
    ) {{
        from {{
            name
        }}
        to {{
            name
        }}
    }}   
    """.format(
            i, dv_id, stream["id"]
        )
        # print("mutation", i, stream_mutation)
        mutations.append(stream_mutation)

    dv_stream_mutation = gql(
        "mutation dataview_streams {\n" + "".join(mutations) + "\n}"
    )

    reply = client.execute(dv_stream_mutation)
    print(f"[add-dv-streams: {dv_id} ({len(json.dumps(reply, indent=4))})]", end="")

In [None]:
mutation_dv_node = gql(
    """
mutation AssetDataView($asset_db: String!, $asset_id: [String]!, $columns: String!, $description: String, $id: ID!, $name: String!, $ocs_tag: String!) {
    MergeDataView(
        asset_db: $asset_db
        asset_id: $asset_id
        id: $id
        columns: $columns 
        description: $description
        name: $name
        ocs_sync: false
        ocs_tag: $ocs_tag
    ) {
        name
        id 
    }
}
    """
)


mutation_dv_rel = gql(
    """
mutation ElementDataView($from: _ElementInput!, $to: _DataViewInput!) {
    AddElementHas_dataview(
        from: $from
        to: $to
    ) {
        from {
            name
        }
        to {
            name
            id
        }
    }
}
    """
)

def dataview_id(asset_ids, ocs_tag, multiple=None):
    dv_asset = (
        multiple
        if len(asset_ids) > 1
        else asset_ids[0].lower().replace(" ", ".").replace("/", "__")
    )
    dv_id = f"{dv_id_prefix}-{dv_asset}"
    if ocs_tag != "all_columns":
        dv_id = f"{dv_id}-{ocs_tag}"
    return dv_id 

def create_asset_dataview(
    dv_name, asset_ids, elem_ids, all_streams, ocs_tag, columns, multiple=None
):
    dv_id = dataview_id(asset_ids, ocs_tag, multiple)
    asset_desc = (
        f"Asset {asset_ids[0]}" if len(asset_ids) == 1 else f"Assets {multiple.upper()}"
    )
    reply_node = client.execute(
        mutation_dv_node,
        variable_values={
            "asset_db": asset_db,
            "asset_id": asset_ids,
            "columns": str(sorted(columns))
            if len(columns) > 0
            else str(sorted(set([s["name"] for s in all_streams]))),
            "description": f"Hub DV for {asset_desc} - {dv_name}",
            "id": dv_id,
            "name": dv_name,
            "ocs_tag": ocs_tag,
        },
    )

    for elem_id in elem_ids:
        reply_rel = client.execute(
            mutation_dv_rel,
            variable_values={"from": {"id": elem_id}, "to": {"id": dv_id}},
        )
        # print(json.dumps(reply_rel, indent=4))

    if len(columns) == 0:
        dv_streams = all_streams
    else:
        dv_streams = [s for s in all_streams if s["name"] in columns]

    dataview_streams_transaction(dv_id, dv_streams)

element_ids = {}

for dv_name in dv_defs.keys():
    print(f"dv_name={dv_name}")
    ocs_tag, columns, has_custom_dv = dv_defs[dv_name]
    for asset_id, elem_id, all_streams in elements_info:
        if has_no_dv(asset_id):
            continue
        element_ids[asset_id] = elem_id
        # no specialized data view except for fermenter vessels
        if len(columns) > 0 and not has_custom_dv(asset_id):
            continue
    # print(dv_name, [asset_id], all_streams, ocs_tag, columns)
        create_asset_dataview(
            dv_name, [asset_id], [elem_id], all_streams, ocs_tag, columns
        )

In [None]:
def extract_columns(asset_ids, ocs_tag):
    query = gql(
        """
    query DataView($id: ID!) {
        DataView(id: $id) {
            columns
      }  
    }
    """
    )
    reply = client.execute(
        query, variable_values={"id": dataview_id([asset_ids[0]], ocs_tag)}
    )
    # print(dataview_id([asset_ids[0]], ocs_tag), reply["DataView"][0]["columns"])
    return sorted(
        [
            i.strip()
            for i in reply["DataView"][0]["columns"][1:-1].replace("'", "").split(",")
        ]
    )


multi_asset_dvs = []
try:
    exec(config["multi_asset_dvs"])
except:
    pass
print(config["multi_asset_dvs"])

for asset_ids, dv_suffix in multi_asset_dvs:
    for dv_name in dv_defs.keys():
        ocs_tag, columns, has_custom_dv = dv_defs[dv_name]
        if len(columns) == 0:
            columns = extract_columns(asset_ids, ocs_tag)
        elem_ids = [element_ids[i] for i in asset_ids]
        create_asset_dataview(
            dv_name + "+", asset_ids, elem_ids, [], ocs_tag, columns, multiple=dv_suffix
        )

### Asset metadata gathering and update, plus geo-data if applicable

In [None]:
meta_query = gql(
    """
{
  Database(name: "WindFarm") {
    asset_with_dv(orderBy: name_asc) {
      name
      id
      has_attribute(orderBy: name_asc) {
        name
        value
        type
      }
    }
  }
}
"""
)
result = client.execute(meta_query)



In [None]:
def convert2value(s, vtype):
    if vtype == "Double":
        return float(s)
    else:
        return str(s)


def extract_meta(js, sub=False):
    j = js["has_attribute"]
    prefix = "" if not sub else (js["name"].lower() + ".")
    return {f"{prefix}{k['name']}": convert2value(k["value"], k["type"]) for k in j}

In [None]:
# test
d = extract_meta(result["Database"][0]["asset_with_dv"][0])
d

In [None]:
meta_mutation = gql(
    """
mutation UpdateMeta($id: ID!, $meta: String) {
  MergeElement(id: $id, asset_metadata: $meta) {
    id
    name
    asset_metadata
  }
}
"""
)

geo_mutation = gql(
    """
mutation UpdateGeo($id: ID!, $lat: Float, $long: Float)  {
    MergeElement(id: $id, latitude: $lat, longitude: $long, location: {latitude: $lat, longitude: $long}) {
        name
        id
        latitude
        longitude
        location {
            latitude
            longitude
        }
    }
}    
"""
)


def update_meta(wid, meta):
    result = client.execute(
        meta_mutation, variable_values={"id": wid, "meta": str(meta)}
    )
    return result


def update_geo(wid, lat, long):
    result = client.execute(
        geo_mutation, variable_values={"id": wid, "lat": lat, "long": long}
    )
    return result

In [None]:
nb = len(result["Database"][0]["asset_with_dv"])
for i in range(nb):
    dai = result["Database"][0]["asset_with_dv"][i]
    d = extract_meta(dai)
    for j in range(len(dai.get("has_element", []))):
        d.update(extract_meta(dai["has_element"][j], True))
    print(dai["name"], dai["id"], d)
    update_meta(dai["id"], d)
    if config["neo4j"].get("has_geodata", False):
        update_geo(dai["id"], d.get("Latitude", 0.0), d.get("Longitude", 0.0))