In [1]:
# config
exampleName="IBM_B3"
exampleDescription="Example ontology for IBM Building 3, Dublin"
testing=False

In [2]:
#!pip install rdflib
#!pip install pydot2
#!pip install pydotplus

In [3]:
import rdflib
from rdflib.namespace import RDFS
from rdflib import URIRef, BNode, Literal
import re
from collections import defaultdict
import numpy as np
import pandas as pd

In [4]:
def ns(url):
  url = url.replace("http://buildsys.org/ontologies/Brick#", "")
  url = url.replace("http://buildsys.org/ontologies/BrickFrame#", "")
  url = url.replace("http://buildsys.org/ontologies/BrickTag#", "")
  return url

BRICKF = rdflib.Namespace('http://buildsys.org/ontologies/BrickFrame#')
TAGS   = rdflib.Namespace('http://buildsys.org/ontologies/BrickTag#')
TS     = rdflib.Namespace('http://buildsys.org/ontologies/Brick#')

g = rdflib.Graph()
g.bind( 'bf', BRICKF)
g.bind('tag', TAGS)
g.bind( 'ts', TS)
result = g.parse('../../../Brick/BrickFrame.ttl', format='n3')
result = g.parse('../../../Brick/BrickTag.ttl', format='n3')
result = g.parse('../../../Brick/Brick.ttl', format='n3')
print(len(g))

29283


define your own namespace

In [5]:
MyNS = rdflib.Namespace('http://buildsys.org/ontologies/examples/'+exampleName+'#')
g.bind(':', MyNS)

### Load TagSets

In [6]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:Tag . }""")
brickTags=set()
for row in qres:
  ts=ns(row['ts'])
  brickTags.add(ns(row['ts']))
len(brickTags)

314

In [7]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:TagSet . }""")
brickTagSets=set()
brickTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  brickTagSets.add(ts)
  brickTagSetTags[ts]=set(ts.split('_'))
len(brickTagSets)

1952

In [8]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Location . }""")
loc_tags=set(['Location'])
for row in qres:
  ts=ns(row['ts'])
  loc_tags.add(ts)
len(loc_tags)

14

In [9]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Point . }""")
point_tags=set(['Point'])
pointTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  point_tags.add(ts)
  pointTagSetTags[ts]=set(ts.split('_'))
len(point_tags)

1336

In [10]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:MeasurementProperty . }""")
measurment_tags=set(['MeasurementProperty'])
for row in qres:
  ts=ns(row['ts'])
  measurment_tags.add(ns(row['ts']))
len(measurment_tags)

766

In [11]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Equipment . }""")
asset_tags=set(['Asset'])
for row in qres:
  ts=ns(row['ts'])
  asset_tags.add(ns(row['ts']))
len(asset_tags)

155

remove hierachical tags

In [12]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts bf:isHierarchical "". }""")
for row in qres:
  ts=ns(row['ts'])
  if ts in brickTags: brickTags.remove(ts)
  if ts in brickTagSets: brickTagSets.remove(ts)
  if ts in brickTagSetTags: del brickTagSetTags[ts]
  if ts in point_tags: point_tags.remove(ts)
  if ts in asset_tags: asset_tags.remove(ts)
  if ts in measurment_tags: measurment_tags.remove(ts)
  if ts in pointTagSetTags: del pointTagSetTags[ts]
len(brickTagSets)
len(point_tags)

1335

In [13]:
def IndivName(name):
  rname=re.sub(r'\s','_',re.sub(r'[^\d\w\s]', '', name))
  if rname[0].isdigit(): rname='d'+rname
  return rname

### Load Data

In [14]:
df=pd.read_csv('IBM_B3_points.csv',sep=";")
if testing: df=df.head(20)
df.head()

Unnamed: 0,Label,TagSet,AssetType,Asset,AssetParent,isFedBy,City,Building,Wing,Floor,Room,Zone,AssetType2,Asset2
0,1F_MID_OPENOFF_CO2,CO2_Sensor,,,,AHU1;Boiler1;Boiler2;Chiller1,Dublin,B3,SOR42,FirstFloor,OpenOffice,Middle,,
1,1F_NRTH_OPENOFF_CO2,CO2_Sensor,,,,AHU1;Boiler1;Boiler2;Chiller1,Dublin,B3,SOR42,FirstFloor,OpenOffice,North,,
2,1F_STH_OPENOFF_CO2,CO2_Sensor,,,,AHU1;Boiler1;Boiler2;Chiller1,Dublin,B3,SOR42,FirstFloor,OpenOffice,South,,
3,421_U10_CLG_VLV,FCU_Cooling_Valve_Command,FCU,42-FCU10,,AHU1;Boiler1;Boiler2;Chiller1,Dublin,B3,SOR42,FirstFloor,SOR42_1_U10,,,
4,421_U10_DAT,FCU_Supply_Air_Temperature_Sensor,FCU,42-FCU10,,AHU1;Boiler1;Boiler2;Chiller1,Dublin,B3,SOR42,FirstFloor,SOR42_1_U10,,,


In [15]:
len(df)

2154

Analyze Dataset

In [16]:
dfTags=set()
dfTagSets=set(pd.unique(df.TagSet.dropna().ravel()))
for ts in dfTagSets:
  dfTags.update(ts.split('_'))
len(dfTags)

73

In [17]:
dfMissingTags=dfTags - brickTags # schemaUsedTags - 
print("Missing Tags from Bricks:" + str(len(dfMissingTags)))
print(dfMissingTags)

Missing Tags from Bricks:6
{'Condensator', 'Fresh', 'Boiler', 'Minimum', 'Number', 'Gas'}


Find best matching tag sets for the defined ones

In [18]:
closestMatches={}
fullMatches={}
for ts in pd.unique(df.TagSet.dropna().ravel()):
  if ts in pointTagSetTags:
    closestMatches[ts]=[ts]
    fullMatches[ts]=ts
    #print("Match: "+ts)
  else:
    tss=set(ts.split('_'))
    largestSubset=[]
    largestIntersect=0
    for ts2 in pointTagSetTags:
      inters=tss.intersection(pointTagSetTags[ts2])
      if len(inters)>largestIntersect:
        largestIntersect=len(inters)
        largestSubset=[] # reset largestSubset
      if len(inters)==largestIntersect:
        largestSubset.append(ts2) # add to the largest sets
    smalestSubset=largestSubset
    if len(largestSubset)>1:
      smalestDiff=len(tss)
      smalestSubset=[]
      for ts2 in largestSubset:
        diffs=pointTagSetTags[ts2] - tss
        if len(diffs)<smalestDiff:
          smalestDiff=len(diffs)
          smalestSubset=[] # reset largestSubset
        if len(diffs)==smalestDiff:
          smalestSubset.append(ts2) # add to the largest sets
    closestMatches[ts]=smalestSubset
    #print("Best "+ts+":"+str(smalestSubset))

Manual Mapping based on the results.

In [19]:
for ts in closestMatches:
  if ts not in fullMatches:
    print(ts+":"+str(closestMatches[ts]))

Boiler_Start_Number_Sensor:['Boiler_Start_Stop_Status', 'Boiler_Run_Time_Sensor']
Boiler_Condensator_Setpoint:['Condensator_Setpoint']


write header

In [30]:
fo = open(exampleName+'.ttl', 'w')
fo.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n""")
fo.write("@prefix bf:  <http://buildsys.org/ontologies/BrickFrame#> .\n")
fo.write("@prefix tag: <http://buildsys.org/ontologies/BrickTag#> .\n")
fo.write("@prefix ts:  <http://buildsys.org/ontologies/Brick#> .\n")
fo.write("@prefix : <"+str(MyNS)+"> .\n\n")
fo.write("<"+str(MyNS).replace("#","")+">  a owl:Ontology ;\n")
fo.write("\towl:imports <http://buildsys.org/ontologies/Brick> ;\n")
fo.write('\trdfs:comment "'+exampleDescription+'"@en .\n\n')

66

write locations

In [31]:
# create location individuals
locationCols=["City","Building","Wing","Floor","Room","Zone"]
locations={}
for col in locationCols:
  for indiv in pd.unique(df[col].dropna().ravel()):
    if indiv!="":
      indivLocName=IndivName(col+"_"+indiv)
      fo.write("\n :"+indivLocName+"  a   ts:"+col+";")
      fo.write('\n\t\t\t rdfs:label "'+indiv+'"@en .\n')
      locations[indiv]=indivLocName
# add relations
for idx in df.index:
  for ci in range(1,len(locationCols)):
    childIndiv =df.loc[idx,locationCols[ci]]
    parentIndiv=df.loc[idx,locationCols[ci-1]]
    if childIndiv in locations and parentIndiv in locations:
      fo.write("\n :"+locations[childIndiv]+"  bf:isPartOf   :"+locations[parentIndiv]+".")
      fo.write("\n :"+locations[parentIndiv]+"  bf:hasPart   :"+locations[childIndiv]+".\n")

write assets

In [32]:
# create asset individuals
assets={}
for idx in df.index:
  assetName = str(df.loc[idx, "Asset"])
  if assetName=="" or assetName=="nan" or assetName in assets: 
    continue
  assetType = df.loc[idx, "AssetType"]
  indivLocName=IndivName(assetType+"_"+assetName)
  fo.write("\n :"+indivLocName+"  a  ts:"+assetType+";")
  fo.write('\n\t\t\t rdfs:label "'+assetName+'"@en .\n')
  assets[assetName] = indivLocName
  assetParent = str(df.loc[idx, "AssetParent"])
  if assetParent!="" and assetParent!="nan":
    if assetParent not in assets:
      parentLocName=IndivName(assetType+"_"+assetParent)
      fo.write("\n :"+parentLocName+"  a   ts:"+assetType+";")
      fo.write('\n\t\t\t rdfs:label "'+assetParent+'"@en .\n')
      assets[assetParent] = parentLocName
    fo.write("\n :"+assets[assetName]+"  bf:isPartOf   :"+assets[assetParent]+".")
    fo.write("\n :"+assets[assetParent]+"  bf:hasPart   :"+assets[assetName]+".\n")

In [33]:
for idx in df.index:
  assetName = str(df.loc[idx, "Asset"])
  if assetName=="" or assetName=="nan": 
    continue
  assetFeed = str(df.loc[idx, "isFedBy"])
  if assetFeed!="" and assetFeed!="nan":
    for feed in assetFeed.split(";"):
      fo.write("\n :"+assets[assetName]+"  bf:isFedBy   :"+assets[feed]+".")
      fo.write("\n :"+assets[feed]+"  bf:feeds   :"+assets[assetName]+".\n")

In [34]:
for idx in df.index:
  assetName = str(df.loc[idx, "Asset2"])
  if assetName=="" or assetName=="nan" or assetName in assets: 
    continue
  assetType = df.loc[idx, "AssetType2"]
  indivLocName=IndivName(assetType+"_"+assetName)
  fo.write("\n :"+indivLocName+"  a  ts:"+assetType+";")
  fo.write('\n\t\t\t rdfs:label "'+assetName+'"@en .\n')
  assets[assetName] = indivLocName

analyze tag sets

In [36]:
for idx in df.index:
  pointIndivName = IndivName(df.loc[idx, "Label"])
  pointType = df.loc[idx, "TagSet"]
  if pointType in fullMatches:
    tags=set(pointType.split('_')) | set(fullMatches[pointType].split('_'))
    fo.write("\n :"+pointIndivName+"  a  bf:Label, bf:TagSet, ts:"+fullMatches[pointType]+";")
  else:
    tags=set(pointType.split('_'))
    fo.write("\n :"+pointType+"  rdfs:subClassOf   bf:TagSet.")
    fo.write("\n :"+pointIndivName+"  a   bf:Label, bf:TagSet, :"+pointType+";")
  fo.write('\n \t\t\t rdfs:label "'+df.loc[idx, "Label"]+'"@en ;')
  # write tags
  #for tag in tags:
  #  fo.write("\n \t\t\t bf:hasTag   :"+tag+"0;")
  # write location
  loc=df.loc[idx, locationCols].dropna().ravel()[-1]
  fo.write("\n \t\t\t bf:isPointOf :"+locations[loc]+';')
  fo.write("\n \t\t\t bf:isLocatedIn :"+locations[loc]+'. \n')
  fo.write("\n :"+locations[loc]+"  bf:hasPoint  :"+pointIndivName+".")
  # write assets
  assetName = str(df.loc[idx, "Asset"])
  assetType = str(df.loc[idx, "AssetType"])
  if assetName!="" and assetName!="nan":
    fo.write("\n :"+assets[assetName]+"  bf:hasPoint  :"+pointIndivName+".")
    fo.write("\n :"+pointIndivName+"  bf:isPointOf  :"+assets[assetName]+".\n")
    if "FCU" in assetName or "Lighting" in assetType:
      fo.write("\n :"+assets[assetName]+"  bf:feeds  :"+locations[loc]+".")
      fo.write("\n :"+locations[loc]+"  bf:isFedBy   :"+assets[assetName]+".")
  assetName = str(df.loc[idx, "Asset2"])
  assetType = str(df.loc[idx, "AssetType2"])
  if assetName!="" and assetName!="nan":
    fo.write("\n :"+assets[assetName]+"  bf:hasPoint  :"+pointIndivName+".")
    fo.write("\n :"+pointIndivName+"  bf:isPointOf  :"+assets[assetName]+".\n")
    if "FCU" in assetName or "Lighting" in assetType:
      fo.write("\n :"+assets[assetName]+"  bf:feeds  :"+locations[loc]+".")
      fo.write("\n :"+locations[loc]+"  bf:isFedBy   :"+assets[assetName]+".")

In [37]:
fo.close()

In [38]:
g = rdflib.Graph()
result = g.parse(exampleName+'.ttl', format='n3')
g.serialize(destination=exampleName+'.ttl', format='turtle')

In [39]:
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:hasPoint ?b . }""")
ln=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isPointOf ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isFedBy ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:feeds ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isPartOf ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:hasPart ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isLocatedIn ?b . }""")
ln+=len(qres)
print(ln)

14074
