In [None]:
# config
exampleName="IBM_B3"
exampleDescription="Example ontology for IBM Building 3, Dublin"
testing=False

In [None]:
#!pip install rdflib
#!pip install pydot2
#!pip install pydotplus

In [None]:
import rdflib
from rdflib.namespace import RDFS
from rdflib import URIRef, BNode, Literal
import re
from collections import defaultdict
import numpy as np
import pandas as pd

In [None]:
def ns(url):
  url = url.replace("https://brickschema.org/schema/1.0.1/Brick#", "")
  url = url.replace("https://brickschema.org/schema/1.0.1/BrickFrame#", "")
  url = url.replace("https://brickschema.org/schema/1.0.1/BrickTag#", "")
  return url

BRICKF = rdflib.Namespace('https://brickschema.org/schema/1.0.1/BrickFrame#')
TAGS   = rdflib.Namespace('https://brickschema.org/schema/1.0.1/BrickTag#')
TS     = rdflib.Namespace('https://brickschema.org/schema/1.0.1/Brick#')

g = rdflib.Graph()
g.bind( 'bf', BRICKF)
g.bind('tag', TAGS)
g.bind( 'ts', TS)
result = g.parse('../../../Brick/BrickFrame.ttl', format='n3')
result = g.parse('../../../Brick/BrickTag.ttl', format='n3')
result = g.parse('../../../Brick/Brick.ttl', format='n3')
print(len(g))

define your own namespace

In [None]:
MyNS = rdflib.Namespace('https://brickschema.org/schema/1.0.1/examples/'+exampleName+'#')
g.bind(':', MyNS)

### Load TagSets

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:Tag . }""")
brickTags=set()
for row in qres:
  ts=ns(row['ts'])
  brickTags.add(ns(row['ts']))
len(brickTags)

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ bf:TagSet . }""")
brickTagSets=set()
brickTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  brickTagSets.add(ts)
  brickTagSetTags[ts]=set(ts.split('_'))
len(brickTagSets)

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Location . }""")
loc_tags=set(['Location'])
for row in qres:
  ts=ns(row['ts'])
  loc_tags.add(ts)
len(loc_tags)

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Point . }""")
point_tags=set(['Point'])
pointTagSetTags={}
for row in qres:
  ts=ns(row['ts'])
  point_tags.add(ts)
  pointTagSetTags[ts]=set(ts.split('_'))
len(point_tags)

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:MeasurementProperty . }""")
measurment_tags=set(['MeasurementProperty'])
for row in qres:
  ts=ns(row['ts'])
  measurment_tags.add(ns(row['ts']))
len(measurment_tags)

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts rdfs:subClassOf+ ts:Equipment . }""")
asset_tags=set(['Asset'])
for row in qres:
  ts=ns(row['ts'])
  asset_tags.add(ns(row['ts']))
len(asset_tags)

remove hierachical tags

In [None]:
qres = g.query("""SELECT DISTINCT ?ts WHERE {  ?ts bf:isHierarchical "". }""")
for row in qres:
  ts=ns(row['ts'])
  if ts in brickTags: brickTags.remove(ts)
  if ts in brickTagSets: brickTagSets.remove(ts)
  if ts in brickTagSetTags: del brickTagSetTags[ts]
  if ts in point_tags: point_tags.remove(ts)
  if ts in asset_tags: asset_tags.remove(ts)
  if ts in measurment_tags: measurment_tags.remove(ts)
  if ts in pointTagSetTags: del pointTagSetTags[ts]
len(brickTagSets)
len(point_tags)

In [None]:
def IndivName(name):
  rname=re.sub(r'\s','_',re.sub(r'[^\d\w\s]', '', name))
  if rname[0].isdigit(): rname='d'+rname
  return rname

### Load Data

In [None]:
df=pd.read_csv('IBM_B3_points.csv',sep=";")
if testing: df=df.head(20)
df.head()

In [None]:
len(df)

Analyze Dataset

In [None]:
dfTags=set()
dfTagSets=set(pd.unique(df.TagSet.dropna().ravel()))
for ts in dfTagSets:
  dfTags.update(ts.split('_'))
len(dfTags)

In [None]:
dfMissingTags=dfTags - brickTags # schemaUsedTags - 
print("Missing Tags from Bricks:" + str(len(dfMissingTags)))
print(dfMissingTags)

Find best matching tag sets for the defined ones

In [None]:
closestMatches={}
fullMatches={}
for ts in pd.unique(df.TagSet.dropna().ravel()):
  if ts in pointTagSetTags:
    closestMatches[ts]=[ts]
    fullMatches[ts]=ts
    #print("Match: "+ts)
  else:
    tss=set(ts.split('_'))
    largestSubset=[]
    largestIntersect=0
    for ts2 in pointTagSetTags:
      inters=tss.intersection(pointTagSetTags[ts2])
      if len(inters)>largestIntersect:
        largestIntersect=len(inters)
        largestSubset=[] # reset largestSubset
      if len(inters)==largestIntersect:
        largestSubset.append(ts2) # add to the largest sets
    smalestSubset=largestSubset
    if len(largestSubset)>1:
      smalestDiff=len(tss)
      smalestSubset=[]
      for ts2 in largestSubset:
        diffs=pointTagSetTags[ts2] - tss
        if len(diffs)<smalestDiff:
          smalestDiff=len(diffs)
          smalestSubset=[] # reset largestSubset
        if len(diffs)==smalestDiff:
          smalestSubset.append(ts2) # add to the largest sets
    closestMatches[ts]=smalestSubset
    #print("Best "+ts+":"+str(smalestSubset))

Manual Mapping based on the results.

In [None]:
for ts in closestMatches:
  if ts not in fullMatches:
    print(ts+":"+str(closestMatches[ts]))

write header

In [None]:
fo = open(exampleName+'.ttl', 'w')
fo.write("""@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n""")
fo.write("@prefix bf:  <https://brickschema.org/schema/1.0.1/BrickFrame#> .\n")
fo.write("@prefix tag: <https://brickschema.org/schema/1.0.1/BrickTag#> .\n")
fo.write("@prefix ts:  <https://brickschema.org/schema/1.0.1/Brick#> .\n")
fo.write("@prefix : <"+str(MyNS)+"> .\n\n")
fo.write("<"+str(MyNS).replace("#","")+">  a owl:Ontology ;\n")
fo.write("\towl:imports <https://brickschema.org/schema/1.0.1/Brick> ;\n")
fo.write('\trdfs:comment "'+exampleDescription+'"@en .\n\n')

write locations

In [None]:
# create location individuals
locationCols=["City","Building","Wing","Floor","Room","Zone"]
locations={}
for col in locationCols:
  for indiv in pd.unique(df[col].dropna().ravel()):
    if indiv!="":
      indivLocName=IndivName(col+"_"+indiv)
      fo.write("\n :"+indivLocName+"  a   ts:"+col+";")
      fo.write('\n\t\t\t rdfs:label "'+indiv+'"@en .\n')
      locations[indiv]=indivLocName
# add relations
for idx in df.index:
  for ci in range(1,len(locationCols)):
    childIndiv =df.loc[idx,locationCols[ci]]
    parentIndiv=df.loc[idx,locationCols[ci-1]]
    if childIndiv in locations and parentIndiv in locations:
      fo.write("\n :"+locations[childIndiv]+"  bf:isPartOf   :"+locations[parentIndiv]+".")
      fo.write("\n :"+locations[parentIndiv]+"  bf:hasPart   :"+locations[childIndiv]+".\n")

write assets

In [None]:
# create asset individuals
assets={}
for idx in df.index:
  assetName = str(df.loc[idx, "Asset"])
  if assetName=="" or assetName=="nan" or assetName in assets: 
    continue
  assetType = df.loc[idx, "AssetType"]
  indivLocName=IndivName(assetType+"_"+assetName)
  fo.write("\n :"+indivLocName+"  a  ts:"+assetType+";")
  fo.write('\n\t\t\t rdfs:label "'+assetName+'"@en .\n')
  assets[assetName] = indivLocName
  assetParent = str(df.loc[idx, "AssetParent"])
  if assetParent!="" and assetParent!="nan":
    if assetParent not in assets:
      parentLocName=IndivName(assetType+"_"+assetParent)
      fo.write("\n :"+parentLocName+"  a   ts:"+assetType+";")
      fo.write('\n\t\t\t rdfs:label "'+assetParent+'"@en .\n')
      assets[assetParent] = parentLocName
    fo.write("\n :"+assets[assetName]+"  bf:isPartOf   :"+assets[assetParent]+".")
    fo.write("\n :"+assets[assetParent]+"  bf:hasPart   :"+assets[assetName]+".\n")

In [None]:
for idx in df.index:
  assetName = str(df.loc[idx, "Asset"])
  if assetName=="" or assetName=="nan": 
    continue
  assetFeed = str(df.loc[idx, "isFedBy"])
  if assetFeed!="" and assetFeed!="nan":
    for feed in assetFeed.split(";"):
      fo.write("\n :"+assets[assetName]+"  bf:isFedBy   :"+assets[feed]+".")
      fo.write("\n :"+assets[feed]+"  bf:feeds   :"+assets[assetName]+".\n")

In [None]:
for idx in df.index:
  assetName = str(df.loc[idx, "Asset2"])
  if assetName=="" or assetName=="nan" or assetName in assets: 
    continue
  assetType = df.loc[idx, "AssetType2"]
  indivLocName=IndivName(assetType+"_"+assetName)
  fo.write("\n :"+indivLocName+"  a  ts:"+assetType+";")
  fo.write('\n\t\t\t rdfs:label "'+assetName+'"@en .\n')
  assets[assetName] = indivLocName

analyze tag sets

In [None]:
for idx in df.index:
  pointIndivName = IndivName(df.loc[idx, "Label"])
  pointType = df.loc[idx, "TagSet"]
  if pointType in fullMatches:
    tags=set(pointType.split('_')) | set(fullMatches[pointType].split('_'))
    fo.write("\n :"+pointIndivName+"  a  bf:Label, bf:TagSet, ts:"+fullMatches[pointType]+";")
  else:
    tags=set(pointType.split('_'))
    fo.write("\n :"+pointType+"  rdfs:subClassOf   bf:TagSet;")
    fo.write('\n\t\t\t rdf:type  owl:Class .')
    fo.write("\n :"+pointIndivName+"  a   bf:Label, bf:TagSet, :"+pointType+";")
  fo.write('\n \t\t\t rdfs:label "'+df.loc[idx, "Label"]+'"@en ;')
  # write tags
  #for tag in tags:
  #  fo.write("\n \t\t\t bf:hasTag   :"+tag+"0;")
  # write location
  loc=df.loc[idx, locationCols].dropna().ravel()[-1]
  fo.write("\n \t\t\t bf:isPointOf :"+locations[loc]+';')
  fo.write("\n \t\t\t bf:isLocatedIn :"+locations[loc]+'. \n')
  fo.write("\n :"+locations[loc]+"  bf:hasPoint  :"+pointIndivName+".")
  # write assets
  assetName = str(df.loc[idx, "Asset"])
  assetType = str(df.loc[idx, "AssetType"])
  if assetName!="" and assetName!="nan":
    fo.write("\n :"+assets[assetName]+"  bf:hasPoint  :"+pointIndivName+".")
    fo.write("\n :"+pointIndivName+"  bf:isPointOf  :"+assets[assetName]+".\n")
    if "FCU" in assetName or "Lighting" in assetType:
      fo.write("\n :"+assets[assetName]+"  bf:feeds  :"+locations[loc]+".")
      fo.write("\n :"+locations[loc]+"  bf:isFedBy   :"+assets[assetName]+".")
  assetName = str(df.loc[idx, "Asset2"])
  assetType = str(df.loc[idx, "AssetType2"])
  if assetName!="" and assetName!="nan":
    fo.write("\n :"+assets[assetName]+"  bf:hasPoint  :"+pointIndivName+".")
    fo.write("\n :"+pointIndivName+"  bf:isPointOf  :"+assets[assetName]+".\n")
    if "FCU" in assetName or "Lighting" in assetType:
      fo.write("\n :"+assets[assetName]+"  bf:feeds  :"+locations[loc]+".")
      fo.write("\n :"+locations[loc]+"  bf:isFedBy   :"+assets[assetName]+".")

In [None]:
fo.close()

In [None]:
g = rdflib.Graph()
result = g.parse(exampleName+'.ttl', format='n3')
g.serialize(destination=exampleName+'.ttl', format='turtle')

In [None]:
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:hasPoint ?b . }""")
ln=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isPointOf ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isFedBy ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:feeds ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isPartOf ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:hasPart ?b . }""")
ln+=len(qres)
qres = g.query("""SELECT DISTINCT ?a ?b WHERE {  ?a bf:isLocatedIn ?b . }""")
ln+=len(qres)
print(ln)