# Paths to our dataset files

In [27]:
data = "../datasets/bts_site_b_train/train.zip"
mapper = "../datasets/bts_site_b_train/mapper_TrainOnly.csv"
model = "../datasets/bts_site_b_train/Site_B.ttl"
schema = "../datasets/bts_site_b_train/Brick_v1.2.1.ttl"
building = "B"

# Load the Dataset

Hack for demo purposes to add the `src` directory to the path:

In [28]:
import sys

sys.path.append("../src/analytics")

Import and instantiate the manager:

In [29]:
from dbmgr import DBManager

db = DBManager(data, mapper, model, schema, building)

Reading stream data      : 100%|██████████| 8654/8654 [00:04<00:00, 1915.30it/s]


This `db` object is what is passed to each analytics module.

## Number of streams in the dataset

In [30]:
len(db)

668

## Iterating over the streams in the dataset

In [31]:
i = 0
for stream_id in db:
    print(stream_id)
    i += 1
    if i == 5:
        break

6f502ba0_77fb_43db_b6dc_48e68d5c822b
3c9ca09b_8952_45da_a066_bcb645cbbc68
d60c44c3_62fc_409a_a751_8b3ddeb9f197
eb0d5916_d942_4e2d_8129_60b83d984fdd
78078042_e16e_425b_9ec8_605aa149274e


## Check if a stream ID is in the dataset:

Not in the dataset:

In [32]:
"tim" in db

False

In the dataset:

In [33]:
"835a86d6_c235_4caf_8071_31b9bf24960b" in db

True

## Get a stream by its ID:

By indexing into the manager:

In [34]:
db["835a86d6_c235_4caf_8071_31b9bf24960b"].head()

Unnamed: 0,time,value,brick_class
0,2021-01-01 00:03:09.662,0.0,Enable_Status
1,2021-01-01 00:13:39.017,0.0,Enable_Status
2,2021-01-01 00:23:10.073,0.0,Enable_Status
3,2021-01-01 00:33:09.637,0.0,Enable_Status
4,2021-01-01 00:43:09.639,0.0,Enable_Status


By calling the getter:

In [35]:
db.get_stream("835a86d6_c235_4caf_8071_31b9bf24960b").head()

Unnamed: 0,time,value,brick_class
0,2021-01-01 00:03:09.662,0.0,Enable_Status
1,2021-01-01 00:13:39.017,0.0,Enable_Status
2,2021-01-01 00:23:10.073,0.0,Enable_Status
3,2021-01-01 00:33:09.637,0.0,Enable_Status
4,2021-01-01 00:43:09.639,0.0,Enable_Status


## Set a stream by its ID:

In [36]:
import pandas as pd

data = pd.DataFrame(
    {
        "time": [
            pd.Timestamp("2017-03-22 15:16:45"),
            pd.Timestamp("2017-03-22 15:17:45"),
            pd.Timestamp("2017-03-22 15:18:45"),
            pd.Timestamp("2017-03-22 15:19:45"),
        ],
        "value": [22.0, 23.0, 24.0, 25.0],
        "brick_class": ["Fake_Class", "Fake_Class", "Fake_Class", "Fake_Class"],
    }
)

By indexing into the manager:

In [37]:
db["835a86d6_c235_4caf_8071_31b9bf24960b"] = data
db["835a86d6_c235_4caf_8071_31b9bf24960b"].head()

Unnamed: 0,time,value,brick_class
0,2017-03-22 15:16:45,22.0,Fake_Class
1,2017-03-22 15:17:45,23.0,Fake_Class
2,2017-03-22 15:18:45,24.0,Fake_Class
3,2017-03-22 15:19:45,25.0,Fake_Class


By calling the setter:

In [38]:
db.set_stream("22543d51_3c38_46d3_98fe_a77838976df9", data)
db.get_stream("22543d51_3c38_46d3_98fe_a77838976df9").head()

Unnamed: 0,time,value,brick_class
0,2017-03-22 15:16:45,22.0,Fake_Class
1,2017-03-22 15:17:45,23.0,Fake_Class
2,2017-03-22 15:18:45,24.0,Fake_Class
3,2017-03-22 15:19:45,25.0,Fake_Class


## Get multiple streams by their IDs:

In [39]:
stream_ids = [
    "22543d51_3c38_46d3_98fe_a77838976df9",
    "3010a24a_2deb_43c8_9db4_cfc94c5faa5c",
    "405896a1_c195_422d_a0aa_18d2eca6a61d",
]
streams = db.get_streams(stream_ids)
for stream_id, stream in streams.items():
    print(f"========= {stream_id} =========")
    print(stream.head())
    print()

                 time  value brick_class
0 2017-03-22 15:16:45   22.0  Fake_Class
1 2017-03-22 15:17:45   23.0  Fake_Class
2 2017-03-22 15:18:45   24.0  Fake_Class
3 2017-03-22 15:19:45   25.0  Fake_Class

                     time  value              brick_class
0 2021-07-13 01:26:18.083 -0.107  Electrical_Power_Sensor
1 2021-07-13 01:31:17.035 -0.042  Electrical_Power_Sensor
2 2021-07-13 01:36:13.143  0.054  Electrical_Power_Sensor
3 2021-07-13 01:50:12.735 -0.016  Electrical_Power_Sensor
4 2021-07-13 01:55:12.150 -0.164  Electrical_Power_Sensor

                     time  value                   brick_class
0 2021-01-01 00:03:16.153   23.5  Cooling_Temperature_Setpoint
1 2021-01-01 00:13:44.730   23.5  Cooling_Temperature_Setpoint
2 2021-01-01 00:23:16.055   23.5  Cooling_Temperature_Setpoint
3 2021-01-01 00:33:16.093   23.5  Cooling_Temperature_Setpoint
4 2021-01-01 00:43:16.129   23.5  Cooling_Temperature_Setpoint



## Get the label of a stream:

Stream that does exist:

In [40]:
db.get_stream_label("22543d51_3c38_46d3_98fe_a77838976df9")

'Operating_Mode_Status'

Stream that doesn't exist will raise a KeyError (so should generally wrap calls like this in try-except blocks):

In [41]:
# db.get_stream_label('does not exist')

## Query the building model:

Some arbitrary sparql query:

In [42]:
q = """
SELECT ?s ?p ?o WHERE {
    ?s ?p ?o
} LIMIT 5
"""

Get the results as RDFLib rows:

In [43]:
res = db.query(q)
for r in res:
    print(r)

(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#3abdac1c_4717_445e_8248_0143c701015a.b9f01054_e14c_4d95_a7f5_77130239fa75'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#isPointOf'), rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#3abdac1c_4717_445e_8248_0143c701015a'))
(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#8f7007c1_ae4b_48a4_808c_18696b8c3fda'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#isPartOf'), rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#f7c06302_3c06_4331_b7ee_6f01946aa788'))
(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc

Get the results as a DataFrame:

In [44]:
res = db.query(q, return_df=True)
res.head()

Unnamed: 0,o,p,s
0,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,https://brickschema.org/schema/Brick#isPointOf,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
1,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,https://brickschema.org/schema/Brick#isPartOf,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
2,bb34ad66_61ef_4964_bcb2_d37fe05f79a3,http://senaps.io/schema/1.0/senaps#stream_id,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
3,https://brickschema.org/schema/Brick#Different...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
4,36044d6b_7db1_4d88_8174_5d4b21fa2314,http://senaps.io/schema/1.0/senaps#stream_id,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...


## Query the brick schema:

Get the results as RDFLib rows:

In [45]:
res = db.query(q, graph="schema")
for r in res:
    print(r)

(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Enthalpy'), rdflib.term.URIRef('http://qudt.org/schema/qudt/applicableUnit'), rdflib.term.URIRef('http://qudt.org/vocab/unit/MegaV-A-HR'))
(rdflib.term.BNode('n11509110293b42febf998d488066a4dbb5588'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), rdflib.term.BNode('n11509110293b42febf998d488066a4dbb5589'))
(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Max_Occupied_Cooling_Discharge_Air_Flow_Setpoint_Limit'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#hasAssociatedTag'), rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#Parameter'))
(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Pressure'), rdflib.term.URIRef('http://qudt.org/schema/qudt/applicableUnit'), rdflib.term.URIRef('http://qudt.org/vocab/unit/GigaPA'))
(rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#Leaving'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.

Get the results as a DataFrame:

In [46]:
res = db.query(q, graph="schema", return_df=True)
res.head()

Unnamed: 0,o,p,s
0,http://qudt.org/vocab/unit/MegaV-A-HR,http://qudt.org/schema/qudt/applicableUnit,https://brickschema.org/schema/Brick#Enthalpy
1,n11509110293b42febf998d488066a4dbb5589,http://www.w3.org/1999/02/22-rdf-syntax-ns#rest,n11509110293b42febf998d488066a4dbb5588
2,https://brickschema.org/schema/BrickTag#Parameter,https://brickschema.org/schema/Brick#hasAssoci...,https://brickschema.org/schema/Brick#Max_Occup...
3,http://qudt.org/vocab/unit/GigaPA,http://qudt.org/schema/qudt/applicableUnit,https://brickschema.org/schema/Brick#Pressure
4,https://brickschema.org/schema/Brick#Tag,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://brickschema.org/schema/BrickTag#Leaving


## Remove the RDF prefixes from the query results:

In [47]:
res = db.query(q, return_df=True, defrag=True)
res.head()

Unnamed: 0,o,p,s
0,3abdac1c_4717_445e_8248_0143c701015a,isPointOf,3abdac1c_4717_445e_8248_0143c701015a.b9f01054_...
1,f7c06302_3c06_4331_b7ee_6f01946aa788,isPartOf,8f7007c1_ae4b_48a4_808c_18696b8c3fda
2,bb34ad66_61ef_4964_bcb2_d37fe05f79a3,stream_id,8f8996ae_8dd4_4a45_bed9_7757fa6907db.2699824a_...
3,Differential_Pressure_Setpoint,type,f7c06302_3c06_4331_b7ee_6f01946aa788.a65adba9_...
4,36044d6b_7db1_4d88_8174_5d4b21fa2314,stream_id,ca7e8a8e_ee4e_4a39_9c81_5f4ffb0a0189


## Get the building model knowledge graph itself:

In [48]:
g = db.model
i = 0
for s, p, o in g:
    print(s, p, o)
    i += 1
    if i == 5:
        break

dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#3abdac1c_4717_445e_8248_0143c701015a.b9f01054_e14c_4d95_a7f5_77130239fa75 https://brickschema.org/schema/Brick#isPointOf dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#3abdac1c_4717_445e_8248_0143c701015a
dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#8f7007c1_ae4b_48a4_808c_18696b8c3fda https://brickschema.org/schema/Brick#isPartOf dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#f7c06302_3c06_4331_b7ee_6f01946aa788
dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#8f8996ae_8dd4_4a45_bed9_7757fa6907db.2699824a_dedf_4f3a_8

## Get the brick schema knowledge graph itself:

In [49]:
g = db.schema
i = 0
for s, p, o in g:
    print(s, p, o)
    i += 1
    if i == 5:
        break

https://brickschema.org/schema/Brick#Enthalpy http://qudt.org/schema/qudt/applicableUnit http://qudt.org/vocab/unit/MegaV-A-HR
n11509110293b42febf998d488066a4dbb5588 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest n11509110293b42febf998d488066a4dbb5589
https://brickschema.org/schema/Brick#Max_Occupied_Cooling_Discharge_Air_Flow_Setpoint_Limit https://brickschema.org/schema/Brick#hasAssociatedTag https://brickschema.org/schema/BrickTag#Parameter
https://brickschema.org/schema/Brick#Pressure http://qudt.org/schema/qudt/applicableUnit http://qudt.org/vocab/unit/GigaPA
https://brickschema.org/schema/BrickTag#Leaving http://www.w3.org/1999/02/22-rdf-syntax-ns#type https://brickschema.org/schema/Brick#Tag


## Get the time series data itself:

In [50]:
data = db.data
i = 0
for k, v in data.items():
    print(f"{k}: {len(v)} data points")
    i += 1
    if i == 5:
        break

6f502ba0_77fb_43db_b6dc_48e68d5c822b: 51193 data points
3c9ca09b_8952_45da_a066_bcb645cbbc68: 51194 data points
d60c44c3_62fc_409a_a751_8b3ddeb9f197: 51194 data points
eb0d5916_d942_4e2d_8129_60b83d984fdd: 51200 data points
78078042_e16e_425b_9ec8_605aa149274e: 51188 data points


## Get the mapper itself:

In [51]:
mapper = db.mapper
mapper.head()

Unnamed: 0,Building,StreamID,Filename,strBrickLabel
8349,B,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,trainAll_7989.pkl,Temperature_Setpoint
8350,B,77ed01a5_11c3_4607_9bf8_342b947c88de,trainAll_7990.pkl,System_Enable_Command
8351,B,16b768f0_5fb1_4db3_b506_d811b7c67738,trainAll_7991.pkl,Proportional_Gain_Parameter
8353,B,eb0d5916_d942_4e2d_8129_60b83d984fdd,trainAll_7992.pkl,Max_Temperature_Setpoint_Limit
8354,B,82f75178_cc15_4db7_b06c_48432305bd58,trainAll_7993.pkl,Max_Temperature_Setpoint_Limit
