# Paths to our dataset files

In [1]:
data = '../datasets/bts_site_b_train/train.zip'
mapper = '../datasets/bts_site_b_train/mapper_TrainOnly.csv'
model = '../datasets/bts_site_b_train/Site_B.ttl'
schema = '../datasets/bts_site_b_train/Brick_v1.2.1.ttl'

# Load the Dataset

Hack for demo purposes to add the `src` directory to the path:

In [2]:
import sys
sys.path.append("../src")

Import and instantiate the manager:

In [3]:
from dbmgr import DBManager
db = DBManager(data, mapper, model, schema)

This `db` object is what is passed to each analytics module.

## Number of streams in the dataset

In [4]:
len(db)

668

## Iterating over the streams in the dataset

In [5]:
i = 0
for stream_id in db:
    print(stream_id)
    i += 1
    if i == 5:
        break

6f502ba0_77fb_43db_b6dc_48e68d5c822b
3c9ca09b_8952_45da_a066_bcb645cbbc68
d60c44c3_62fc_409a_a751_8b3ddeb9f197
eb0d5916_d942_4e2d_8129_60b83d984fdd
78078042_e16e_425b_9ec8_605aa149274e


## Check if a stream ID is in the dataset:

Not in the dataset:

In [6]:
'tim' in db

False

In the dataset:

In [7]:
'835a86d6_c235_4caf_8071_31b9bf24960b' in db

True

## Get a stream by its ID:

By indexing into the manager:

In [8]:
db['835a86d6_c235_4caf_8071_31b9bf24960b'].head()

Unnamed: 0,time,value,brick_class
0,2021-01-01 00:03:09.662,0.0,Enable_Status
1,2021-01-01 00:13:39.017,0.0,Enable_Status
2,2021-01-01 00:23:10.073,0.0,Enable_Status
3,2021-01-01 00:33:09.637,0.0,Enable_Status
4,2021-01-01 00:43:09.639,0.0,Enable_Status


By calling the getter:

In [9]:
db.get_stream('835a86d6_c235_4caf_8071_31b9bf24960b').head()

Unnamed: 0,time,value,brick_class
0,2021-01-01 00:03:09.662,0.0,Enable_Status
1,2021-01-01 00:13:39.017,0.0,Enable_Status
2,2021-01-01 00:23:10.073,0.0,Enable_Status
3,2021-01-01 00:33:09.637,0.0,Enable_Status
4,2021-01-01 00:43:09.639,0.0,Enable_Status


Set a stream by its ID:

In [10]:
import pandas as pd

db['835a86d6_c235_4caf_8071_31b9bf24960b'] = pd.DataFrame({
            "time": [pd.Timestamp('2017-03-22 15:16:45'), pd.Timestamp('2017-03-22 15:17:45'), pd.Timestamp('2017-03-22 15:18:45'), pd.Timestamp('2017-03-22 15:19:45') ],
            "value": [22.0, 23.0, 24.0, 25.0],
            "brick_class": ["Fake_Class", "Fake_Class", "Fake_Class", "Fake_Class"],
        })

db['835a86d6_c235_4caf_8071_31b9bf24960b'].head()

Unnamed: 0,time,value,brick_class
0,2017-03-22 15:16:45,22.0,Fake_Class
1,2017-03-22 15:17:45,23.0,Fake_Class
2,2017-03-22 15:18:45,24.0,Fake_Class
3,2017-03-22 15:19:45,25.0,Fake_Class


## Get multiple streams by their IDs:

In [11]:
stream_ids = [
    '22543d51_3c38_46d3_98fe_a77838976df9',
    '3010a24a_2deb_43c8_9db4_cfc94c5faa5c',
    '405896a1_c195_422d_a0aa_18d2eca6a61d',
]
streams = db.get_streams(stream_ids)
for stream_id, stream in streams.items():
    print(f'========= {stream_id} =========')
    print(stream.head())
    print()

                     time  value            brick_class
0 2021-01-01 00:03:16.758    1.0  Operating_Mode_Status
1 2021-01-01 00:13:45.383    1.0  Operating_Mode_Status
2 2021-01-01 00:23:16.660    1.0  Operating_Mode_Status
3 2021-01-01 00:33:16.717    1.0  Operating_Mode_Status
4 2021-01-01 00:43:16.769    1.0  Operating_Mode_Status

                     time  value              brick_class
0 2021-07-13 01:26:18.083 -0.107  Electrical_Power_Sensor
1 2021-07-13 01:31:17.035 -0.042  Electrical_Power_Sensor
2 2021-07-13 01:36:13.143  0.054  Electrical_Power_Sensor
3 2021-07-13 01:50:12.735 -0.016  Electrical_Power_Sensor
4 2021-07-13 01:55:12.150 -0.164  Electrical_Power_Sensor

                     time  value                   brick_class
0 2021-01-01 00:03:16.153   23.5  Cooling_Temperature_Setpoint
1 2021-01-01 00:13:44.730   23.5  Cooling_Temperature_Setpoint
2 2021-01-01 00:23:16.055   23.5  Cooling_Temperature_Setpoint
3 2021-01-01 00:33:16.093   23.5  Cooling_Temperature_Setpoint

## Query the building model:

Some arbitrary sparql query:

In [12]:
q = """
SELECT ?s ?p ?o WHERE {
    ?s ?p ?o
} LIMIT 10
"""

Get the results as RDFLib rows:

In [13]:
res = db.query(q)
i = 0
for r in res:
    print(r)
    i += 1
    if i == 5:
        break

(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#520aeaf6_5518_474e_9a94_c8885c6aa1c7'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#feeds'), rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#718e54c6_67a5_4ba6_b293_04bfd531e28f'))
(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#fd5601fa_5513_4b0a_b935_9817c84319e6.3a3f575d_2d3c_4399_a159_9cfabfb29282.292353e0_4db5_4ee1_a0b6_99a2446e5a68'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#Point'))
(rdflib.term.URIRef('dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_9

Get the results as a DataFrame:

In [14]:
res = db.query(q, return_df=True)
res.head()

Unnamed: 0,o,p,s
0,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,https://brickschema.org/schema/Brick#feeds,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
1,https://brickschema.org/schema/Brick#Point,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
2,https://brickschema.org/schema/Brick#Hot_Water...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
3,4f9b0cc9_cbc2_40e2_aecc_57d740cd1ca2,http://senaps.io/schema/1.0/senaps#stream_id,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...
4,n9f0708789aa343fcb721d672be111de3b189,https://brickschema.org/schema/Brick#electrica...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...


## Query the brick schema:

Get the results as RDFLib rows:

In [15]:
res = db.query(q, graph='schema')
i = 0
for r in res:
    print(r)
    i += 1
    if i == 5:
        break

(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Room_Air_Temperature_Setpoint'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#hasAssociatedTag'), rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#Temperature'))
(rdflib.term.BNode('n10a78ec210814897a9093f43032df34bb1698'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), rdflib.term.BNode('n10a78ec210814897a9093f43032df34bb1699'))
(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Ice_Tank_Leaving_Water_Temperature_Sensor'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#hasAssociatedTag'), rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#Sensor'))
(rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#Disconnect'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('https://brickschema.org/schema/Brick#Tag'))
(rdflib.term.URIRef('https://brickschema.org/schema/Brick#Current_Total_Harmonic_Distortion'), rdflib.t

Get the results as a DataFrame:

In [16]:
res = db.query(q, graph='schema', return_df=True)
res.head()

Unnamed: 0,o,p,s
0,https://brickschema.org/schema/BrickTag#Temper...,https://brickschema.org/schema/Brick#hasAssoci...,https://brickschema.org/schema/Brick#Room_Air_...
1,n10a78ec210814897a9093f43032df34bb1699,http://www.w3.org/1999/02/22-rdf-syntax-ns#rest,n10a78ec210814897a9093f43032df34bb1698
2,https://brickschema.org/schema/BrickTag#Sensor,https://brickschema.org/schema/Brick#hasAssoci...,https://brickschema.org/schema/Brick#Ice_Tank_...
3,https://brickschema.org/schema/Brick#Tag,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://brickschema.org/schema/BrickTag#Discon...
4,CurrentTotalHarmonicDistortion,http://www.w3.org/2000/01/rdf-schema#label,https://brickschema.org/schema/Brick#Current_T...


## Remove the RDF prefixes from the query results:

In [17]:
res = db.query(q, return_df=True, defrag=True)
res.head()

Unnamed: 0,o,p,s
0,718e54c6_67a5_4ba6_b293_04bfd531e28f,feeds,520aeaf6_5518_474e_9a94_c8885c6aa1c7
1,Point,type,fd5601fa_5513_4b0a_b935_9817c84319e6.3a3f575d_...
2,Hot_Water_System,type,8c409529_fddc_41dc_a3a5_92fa14ca1cdb
3,4f9b0cc9_cbc2_40e2_aecc_57d740cd1ca2,stream_id,ebb4f301_f353_446c_9b92_19f131e5181d.f6cf4a1e_...
4,n9f0708789aa343fcb721d672be111de3b189,electricalPhases,1af5f6f9_15b1_4a62_962e_0ede16373202.a8dcfdcf_...


## Get the building model knowledge graph itself:

In [18]:
g = db.model
for s, p, o in g:
    print(s, p, o)

dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#520aeaf6_5518_474e_9a94_c8885c6aa1c7 https://brickschema.org/schema/Brick#feeds dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#718e54c6_67a5_4ba6_b293_04bfd531e28f
dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#fd5601fa_5513_4b0a_b935_9817c84319e6.3a3f575d_2d3c_4399_a159_9cfabfb29282.292353e0_4db5_4ee1_a0b6_99a2446e5a68 http://www.w3.org/1999/02/22-rdf-syntax-ns#type https://brickschema.org/schema/Brick#Point
dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7ce9/building/95d1f7f3_33c6_4bd8_a82c_99c550d83bb8#8c409529_fddc_41dc_a3a5_92fa14ca1cdb http://www.w3.org/1999/02/22-rdf-syntax-ns#type https://brickschema.org/schema/Brick#Hot_Water_System
dch:org/df

## Get the brick schema knowledge graph itself:

In [19]:
g = db.schema
for s, p, o in g:
    print(s, p, o)

https://brickschema.org/schema/Brick#Room_Air_Temperature_Setpoint https://brickschema.org/schema/Brick#hasAssociatedTag https://brickschema.org/schema/BrickTag#Temperature
n10a78ec210814897a9093f43032df34bb1698 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest n10a78ec210814897a9093f43032df34bb1699
https://brickschema.org/schema/Brick#Ice_Tank_Leaving_Water_Temperature_Sensor https://brickschema.org/schema/Brick#hasAssociatedTag https://brickschema.org/schema/BrickTag#Sensor
https://brickschema.org/schema/BrickTag#Disconnect http://www.w3.org/1999/02/22-rdf-syntax-ns#type https://brickschema.org/schema/Brick#Tag
https://brickschema.org/schema/Brick#Current_Total_Harmonic_Distortion http://www.w3.org/2000/01/rdf-schema#label CurrentTotalHarmonicDistortion
n10a78ec210814897a9093f43032df34bb607 http://www.w3.org/2002/07/owl#onProperty https://brickschema.org/schema/Brick#hasTag
n10a78ec210814897a9093f43032df34bb2394 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/

## Get the time series data itself:

In [20]:
data = db.data
for k, v in data.items():
    print(f'{k}: {len(v)} data points')

6f502ba0_77fb_43db_b6dc_48e68d5c822b: 51193 data points
3c9ca09b_8952_45da_a066_bcb645cbbc68: 51194 data points
d60c44c3_62fc_409a_a751_8b3ddeb9f197: 51194 data points
eb0d5916_d942_4e2d_8129_60b83d984fdd: 51200 data points
78078042_e16e_425b_9ec8_605aa149274e: 51188 data points
6ba43136_3c00_4d77_adee_557c4b027fd1: 51198 data points
c1a6f74a_95fe_49fe_8b55_d78de51ff45c: 51193 data points
dbd1311d_2e30_44ad_92fc_345fa477a3e9: 51190 data points
d85d789a_5df8_4035_8a2a_6c032aa6621d: 51196 data points
8be7fb20_8d4b_410c_b580_a9f0a0b0c917: 52376 data points
7a2ff512_fcd4_41eb_9299_379824d3347f: 51196 data points
22543d51_3c38_46d3_98fe_a77838976df9: 51198 data points
3010a24a_2deb_43c8_9db4_cfc94c5faa5c: 52378 data points
405896a1_c195_422d_a0aa_18d2eca6a61d: 51198 data points
9348979b_cad6_46a7_922b_6849412ca5c4: 51193 data points
f6093eec_4b82_4427_b4c7_4b09e62cff58: 51203 data points
1f2f7881_0dff_4580_91cb_93cca0cda361: 51198 data points
76f52c23_8902_444c_9056_47aa867b3af7: 51200 data