In [2]:
from brickschema import Graph
import matplotlib.pyplot as plt
import pandas as pd
import rdflib



No module named 'pkg_resources'


Load the building model:

In [3]:
g = Graph(load_brick=True)
g.load_file('../../datasets/bts_site_b_train/Site_B_cleaned.ttl')
print(f'Building model has {len(g)} triples')

Building model has 52457 triples


Expand the building model through inference:

In [4]:
g.expand(profile='rdfs') # inference using RDFS reasoning
print(f'Expanded building model has {len(g)} triples')

Expanded building model has 89052 triples


In [None]:
g.expand(profile='shacl') # infererence using Brick classes from Brick tags
print(f'Expanded graph has {len(g)} triples')

Conforms: False
Results (46):
Validation Result in SPARQLConstraintComponent (http://www.w3.org/ns/shacl#SPARQLConstraintComponent):
	Source Shape: bsh:DeprecationRuleForInstances
	Focus Node: p33f3e0c2_f2cd_471c_b5a0_4655c2bd4623:08ff7ae0_acef_4f4d_8328_2de484a6d5e1.f31b26ab_52c0_4781_a1dc_34c5096c825b
	Value Node: p33f3e0c2_f2cd_471c_b5a0_4655c2bd4623:08ff7ae0_acef_4f4d_8328_2de484a6d5e1.f31b26ab_52c0_4781_a1dc_34c5096c825b
	Source Constraint: [ rdf:type rdfs:Resource, sh:SPARQLConstraint ; sh:message Literal("{$this} has type {?class} which was deprecated in version {?depver}. For now, it has been updated to also be of type {?newClass}.") ; sh:prefixes <https://brickschema.org/schema/1.3/Brick> ; sh:select Literal("SELECT $this ?class ?newClass ?depver WHERE {
$this a ?class .
?class owl:deprecated true .
?class brick:deprecatedInVersion ?depver .
?class brick:isReplacedBy ?newClass .
}") ]
	Message: dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/site/c495fbd8_7a71_44b4_ad2a_014ac2df7

Play around with some queries:

In [None]:
# Get all tags for the brick:Building (p33f3e0c2_f2cd_471c_b5a0_4655c2bd4623:13211186_beb4_4227_bd2d_0644e860886e)
query1 = '''
    SELECT ?tag WHERE {
        p33f3e0c2_f2cd_471c_b5a0_4655c2bd4623:13211186_beb4_4227_bd2d_0644e860886e brick:hasTag ?tag .
    }
'''

# Get all entities that have the tags Air and Temperature
query2 = '''
    SELECT ?entity WHERE {
        ?entity  brick:hasTag tag:Air, tag:Temperature .
    }
'''

# Get all classes that are associated with the tags Air and Temperature
query3 = '''
SELECT ?class WHERE {
    ?class  brick:hasAssociatedTag tag:Air, tag:Temperature .
}
'''

# Get all classes that are associated with the tag Room
query4 = '''
SELECT ?class WHERE {
    ?class  brick:hasAssociatedTag tag:Room .
}
'''

for query in [query1, query2, query3, query4]:
    res = g.query(query)
    print(query)
    for row in res:
        print(row)

In [None]:
# Get all classes that are associated with the tag Location
query = '''
SELECT ?class WHERE {
    ?class  brick:hasAssociatedTag tag:Location .
}
'''

res = g.query(query)

for row in res:
    print(row)

In [None]:
# Get the source and stream of all data streams
query = '''
    SELECT ?src ?sid WHERE  {
        ?src senaps:stream_id ?sid .
    }
'''
res = g.query(query)
for row in res:
    print(row)

Get all the IDs of all conference rooms and their related air temperature sensors and setpoints:

In [None]:
query = '''
    SELECT ?loc ?ats ?atsp WHERE  {
        ?ats    a                 brick:Air_Temperature_Sensor .
        ?atsp   a                 brick:Room_Air_Temperature_Setpoint .
        ?ats    brick:isPointOf   ?loc .
        ?atsp   brick:isPointOf   ?loc .
        ?loc    a                 brick:Conference_Room.
    }
'''
conference_rooms = g.query(query)
for row in conference_rooms:
    print(row)

Get the ID(s) of any outside air temperature sensor(s) associated with a weather station:

In [None]:
weather_station_query = '''
    SELECT ?ats WHERE  {
        ?ats   a                  brick:Outside_Air_Temperature_Sensor .
        ?ats   brick:isPointOf    ?loc .
        ?loc   a                  brick:Weather_Station
    }
'''
weather_station = g.query(weather_station_query)
for row in weather_station:
    print(row)

In [None]:
print(type(weather_station))
e = list(weather_station)[0]
print(type(e))
print(e[0])

Get the stream ID of the outside air temperature sensor:

In [None]:
outside_temperature_stream_query = '''
    SELECT ?ats ?sid WHERE  {
        ?ats senaps:stream_id ?sid
    }
'''

outside_temperature_stream_id = None

for row in weather_station:
    outside_temperature_stream = g.query(outside_temperature_stream_query, initBindings={'ats': row[0]})

print(outside_temperature_stream.bindings)
binding = outside_temperature_stream.bindings[0]
print(binding)
print(binding['sid'])
outside_temperature_stream_id = str(binding['sid'])
    

Connect to our Database abstraction:

In [12]:
from dbmanager import DBManager

data_zip_path = '../../datasets/bts_site_b_train/train.zip'
mapping_csv_path = '../../datasets/bts_site_b_train/mapper_TrainOnly.csv'
db = DBManager(data_zip_path, mapping_csv_path)

Test we can retrieve some data:

In [None]:
# a single stream
df = db.get_stream('664ddc27_ccc5_43a1_97b7_3000c0e948db')
df.head()

In [None]:
# multiple streams
stream_ids = [
    '3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4',
    '77ed01a5_11c3_4607_9bf8_342b947c88de',
    ]

streams = db.get_streams(stream_ids)

for stream_id, df in streams.items():
    print(f'Stream ID: {stream_id}')
    print(df.head())
    print()

In [None]:
outside_air_temperature = db.get_stream(outside_temperature_stream_id)
outside_air_temperature.head()

A quick function to take the id of some entity and return its associated stream id:

In [16]:
def get_stream_id(g, subject):
    # one (incomplete) way to get stream id
    # stream_id_query = f"""
    # SELECT ?sid WHERE  {{
    #     ?id    senaps:stream_id ?sid .
    # }}
    # """
    # res = g.query(stream_id_query, initBindings={'id': rdflib.URIRef(subject)})

    # for row in res:
    #     print(row)

    # another way to get stream id
    predicate = rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')
    stream_id = g.value(subject, predicate)
    return stream_id

Quick look at the data for the air temperature sensor and setpoint for each conference room:

In [None]:
for i, row in enumerate(conference_rooms, start=1):
    conference_room_id = row[0]
    air_temperature_sensor_id = row[1]
    room_air_temperature_setpoint_id = row[2]

    print(f"Conference Room {i} ({conference_room_id})")

    air_temperature_sensor_stream_id = get_stream_id(g, air_temperature_sensor_id)
    room_air_temperature_setpoint_id = get_stream_id(g, room_air_temperature_setpoint_id)

    air_temperature_df = db.get_stream(air_temperature_sensor_stream_id)
    room_air_temperature_setpoint_df = db.get_stream(room_air_temperature_setpoint_id)

    print(air_temperature_df.head())
    print(room_air_temperature_setpoint_df.head())
    print()
    

Now let's do the same but actually plot some stuff:

In [18]:
def get_daily_median(df):
    df['time'] = pd.to_datetime(df['time'])

    # possibly the better way to go...
    # df = df.set_index('time')
    # df = df.resample('D').median()

    df['date'] = df['time'].dt.date
    df = df.groupby(['date', 'label'])['value'].median().unstack()

    # should we dropna?
    # df = df.dropna()
    return df

In [None]:
df = get_daily_median(outside_air_temperature).head()
df

In [None]:
outside_median_df = get_daily_median(outside_air_temperature)

for i, row in enumerate(conference_rooms, start=1):
    conference_room_id = row[0]
    air_temperature_sensor_id = row[1]
    room_air_temperature_setpoint_id = row[2]

    air_temperature_sensor_stream_id = get_stream_id(g, air_temperature_sensor_id)
    room_air_temperature_setpoint_stream_id = get_stream_id(g, room_air_temperature_setpoint_id)

    air_temperature_sensor_df = db.get_stream(air_temperature_sensor_stream_id)
    room_air_temperature_setpoint_df = db.get_stream(room_air_temperature_setpoint_stream_id)

    inside_median_df = get_daily_median(air_temperature_sensor_df)
    setpoint_median_df = get_daily_median(room_air_temperature_setpoint_df)

    # Create the plot
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(inside_median_df.index, inside_median_df[inside_median_df.columns[0]], label=inside_median_df.columns[0], marker=',')
    ax.plot(outside_median_df.index, outside_median_df[outside_median_df.columns[0]], label=outside_median_df.columns[0], color='g', marker=',', alpha=0.5)
    ax.plot(setpoint_median_df.index, setpoint_median_df[setpoint_median_df.columns[0]], label=setpoint_median_df.columns[0], color='r', marker=',', alpha=0.5)

    title = f'Conference Room {i}'
    plt.title(title)
    plt.xlabel('Date')
    plt.ylabel('Daily Median Temperature')
    plt.legend()
    plt.grid(True)

    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()