In [36]:
from pathlib import Path
import pickle
import zipfile

import numpy as np
import pandas as pd

import rdflib
from rdflib import Graph
from rdflib.namespace import BRICK

In [37]:
dataset_dir = '../../datasets/bts_site_b_train/'

dataset_zip = 'train.zip'
dataset_path = Path(dataset_dir) / dataset_zip

mapping_csv = 'mapper_TrainOnly.csv'
mapping_path = Path(dataset_dir) / mapping_csv

# building_ttl = 'Site_B.ttl'
building_ttl = 'Site_B_cleaned.ttl'
building_model = Path(dataset_dir) / building_ttl

brick_ttl = 'Brick_v1.2.1.ttl'
brick_schema = Path(dataset_dir) / brick_ttl

In [38]:
# Load the mapping file
mapping_df = pd.read_csv(mapping_path, index_col=0)

# Building B only
# mapping_df = mapping_df[mapping_df['Building'] == 'B']

# Ignore streams not saved to file
mapping_df = mapping_df[mapping_df['Filename'].str.contains('FILE NOT SAVED') == False]

mapping_df.head()

Unnamed: 0,Building,StreamID,Filename,strBrickLabel
0,A,9ba955fa_5960_4c9b_b73a_10156da7d083,trainAll_0.pkl,Operating_Mode_Status
2,A,8fd6e75b_88bc_4992_b420_77389969b3c4,trainAll_1.pkl,Mode_Command
3,A,8db6eaa9_bd6c_4f7e_aed0_a47e4e192a6c,trainAll_2.pkl,Active_Power_Sensor
4,A,b2338dec_110a_45cc_8358_1171aaef2c45,trainAll_3.pkl,System_Status
5,A,ec5ff874_0af2_49d8_a6a0_21ea3d077dc8,trainAll_4.pkl,Maintenance_Mode_Command


In [39]:
streams = []

with zipfile.ZipFile(dataset_path, "r") as f:
    for name in f.namelist():
        if not name.endswith('.pkl'):
            continue

        pkl_data = f.read(name)
        data = pickle.loads(pkl_data)

        filename = name.split('/')[-1]

        building = mapping_df.loc[mapping_df['Filename'] == filename, 'Building'].iloc[0]
        stream_id = mapping_df.loc[mapping_df['Filename'] == filename, 'StreamID'].iloc[0]
        brick_class_csv = mapping_df.loc[mapping_df['Filename'] == filename, 'strBrickLabel'].iloc[0]
        brick_class_pkl = data['y']
        data_points = len(data['t'])
        min_time = min(data['t'])
        max_time = max(data['t'])
        min_value = min(data['v'])
        mean_value = np.mean(data['v'])
        median_value = np.median(data['v'])
        max_value = max(data['v'])

        # print('Building:', building, 'StreamID:', stream_id, 'Brick Class:', brick_class, 'Min Time:', min_time, 'Max Time:', max_time, 'Min Value:', min_value, 'Max Value:', max_value)
        # break

        streams.append({
            'filename': filename,
            'building': building,
            'stream_id': stream_id,
            'brick_class_pkl': brick_class_pkl,
            'brick_class_csv': brick_class_csv,
            'data_points': data_points,
            'min_time': min_time,
            'max_time': max_time,
            'min_value': min_value,
            'max_value': max_value,
            'mean_value': mean_value,
            'median_value': median_value,
        })

  data = pickle.loads(pkl_data)


In [40]:
summary_df = pd.DataFrame(streams)
summary_df.head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,data_points,min_time,max_time,min_value,max_value,mean_value,median_value
0,trainAll_2183.pkl,A,980d7c24_3626_47f0_9430_582c827ee507,Alarm,Alarm,12939,2021-01-01 00:09:56.621,2021-03-31 22:51:21.721,0.0,0.0,0.0,0.0
1,trainAll_5834.pkl,A,3b9ab701_ec53_491f_a943_d82b3e90f130,Mode_Status,Mode_Status,12902,2021-01-01 00:02:30.508,2021-03-31 22:54:07.521,0.0,1.0,0.203069,0.0
2,trainAll_6483.pkl,A,22fd94c6_f39f_49b1_b3af_36b4dffd90a9,Fan_Status,Fan_Status,12904,2021-01-01 00:02:45.031,2021-03-31 22:54:18.431,0.0,1.0,0.288205,0.0
3,trainAll_7945.pkl,A,1e352003_b4c7_43f0_8f8b_099d44897fe6,Status,Status,12893,2021-01-01 00:03:01.414,2021-03-31 22:54:35.893,0.0,1.0,0.581478,1.0
4,trainAll_4294.pkl,A,7260e1bc_3b09_4880_8c68_57b50b4f0f93,Alarm,Alarm,12922,2021-01-01 00:00:18.577,2021-03-31 22:51:45.823,0.0,0.0,0.0,0.0


In [41]:
g = Graph().parse(building_model, format='turtle')

In [42]:
def get_brick_class(stream_id, graph):
    predicate = rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')
    object = rdflib.term.Literal(stream_id)
    subject = g.value(predicate=predicate, object=object, any=True)

    predicate = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
    brick_class = graph.value(subject, predicate)

    if brick_class is None:
        return None
    
    return brick_class.replace('https://brickschema.org/schema/Brick#', '')

In [43]:
summary_df['brick_class_ttl'] = summary_df['stream_id'].apply(get_brick_class, args=(g,))
summary_df[summary_df['building'] == 'B'].head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,data_points,min_time,max_time,min_value,max_value,mean_value,median_value,brick_class_ttl
19,trainAll_8492.pkl,B,6f502ba0_77fb_43db_b6dc_48e68d5c822b,System_Status,System_Status,51193,2021-01-01 00:03:17.897,2021-12-24 22:59:53.425,0.0,1.0,0.320649,0.0,System_Status
20,trainAll_8486.pkl,B,3c9ca09b_8952_45da_a066_bcb645cbbc68,Min_Discharge_Air_Temperature_Setpoint_Limit,Min_Discharge_Air_Temperature_Setpoint_Limit,51194,2021-01-01 00:03:17.822,2021-12-24 22:59:53.290,0.0,16.0,15.142087,16.0,Min_Discharge_Air_Temperature_Setpoint_Limit
36,trainAll_8479.pkl,B,d60c44c3_62fc_409a_a751_8b3ddeb9f197,Position_Sensor,Position_Sensor,51194,2021-01-01 00:03:17.746,2021-12-24 22:59:53.141,0.0,100.0,1.394574,0.0,Position_Sensor
38,trainAll_7992.pkl,B,eb0d5916_d942_4e2d_8129_60b83d984fdd,Max_Temperature_Setpoint_Limit,Max_Temperature_Setpoint_Limit,51200,2021-01-01 00:03:14.674,2021-12-24 22:59:47.496,0.0,22.5,21.288867,22.5,Max_Temperature_Setpoint_Limit
50,trainAll_8337.pkl,B,78078042_e16e_425b_9ec8_605aa149274e,Damper_Position_Sensor,Damper_Position_Sensor,51188,2021-01-01 00:03:10.523,2021-12-24 22:59:40.477,0.0,90.0,61.405774,89.8584,Damper_Position_Sensor


In [44]:
def get_relative_of_stream_source(stream_id, tag, g):
    # get the source of the stream
    stream_id = rdflib.term.Literal(stream_id)
    stream_pred = rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')
    stream_source = g.value(predicate=stream_pred, object=stream_id, any=True)
    if stream_source is None:
        return None
    
    # get the relative of the stream source
    stream_source_pred = BRICK[tag]
    stream_source_object = g.value(subject=stream_source, predicate=stream_source_pred, any=True)

    if stream_source_object is None:
        return None
    
    # get the class of the relative
    stream_source_class_pred = rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
    stream_source_class = g.value(subject=stream_source_object, predicate=stream_source_class_pred, any=True)
    
    if stream_source_class is None:
        return None

    return stream_source_class.replace('https://brickschema.org/schema/Brick#', '')

In [45]:
summary_df['isPointOf'] = summary_df['stream_id'].apply(get_relative_of_stream_source, args=('isPointOf', g))
summary_df['hasLocation'] = summary_df['stream_id'].apply(get_relative_of_stream_source, args=('hasLocation', g))
summary_df[summary_df['building'] == 'B'].head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,data_points,min_time,max_time,min_value,max_value,mean_value,median_value,brick_class_ttl,isPointOf,hasLocation
19,trainAll_8492.pkl,B,6f502ba0_77fb_43db_b6dc_48e68d5c822b,System_Status,System_Status,51193,2021-01-01 00:03:17.897,2021-12-24 22:59:53.425,0.0,1.0,0.320649,0.0,System_Status,Fan_Coil_Unit,
20,trainAll_8486.pkl,B,3c9ca09b_8952_45da_a066_bcb645cbbc68,Min_Discharge_Air_Temperature_Setpoint_Limit,Min_Discharge_Air_Temperature_Setpoint_Limit,51194,2021-01-01 00:03:17.822,2021-12-24 22:59:53.290,0.0,16.0,15.142087,16.0,Min_Discharge_Air_Temperature_Setpoint_Limit,Fan_Coil_Unit,
36,trainAll_8479.pkl,B,d60c44c3_62fc_409a_a751_8b3ddeb9f197,Position_Sensor,Position_Sensor,51194,2021-01-01 00:03:17.746,2021-12-24 22:59:53.141,0.0,100.0,1.394574,0.0,Position_Sensor,Cooling_Valve,
38,trainAll_7992.pkl,B,eb0d5916_d942_4e2d_8129_60b83d984fdd,Max_Temperature_Setpoint_Limit,Max_Temperature_Setpoint_Limit,51200,2021-01-01 00:03:14.674,2021-12-24 22:59:47.496,0.0,22.5,21.288867,22.5,Max_Temperature_Setpoint_Limit,Fan_Coil_Unit,
50,trainAll_8337.pkl,B,78078042_e16e_425b_9ec8_605aa149274e,Damper_Position_Sensor,Damper_Position_Sensor,51188,2021-01-01 00:03:10.523,2021-12-24 22:59:40.477,0.0,90.0,61.405774,89.8584,Damper_Position_Sensor,Return_Damper,


In [46]:
def get_unit(stream_id, graph):
    stream_id = rdflib.term.Literal(stream_id)
    stream_pred = rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')
    stream_source = g.value(predicate=stream_pred, object=stream_id, any=True)
    if stream_source is None:
        return None

    unit_pred = BRICK.hasUnit
    unit = g.value(subject=stream_source, predicate=unit_pred, any=True)
    if unit is None:
        return None
    
    unit = unit.replace('http://qudt.org/vocab/unit/', '')
    return unit

In [47]:
summary_df['hasUnit'] = summary_df['stream_id'].apply(get_unit, args=(g,))
summary_df[summary_df['building'] == 'B'].head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,data_points,min_time,max_time,min_value,max_value,mean_value,median_value,brick_class_ttl,isPointOf,hasLocation,hasUnit
19,trainAll_8492.pkl,B,6f502ba0_77fb_43db_b6dc_48e68d5c822b,System_Status,System_Status,51193,2021-01-01 00:03:17.897,2021-12-24 22:59:53.425,0.0,1.0,0.320649,0.0,System_Status,Fan_Coil_Unit,,
20,trainAll_8486.pkl,B,3c9ca09b_8952_45da_a066_bcb645cbbc68,Min_Discharge_Air_Temperature_Setpoint_Limit,Min_Discharge_Air_Temperature_Setpoint_Limit,51194,2021-01-01 00:03:17.822,2021-12-24 22:59:53.290,0.0,16.0,15.142087,16.0,Min_Discharge_Air_Temperature_Setpoint_Limit,Fan_Coil_Unit,,
36,trainAll_8479.pkl,B,d60c44c3_62fc_409a_a751_8b3ddeb9f197,Position_Sensor,Position_Sensor,51194,2021-01-01 00:03:17.746,2021-12-24 22:59:53.141,0.0,100.0,1.394574,0.0,Position_Sensor,Cooling_Valve,,
38,trainAll_7992.pkl,B,eb0d5916_d942_4e2d_8129_60b83d984fdd,Max_Temperature_Setpoint_Limit,Max_Temperature_Setpoint_Limit,51200,2021-01-01 00:03:14.674,2021-12-24 22:59:47.496,0.0,22.5,21.288867,22.5,Max_Temperature_Setpoint_Limit,Fan_Coil_Unit,,
50,trainAll_8337.pkl,B,78078042_e16e_425b_9ec8_605aa149274e,Damper_Position_Sensor,Damper_Position_Sensor,51188,2021-01-01 00:03:10.523,2021-12-24 22:59:40.477,0.0,90.0,61.405774,89.8584,Damper_Position_Sensor,Return_Damper,,


In [48]:
b = Graph().parse(brick_schema)

In [49]:
def get_brick_definition(brick_class, graph):
    if brick_class is None:
        return None
    subject = BRICK[brick_class]
    predicate = rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#definition')
    definition = graph.value(subject, predicate)
    return definition

In [50]:
summary_df['brick_definition'] = summary_df['brick_class_ttl'].apply(get_brick_definition, args=(b,))
summary_df[summary_df['building'] == 'B'].head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,data_points,min_time,max_time,min_value,max_value,mean_value,median_value,brick_class_ttl,isPointOf,hasLocation,hasUnit,brick_definition
19,trainAll_8492.pkl,B,6f502ba0_77fb_43db_b6dc_48e68d5c822b,System_Status,System_Status,51193,2021-01-01 00:03:17.897,2021-12-24 22:59:53.425,0.0,1.0,0.320649,0.0,System_Status,Fan_Coil_Unit,,,Indicates properties of the activity of a system
20,trainAll_8486.pkl,B,3c9ca09b_8952_45da_a066_bcb645cbbc68,Min_Discharge_Air_Temperature_Setpoint_Limit,Min_Discharge_Air_Temperature_Setpoint_Limit,51194,2021-01-01 00:03:17.822,2021-12-24 22:59:53.290,0.0,16.0,15.142087,16.0,Min_Discharge_Air_Temperature_Setpoint_Limit,Fan_Coil_Unit,,,A parameter that places a lower bound on the r...
36,trainAll_8479.pkl,B,d60c44c3_62fc_409a_a751_8b3ddeb9f197,Position_Sensor,Position_Sensor,51194,2021-01-01 00:03:17.746,2021-12-24 22:59:53.141,0.0,100.0,1.394574,0.0,Position_Sensor,Cooling_Valve,,,Measures the current position of a component i...
38,trainAll_7992.pkl,B,eb0d5916_d942_4e2d_8129_60b83d984fdd,Max_Temperature_Setpoint_Limit,Max_Temperature_Setpoint_Limit,51200,2021-01-01 00:03:14.674,2021-12-24 22:59:47.496,0.0,22.5,21.288867,22.5,Max_Temperature_Setpoint_Limit,Fan_Coil_Unit,,,A parameter that places an upper bound on the ...
50,trainAll_8337.pkl,B,78078042_e16e_425b_9ec8_605aa149274e,Damper_Position_Sensor,Damper_Position_Sensor,51188,2021-01-01 00:03:10.523,2021-12-24 22:59:40.477,0.0,90.0,61.405774,89.8584,Damper_Position_Sensor,Return_Damper,,,Measures the current position of a damper in t...


In [51]:
summary_df.sort_values(by=['building', 'brick_class_pkl', 'brick_class_ttl', 'isPointOf','filename'], inplace=True)
summary_df.insert(5, 'brick_class_ttl', summary_df.pop('brick_class_ttl'))
summary_df.insert(6, 'isPointOf', summary_df.pop('isPointOf'))
summary_df.insert(7, 'hasLocation', summary_df.pop('hasLocation'))
summary_df.insert(8, 'hasUnit', summary_df.pop('hasUnit'))
summary_df[summary_df['building'] == 'B'].head()

Unnamed: 0,filename,building,stream_id,brick_class_pkl,brick_class_csv,brick_class_ttl,isPointOf,hasLocation,hasUnit,data_points,min_time,max_time,min_value,max_value,mean_value,median_value,brick_definition
4298,trainAll_8191.pkl,B,dc2f8f57_1cba_4667_86c4_a089c0371562,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Handler_Unit,,,51196,2021-01-01 00:03:11.975,2021-12-24 22:59:42.631,0.0,1.0,0.946168,1.0,Measures the temperature of air
2982,trainAll_8223.pkl,B,09d686a2_134c_4904_b4c4_8ce3ce582169,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Handler_Unit,,,51196,2021-01-01 00:03:13.600,2021-12-24 22:59:45.564,0.0,1.0,0.946148,1.0,Measures the temperature of air
288,trainAll_8256.pkl,B,a52e12e5_dafa_4c8a_9ce5_0e66125d208c,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Handler_Unit,,,51204,2021-01-01 00:03:14.060,2021-12-24 22:59:46.439,0.0,1.0,0.946157,1.0,Measures the temperature of air
1292,trainAll_8292.pkl,B,84ddec06_b9f3_4f34_936d_0789717cae25,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Handler_Unit,,,51180,2021-01-01 00:03:09.338,2021-12-24 22:59:39.246,0.0,1.0,0.946131,1.0,Measures the temperature of air
622,trainAll_8334.pkl,B,c12eb576_6496_4553_b655_b6a3c465d679,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Temperature_Sensor,Air_Handler_Unit,,,51185,2021-01-01 00:03:10.457,2021-12-24 22:59:40.404,0.0,1.0,0.946156,1.0,Measures the temperature of air


In [52]:
summary_df.to_csv('bts_site_b_train_summary.csv', index=False)