In [749]:
from pathlib import Path
import brickschema
import numpy as np
import pandas as pd
import rdflib
from rdflib import Namespace
from rdflib.namespace import RDFS, SKOS, BRICK


In [750]:
SENAPS = Namespace("http://senaps.io/schema/1.0/senaps#")
SENAPS['stream_id']

rdflib.term.URIRef('http://senaps.io/schema/1.0/senaps#stream_id')

In [751]:
SKOS['definition']

rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#definition')

In [752]:
SKOS

Namespace('http://www.w3.org/2004/02/skos/core#')

In [753]:
dataset_dir = '../../datasets/bts_site_b_train/'

dataset_zip = 'train.zip'
dataset_path = Path(dataset_dir) / dataset_zip

mapping_csv = 'mapper_TrainOnly.csv'
mapping_path = Path(dataset_dir) / mapping_csv

# building_ttl = 'Site_B_tim.ttl'
building_ttl = 'Site_B.ttl'
building_model = Path(dataset_dir) / building_ttl

brick_ttl = 'Brick_v1.2.1.ttl'
brick_schema = Path(dataset_dir) / brick_ttl

In [754]:
g_building = brickschema.Graph().load_file(building_model)
g_brick = brickschema.Graph().load_file(brick_schema)
# g_brick_latest = brickschema.Graph(load_brick=True)
g_brick_latest = brickschema.Graph(load_brick_nightly=True)

# ANALYSIS

In [755]:
def sparql_to_df(g, q, **kwargs):
    res = g.query(q, **kwargs)
    df = pd.DataFrame(res.bindings)
    # are these necessary?
    df.columns = df.columns.map(str)
    # df = df.map(str)
    df.drop_duplicates(inplace=True)
    return df

In [756]:
# Get all brick entities and their classes in the building model
def get_brick_entities(g):
    q = '''
    SELECT ?entity ?brick_class ?stream_id ?named_unit ?anonymous_unit WHERE {
        ?entity a ?brick_class .
        OPTIONAL { ?entity senaps:stream_id ?stream_id } .
        OPTIONAL { ?entity brick:hasUnit ?named_unit .
                    filter ( strstarts(str(?named_unit),str(unit:)) ) } .
        OPTIONAL { ?entity brick:hasUnit [ brick:value ?anonymous_unit ] } .
        filter ( strstarts(str(?brick_class),str(brick:)) ) .
    }
    '''
    # q = '''
    # SELECT ?entity ?brick_class ?stream_id WHERE {
    #     ?entity a ?brick_class .
    #     OPTIONAL { ?entity senaps:stream_id ?stream_id } .
    #     filter ( strstarts(str(?brick_class),str(brick:)) ) .
    # }
    # '''
    return sparql_to_df(g, q)

get_brick_entities(g_building)

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e
...,...,...,...,...
1110,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,
1111,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,
1112,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,
1113,,https://brickschema.org/schema/Brick#Air_Handl...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,


In [757]:
df = get_brick_entities(g_building)

In [758]:
def class_in_brick(cls, g):
    return (cls, None, None) in g

In [759]:
df['class_in_provided_brick'] = df['brick_class'].apply(class_in_brick, args=(g_brick,))
print(len(df[df['class_in_provided_brick'] == True]), 'recognised by provided Brick schema:')
print(df[df['class_in_provided_brick'] == True].head())
print(len(df[df['class_in_provided_brick'] == False]), 'not recognised by provided Brick schema:')
print(df[df['class_in_provided_brick'] == False].head())

1060 recognised by provided Brick schema:
  anonymous_unit                                        brick_class  \
0            NaN  https://brickschema.org/schema/Brick#Temperatu...   
1            NaN  https://brickschema.org/schema/Brick#Temperatu...   
2            NaN  https://brickschema.org/schema/Brick#Temperatu...   
3            NaN  https://brickschema.org/schema/Brick#Temperatu...   
4            NaN  https://brickschema.org/schema/Brick#Temperatu...   

                                              entity  \
0  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
1  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
2  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
3  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
4  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   

                              stream_id  class_in_provided_brick  
0  3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4                     True  
1  f9833afd_e8a3_437b_9031_f29b656c94f9                     

In [760]:
df['class_in_latest_brick'] = df['brick_class'].apply(class_in_brick, args=(g_brick_latest,))
print(len(df[df['class_in_latest_brick'] == True]), 'recognised by provided Brick schema:')
print(df[df['class_in_latest_brick'] == True].head())
print(len(df[df['class_in_latest_brick'] == False]), 'not recognised by provided Brick schema:')
print(df[df['class_in_latest_brick'] == False].head())

1021 recognised by provided Brick schema:
  anonymous_unit                                        brick_class  \
0            NaN  https://brickschema.org/schema/Brick#Temperatu...   
1            NaN  https://brickschema.org/schema/Brick#Temperatu...   
2            NaN  https://brickschema.org/schema/Brick#Temperatu...   
3            NaN  https://brickschema.org/schema/Brick#Temperatu...   
4            NaN  https://brickschema.org/schema/Brick#Temperatu...   

                                              entity  \
0  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
1  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
2  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
3  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   
4  dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...   

                              stream_id  class_in_provided_brick  \
0  3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4                     True   
1  f9833afd_e8a3_437b_9031_f29b656c94f9                   

In [761]:
def get_brick_definition(cls, g, g_alt=None):
    if cls is None:
        return None
    
    # predicate = rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#definition')
    predicate = SKOS['definition']
    definition = g.value(subject=cls, predicate=predicate)
    
    original_cls = cls
    while definition is None:
        cls = g.value(subject=cls, predicate=RDFS['subClassOf'])
        # print(cls)
        if cls is None:
            break
        definition = g.value(subject=cls, predicate=predicate)
    
    if definition is None and g_alt is not None:
        return get_brick_definition(original_cls, g_alt)
    
    return g.value(subject=cls, predicate=predicate)

In [762]:
df['brick_definition'] = df['brick_class'].apply(get_brick_definition, args=(g_brick,))
# df['brick_definition'] = df['class'].apply(get_brick_definition, args=(g_brick_latest,))
# df['brick_definition'] = df['class'].apply(get_brick_definition, args=(g_brick, g_brick_latest))
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature


In [763]:
if 'named_unit' not in df.columns:
    df['named_unit'] = None
if 'anonymous_unit' not in df.columns:
    df['anonymous_unit'] = None


df = df.assign(unit=lambda x: x['named_unit'].combine_first(x['anonymous_unit']))

# def unit_in_brick(unit, g):
#     return (unit, None, None) in g


# df['unit_in_provided_brick'] = df['named_unit'].apply(class_in_brick, args=(g_brick,))

In [764]:
def unit_is_named(r):
    if pd.isna(r.unit):
        return None
    
    return not pd.isna(r.named_unit)

In [765]:
df['unit_is_named'] = df.apply(unit_is_named, axis=1)
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,named_unit,unit,unit_is_named
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,


In [766]:
# df.assign(unit_is_named=lambda x: x['unit'].apply(lambda u: u is not None and u.startswith('unit:')))   

In [767]:
# Load the mapping file
mapping_df = pd.read_csv(mapping_path, index_col=0)

# Building B only
# mapping_df = mapping_df[mapping_df['Building'] == 'B']

# Ignore streams not saved to file
mapping_df = mapping_df[mapping_df['Filename'].str.contains('FILE NOT SAVED') == False]

mapping_df.head()

Unnamed: 0,Building,StreamID,Filename,strBrickLabel
0,A,9ba955fa_5960_4c9b_b73a_10156da7d083,trainAll_0.pkl,Operating_Mode_Status
2,A,8fd6e75b_88bc_4992_b420_77389969b3c4,trainAll_1.pkl,Mode_Command
3,A,8db6eaa9_bd6c_4f7e_aed0_a47e4e192a6c,trainAll_2.pkl,Active_Power_Sensor
4,A,b2338dec_110a_45cc_8358_1171aaef2c45,trainAll_3.pkl,System_Status
5,A,ec5ff874_0af2_49d8_a6a0_21ea3d077dc8,trainAll_4.pkl,Maintenance_Mode_Command


In [768]:
df['stream_id'][0]

rdflib.term.Literal('3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4')

In [769]:
def stream_exists_in_mapping(s, mapping_df):
    if pd.isna(s):
        return None
    return str(s).strip() in mapping_df['StreamID'].values

In [770]:
df['stream_exists_in_mapping'] = df['stream_id'].apply(stream_exists_in_mapping, args=(mapping_df,))
df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,named_unit,unit,unit_is_named,stream_exists_in_mapping
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,,True
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,,True
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,,True
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,,True
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,,True


In [771]:
def brick_class_in_mapping(s, mapping_df):
    if pd.isna(s):
        return None
    mapping_df['StreamID']
    return str(s).strip() in mapping_df['StreamID'].values

In [772]:
# Convert df['stream_id'] to string for the join
df['stream_id_str'] = df['stream_id'].apply(lambda x: str(x))

# Perform the left join
df = pd.merge(df, mapping_df[['StreamID', 'strBrickLabel']], how='left', left_on='stream_id_str', right_on='StreamID')

# Optionally drop the temporary column 'stream_id_str' and 'StreamID' after the merge
df = df.drop(columns=['stream_id_str', 'StreamID'])
df.rename(columns={'strBrickLabel': 'brick_class_in_mapping'}, inplace=True)

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,named_unit,unit,unit_is_named,stream_exists_in_mapping,brick_class_in_mapping
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,,True,Temperature_Setpoint
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,,True,Temperature_Setpoint
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,,True,Temperature_Setpoint
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,,True,Temperature_Setpoint
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,,True,Temperature_Setpoint


In [773]:
df['brick_class_fragment'] = df['brick_class'].apply(lambda x: str(x.fragment) if x is not None else None)

df['brick_class_is_consistent'] = np.where(
    pd.isna(df['brick_class_in_mapping']),  # Check if brick_class_in_mapping is empty
    None,  # Leave empty where there's no mapping value
    df['brick_class_fragment'] == df['brick_class_in_mapping']  # Compare fragment with the mapping
)

df.drop(columns=['brick_class_fragment'], inplace=True)

df.head()

Unnamed: 0,anonymous_unit,brick_class,entity,stream_id,class_in_provided_brick,class_in_latest_brick,brick_definition,named_unit,unit,unit_is_named,stream_exists_in_mapping,brick_class_in_mapping,brick_class_is_consistent
0,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,3dfa2bab_f8f2_485b_80e9_5afdeeac9dc4,True,True,Sets temperature,,,,True,Temperature_Setpoint,True
1,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,f9833afd_e8a3_437b_9031_f29b656c94f9,True,True,Sets temperature,,,,True,Temperature_Setpoint,True
2,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,141b6d3e_2aaf_4872_81b2_e5be3e5e3b66,True,True,Sets temperature,,,,True,Temperature_Setpoint,True
3,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,4e1f9452_b52f_4ce1_a36f_e8a5cc8c3a5d,True,True,Sets temperature,,,,True,Temperature_Setpoint,True
4,,https://brickschema.org/schema/Brick#Temperatu...,dch:org/df125f8d_93e4_4a4f_bc87_1c4e332f32b8/s...,0ace4d2f_11d3_49f0_a61a_2f839ae3208e,True,True,Sets temperature,,,,True,Temperature_Setpoint,True


In [774]:
def defrag_uri(s):
    if isinstance(s, rdflib.term.URIRef):
        if '#' in s:
            return s.fragment
        elif '/' in s:
            return s.split('/')[-1]
    return s

In [775]:
for col in df.columns:
    df[col] = df[col].apply(defrag_uri)

In [776]:
df.to_csv('model_quality.csv', index=False)

---
# VISUALISATION

In [777]:
# import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Brick Entities in Building Model Recognised by Brick Schema

In [778]:
entity_in_provided_brick = df[['brick_class', 'entity', 'class_in_provided_brick']].copy()
entity_in_provided_brick.sort_values(by=['class_in_provided_brick', 'brick_class', 'entity'], inplace=True)
entity_in_provided_brick.head()

Unnamed: 0,brick_class,entity,class_in_provided_brick
957,Electrical_Circuit,4e5f09a8_0b2e_4c60_a6da_f7d332d6808e,False
960,Electrical_Circuit,4f525a1e_69b2_4fff_be62_16b262d3724b,False
956,Electrical_Circuit,6dc225a8_753e_4e47_8528_a3c03ea4e496,False
961,Electrical_Circuit,a4bcf16d_a84e_414e_b9c1_603c3d10cb3e,False
959,Electrical_Circuit,d8d2f843_6cc8_4b62_892e_5e67b56be41c,False


In [779]:
to_plot = entity_in_provided_brick[['brick_class', 'class_in_provided_brick']].groupby('class_in_provided_brick').count()
to_plot.reset_index(inplace=True)
to_plot['class_in_provided_brick'] = to_plot['class_in_provided_brick'].apply(lambda x: 'Recognised' if x else 'Unrecognised')
to_plot.head()

Unnamed: 0,class_in_provided_brick,brick_class
0,Unrecognised,55
1,Recognised,1060


In [780]:
import plotly.express as px
fig = px.pie(to_plot, values='brick_class', names='class_in_provided_brick',
             title='Proportion of Brick Entities Recognised by Provided Brick Schema',
            #  hover_data={'brick_class': True, 'class_in_provided_brick': False}, 
            #  hover_data={'brick_class': True, 'class_in_provided_brick': False}, 
            #  labels={'brick_class':'Number of Entities'})
             labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'})
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [781]:
to_plot = entity_in_provided_brick[['brick_class', 'class_in_provided_brick']].copy()
# to_plot.reset_index(inplace=True)
to_plot['class_in_provided_brick'] = to_plot['class_in_provided_brick'].apply(lambda x: 'Recognised' if x else 'Unrecognised')
to_plot.head()

Unnamed: 0,brick_class,class_in_provided_brick
957,Electrical_Circuit,Unrecognised
960,Electrical_Circuit,Unrecognised
956,Electrical_Circuit,Unrecognised
961,Electrical_Circuit,Unrecognised
959,Electrical_Circuit,Unrecognised


In [782]:
fig = make_subplots(
    rows=2, cols=1,
    # shared_xaxes=True,
    vertical_spacing=0.03,
    specs=[[{"type": "pie"}],
           [{"type": "table"}]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID", "Class Recognised"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns],
            align = "left")
    ),
    row=2, col=1
)

fig.update_layout(
    height=800,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
)

fig.show()


In [783]:
fig = make_subplots(
    rows=3, cols=1,
    # shared_xaxes=True,
    # vertical_spacing=0.1,
    vertical_spacing=0.03,
    subplot_titles = ['Proportion of Entities', 'Recognised Entities', 'Unrecognised Entities'],
    specs=[[{"type": "pie"}],
           [{"type": "table"}],
           [{"type": "table"}]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

entity_in_provided_brick_true = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == True]
entity_in_provided_brick_false = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_true[k].tolist() for k in entity_in_provided_brick_true.columns[:2]],
            align = "left")
    ),
    row=2, col=1
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_false[k].tolist() for k in entity_in_provided_brick_false.columns[:2]],
            align = "left")
    ),
    row=3, col=1,
)


fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
)

fig.show()

In [784]:
fig = make_subplots(
    rows=3, cols=2,
    # shared_xaxes=True,
    # vertical_spacing=0.1,
    # vertical_spacing=0.03,
    vertical_spacing=0.05,
    subplot_titles = ['Proportion of Entities', 'Unrecognised by Class', 'Unrecognised Entities', 'Recognised Entities'],
    specs=[[{"type": "pie"}, {"type": "pie"}],
           [{"type": "table", 'colspan': 2}, None],
           [{"type": "table", 'colspan': 2}, None]]
)

labels = to_plot['class_in_provided_brick'].value_counts().index
values = to_plot['class_in_provided_brick'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=1
)

entity_in_provided_brick_true = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == True]
entity_in_provided_brick_false = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]

labels = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]['brick_class'].value_counts().index
values = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]['brick_class'].value_counts().values

fig.add_trace(
    go.Pie(#to_plot, 
        #    values='brick_class', 
            labels=labels,
              values=values,
              textposition='inside', 
              textinfo='percent+label',
            #   textinfo='value+label',
              name="",
        #    names='class_in_provided_brick',
        #    title='Proportion of Brick Entities Recognised by Provided Brick Schema',
        #    labels={'class_in_provided_brick': 'Class', 'brick_class':'Number of Entities'}
    ),
    row=1, col=2
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_false[k].tolist() for k in entity_in_provided_brick_false.columns[:2]],
            align = "left")
    ),
    row=2, col=1,
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[entity_in_provided_brick_true[k].tolist() for k in entity_in_provided_brick_true.columns[:2]],
            align = "left")
    ),
    row=3, col=1
)

fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Brick Entities in Building Model Recognised by Brick Schema",
    title_x=0.5,
)

fig.show()

In [785]:
entity_in_provided_brick_false.to_csv('01_unrecognised_entities.csv', index=False)

In [786]:
fig.write_html("01_brick_entities_recognised_by_schema.html")

## Data Sources in Building Model with Associated Units

In [787]:
stream_with_units = df[['brick_class', 'stream_id', 'unit', 'unit_is_named']].copy()
stream_with_units.dropna(subset=['stream_id'], inplace=True)
stream_with_units.sort_values(by=['brick_class', 'stream_id'], inplace=True)
# entity_in_provided_brick.sort_values(by=['class_in_provided_brick', 'brick_class', 'entity'], inplace=True)
stream_with_units['has_unit'] = stream_with_units['unit'].apply(lambda x: 'No units' if pd.isna(x) else 'Units')
stream_with_units.head()

Unnamed: 0,brick_class,stream_id,unit,unit_is_named,has_unit
491,Air_Temperature_Sensor,021f5ffa_16bc_4efc_bedd_5eb04d8b3837,,,No units
461,Air_Temperature_Sensor,03f84d72_615b_4b71_840c_41c2a5015d76,,,No units
472,Air_Temperature_Sensor,09d686a2_134c_4904_b4c4_8ce3ce582169,,,No units
492,Air_Temperature_Sensor,11328598_25fc_4808_ba70_a4a48ed1d6e4,,,No units
475,Air_Temperature_Sensor,12fd9d33_d6a4_46c3_b647_b58a569a2db5,,,No units


In [788]:
streams_without_units = stream_with_units[pd.isna(stream_with_units['unit'])].copy()
streams_without_units.sort_values(by=['brick_class', 'stream_id'], inplace=True)
streams_without_units.head()

Unnamed: 0,brick_class,stream_id,unit,unit_is_named,has_unit
491,Air_Temperature_Sensor,021f5ffa_16bc_4efc_bedd_5eb04d8b3837,,,No units
461,Air_Temperature_Sensor,03f84d72_615b_4b71_840c_41c2a5015d76,,,No units
472,Air_Temperature_Sensor,09d686a2_134c_4904_b4c4_8ce3ce582169,,,No units
492,Air_Temperature_Sensor,11328598_25fc_4808_ba70_a4a48ed1d6e4,,,No units
475,Air_Temperature_Sensor,12fd9d33_d6a4_46c3_b647_b58a569a2db5,,,No units


In [789]:
number_with_units_labels = stream_with_units['has_unit'].value_counts().index
number_with_units_values = stream_with_units['has_unit'].value_counts().values
print(number_with_units_labels)
print(number_with_units_values)

Index(['No units', 'Units'], dtype='object', name='has_unit')
[730 121]


In [790]:
stream_with_named_units = stream_with_units.dropna(subset=['unit']).copy()
stream_with_named_units['has_named_unit'] = stream_with_units['unit_is_named'].apply(lambda x: 'Machine readable' if x else 'Not machine readable')
stream_with_named_units.head()

Unnamed: 0,brick_class,stream_id,unit,unit_is_named,has_unit,has_named_unit
727,Angle_Sensor,9c9c5d83_ba16_4b45_826d_071e226ebbe8,DEG,False,Units,Not machine readable
725,Angle_Sensor,b52d78be_742a_4a1e_a6b4_498de7cca709,DEG,False,Units,Not machine readable
726,Angle_Sensor,d5156069_91d0_468f_bbf7_96070b4325cd,DEG,False,Units,Not machine readable
782,Current_Sensor,02e608c1_09fe_48be_91dc_1419d6bc1fa7,A,False,Units,Not machine readable
783,Current_Sensor,08b1cc70_e437_43e2_b961_9eb2e1e7cf28,A,False,Units,Not machine readable


In [791]:
streams_with_anonymous_units = stream_with_units[stream_with_units['unit_is_named'] == False]
streams_with_anonymous_units.head()

Unnamed: 0,brick_class,stream_id,unit,unit_is_named,has_unit
727,Angle_Sensor,9c9c5d83_ba16_4b45_826d_071e226ebbe8,DEG,False,Units
725,Angle_Sensor,b52d78be_742a_4a1e_a6b4_498de7cca709,DEG,False,Units
726,Angle_Sensor,d5156069_91d0_468f_bbf7_96070b4325cd,DEG,False,Units
782,Current_Sensor,02e608c1_09fe_48be_91dc_1419d6bc1fa7,A,False,Units
783,Current_Sensor,08b1cc70_e437_43e2_b961_9eb2e1e7cf28,A,False,Units


In [792]:
number_with_named_units_labels = stream_with_named_units['has_named_unit'].value_counts().index
number_with_named_units_values = stream_with_named_units['has_named_unit'].value_counts().values
print(number_with_named_units_labels)
print(number_with_named_units_values)

Index(['Not machine readable'], dtype='object', name='has_named_unit')
[121]


In [793]:
fig = make_subplots(
    rows=3, cols=2,
    vertical_spacing=0.05,
    subplot_titles = ['Proportion of Streams with Units', 'Units that are Machine Readable', 'Streams without Units', 'Streams with Non-Machine Readable Units'],
    specs=[[{"type": "pie"}, {"type": "pie"}],
           [{"type": "table", 'colspan': 2}, None],
           [{"type": "table", 'colspan': 2}, None]]
)

fig.add_trace(
    go.Pie(
        labels=number_with_units_labels,
        values=number_with_units_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=1
)

entity_in_provided_brick_true = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == True]
entity_in_provided_brick_false = entity_in_provided_brick[entity_in_provided_brick['class_in_provided_brick'] == False]

fig.add_trace(
    go.Pie( 
        labels=number_with_named_units_labels,
        values=number_with_named_units_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=2
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Stream ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[streams_without_units[k].tolist() for k in streams_without_units.columns[:2]],
            align = "left")
    ),
    row=2, col=1,
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Stream ID", "Units"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            # values=[entity_in_provided_brick[k].tolist() for k in entity_in_provided_brick.columns[1:]],
            values=[streams_with_anonymous_units[k].tolist() for k in streams_with_anonymous_units.columns[:3]],
            align = "left")
    ),
    row=3, col=1
)

fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Data Sources in Building Model with Associated Units",
    title_x=0.5,
)

fig.show()

In [794]:
streams_without_units.to_csv('02_A_streams_without_units.csv', index=False)
streams_with_anonymous_units.to_csv('02_B_streams_with_anonymous_units.csv', index=False)

In [795]:
fig.write_html("02_data_sources_with_associated_units.html")

## Data Sources in Building Model without Timeseries Data

In [796]:
have_data = df[['brick_class', 'stream_id', 'stream_exists_in_mapping']].copy()
have_data.dropna(subset=['stream_id'], inplace=True)
have_data.sort_values(by=['brick_class', 'stream_id'], inplace=True)
have_data['has_data'] = have_data['stream_exists_in_mapping'].apply(lambda x: 'Data' if x else 'No data')
have_data.head()

Unnamed: 0,brick_class,stream_id,stream_exists_in_mapping,has_data
491,Air_Temperature_Sensor,021f5ffa_16bc_4efc_bedd_5eb04d8b3837,True,Data
461,Air_Temperature_Sensor,03f84d72_615b_4b71_840c_41c2a5015d76,True,Data
472,Air_Temperature_Sensor,09d686a2_134c_4904_b4c4_8ce3ce582169,True,Data
492,Air_Temperature_Sensor,11328598_25fc_4808_ba70_a4a48ed1d6e4,True,Data
475,Air_Temperature_Sensor,12fd9d33_d6a4_46c3_b647_b58a569a2db5,True,Data


In [797]:
number_with_data_labels = have_data['has_data'].value_counts().index
number_with_data_values = have_data['has_data'].value_counts().values
print(number_with_data_labels)
print(number_with_data_values)

Index(['Data', 'No data'], dtype='object', name='has_data')
[668 183]


In [798]:
missing_data_by_class = have_data[have_data['stream_exists_in_mapping'] == False].copy()
missing_data_by_class = missing_data_by_class.groupby('brick_class').count()
missing_data_by_class.head()

Unnamed: 0_level_0,stream_id,stream_exists_in_mapping,has_data
brick_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cooling_Command,1,1,1
Cooling_Demand_Sensor,1,1,1
Enable_Status,3,3,3
Heating_Command,1,1,1
Heating_Demand_Sensor,1,1,1


In [799]:
missing_data_by_class_labels = have_data[have_data['stream_exists_in_mapping'] == False]['brick_class'].value_counts().index
missing_data_by_class_values = have_data[have_data['stream_exists_in_mapping'] == False]['brick_class'].value_counts().values
print(missing_data_by_class_labels)
print(missing_data_by_class_values)

Index(['Point', 'Mode_Command', 'Temperature_Parameter', 'Reset_Command',
       'Enable_Status', 'Mode_Status', 'System_Status', 'Cooling_Command',
       'Cooling_Demand_Sensor', 'Heating_Command', 'Heating_Demand_Sensor'],
      dtype='object', name='brick_class')
[105  33  18  16   3   2   2   1   1   1   1]


In [800]:
missing_data_true = have_data[have_data['stream_exists_in_mapping'] == True]
missing_data_false = have_data[have_data['stream_exists_in_mapping'] == False]

In [801]:
fig = make_subplots(
    rows=3, cols=2,
    vertical_spacing=0.05,
    subplot_titles = ['Proportion of Data Sources', 'Missing by Class', 'Data Sources with Missing Timeseries Data', 'Data Sources with Available Timeseries Data'],
    specs=[[{"type": "pie"}, {"type": "pie"}],
           [{"type": "table", 'colspan': 2}, None],
           [{"type": "table", 'colspan': 2}, None]]
)

fig.add_trace(
    go.Pie(
        labels=number_with_data_labels,
        values=number_with_data_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=1
)


fig.add_trace(
    go.Pie(
        labels=missing_data_by_class_labels,
        values=missing_data_by_class_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=2
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Stream ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            values=[missing_data_false[k].tolist() for k in missing_data_false.columns[:2]],
            align = "left")
    ),
    row=2, col=1,
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class", "Stream ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            values=[missing_data_true[k].tolist() for k in missing_data_true.columns[:2]],
            align = "left")
    ),
    row=3, col=1
)

fig.update_layout(
    height=1200,
    showlegend=False,
    title_text="Data Sources in Building Model without Timeseries Data",
    title_x=0.5,
)

fig.show()

In [802]:
missing_data_false.to_csv('03_data_sources_with_missing_data.csv', index=False)

In [803]:
fig.write_html("03_data_sources_without_data.html")

## Data Sources with Inconsistent Brick Class between Model and Mapper

In [804]:
class_consistency = df[['brick_class', 'brick_class_in_mapping', 'entity', 'brick_class_is_consistent']].copy()
class_consistency.dropna(subset=['brick_class_in_mapping'], inplace=True)
class_consistency.sort_values(by=['brick_class', 'brick_class_in_mapping', 'entity'], inplace=True)
class_consistency['consistency'] = class_consistency['brick_class_is_consistent'].apply(lambda x: 'Consistent' if x else 'Inconsistent')
class_consistency.head()

Unnamed: 0,brick_class,brick_class_in_mapping,entity,brick_class_is_consistent,consistency
455,Air_Temperature_Sensor,Air_Temperature_Sensor,124295e4_f6a9_4904_baa2_6bb2752d0059.32c2ab64_...,True,Consistent
456,Air_Temperature_Sensor,Air_Temperature_Sensor,124295e4_f6a9_4904_baa2_6bb2752d0059.32fa613f_...,True,Consistent
487,Air_Temperature_Sensor,Air_Temperature_Sensor,3062fe14_682d_481f_a98e_b217c45c2337.a6dd463f_...,True,Consistent
488,Air_Temperature_Sensor,Air_Temperature_Sensor,3062fe14_682d_481f_a98e_b217c45c2337.f129b8da_...,True,Consistent
480,Air_Temperature_Sensor,Air_Temperature_Sensor,3abdac1c_4717_445e_8248_0143c701015a.4c12ffce_...,True,Consistent


In [805]:
number_consistency_labels = class_consistency['consistency'].value_counts().index
number_consistency_values = class_consistency['consistency'].value_counts().values
print(number_consistency_labels)
print(number_consistency_values)

Index(['Consistent', 'Inconsistent'], dtype='object', name='consistency')
[645  23]


In [806]:
inconsistent_classes = class_consistency[class_consistency['brick_class_is_consistent'] == False].copy()
inconsistent_classes.head()

Unnamed: 0,brick_class,brick_class_in_mapping,entity,brick_class_is_consistent,consistency
867,Electrical_Energy_Sensor,Electrical_Power_Sensor,1af5f6f9_15b1_4a62_962e_0ede16373202.3ad9be1a_...,False,Inconsistent
866,Electrical_Energy_Sensor,Electrical_Power_Sensor,1af5f6f9_15b1_4a62_962e_0ede16373202.54513e05_...,False,Inconsistent
865,Electrical_Energy_Sensor,Electrical_Power_Sensor,1af5f6f9_15b1_4a62_962e_0ede16373202.983cbacc_...,False,Inconsistent
878,Electrical_Energy_Sensor,Electrical_Power_Sensor,240f89b5_a8f7_4716_ba46_b583227d99a4,False,Inconsistent
870,Electrical_Energy_Sensor,Electrical_Power_Sensor,6b498416_1218_453d_a09e_64a2e2aebe24.3ad9be1a_...,False,Inconsistent


In [807]:
consistency_by_class_labels = inconsistent_classes['brick_class'].value_counts().index
consistency_by_class_values = inconsistent_classes['brick_class'].value_counts().values
print(consistency_by_class_labels)
print(consistency_by_class_values)

Index(['Electrical_Energy_Sensor', 'b1a78aa6_d50d_4c0a_920e_5ff7019ab663_Sensor'], dtype='object', name='brick_class')
[17  6]


In [808]:
fig = make_subplots(
    rows=2, cols=2,
    # shared_xaxes=True,
    # vertical_spacing=0.1,
    vertical_spacing=0.05,
    subplot_titles = ['Proportion of Data Sources', 'Inconsistent by Class', 'Data Sources with Inconsistent Brick Class'],
    specs=[[{"type": "pie"}, {"type": "pie"}],
           [{"type": "table", 'colspan': 2}, None]]
)

fig.add_trace(
    go.Pie(
        labels=number_consistency_labels,
        values=number_consistency_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=1
)


fig.add_trace(
    go.Pie(
        labels=consistency_by_class_labels,
        values=consistency_by_class_values,
        textposition='inside', 
        textinfo='percent+label',
        name="",
    ),
    row=1, col=2
)

fig.add_trace(
    go.Table(
        header=dict(
            values=["Brick Class in Model", "Brick Class in Mapper", "Entity ID"],
            font=dict(size=10),
            align="left"
        ),
        cells=dict(
            values=[inconsistent_classes[k].tolist() for k in inconsistent_classes.columns[:3]],
            align = "left")
    ),
    row=2, col=1,
)

fig.update_layout(
    height=800,
    showlegend=False,
    title_text="Data Sources with Inconsistent Brick Class between Model and Mapper",
    title_x=0.5,
)

fig.show()

In [809]:
inconsistent_classes.to_csv('04_data_sources_with_inconsistent_classes.csv', index=False)

In [810]:
fig.write_html("04_data_sources_with_inconsistent_class.html")

---

# SCRATCH

In [None]:
# streams with blank node units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit [ brick:value ?b ] .
}
'''
sparql_to_df(g_building, q)

In [None]:
# streams with blank node units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit ?b .
    ?b rdf:type/rdfs:subClassOf* unit: .
}
'''
sparql_to_df(g_building, q)

In [None]:
# streams with proper units
q = '''
SELECT ?a ?b
WHERE {
    ?a brick:hasUnit ?b .
    filter ( strstarts(str(?b),str(unit:)) ) .
}
'''
sparql_to_df(g_building, q)

In [None]:
for ns_prefix, namespace in g_building.namespaces():
    print(f'{ns_prefix}: {namespace}')

In [245]:
df.to_csv('model_quality_defrag.csv', index=False)

In [None]:
def get_anonymous_units(s, g):
    q = '''
    SELECT ?units
    WHERE {
        ?entity brick:hasUnit [ brick:value ?units ] .
    }
    '''
    return sparql_to_df(g, q, initBindings={'entity': s})

df['anonymous_units'] = df['entity'].apply(get_anonymous_units, args=(g_building,))

df.head()

In [None]:
def get_named_units(s, g):
    # streams with proper units
    q = '''
    SELECT ?named_units
    WHERE {
        ?entity brick:hasUnit ?named_units .
        # filter ( strstarts(str(?b),str(unit:)) ) .
    }
    '''
    return sparql_to_df(g, q, initBindings={'entity': s})

df['named_units'] = df['entity'].apply(get_named_units, args=(g_building,))

df.head()

In [None]:
# df['stream_exists_in_mapping'] = np.where(df['stream_id'].isna(), np.nan, df['stream_id'].str.isin(mapping_df['StreamID']))

df['stream_exists_in_mapping'] = np.where(
    df['stream_id'].apply(lambda x: pd.isna(str(x).strip())), pd.NA,  # Handle empty Literal
    df['stream_id'].apply(lambda x: str(x)).isin(mapping_df['StreamID'])  # Convert Literal to string and check
)
# df['stream_exists_in_mapping'] = df['stream_exists_in_mapping'].astype('boolean')
df['stream_exists_in_mapping'] = df['stream_exists_in_mapping'].apply(
    lambda x: pd.NA if pd.isna(x) else bool(x)
)

df.head()
# df[('stream_id' == '')]
# df[df['stream_id'] != '']
df[pd.isna(df['stream_id'])]
# for val in df['stream_id']:
#     print(val)